webscrap twitter following list of users with selenum

36 views

Skip to first unread message

asaf david

unread,

Apr 17, 2023, 10:23:01 AM4/17/23

to Selenium Users

i am trying to web scrap twitter following list by users example https://twitter.com/AzGoneClr/following 
the function load the page, log in with cookies, then scroll down till the array stop growing in size 
i run in to a problem where it get stuck on loading as i scroll down 
i tryd to focus elements on the page 
clicking on home and after going back 
and this still stuck on loading 
anyone know how to by pass this? 😞 

using: selenium undetected_chromedriver

def fetch_data(owneraddress: str):
options = uc.ChromeOptions()
options.headless = False
driver = uc.Chrome(use_subprocess=True, options=options)
driver.set_window_size(400, 400)

driver.get("{}/following".format(owneraddress))

time.sleep(3)
cururl=driver.current_url
if 'following' not in cururl:
driver.add_cookie({'name': 'auth_token','value':'{}'.format(config.twitter_auth)})
driver.get("{}/following".format(owneraddress))
time.sleep(3)

pagescrollindex=100

arrlist=[]
lastcount=0
endcount=0
while True:

driver.execute_script("window.scrollTo(0, {}-200)".format(pagescrollindex))
time.sleep(0.1)
pagescrollindex+=200
htmlpage=driver.page_source
if 'Hmm... this page doesn’t exist. Try searching for something else.' in htmlpage:
return []

test=htmlpage.split('"cellInnerDiv"')

for x in test:
if '</html>' in x:
continue
username=''
test3=x.split('aria-label="Follow @')
if len(test3)<2:
continue

#for y in test2:

#if y[0]=='/':
#print("found")
#username=y.split('"')[0]
#break
username=test3[1].split('"')[0]
if len(username)==0:
continue

if username not in arrlist:
arrlist.append(username)
else:
continue
#print(username)
if lastcount==len(arrlist):
endcount+=1
else:
endcount=0
lastcount=len(arrlist)
if endcount==40:
if 'class="css-1dbjc4n r-o52ifk"' not in test[-1]: # trying to check if got to last cell if not go to home page and back to try and break the stuck loading problem didnt work
endcount=0
#row=driver.find_element(By.CLASS_NAME,'css-1dbjc4n')
home_link = driver.find_element(By.XPATH,'//a[contains(@href,"/home")]')

#home_link = driver.find_element(By.LINK_TEXT,"Home")

home_link.click()
time.sleep(10)
driver.back()
time.sleep(10)

continue

else:
break

#if len(arrlist)<14:
#state = driver.execute_script("return document.readyState")
#print()
driver.close()
return arrlist

Reply all

Reply to author

Forward

0 new messages