i am trying to web scrap twitter following list by users example <br>
https://twitter.com/AzGoneClr/following<br>
the function load the page, log in with cookies, then scroll down till the array stop growing in size<br>
i run in to a problem where it get stuck on loading as i scroll down <br>
i tryd to focus elements on the page<br>
clicking on home and after going back <br>
and this still stuck on loading<br>
anyone know how to by pass this? 😞<br>
using: selenium undetected_chromedriver
def fetch_data(owneraddress: str):
options = uc.ChromeOptions()
options.headless = False
driver = uc.Chrome(use_subprocess=True, options=options)
driver.set_window_size(400, 400)
driver.get("{}/following".format(owneraddress))
time.sleep(3)
cururl=driver.current_url
if 'following' not in cururl:
driver.add_cookie({'name': 'auth_token','value':'{}'.format(config.twitter_auth)})
driver.get("{}/following".format(owneraddress))
time.sleep(3)
pagescrollindex=100
arrlist=[]
lastcount=0
endcount=0
while True:
driver.execute_script("window.scrollTo(0, {}-200)".format(pagescrollindex))
time.sleep(0.1)
pagescrollindex+=200
htmlpage=driver.page_source
if 'Hmm... this page doesn’t exist. Try searching for something else.' in htmlpage:
return []
test=htmlpage.split('"cellInnerDiv"')
for x in test:
if '</html>' in x:
continue
username=''
test3=x.split('aria-label="Follow @')
if len(test3)<2:
continue
#for y in test2:
#if y[0]=='/':
#print("found")
#username=y.split('"')[0]
#break
username=test3[1].split('"')[0]
if len(username)==0:
continue
if username not in arrlist:
arrlist.append(username)
else:
continue
#print(username)
if lastcount==len(arrlist):
endcount+=1
else:
endcount=0
lastcount=len(arrlist)
if endcount==40:
if 'class="css-1dbjc4n r-o52ifk"' not in test[-1]: # trying to check if got to last cell if not go to home page and back to try and break the stuck loading problem didnt work
endcount=0
#row=driver.find_element(By.CLASS_NAME,'css-1dbjc4n')
home_link = driver.find_element(By.XPATH,'//a[contains(@href,"/home")]')
#home_link = driver.find_element(By.LINK_TEXT,"Home")
home_link.click()
time.sleep(10)
driver.back()
time.sleep(10)
continue
else:
break
#if len(arrlist)<14:
#state = driver.execute_script("return document.readyState")
#print()
driver.close()
return arrlist