I've been playing around learning how to create web scrapers using Selenium. One thing I'm struggling with is scraping pages with pagination. I've written a script that i thought would scrape every page.
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from
selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.options import Options
from
webdriver_manager.chrome import ChromeDriverManager
import getpass
import datetime
import pandas as pd
custom_options = webdriver.ChromeOptions()
custom_options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})
driver = webdriver.Chrome(ChromeDriverManager().install(), options=custom_options)
driver.get("
https://lr.caa.cz/letecky-rejstrik?lang=en")
WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, "/html/body/app-root/app-cookies-consent/div/div/div[2]/div/button[1]"))).click()
data =[]
while(True):
try:
table_body = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.TAG_NAME,"tbody")))
table_body_rows = table_body.find_elements_by_tag_name("tr")
button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"/html/body/app-root/div/main/div/div/app-avreg-list/nav/div/app-pagination/div/a[3]/i")))
for i in table_body_rows:
row_data = []
table_data = i.find_elements_by_tag_name("td")
for j in table_data:
row_data.append(j.text.strip())
data.append(row_data)
button.click()
except:
break
df = pd.DataFrame(data)
print(df)
driver.quit()
It scrapes the first page but then it doesn't seem to go beyond that. Any thoughts on what might be wrong?
I'm running Python 3.8.0.