Selenium web scraping site with pagination

77 views
Skip to first unread message

Mark Kwiatkowski

unread,
Mar 11, 2022, 7:46:27 AM3/11/22
to Selenium Users
Hello,

I've been playing around learning how to create web scrapers using Selenium. One thing I'm struggling with is scraping pages with pagination. I've written a script that i thought would scrape every page. 

from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import getpass
import datetime
import pandas as pd
custom_options = webdriver.ChromeOptions()
custom_options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})

driver = webdriver.Chrome(ChromeDriverManager().install(), options=custom_options)
driver.get("https://lr.caa.cz/letecky-rejstrik?lang=en")
WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, "/html/body/app-root/app-cookies-consent/div/div/div[2]/div/button[1]"))).click()

data =[]

while(True):
    try:
        table_body = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.TAG_NAME,"tbody")))
        table_body_rows = table_body.find_elements_by_tag_name("tr")
        button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"/html/body/app-root/div/main/div/div/app-avreg-list/nav/div/app-pagination/div/a[3]/i")))
        for i in table_body_rows:
            row_data = []
            table_data = i.find_elements_by_tag_name("td")
            for j in table_data:
                row_data.append(j.text.strip())
            data.append(row_data)
        button.click()
    except:
       
        break
df = pd.DataFrame(data)
print(df)
driver.quit()

It scrapes the first page but then it doesn't seem to go beyond that. Any thoughts on what might be wrong?

I'm running Python 3.8.0.

Mark


Reply all
Reply to author
Forward
0 new messages