Hello,
I'm trying to scrape some flyer data that requires clicking buttons which then populates the data I want to extract in an aside frame. I am able to extract the data properly after clicking the first button, but when I click the 2nd button I keep getting the no such element error.
I think I'm having problems switching between iframes. I've tried many different things and can't figure it out.
# Specify the path to the Edge WebDriver
webdriver_path = r'C:\Users\MikeL\msedgedriver.exe'
edge_service = EdgeService(executable_path=webdriver_path)
# Initialize the Edge WebDriver
options = webdriver.EdgeOptions()
# Comment out the following line to run the browser in non-headless mode
options.add_argument('--disable-gpu')
# Create a directory to save debug information
debug_dir = "debug_info"
os.makedirs(debug_dir, exist_ok=True)
# Read the product names from the CSV file
product_names_df = pd.read_csv('ProductNameFilter.csv')
product_names = product_names_df['ProductName'].tolist()
try:
driver = webdriver.Edge(service=edge_service, options=options)
# Open the target webpage
driver.get("
https://liquor.sobeys.com/flyer/?store_id=96906")
# Allow some time for the page to load
driver.implicitly_wait(10)
time.sleep(5) # Extra time to ensure the flyer content is loaded
# Close the cookie policy popup if it appears
try:
cookie_close_button = driver.find_element(By.CSS_SELECTOR, 'a.cookie_policy_btn')
cookie_close_button.click()
time.sleep(2)
except NoSuchElementException:
print("No cookie policy popup found")
# Switch to the navigation bar iframe and select the correct flyer
nav_iframe = driver.find_element(By.CSS_SELECTOR, "iframe.navframe")
driver.switch_to.frame(nav_iframe)
# Find and click the correct flyer (where ispreview is false)
flyer_buttons = driver.find_elements(By.CSS_SELECTOR, 'flipp-filmstrip-pub button')
for btn in flyer_buttons:
parent = btn.find_element(By.XPATH, '..')
if 'ispreview="false"' in parent.get_attribute('outerHTML'):
btn.click()
time.sleep(5) # Allow some time for the flyer to load
break
# Switch back to the main content
driver.switch_to.default_content()
# Switch to the main iframe containing the flyer
main_iframe = driver.find_element(By.CSS_SELECTOR, "iframe.mainframe")
driver.switch_to.frame(main_iframe)
# Save iframe content to a file
iframe_content = driver.page_source
with open(os.path.join(debug_dir, "sobeys_iframe_content.html"), "w", encoding="utf-8") as file:
file.write(iframe_content)
print("Iframe content saved to sobeys_iframe_content.html")
# Extract product data from buttons
buttons = driver.find_elements(By.TAG_NAME, 'button')
filtered_buttons = []
for button in buttons:
aria_label = button.get_attribute('aria-label')
if aria_label:
# Check if any of the product names are in the aria-label
if any(product_name in aria_label for product_name in product_names):
filtered_buttons.append(button)
data = []
# Function to extract information from the product details sidebar
def extract_product_details(button_index):
time.sleep(random.uniform(3, 5)) # Randomized wait time for the sidebar to load
details = {}
try:
# Switch to the correct iframe for the product details
driver.switch_to.default_content() # Switch back to the main content
info_iframe = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "iframe.asideframe"))
)
driver.switch_to.frame(info_iframe)
# Save the iframe content after clicking a button for debugging
iframe_buttonclick_content = driver.page_source
with open(os.path.join(debug_dir, f"iframe_buttonclick_{button_index}.html"), "w", encoding="utf-8") as file:
file.write(iframe_buttonclick_content)
print(f"Iframe content after button click saved to iframe_buttonclick_{button_index}.html")
# Extract the required details
details['brand'] = driver.find_element(By.CSS_SELECTOR, 'div.brand-name p.brand').text
details['prepricetext'] = driver.find_element(By.CSS_SELECTOR, 'span.prepricetext').text
details['pricevalue'] = driver.find_element(By.CSS_SELECTOR, 'span.price-value').text
details['postpricetext'] = driver.find_element(By.CSS_SELECTOR, 'span.postpricetext').text
details['originalpricetext'] = driver.find_element(By.CSS_SELECTOR, 'div.originalpricetext').text
details['salestory'] = driver.find_element(By.CSS_SELECTOR, 'div.salestory').text
validity_dates = driver.find_element(By.CSS_SELECTOR, 'flipp-validity-dates').get_attribute('innerHTML')
details['start_date'] = validity_dates.split('from="')[1].split('"')[0]
details['end_date'] = validity_dates.split('to="')[1].split('"')[0]
except NoSuchElementException as e:
print(f"Error extracting product details: NoSuchElementException for element {e}")
except TimeoutException as e:
print(f"Error extracting product details: TimeoutException for element {e}")
except Exception as e:
print(f"Error extracting product details: {e}")
# Switch back to the flyer iframe
driver.switch_to.default_content()
main_iframe = driver.find_element(By.CSS_SELECTOR, "iframe.mainframe")
driver.switch_to.frame(main_iframe)
return details
# Function to perform random scrolling
def random_scroll():
scroll_height = driver.execute_script("return document.body.scrollHeight")
random_scroll_position = random.randint(0, scroll_height)
driver.execute_script(f"window.scrollTo(0, {random_scroll_position});")
time.sleep(random.uniform(1, 3)) # Randomized wait time after scrolling
# Click on all the filtered buttons in the order found
for i, button in enumerate(filtered_buttons):
try:
random_scroll() # Perform random scrolling
button.click()
time.sleep(2) # Give time for the asideframe to load
product_details = extract_product_details(i)
if product_details:
data.append(product_details)
driver.back()
time.sleep(random.uniform(2, 4)) # Randomized wait time after clicking back
except StaleElementReferenceException as e:
print(f"Button {i} became stale. Skipping this button. Error: {e}")
continue
except Exception as e:
print(f"An error occurred while clicking button {i}: {e}")
continue
# Save extracted data to CSV
df = pd.DataFrame(data)
filename = "filtered_sobeys_flyer_labels.csv"
df.to_csv(filename, index=False)