Issues finding elements with iframe switching

20 views
Skip to first unread message

MLD_92

unread,
Jun 21, 2024, 12:57:48 AM (12 days ago) Jun 21
to Selenium Users
Hello,

I'm trying to scrape some flyer data that requires clicking buttons which then populates the data I want to extract in an aside frame. I am able to extract the data properly after clicking the first button, but when I click the 2nd button I keep getting the no such element error.

I think I'm having problems switching between iframes. I've tried many different things and can't figure it out.

# Specify the path to the Edge WebDriver
webdriver_path = r'C:\Users\MikeL\msedgedriver.exe'
edge_service = EdgeService(executable_path=webdriver_path)

# Initialize the Edge WebDriver
options = webdriver.EdgeOptions()
# Comment out the following line to run the browser in non-headless mode
options.add_argument('--disable-gpu')

# Create a directory to save debug information
debug_dir = "debug_info"
os.makedirs(debug_dir, exist_ok=True)

# Read the product names from the CSV file
product_names_df = pd.read_csv('ProductNameFilter.csv')
product_names = product_names_df['ProductName'].tolist()

try:
    driver = webdriver.Edge(service=edge_service, options=options)

    # Open the target webpage
    driver.get("https://liquor.sobeys.com/flyer/?store_id=96906")

    # Allow some time for the page to load
    driver.implicitly_wait(10)
    time.sleep(5)  # Extra time to ensure the flyer content is loaded

    # Close the cookie policy popup if it appears
    try:
        cookie_close_button = driver.find_element(By.CSS_SELECTOR, 'a.cookie_policy_btn')
        cookie_close_button.click()
        time.sleep(2)
    except NoSuchElementException:
        print("No cookie policy popup found")

    # Switch to the navigation bar iframe and select the correct flyer
    nav_iframe = driver.find_element(By.CSS_SELECTOR, "iframe.navframe")
    driver.switch_to.frame(nav_iframe)

    # Find and click the correct flyer (where ispreview is false)
    flyer_buttons = driver.find_elements(By.CSS_SELECTOR, 'flipp-filmstrip-pub button')
    for btn in flyer_buttons:
        parent = btn.find_element(By.XPATH, '..')
        if 'ispreview="false"' in parent.get_attribute('outerHTML'):
            btn.click()
            time.sleep(5)  # Allow some time for the flyer to load
            break

    # Switch back to the main content
    driver.switch_to.default_content()

    # Switch to the main iframe containing the flyer
    main_iframe = driver.find_element(By.CSS_SELECTOR, "iframe.mainframe")
    driver.switch_to.frame(main_iframe)

    # Save iframe content to a file
    iframe_content = driver.page_source
    with open(os.path.join(debug_dir, "sobeys_iframe_content.html"), "w", encoding="utf-8") as file:
        file.write(iframe_content)
    print("Iframe content saved to sobeys_iframe_content.html")

    # Extract product data from buttons
    buttons = driver.find_elements(By.TAG_NAME, 'button')
    filtered_buttons = []
    for button in buttons:
        aria_label = button.get_attribute('aria-label')
        if aria_label:
            # Check if any of the product names are in the aria-label
            if any(product_name in aria_label for product_name in product_names):
                filtered_buttons.append(button)

    data = []

    # Function to extract information from the product details sidebar
    def extract_product_details(button_index):
        time.sleep(random.uniform(3, 5))  # Randomized wait time for the sidebar to load
        details = {}

        try:
            # Switch to the correct iframe for the product details
            driver.switch_to.default_content()  # Switch back to the main content
            info_iframe = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "iframe.asideframe"))
            )
            driver.switch_to.frame(info_iframe)

            # Save the iframe content after clicking a button for debugging
            iframe_buttonclick_content = driver.page_source
            with open(os.path.join(debug_dir, f"iframe_buttonclick_{button_index}.html"), "w", encoding="utf-8") as file:
                file.write(iframe_buttonclick_content)
            print(f"Iframe content after button click saved to iframe_buttonclick_{button_index}.html")

            # Extract the required details
            details['brand'] = driver.find_element(By.CSS_SELECTOR, 'div.brand-name p.brand').text
            details['prepricetext'] = driver.find_element(By.CSS_SELECTOR, 'span.prepricetext').text
            details['pricevalue'] = driver.find_element(By.CSS_SELECTOR, 'span.price-value').text
            details['postpricetext'] = driver.find_element(By.CSS_SELECTOR, 'span.postpricetext').text
            details['originalpricetext'] = driver.find_element(By.CSS_SELECTOR, 'div.originalpricetext').text
            details['salestory'] = driver.find_element(By.CSS_SELECTOR, 'div.salestory').text
            validity_dates = driver.find_element(By.CSS_SELECTOR, 'flipp-validity-dates').get_attribute('innerHTML')
            details['start_date'] = validity_dates.split('from="')[1].split('"')[0]
            details['end_date'] = validity_dates.split('to="')[1].split('"')[0]

        except NoSuchElementException as e:
            print(f"Error extracting product details: NoSuchElementException for element {e}")
        except TimeoutException as e:
            print(f"Error extracting product details: TimeoutException for element {e}")
        except Exception as e:
            print(f"Error extracting product details: {e}")

        # Switch back to the flyer iframe
        driver.switch_to.default_content()
        main_iframe = driver.find_element(By.CSS_SELECTOR, "iframe.mainframe")
        driver.switch_to.frame(main_iframe)

        return details

    # Function to perform random scrolling
    def random_scroll():
        scroll_height = driver.execute_script("return document.body.scrollHeight")
        random_scroll_position = random.randint(0, scroll_height)
        driver.execute_script(f"window.scrollTo(0, {random_scroll_position});")
        time.sleep(random.uniform(1, 3))  # Randomized wait time after scrolling

    # Click on all the filtered buttons in the order found
    for i, button in enumerate(filtered_buttons):
        try:
            random_scroll()  # Perform random scrolling
            button.click()
            time.sleep(2)  # Give time for the asideframe to load
            product_details = extract_product_details(i)
            if product_details:
                data.append(product_details)
            driver.back()
            time.sleep(random.uniform(2, 4))  # Randomized wait time after clicking back
        except StaleElementReferenceException as e:
            print(f"Button {i} became stale. Skipping this button. Error: {e}")
            continue
        except Exception as e:
            print(f"An error occurred while clicking button {i}: {e}")
            continue

    # Save extracted data to CSV
    df = pd.DataFrame(data)
    filename = "filtered_sobeys_flyer_labels.csv"
    df.to_csv(filename, index=False)
Reply all
Reply to author
Forward
0 new messages