I'm having the following issue with chrome webdriver running on a vm ubuntu 18.04 in headless mode:
I'm running the code belowto save html pages from a list of url's:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import io
from pathlib import Path
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("start-maximized")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("enable-automation")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("--disable-browser-side-navigation")
chrome_options.add_argument("--dns-prefetch-disable")
br = webdriver.Chrome("./google_scrapping/chromedriver",options=chrome_options)
# br.set_page_load_timeout(60)
for cat in cat_pp_url:
for domain,pp in cat_pp_url[cat].items():
print(pp)
br.get(pp)
# br.implicitly_wait(10)
html = br.page_source
Path("./crawled_pp_html/"+domain).mkdir(parents=True, exist_ok=True)
with io.open("./crawled_pp_html/"+domain+"/priv.html", "w", encoding="utf-8") as f:
f.write(html)
f.close()
driver.quit()
it starts working and then I have this issue after like 38 iteration , and the problem was with this website :https://www.logistics.dhl/global-en/home/footer/global-privacy-notice.html
---------------------------------------------------------------------------
TimeoutException Traceback (most recent call last)
<ipython-input-671-42cb55a494cd> in <module>
26 for domain,pp in cat_pp_url[cat].items():
27 print(pp)
---> 28 br.get(pp)
29 # br.implicitly_wait(10)
30 html = br.page_source
~/anaconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in get(self, url)
331 Loads a web page in the current browser session.
332 """
--> 333 self.execute(Command.GET, {'url': url})
334
335 @property
~/anaconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
~/anaconda3/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
TimeoutException: Message: timeout: Timed out receiving message from renderer: -0.000
(Session info: headless chrome=81.0.4044.92)
I tried disabling any kind of timeout, added few arguments that people suggested on stackoverflow , but nothing seems to work.
any thoughts?