How can I make Selenium continue running after sleeping and resuming Windows?

43 views

Skip to first unread message

KEVIN SEGUNDO ABANTO MENDEZ

unread,

Jan 7, 2025, 1:43:54 AMJan 7

to Selenium Users

I'm extracting the members of a Facebook group, but I need to have my PC on for 7 days to be able to extract all the profiles.

I tried using ChatGPT and Claude so I can pause the script and resume it after I've slept and resumed my PC, but nothing works.

**CODE:**

import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
import time
import threading
import sys
from datetime import datetime
import colorama
from colorama import Fore, Back, Style
import keyboard

# Inicializar colorama para CMD
colorama.init()

# Variables globales
start_time = None
total_members = set()
should_continue = True
extraction_active = True
is_paused = False
chrome_options = Options()
chrome_options.add_argument(r"--user-data-dir=C:\\Users\\studi\\AppData\\Local\\Google\\Chrome\\User Data")
chrome_options.add_argument("--profile-directory=Default")
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("detach", True)

driver_path = r"C:\\chromedriver\\chromedriver.exe"
service = Service(driver_path)

def handle_space():
global is_paused
is_paused = not is_paused
print(f"\n{Fore.YELLOW}{'PAUSADO' if is_paused else 'REANUDADO'}{Style.RESET_ALL}")

def check_for_enter():
global extraction_active
input("Presiona ENTER para detener la extracción...\n")
extraction_active = False
clear_console()
print(f"{Fore.YELLOW}Extracción cancelada. Total de miembros extraídos: {len(total_members)}{Style.RESET_ALL}")

def clear_console():
os.system('cls' if os.name == 'nt' else 'clear')

def format_time(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = int(seconds % 60)
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"

def print_stats(total_count, new_members, scroll_count):
clear_console()
elapsed_time = time.time() - start_time
formatted_time = format_time(elapsed_time)

print(f"{Fore.CYAN}={'='*50}{Style.RESET_ALL}")
print(f"{Fore.YELLOW}Tiempo transcurrido: {formatted_time}{Style.RESET_ALL}")
print(f"{Fore.CYAN}={'='*50}{Style.RESET_ALL}")
print(f"{Fore.GREEN}Facebook Group Member Extractor - Status{Style.RESET_ALL}")
print(f"{Fore.CYAN}={'='*50}{Style.RESET_ALL}")
print(f"{Fore.WHITE}Total members extracted: {Fore.YELLOW}{total_count}{Style.RESET_ALL}")
print(f"{Fore.WHITE}New members in this scroll: {Fore.GREEN}+{new_members}{Style.RESET_ALL}")
print(f"{Fore.WHITE}Current scroll attempt: {Fore.CYAN}{scroll_count}{Style.RESET_ALL}")
print(f"{Fore.CYAN}={'='*50}{Style.RESET_ALL}")
print(f"{Fore.WHITE}Press ENTER to stop extraction{Style.RESET_ALL}")
print(f"{Fore.WHITE}Press SPACE to pause/resume{Style.RESET_ALL}")
print(f"{Fore.CYAN}={'='*50}{Style.RESET_ALL}")
if is_paused:
print(f"{Fore.YELLOW}EXTRACCIÓN PAUSADA - Presiona ESPACIO para continuar{Style.RESET_ALL}")

def navegar_a_miembros(driver, url_grupo):
try:
url_miembros = url_grupo + "members"
driver.get(url_miembros)
time.sleep(3)

try:
people_selectors = [
"//span[contains(text(), 'People')]",
"//a[@role='tab']//span[contains(text(), 'People')]",
"//a[contains(@href, '/members/')]//span[contains(@class, 'x193iq5w') and contains(text(), 'People')]"
]

people_clicked = False
for selector in people_selectors:
try:
boton_people = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, selector))
)
boton_people.click()
print(f"{Fore.GREEN}✓ Clic en 'People' exitoso{Style.RESET_ALL}")
people_clicked = True
time.sleep(2)
break
except (TimeoutException, NoSuchElementException):
continue

if not people_clicked:
print(f"{Fore.YELLOW}No se pudo hacer clic en 'People', continuando...{Style.RESET_ALL}")

return True

except Exception as e:
print(f"{Fore.RED}Error en navegación: {e}{Style.RESET_ALL}")
return True

except Exception as e:
print(f"{Fore.RED}Error navegando a miembros: {e}{Style.RESET_ALL}")
return False

def cargar_miembros(driver):
global extraction_active, is_paused
intentos_fallidos = 0
ultima_altura = 0
scroll_count = 0
last_member_count = 0
ciclo_scroll = 0

while extraction_active:
try:
while is_paused and extraction_active:
time.sleep(0.1)

altura_actual = driver.execute_script("return document.body.scrollHeight")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)

current_members = extraer_perfiles(driver)
new_members = len(current_members) - last_member_count

print_stats(len(current_members), new_members, scroll_count + 1)
last_member_count = len(current_members)

if altura_actual == ultima_altura:
intentos_fallidos += 1

if intentos_fallidos % 10 == 0:
print(f"{Fore.YELLOW}Scroll bloqueado - Intento {intentos_fallidos}/50")
print(f"Subiendo al inicio y esperando 3 segundos...{Style.RESET_ALL}")
driver.execute_script("window.scrollTo(0, 0)")
time.sleep(3)
else:
intentos_fallidos = 0

ultima_altura = altura_actual
scroll_count += 1

except WebDriverException as e:
if "disconnected" in str(e):
print(f"{Fore.YELLOW}Reconectando después de suspensión...{Style.RESET_ALL}")
try:
driver.refresh()
time.sleep(5)
continue
except Exception as refresh_error:
print(f"{Fore.RED}Error al reconectar: {refresh_error}{Style.RESET_ALL}")
break

if intentos_fallidos == 50:
ciclo_scroll += 1
print(f"{Fore.YELLOW}\n{'='*50}")
print(f"Ciclo de scroll {ciclo_scroll} completado.")
print(f"Total de miembros hasta ahora: {len(current_members)}")
print(f"{'='*50}{Style.RESET_ALL}")

response = input(f"{Fore.WHITE}¿Desea continuar con otro ciclo de 50 intentos? (yes/no): {Style.RESET_ALL}").lower()
if response == 'yes':
intentos_fallidos = 0
print(f"{Fore.GREEN}Iniciando nuevo ciclo de scroll...{Style.RESET_ALL}")
time.sleep(2)
else:
break

return current_members

def extraer_perfiles(driver):
global total_members
try:
miembros = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.XPATH, '//a[contains(@href, "/groups/") and contains(@href, "/user/")]'))
)
for miembro in miembros:
href = miembro.get_attribute('href')
if href:
if not href.startswith('https://'):
href = "https://www.facebook.com" + href
total_members.add(href)
return total_members
except Exception as e:
print(f"{Fore.RED}Error extrayendo perfiles: {e}{Style.RESET_ALL}")
return total_members

def guardar_perfiles(perfiles, nombre_grupo):
if perfiles:
archivo_salida = f"{nombre_grupo}.txt"
try:
with open(archivo_salida, 'w', encoding='utf-8') as archivo:
for perfil in perfiles:
archivo.write(perfil + "\n")
print(f"{Fore.GREEN}Perfiles guardados en: {os.path.abspath(archivo_salida)}{Style.RESET_ALL}")
print(f"{Fore.GREEN}Total de perfiles guardados: {len(perfiles)}{Style.RESET_ALL}")
except Exception as e:
print(f"{Fore.RED}Error guardando archivo: {e}{Style.RESET_ALL}")

def main():
global extraction_active, start_time, total_members, is_paused
clear_console()
print(f"{Fore.CYAN}{'='*50}")
print(f"{Fore.GREEN}Facebook Group Member Extractor{Style.RESET_ALL}")
print(f"{Fore.CYAN}{'='*50}{Style.RESET_ALL}")
print(f"{Fore.WHITE}Ingrese las URLs de los grupos (Presione Enter dos veces para finalizar):{Style.RESET_ALL}")
urls = []
while True:
url = input().strip()
if url == "":
break
urls.append(url)

if not urls:
print(f"{Fore.RED}No se ingresaron URLs{Style.RESET_ALL}")
return

keyboard.on_press_key("space", lambda _: handle_space())
start_time = time.time()
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
monitor_thread = threading.Thread(target=check_for_enter)
monitor_thread.daemon = True
monitor_thread.start()

for url in urls:
if not extraction_active:
break

print(f"\n{Fore.CYAN}Procesando grupo: {url}{Style.RESET_ALL}")
total_members.clear()

if navegar_a_miembros(driver, url):
miembros_extraidos = cargar_miembros(driver)
if miembros_extraidos:
nombre_grupo = url.split('/')[-2] if url[-1] == '/' else url.split('/')[-1]
guardar_perfiles(miembros_extraidos, nombre_grupo)
print(f"{Fore.GREEN}Extracción completada para el grupo: {nombre_grupo}{Style.RESET_ALL}")
else:
print(f"{Fore.RED}No se pudieron extraer miembros del grupo{Style.RESET_ALL}")
except Exception as e:
print(f"{Fore.RED}Error: {e}{Style.RESET_ALL}")
finally:
keyboard.unhook_all()
if not extraction_active:
driver.quit()

if __name__ == "__main__":
main()

Reply all

Reply to author

Forward

0 new messages