Spaces:
Paused
Paused
| from __future__ import annotations | |
| from selenium import webdriver | |
| from selenium.common.exceptions import TimeoutException, WebDriverException | |
| from selenium.webdriver.chrome.service import Service as ChromeService | |
| from selenium.webdriver.remote.webdriver import WebDriver | |
| from selenium.webdriver.support.wait import WebDriverWait | |
| DEFAULT_USER_AGENT = ( | |
| "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " | |
| "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" | |
| ) | |
| def configure_browser(*, chrome_binary: str, chromedriver_path: str, page_timeout: int = 40) -> WebDriver: | |
| options = webdriver.ChromeOptions() | |
| options.binary_location = chrome_binary | |
| options.page_load_strategy = "eager" | |
| options.add_argument("--headless=new") | |
| options.add_argument("--disable-gpu") | |
| options.add_argument("--no-sandbox") | |
| options.add_argument("--disable-dev-shm-usage") | |
| options.add_argument("--disable-blink-features=AutomationControlled") | |
| options.add_argument("--disable-background-networking") | |
| options.add_argument("--disable-background-timer-throttling") | |
| options.add_argument("--disable-backgrounding-occluded-windows") | |
| options.add_argument("--disable-renderer-backgrounding") | |
| options.add_argument("--disable-extensions") | |
| options.add_argument("--disable-default-apps") | |
| options.add_argument("--no-first-run") | |
| options.add_argument("--no-default-browser-check") | |
| options.add_argument("--mute-audio") | |
| options.add_argument("--window-size=1440,1280") | |
| options.add_argument("--lang=zh-CN") | |
| options.add_argument(f"--user-agent={DEFAULT_USER_AGENT}") | |
| options.add_argument("--remote-debugging-pipe") | |
| options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"]) | |
| options.add_experimental_option("useAutomationExtension", False) | |
| service = ChromeService(executable_path=chromedriver_path) | |
| driver = webdriver.Chrome(service=service, options=options) | |
| driver.set_page_load_timeout(page_timeout) | |
| driver.set_script_timeout(min(page_timeout, 20)) | |
| driver.implicitly_wait(6) | |
| driver.execute_cdp_cmd( | |
| "Page.addScriptToEvaluateOnNewDocument", | |
| { | |
| "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined});" | |
| }, | |
| ) | |
| return driver | |
| def open_with_recovery(driver: WebDriver, url: str) -> bool: | |
| try: | |
| driver.get(url) | |
| return False | |
| except TimeoutException: | |
| try: | |
| driver.execute_script("window.stop();") | |
| except WebDriverException: | |
| pass | |
| return True | |
| def wait_for_ready(driver_wait: WebDriverWait, *, allow_interactive: bool = True) -> str: | |
| acceptable_states = {"complete", "interactive"} if allow_interactive else {"complete"} | |
| driver_wait.until( | |
| lambda web_driver: web_driver.execute_script("return document.readyState") in acceptable_states, | |
| "The target page did not finish loading in time.", | |
| ) | |
| return str(driver_wait._driver.execute_script("return document.readyState")) | |