import json import logging import os import re import shutil import urllib.parse import tempfile import sys from selenium.webdriver.chrome.webdriver import WebDriver import undetected_chromedriver as uc FLARESOLVERR_VERSION = None PLATFORM_VERSION = None CHROME_EXE_PATH = None CHROME_MAJOR_VERSION = None USER_AGENT = None XVFB_DISPLAY = None PATCHED_DRIVER_PATH = None def get_config_log_html() -> bool: return os.environ.get('LOG_HTML', 'false').lower() == 'true' def get_config_headless() -> bool: return os.environ.get('HEADLESS', 'true').lower() == 'true' def get_flaresolverr_version() -> str: global FLARESOLVERR_VERSION if FLARESOLVERR_VERSION is not None: return FLARESOLVERR_VERSION package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json') if not os.path.isfile(package_path): package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'package.json') with open(package_path) as f: FLARESOLVERR_VERSION = json.loads(f.read())['version'] return FLARESOLVERR_VERSION def get_current_platform() -> str: global PLATFORM_VERSION if PLATFORM_VERSION is not None: return PLATFORM_VERSION PLATFORM_VERSION = os.name return PLATFORM_VERSION def create_proxy_extension(proxy: dict) -> str: parsed_url = urllib.parse.urlparse(proxy['url']) scheme = parsed_url.scheme host = parsed_url.hostname port = parsed_url.port username = proxy['username'] password = proxy['password'] manifest_json = """ { "version": "1.0.0", "manifest_version": 2, "name": "Chrome Proxy", "permissions": [ "proxy", "tabs", "unlimitedStorage", "storage", "", "webRequest", "webRequestBlocking" ], "background": {"scripts": ["background.js"]}, "minimum_chrome_version": "76.0.0" } """ background_js = """ var config = { mode: "fixed_servers", rules: { singleProxy: { scheme: "%s", host: "%s", port: %d }, bypassList: ["localhost"] } }; chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); function callbackFn(details) { return { authCredentials: { username: "%s", password: "%s" } }; } chrome.webRequest.onAuthRequired.addListener( callbackFn, { urls: [""] }, ['blocking'] ); """ % ( scheme, host, port, username, password ) proxy_extension_dir = tempfile.mkdtemp() with open(os.path.join(proxy_extension_dir, "manifest.json"), "w") as f: f.write(manifest_json) with open(os.path.join(proxy_extension_dir, "background.js"), "w") as f: f.write(background_js) return proxy_extension_dir def get_webdriver(proxy: dict = None) -> WebDriver: global PATCHED_DRIVER_PATH, USER_AGENT logging.debug('Launching web browser...') # undetected_chromedriver options = uc.ChromeOptions() options.add_argument('--no-sandbox') options.add_argument('--window-size=1920,1080') # todo: this param shows a warning in chrome head-full options.add_argument('--disable-setuid-sandbox') options.add_argument('--disable-dev-shm-usage') # this option removes the zygote sandbox (it seems that the resolution is a bit faster) options.add_argument('--no-zygote') # attempt to fix Docker ARM32 build options.add_argument('--disable-gpu-sandbox') options.add_argument('--disable-software-rasterizer') options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-ssl-errors') # fix GL errors in ASUSTOR NAS # https://github.com/FlareSolverr/FlareSolverr/issues/782 # https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069 # https://peter.sh/experiments/chromium-command-line-switches/#use-gl options.add_argument('--use-gl=swiftshader') language = os.environ.get('LANG', None) if language is not None: options.add_argument('--accept-lang=%s' % language) # Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910 if USER_AGENT is not None: options.add_argument('--user-agent=%s' % USER_AGENT) proxy_extension_dir = None if proxy and all(key in proxy for key in ['url', 'username', 'password']): proxy_extension_dir = create_proxy_extension(proxy) options.add_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir)) elif proxy and 'url' in proxy: proxy_url = proxy['url'] logging.debug("Using webdriver proxy: %s", proxy_url) options.add_argument('--proxy-server=%s' % proxy_url) # note: headless mode is detected (headless = True) # we launch the browser in head-full mode with the window hidden windows_headless = False if get_config_headless(): if os.name == 'nt': windows_headless = True else: start_xvfb_display() # For normal headless mode: # options.add_argument('--headless') options.add_argument("--auto-open-devtools-for-tabs") # if we are inside the Docker container, we avoid downloading the driver driver_exe_path = None version_main = None if os.path.exists("/app/chromedriver"): # running inside Docker driver_exe_path = "/app/chromedriver" else: version_main = get_chrome_major_version() if PATCHED_DRIVER_PATH is not None: driver_exe_path = PATCHED_DRIVER_PATH # detect chrome path browser_executable_path = get_chrome_exe_path() # downloads and patches the chromedriver # if we don't set driver_executable_path it downloads, patches, and deletes the driver each time try: driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path, driver_executable_path=driver_exe_path, version_main=version_main, windows_headless=windows_headless, headless=get_config_headless()) except Exception as e: logging.error("Error starting Chrome: %s" % e) # save the patched driver to avoid re-downloads if driver_exe_path is None: PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name) if PATCHED_DRIVER_PATH != driver.patcher.executable_path: shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH) # clean up proxy extension directory if proxy_extension_dir is not None: shutil.rmtree(proxy_extension_dir) # selenium vanilla # options = webdriver.ChromeOptions() # options.add_argument('--no-sandbox') # options.add_argument('--window-size=1920,1080') # options.add_argument('--disable-setuid-sandbox') # options.add_argument('--disable-dev-shm-usage') # driver = webdriver.Chrome(options=options) return driver def get_chrome_exe_path() -> str: global CHROME_EXE_PATH if CHROME_EXE_PATH is not None: return CHROME_EXE_PATH # linux pyinstaller bundle chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome") if os.path.exists(chrome_path): if not os.access(chrome_path, os.X_OK): raise Exception(f'Chrome binary "{chrome_path}" is not executable. ' f'Please, extract the archive with "tar xzf ".') CHROME_EXE_PATH = chrome_path return CHROME_EXE_PATH # windows pyinstaller bundle chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome.exe") if os.path.exists(chrome_path): CHROME_EXE_PATH = chrome_path return CHROME_EXE_PATH # system CHROME_EXE_PATH = uc.find_chrome_executable() return CHROME_EXE_PATH def get_chrome_major_version() -> str: global CHROME_MAJOR_VERSION if CHROME_MAJOR_VERSION is not None: return CHROME_MAJOR_VERSION if os.name == 'nt': # Example: '104.0.5112.79' try: complete_version = extract_version_nt_executable(get_chrome_exe_path()) except Exception: try: complete_version = extract_version_nt_registry() except Exception: # Example: '104.0.5112.79' complete_version = extract_version_nt_folder() else: chrome_path = get_chrome_exe_path() process = os.popen(f'"{chrome_path}" --version') # Example 1: 'Chromium 104.0.5112.79 Arch Linux\n' # Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n' complete_version = process.read() process.close() CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1] return CHROME_MAJOR_VERSION def extract_version_nt_executable(exe_path: str) -> str: import pefile pe = pefile.PE(exe_path, fast_load=True) pe.parse_data_directories( directories=[pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_RESOURCE"]] ) return pe.FileInfo[0][0].StringTable[0].entries[b"FileVersion"].decode('utf-8') def extract_version_nt_registry() -> str: stream = os.popen( 'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"') output = stream.read() google_version = '' for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]: if letter != '\n': google_version += letter else: break return google_version.strip() def extract_version_nt_folder() -> str: # Check if the Chrome folder exists in the x32 or x64 Program Files folders. for i in range(2): path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application' if os.path.isdir(path): paths = [f.path for f in os.scandir(path) if f.is_dir()] for path in paths: filename = os.path.basename(path) pattern = '\d+\.\d+\.\d+\.\d+' match = re.search(pattern, filename) if match and match.group(): # Found a Chrome version. return match.group(0) return '' def get_user_agent(driver=None) -> str: global USER_AGENT if USER_AGENT is not None: return USER_AGENT try: if driver is None: driver = get_webdriver() USER_AGENT = driver.execute_script("return navigator.userAgent") # Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910 USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE) return USER_AGENT except Exception as e: raise Exception("Error getting browser User-Agent. " + str(e)) finally: if driver is not None: if PLATFORM_VERSION == "nt": driver.close() driver.quit() def start_xvfb_display(): global XVFB_DISPLAY if XVFB_DISPLAY is None: from xvfbwrapper import Xvfb XVFB_DISPLAY = Xvfb() XVFB_DISPLAY.start() def object_to_dict(_object): json_dict = json.loads(json.dumps(_object, default=lambda o: o.__dict__)) # remove hidden fields return {k: v for k, v in json_dict.items() if not k.startswith('__')}