Spaces:
Runtime error
Runtime error
import json | |
import logging | |
import os | |
import re | |
import shutil | |
import urllib.parse | |
import tempfile | |
import sys | |
from selenium.webdriver.chrome.webdriver import WebDriver | |
import undetected_chromedriver as uc | |
FLARESOLVERR_VERSION = None | |
PLATFORM_VERSION = None | |
CHROME_EXE_PATH = None | |
CHROME_MAJOR_VERSION = None | |
USER_AGENT = None | |
XVFB_DISPLAY = None | |
PATCHED_DRIVER_PATH = None | |
def get_config_log_html() -> bool: | |
return os.environ.get('LOG_HTML', 'false').lower() == 'true' | |
def get_config_headless() -> bool: | |
return os.environ.get('HEADLESS', 'true').lower() == 'true' | |
def get_flaresolverr_version() -> str: | |
global FLARESOLVERR_VERSION | |
if FLARESOLVERR_VERSION is not None: | |
return FLARESOLVERR_VERSION | |
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json') | |
if not os.path.isfile(package_path): | |
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'package.json') | |
with open(package_path) as f: | |
FLARESOLVERR_VERSION = json.loads(f.read())['version'] | |
return FLARESOLVERR_VERSION | |
def get_current_platform() -> str: | |
global PLATFORM_VERSION | |
if PLATFORM_VERSION is not None: | |
return PLATFORM_VERSION | |
PLATFORM_VERSION = os.name | |
return PLATFORM_VERSION | |
def create_proxy_extension(proxy: dict) -> str: | |
parsed_url = urllib.parse.urlparse(proxy['url']) | |
scheme = parsed_url.scheme | |
host = parsed_url.hostname | |
port = parsed_url.port | |
username = proxy['username'] | |
password = proxy['password'] | |
manifest_json = """ | |
{ | |
"version": "1.0.0", | |
"manifest_version": 2, | |
"name": "Chrome Proxy", | |
"permissions": [ | |
"proxy", | |
"tabs", | |
"unlimitedStorage", | |
"storage", | |
"<all_urls>", | |
"webRequest", | |
"webRequestBlocking" | |
], | |
"background": {"scripts": ["background.js"]}, | |
"minimum_chrome_version": "76.0.0" | |
} | |
""" | |
background_js = """ | |
var config = { | |
mode: "fixed_servers", | |
rules: { | |
singleProxy: { | |
scheme: "%s", | |
host: "%s", | |
port: %d | |
}, | |
bypassList: ["localhost"] | |
} | |
}; | |
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); | |
function callbackFn(details) { | |
return { | |
authCredentials: { | |
username: "%s", | |
password: "%s" | |
} | |
}; | |
} | |
chrome.webRequest.onAuthRequired.addListener( | |
callbackFn, | |
{ urls: ["<all_urls>"] }, | |
['blocking'] | |
); | |
""" % ( | |
scheme, | |
host, | |
port, | |
username, | |
password | |
) | |
proxy_extension_dir = tempfile.mkdtemp() | |
with open(os.path.join(proxy_extension_dir, "manifest.json"), "w") as f: | |
f.write(manifest_json) | |
with open(os.path.join(proxy_extension_dir, "background.js"), "w") as f: | |
f.write(background_js) | |
return proxy_extension_dir | |
def get_webdriver(proxy: dict = None) -> WebDriver: | |
global PATCHED_DRIVER_PATH, USER_AGENT | |
logging.debug('Launching web browser...') | |
# undetected_chromedriver | |
options = uc.ChromeOptions() | |
options.add_argument('--no-sandbox') | |
options.add_argument('--window-size=1920,1080') | |
# todo: this param shows a warning in chrome head-full | |
options.add_argument('--disable-setuid-sandbox') | |
options.add_argument('--disable-dev-shm-usage') | |
# this option removes the zygote sandbox (it seems that the resolution is a bit faster) | |
options.add_argument('--no-zygote') | |
# attempt to fix Docker ARM32 build | |
options.add_argument('--disable-gpu-sandbox') | |
options.add_argument('--disable-software-rasterizer') | |
options.add_argument('--ignore-certificate-errors') | |
options.add_argument('--ignore-ssl-errors') | |
# fix GL errors in ASUSTOR NAS | |
# https://github.com/FlareSolverr/FlareSolverr/issues/782 | |
# https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069 | |
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl | |
options.add_argument('--use-gl=swiftshader') | |
language = os.environ.get('LANG', None) | |
if language is not None: | |
options.add_argument('--accept-lang=%s' % language) | |
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910 | |
if USER_AGENT is not None: | |
options.add_argument('--user-agent=%s' % USER_AGENT) | |
proxy_extension_dir = None | |
if proxy and all(key in proxy for key in ['url', 'username', 'password']): | |
proxy_extension_dir = create_proxy_extension(proxy) | |
options.add_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir)) | |
elif proxy and 'url' in proxy: | |
proxy_url = proxy['url'] | |
logging.debug("Using webdriver proxy: %s", proxy_url) | |
options.add_argument('--proxy-server=%s' % proxy_url) | |
# note: headless mode is detected (headless = True) | |
# we launch the browser in head-full mode with the window hidden | |
windows_headless = False | |
if get_config_headless(): | |
if os.name == 'nt': | |
windows_headless = True | |
else: | |
start_xvfb_display() | |
# For normal headless mode: | |
# options.add_argument('--headless') | |
options.add_argument("--auto-open-devtools-for-tabs") | |
# if we are inside the Docker container, we avoid downloading the driver | |
driver_exe_path = None | |
version_main = None | |
if os.path.exists("/app/chromedriver"): | |
# running inside Docker | |
driver_exe_path = "/app/chromedriver" | |
else: | |
version_main = get_chrome_major_version() | |
if PATCHED_DRIVER_PATH is not None: | |
driver_exe_path = PATCHED_DRIVER_PATH | |
# detect chrome path | |
browser_executable_path = get_chrome_exe_path() | |
# downloads and patches the chromedriver | |
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time | |
try: | |
driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path, | |
driver_executable_path=driver_exe_path, version_main=version_main, | |
windows_headless=windows_headless, headless=get_config_headless()) | |
except Exception as e: | |
logging.error("Error starting Chrome: %s" % e) | |
# save the patched driver to avoid re-downloads | |
if driver_exe_path is None: | |
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name) | |
if PATCHED_DRIVER_PATH != driver.patcher.executable_path: | |
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH) | |
# clean up proxy extension directory | |
if proxy_extension_dir is not None: | |
shutil.rmtree(proxy_extension_dir) | |
# selenium vanilla | |
# options = webdriver.ChromeOptions() | |
# options.add_argument('--no-sandbox') | |
# options.add_argument('--window-size=1920,1080') | |
# options.add_argument('--disable-setuid-sandbox') | |
# options.add_argument('--disable-dev-shm-usage') | |
# driver = webdriver.Chrome(options=options) | |
return driver | |
def get_chrome_exe_path() -> str: | |
global CHROME_EXE_PATH | |
if CHROME_EXE_PATH is not None: | |
return CHROME_EXE_PATH | |
# linux pyinstaller bundle | |
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome") | |
if os.path.exists(chrome_path): | |
if not os.access(chrome_path, os.X_OK): | |
raise Exception(f'Chrome binary "{chrome_path}" is not executable. ' | |
f'Please, extract the archive with "tar xzf <file.tar.gz>".') | |
CHROME_EXE_PATH = chrome_path | |
return CHROME_EXE_PATH | |
# windows pyinstaller bundle | |
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome.exe") | |
if os.path.exists(chrome_path): | |
CHROME_EXE_PATH = chrome_path | |
return CHROME_EXE_PATH | |
# system | |
CHROME_EXE_PATH = uc.find_chrome_executable() | |
return CHROME_EXE_PATH | |
def get_chrome_major_version() -> str: | |
global CHROME_MAJOR_VERSION | |
if CHROME_MAJOR_VERSION is not None: | |
return CHROME_MAJOR_VERSION | |
if os.name == 'nt': | |
# Example: '104.0.5112.79' | |
try: | |
complete_version = extract_version_nt_executable(get_chrome_exe_path()) | |
except Exception: | |
try: | |
complete_version = extract_version_nt_registry() | |
except Exception: | |
# Example: '104.0.5112.79' | |
complete_version = extract_version_nt_folder() | |
else: | |
chrome_path = get_chrome_exe_path() | |
process = os.popen(f'"{chrome_path}" --version') | |
# Example 1: 'Chromium 104.0.5112.79 Arch Linux\n' | |
# Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n' | |
complete_version = process.read() | |
process.close() | |
CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1] | |
return CHROME_MAJOR_VERSION | |
def extract_version_nt_executable(exe_path: str) -> str: | |
import pefile | |
pe = pefile.PE(exe_path, fast_load=True) | |
pe.parse_data_directories( | |
directories=[pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_RESOURCE"]] | |
) | |
return pe.FileInfo[0][0].StringTable[0].entries[b"FileVersion"].decode('utf-8') | |
def extract_version_nt_registry() -> str: | |
stream = os.popen( | |
'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"') | |
output = stream.read() | |
google_version = '' | |
for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]: | |
if letter != '\n': | |
google_version += letter | |
else: | |
break | |
return google_version.strip() | |
def extract_version_nt_folder() -> str: | |
# Check if the Chrome folder exists in the x32 or x64 Program Files folders. | |
for i in range(2): | |
path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application' | |
if os.path.isdir(path): | |
paths = [f.path for f in os.scandir(path) if f.is_dir()] | |
for path in paths: | |
filename = os.path.basename(path) | |
pattern = '\d+\.\d+\.\d+\.\d+' | |
match = re.search(pattern, filename) | |
if match and match.group(): | |
# Found a Chrome version. | |
return match.group(0) | |
return '' | |
def get_user_agent(driver=None) -> str: | |
global USER_AGENT | |
if USER_AGENT is not None: | |
return USER_AGENT | |
try: | |
if driver is None: | |
driver = get_webdriver() | |
USER_AGENT = driver.execute_script("return navigator.userAgent") | |
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910 | |
USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE) | |
return USER_AGENT | |
except Exception as e: | |
raise Exception("Error getting browser User-Agent. " + str(e)) | |
finally: | |
if driver is not None: | |
if PLATFORM_VERSION == "nt": | |
driver.close() | |
driver.quit() | |
def start_xvfb_display(): | |
global XVFB_DISPLAY | |
if XVFB_DISPLAY is None: | |
from xvfbwrapper import Xvfb | |
XVFB_DISPLAY = Xvfb() | |
XVFB_DISPLAY.start() | |
def object_to_dict(_object): | |
json_dict = json.loads(json.dumps(_object, default=lambda o: o.__dict__)) | |
# remove hidden fields | |
return {k: v for k, v in json_dict.items() if not k.startswith('__')} | |