Spaces:
Building
Building
from flask import Flask | |
from flask import request | |
import undetected_chromedriver as uc | |
import re | |
import os | |
from threading import Timer | |
# /start_browser | |
# /stop_browser | |
# /text?url=https://api.investing.com/api/financialdata/8849/historical/chart/?interval=PT1M&pointscount=60 | |
# encoded: https%3A%2F%2Fapi.investing.com%2Fapi%2Ffinancialdata%2F8849%2Fhistorical%2Fchart%2F%3Finterval%3DPT1M%26pointscount%3D60 | |
# /fetch?url=https://api.investing.com/api/financialdata/historical/1?start-date=2024-02-15&end-date=2029-02-15&time-frame=Daily&add-missing-rows=false | |
# encoded: https%3A%2F%2Fapi.investing.com%2Fapi%2Ffinancialdata%2Fhistorical%2F1%3Fstart-date%3D2024-02-15%26end-date%3D2029-02-15%26time-frame%3DDaily%26add-missing-rows%3Dfalse | |
driver = None | |
XVFB_DISPLAY = None | |
USER_AGENT = None | |
CHROME_EXE_PATH = None | |
timer = None | |
app = Flask(__name__) | |
def _start_xvfb_display(): | |
global XVFB_DISPLAY | |
if XVFB_DISPLAY is None: | |
from xvfbwrapper import Xvfb | |
XVFB_DISPLAY = Xvfb() | |
XVFB_DISPLAY.start() | |
def get_chrome_exe_path() -> str: | |
global CHROME_EXE_PATH | |
if CHROME_EXE_PATH is not None: | |
return CHROME_EXE_PATH | |
# linux pyinstaller bundle | |
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome") | |
if os.path.exists(chrome_path): | |
CHROME_EXE_PATH = chrome_path | |
return CHROME_EXE_PATH | |
# system | |
CHROME_EXE_PATH = uc.find_chrome_executable() | |
return CHROME_EXE_PATH | |
def _start_browser(): | |
global driver, USER_AGENT | |
if driver != None: | |
driver.quit() | |
driver = None | |
_start_xvfb_display() | |
# https://github.com/FlareSolverr/FlareSolverr/blob/043f18b231b4f409080b2b5c4421ce0f4cac7dec/src/utils.py | |
options = uc.ChromeOptions() | |
options.add_argument('--no-sandbox') | |
options.add_argument('--window-size=1920,1080') | |
# todo: this param shows a warning in chrome head-full | |
options.add_argument('--disable-setuid-sandbox') | |
options.add_argument('--disable-dev-shm-usage') | |
# this option removes the zygote sandbox (it seems that the resolution is a bit faster) | |
options.add_argument('--no-zygote') | |
# attempt to fix Docker ARM32 build | |
options.add_argument('--disable-gpu-sandbox') | |
options.add_argument('--disable-software-rasterizer') | |
options.add_argument('--ignore-certificate-errors') | |
options.add_argument('--ignore-ssl-errors') | |
# fix GL errors in ASUSTOR NAS | |
# https://github.com/FlareSolverr/FlareSolverr/issues/782 | |
# https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069 | |
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl | |
options.add_argument('--use-gl=swiftshader') | |
#options.add_argument("--headless=new") | |
if USER_AGENT is not None: | |
options.add_argument('--user-agent=%s' % USER_AGENT) | |
language = os.environ.get('LANG', None) | |
if language is not None: | |
options.add_argument('--lang=%s' % language) | |
# added by me | |
options.add_argument(' --disable-web-security') # allow cross origin | |
driver = uc.Chrome(options=options, headless=False, version_main=None, driver_executable_path="/app/chromedriver", browser_executable_path=get_chrome_exe_path()) | |
if USER_AGENT is None: | |
USER_AGENT = driver.execute_script("return navigator.userAgent") | |
USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE) | |
app.logger.info(USER_AGENT) | |
driver.quit() | |
#restart with user agent | |
options = uc.ChromeOptions() | |
options.add_argument('--no-sandbox') | |
options.add_argument('--window-size=1920,1080') | |
# todo: this param shows a warning in chrome head-full | |
options.add_argument('--disable-setuid-sandbox') | |
options.add_argument('--disable-dev-shm-usage') | |
# this option removes the zygote sandbox (it seems that the resolution is a bit faster) | |
options.add_argument('--no-zygote') | |
# attempt to fix Docker ARM32 build | |
options.add_argument('--disable-gpu-sandbox') | |
options.add_argument('--disable-software-rasterizer') | |
options.add_argument('--ignore-certificate-errors') | |
options.add_argument('--ignore-ssl-errors') | |
# fix GL errors in ASUSTOR NAS | |
# https://github.com/FlareSolverr/FlareSolverr/issues/782 | |
# https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069 | |
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl | |
options.add_argument('--use-gl=swiftshader') | |
if USER_AGENT is not None: | |
options.add_argument('--user-agent=%s' % USER_AGENT) | |
language = os.environ.get('LANG', None) | |
if language is not None: | |
options.add_argument('--lang=%s' % language) | |
# added by me | |
options.add_argument(' --disable-web-security') # allow cross origin | |
driver = uc.Chrome(options=options, headless=False, version_main=None, driver_executable_path="/app/chromedriver", browser_executable_path=get_chrome_exe_path()) | |
_reset_stop_timer() | |
app.logger.info("browser started") | |
def _stop_browser(): | |
global driver | |
driver.quit() | |
driver = None | |
app.logger.info("browser stopped") | |
def _reset_stop_timer(): | |
global timer | |
if timer is not None: | |
timer.cancel() | |
timer = Timer(5*60, _stop_browser) # _stop_browser executed in another thread... | |
timer.start() | |
def hello_world(): | |
return "<p>Hello, World!</p>" | |
def start_browser(): | |
_start_browser() | |
return "ok" | |
def stop_browser(): | |
_stop_browser() | |
return "ok" | |
def text(): | |
global driver | |
if driver == None: | |
_start_browser() | |
else: | |
_reset_stop_timer() | |
url = request.args.get('url', '') | |
driver.get(url) | |
text = driver.page_source | |
#driver.close() | |
return text | |
def screenshot(): | |
return "todo" | |
def evaluate(): | |
return "todo" | |
def fetch(): | |
global driver | |
if driver == None: | |
_start_browser() | |
else: | |
_reset_stop_timer() | |
url = request.args.get('url', '') | |
#driver.get('https://example.com') | |
driver.get('https://i-invdn-com.investing.com/redesign/images/seo/investing_300X300.png') | |
script = """ | |
var callback = arguments[arguments.length - 1]; // this is the callback to call when you are done | |
(async function(){ | |
try { | |
let res = await fetch('%s', {headers:{'domain-id':'www'}}); | |
let text = await res.text(); | |
callback(text); | |
} catch (e) { | |
callback('error: ' + e); | |
} | |
})()""" % (url) | |
result = driver.execute_async_script(script) | |
return result | |
if __name__ == '__main__': | |
app.run(host="0.0.0.0", port=7860, debug=True) |