Spaces:
Building
Building
File size: 6,927 Bytes
1380f39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
from flask import Flask
from flask import request
import undetected_chromedriver as uc
import re
import os
from threading import Timer
# /start_browser
# /stop_browser
# /text?url=https://api.investing.com/api/financialdata/8849/historical/chart/?interval=PT1M&pointscount=60
# encoded: https%3A%2F%2Fapi.investing.com%2Fapi%2Ffinancialdata%2F8849%2Fhistorical%2Fchart%2F%3Finterval%3DPT1M%26pointscount%3D60
# /fetch?url=https://api.investing.com/api/financialdata/historical/1?start-date=2024-02-15&end-date=2029-02-15&time-frame=Daily&add-missing-rows=false
# encoded: https%3A%2F%2Fapi.investing.com%2Fapi%2Ffinancialdata%2Fhistorical%2F1%3Fstart-date%3D2024-02-15%26end-date%3D2029-02-15%26time-frame%3DDaily%26add-missing-rows%3Dfalse
driver = None
XVFB_DISPLAY = None
USER_AGENT = None
CHROME_EXE_PATH = None
timer = None
app = Flask(__name__)
def _start_xvfb_display():
global XVFB_DISPLAY
if XVFB_DISPLAY is None:
from xvfbwrapper import Xvfb
XVFB_DISPLAY = Xvfb()
XVFB_DISPLAY.start()
def get_chrome_exe_path() -> str:
global CHROME_EXE_PATH
if CHROME_EXE_PATH is not None:
return CHROME_EXE_PATH
# linux pyinstaller bundle
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome")
if os.path.exists(chrome_path):
CHROME_EXE_PATH = chrome_path
return CHROME_EXE_PATH
# system
CHROME_EXE_PATH = uc.find_chrome_executable()
return CHROME_EXE_PATH
def _start_browser():
global driver, USER_AGENT
if driver != None:
driver.quit()
driver = None
_start_xvfb_display()
# https://github.com/FlareSolverr/FlareSolverr/blob/043f18b231b4f409080b2b5c4421ce0f4cac7dec/src/utils.py
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--window-size=1920,1080')
# todo: this param shows a warning in chrome head-full
options.add_argument('--disable-setuid-sandbox')
options.add_argument('--disable-dev-shm-usage')
# this option removes the zygote sandbox (it seems that the resolution is a bit faster)
options.add_argument('--no-zygote')
# attempt to fix Docker ARM32 build
options.add_argument('--disable-gpu-sandbox')
options.add_argument('--disable-software-rasterizer')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
# fix GL errors in ASUSTOR NAS
# https://github.com/FlareSolverr/FlareSolverr/issues/782
# https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl
options.add_argument('--use-gl=swiftshader')
#options.add_argument("--headless=new")
if USER_AGENT is not None:
options.add_argument('--user-agent=%s' % USER_AGENT)
language = os.environ.get('LANG', None)
if language is not None:
options.add_argument('--lang=%s' % language)
# added by me
options.add_argument(' --disable-web-security') # allow cross origin
driver = uc.Chrome(options=options, headless=False, version_main=None, driver_executable_path="/app/chromedriver", browser_executable_path=get_chrome_exe_path())
if USER_AGENT is None:
USER_AGENT = driver.execute_script("return navigator.userAgent")
USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE)
app.logger.info(USER_AGENT)
driver.quit()
#restart with user agent
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--window-size=1920,1080')
# todo: this param shows a warning in chrome head-full
options.add_argument('--disable-setuid-sandbox')
options.add_argument('--disable-dev-shm-usage')
# this option removes the zygote sandbox (it seems that the resolution is a bit faster)
options.add_argument('--no-zygote')
# attempt to fix Docker ARM32 build
options.add_argument('--disable-gpu-sandbox')
options.add_argument('--disable-software-rasterizer')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
# fix GL errors in ASUSTOR NAS
# https://github.com/FlareSolverr/FlareSolverr/issues/782
# https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl
options.add_argument('--use-gl=swiftshader')
if USER_AGENT is not None:
options.add_argument('--user-agent=%s' % USER_AGENT)
language = os.environ.get('LANG', None)
if language is not None:
options.add_argument('--lang=%s' % language)
# added by me
options.add_argument(' --disable-web-security') # allow cross origin
driver = uc.Chrome(options=options, headless=False, version_main=None, driver_executable_path="/app/chromedriver", browser_executable_path=get_chrome_exe_path())
_reset_stop_timer()
app.logger.info("browser started")
def _stop_browser():
global driver
driver.quit()
driver = None
app.logger.info("browser stopped")
def _reset_stop_timer():
global timer
if timer is not None:
timer.cancel()
timer = Timer(5*60, _stop_browser) # _stop_browser executed in another thread...
timer.start()
@app.route("/")
def hello_world():
return "<p>Hello, World!</p>"
@app.route("/start_browser")
def start_browser():
_start_browser()
return "ok"
@app.route("/close_browser")
def stop_browser():
_stop_browser()
return "ok"
@app.route("/text")
def text():
global driver
if driver == None:
_start_browser()
else:
_reset_stop_timer()
url = request.args.get('url', '')
driver.get(url)
text = driver.page_source
#driver.close()
return text
@app.route("/screenshot")
def screenshot():
return "todo"
@app.route("/evaluate")
def evaluate():
return "todo"
@app.route("/fetch")
def fetch():
global driver
if driver == None:
_start_browser()
else:
_reset_stop_timer()
url = request.args.get('url', '')
#driver.get('https://example.com')
driver.get('https://i-invdn-com.investing.com/redesign/images/seo/investing_300X300.png')
script = """
var callback = arguments[arguments.length - 1]; // this is the callback to call when you are done
(async function(){
try {
let res = await fetch('%s', {headers:{'domain-id':'www'}});
let text = await res.text();
callback(text);
} catch (e) {
callback('error: ' + e);
}
})()""" % (url)
result = driver.execute_async_script(script)
return result
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860, debug=True) |