Spaces:
Paused
Paused
Commit
·
a95eca6
1
Parent(s):
941070a
dependencies
Browse files- requirements.txt +1 -1
- web_search.py +17 -9
requirements.txt
CHANGED
@@ -8,7 +8,7 @@ llama-index-embeddings-openai
|
|
8 |
llama-index-llms-openai
|
9 |
# needed for simpledirectoryreader to work
|
10 |
llama-index-readers-file
|
11 |
-
selenium==4.
|
12 |
unstructured
|
13 |
requests
|
14 |
chromium
|
|
|
8 |
llama-index-llms-openai
|
9 |
# needed for simpledirectoryreader to work
|
10 |
llama-index-readers-file
|
11 |
+
selenium==4.22.0
|
12 |
unstructured
|
13 |
requests
|
14 |
chromium
|
web_search.py
CHANGED
@@ -72,16 +72,24 @@ def process_url(url, timeout):
|
|
72 |
options.add_argument("--headless")
|
73 |
options.add_argument("--no-sandbox")
|
74 |
options.add_argument("--disable-dev-shm-usage")
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
result = ""
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
85 |
except Exception:
|
86 |
traceback.print_exc()
|
87 |
logger.info(f"{site} err")
|
|
|
72 |
options.add_argument("--headless")
|
73 |
options.add_argument("--no-sandbox")
|
74 |
options.add_argument("--disable-dev-shm-usage")
|
75 |
+
|
76 |
+
options.add_argument("start-maximized")
|
77 |
+
options.add_argument("disable-infobars")
|
78 |
+
options.add_argument("--disable-extensions")
|
79 |
+
options.add_argument("--disable-gpu")
|
80 |
+
options.add_argument("--disable-dev-shm-usage")
|
81 |
result = ""
|
82 |
+
|
83 |
+
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
|
84 |
+
logger.info(f"*****setting page load timeout {timeout}")
|
85 |
+
driver.set_page_load_timeout(timeout)
|
86 |
+
|
87 |
+
try:
|
88 |
+
driver.get(url)
|
89 |
+
response = driver.page_source
|
90 |
+
result = response_text_extract(url=url, response=response)
|
91 |
+
except selenium.common.exceptions.TimeoutException:
|
92 |
+
return "", url
|
93 |
except Exception:
|
94 |
traceback.print_exc()
|
95 |
logger.info(f"{site} err")
|