arabellastrange commited on
Commit
a95eca6
·
1 Parent(s): 941070a

dependencies

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. web_search.py +17 -9
requirements.txt CHANGED
@@ -8,7 +8,7 @@ llama-index-embeddings-openai
8
  llama-index-llms-openai
9
  # needed for simpledirectoryreader to work
10
  llama-index-readers-file
11
- selenium==4.10.0
12
  unstructured
13
  requests
14
  chromium
 
8
  llama-index-llms-openai
9
  # needed for simpledirectoryreader to work
10
  llama-index-readers-file
11
+ selenium==4.22.0
12
  unstructured
13
  requests
14
  chromium
web_search.py CHANGED
@@ -72,16 +72,24 @@ def process_url(url, timeout):
72
  options.add_argument("--headless")
73
  options.add_argument("--no-sandbox")
74
  options.add_argument("--disable-dev-shm-usage")
 
 
 
 
 
 
75
  result = ""
76
- with webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) as dr:
77
- logger.info(f"*****setting page load timeout {timeout}")
78
- dr.set_page_load_timeout(timeout)
79
- try:
80
- dr.get(url)
81
- response = dr.page_source
82
- result = response_text_extract(url=url, response=response)
83
- except selenium.common.exceptions.TimeoutException:
84
- return "", url
 
 
85
  except Exception:
86
  traceback.print_exc()
87
  logger.info(f"{site} err")
 
72
  options.add_argument("--headless")
73
  options.add_argument("--no-sandbox")
74
  options.add_argument("--disable-dev-shm-usage")
75
+
76
+ options.add_argument("start-maximized")
77
+ options.add_argument("disable-infobars")
78
+ options.add_argument("--disable-extensions")
79
+ options.add_argument("--disable-gpu")
80
+ options.add_argument("--disable-dev-shm-usage")
81
  result = ""
82
+
83
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
84
+ logger.info(f"*****setting page load timeout {timeout}")
85
+ driver.set_page_load_timeout(timeout)
86
+
87
+ try:
88
+ driver.get(url)
89
+ response = driver.page_source
90
+ result = response_text_extract(url=url, response=response)
91
+ except selenium.common.exceptions.TimeoutException:
92
+ return "", url
93
  except Exception:
94
  traceback.print_exc()
95
  logger.info(f"{site} err")