mhsvieira commited on
Commit
7deba93
1 Parent(s): a9e7556

Fix web search

Browse files
Files changed (1) hide show
  1. corpora/sourcer.py +7 -10
corpora/sourcer.py CHANGED
@@ -36,20 +36,17 @@ def search_web(query: str) -> list:
36
 
37
  texts = []
38
  responses = [None] * len(links)
39
- download_threads = [None] * len(links)
40
- processing_threads = [None] * len(links)
41
- # dowload_threads[0] = threading.Thread(target=download_page, args=(links[0], responses, 0))
42
- download_page(links[0], responses, 0)
43
- for i in range(1, len(links), 2):
44
- # new page processing thread
45
- processing_thread = threading.Thread(target=process_page, args=(texts, responses, i-1))
46
- # new download thread
47
- download_thread = threading.Thread(target=download_page, args=(links[i], responses, i))
48
  # start threads
49
  download_thread.start()
50
  processing_thread.start()
 
51
  download_thread.join()
52
  processing_thread.join()
53
 
54
-
55
  return texts
 
36
 
37
  texts = []
38
  responses = [None] * len(links)
39
+ download_page(links[0], responses, 0) # download first page
40
+ for i in range(1, len(links)):
41
+ # process previous page
42
+ processing_thread = threading.Thread(target=process_page, args=(texts, responses, i-1), name='processing'+str(i-1))
43
+ # dowload new page
44
+ download_thread = threading.Thread(target=download_page, args=(links[i], responses, i), name='download'+str(i))
 
 
 
45
  # start threads
46
  download_thread.start()
47
  processing_thread.start()
48
+ # wait for threads
49
  download_thread.join()
50
  processing_thread.join()
51
 
 
52
  return texts