Spaces:
Runtime error
Runtime error
Fix web search
Browse files- corpora/sourcer.py +7 -10
corpora/sourcer.py
CHANGED
@@ -36,20 +36,17 @@ def search_web(query: str) -> list:
|
|
36 |
|
37 |
texts = []
|
38 |
responses = [None] * len(links)
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
processing_thread = threading.Thread(target=process_page, args=(texts, responses, i-1))
|
46 |
-
# new download thread
|
47 |
-
download_thread = threading.Thread(target=download_page, args=(links[i], responses, i))
|
48 |
# start threads
|
49 |
download_thread.start()
|
50 |
processing_thread.start()
|
|
|
51 |
download_thread.join()
|
52 |
processing_thread.join()
|
53 |
|
54 |
-
|
55 |
return texts
|
|
|
36 |
|
37 |
texts = []
|
38 |
responses = [None] * len(links)
|
39 |
+
download_page(links[0], responses, 0) # download first page
|
40 |
+
for i in range(1, len(links)):
|
41 |
+
# process previous page
|
42 |
+
processing_thread = threading.Thread(target=process_page, args=(texts, responses, i-1), name='processing'+str(i-1))
|
43 |
+
# dowload new page
|
44 |
+
download_thread = threading.Thread(target=download_page, args=(links[i], responses, i), name='download'+str(i))
|
|
|
|
|
|
|
45 |
# start threads
|
46 |
download_thread.start()
|
47 |
processing_thread.start()
|
48 |
+
# wait for threads
|
49 |
download_thread.join()
|
50 |
processing_thread.join()
|
51 |
|
|
|
52 |
return texts
|