Spaces:
Sleeping
Sleeping
:zap: [Enhance] Loop multiple conditions for extracting abstract
Browse files
documents/query_results_extractor.py
CHANGED
@@ -21,10 +21,18 @@ class QueryResultsExtractor:
|
|
21 |
url = result.find("a")["href"]
|
22 |
title = result.find("h3").text.strip()
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
logger.mesg(
|
29 |
f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
|
30 |
)
|
|
|
21 |
url = result.find("a")["href"]
|
22 |
title = result.find("h3").text.strip()
|
23 |
|
24 |
+
abstract_element_conditions = [
|
25 |
+
{"data-sncf": "1"},
|
26 |
+
{"class_": "ITZIwc"},
|
27 |
+
]
|
28 |
+
for condition in abstract_element_conditions:
|
29 |
+
abstract_element = result.find("div", condition)
|
30 |
+
if abstract_element is not None:
|
31 |
+
abstract = abstract_element.text.strip()
|
32 |
+
break
|
33 |
+
else:
|
34 |
+
abstract = ""
|
35 |
+
|
36 |
logger.mesg(
|
37 |
f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
|
38 |
)
|