Hansimov commited on
Commit
a315628
1 Parent(s): ea49401

:zap: [Enhance] Loop multiple conditions for extracting abstract

Browse files
documents/query_results_extractor.py CHANGED
@@ -21,10 +21,18 @@ class QueryResultsExtractor:
21
  url = result.find("a")["href"]
22
  title = result.find("h3").text.strip()
23
 
24
- abstract_element = result.find("div", {"data-sncf": "1"})
25
- if abstract_element is None:
26
- abstract_element = result.find("div", class_="ITZIwc")
27
- abstract = abstract_element.text.strip()
 
 
 
 
 
 
 
 
28
  logger.mesg(
29
  f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
30
  )
 
21
  url = result.find("a")["href"]
22
  title = result.find("h3").text.strip()
23
 
24
+ abstract_element_conditions = [
25
+ {"data-sncf": "1"},
26
+ {"class_": "ITZIwc"},
27
+ ]
28
+ for condition in abstract_element_conditions:
29
+ abstract_element = result.find("div", condition)
30
+ if abstract_element is not None:
31
+ abstract = abstract_element.text.strip()
32
+ break
33
+ else:
34
+ abstract = ""
35
+
36
  logger.mesg(
37
  f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
38
  )