Ashhar commited on
Commit
9dfbf64
·
1 Parent(s): 19598a5

fixed bug with scraper'

Browse files
Files changed (1) hide show
  1. tools/webScraper.py +7 -4
tools/webScraper.py CHANGED
@@ -3,7 +3,7 @@ from urllib.parse import parse_qs, urlparse
3
  from bs4 import BeautifulSoup
4
  import requests
5
  from typing import TypedDict
6
- import utils as U
7
 
8
  SIMULATE_BROWSER = os.environ.get("SIMULATE_BROWSER_SEARCH") == "true"
9
 
@@ -36,7 +36,7 @@ else:
36
 
37
 
38
  def scrapeGoogleSearch(query):
39
- U.pprint(f"{SIMULATE_BROWSER=}")
40
  finalResponse = []
41
 
42
  headers = {
@@ -61,7 +61,6 @@ def scrapeGoogleSearch(query):
61
  with open("soup_dump.html", "w", encoding="utf-8") as file:
62
  file.write(soup.prettify())
63
 
64
- results = soup.find("body")
65
  mainDiv = soup.find("div", attrs={"id": "main"})
66
  answerText = ""
67
  if SELECTORS.get("answer"):
@@ -80,6 +79,8 @@ def scrapeGoogleSearch(query):
80
 
81
  results = mainDiv.select(SELECTORS["search_results"])
82
  resultsDesc = mainDiv.select(SELECTORS["search_results_desc"])
 
 
83
 
84
  if results:
85
  finalResponse.append("Search Results:\n")
@@ -91,7 +92,7 @@ def scrapeGoogleSearch(query):
91
  parsedUrl = urlparse(link)
92
  urlParams = parse_qs(parsedUrl.query)
93
  link = urlParams.get("q", [None])[0]
94
- desc = resultsDesc[i].text
95
  finalResponse.append(f"Title: {title}")
96
  finalResponse.append(f"Description: {desc}")
97
  finalResponse.append(f"URL: {link}\n")
@@ -99,3 +100,5 @@ def scrapeGoogleSearch(query):
99
  print("Failed to retrieve search results.")
100
 
101
  return "\n".join(finalResponse)
 
 
 
3
  from bs4 import BeautifulSoup
4
  import requests
5
  from typing import TypedDict
6
+ # import utils as U
7
 
8
  SIMULATE_BROWSER = os.environ.get("SIMULATE_BROWSER_SEARCH") == "true"
9
 
 
36
 
37
 
38
  def scrapeGoogleSearch(query):
39
+ # U.pprint(f"{SIMULATE_BROWSER=}")
40
  finalResponse = []
41
 
42
  headers = {
 
61
  with open("soup_dump.html", "w", encoding="utf-8") as file:
62
  file.write(soup.prettify())
63
 
 
64
  mainDiv = soup.find("div", attrs={"id": "main"})
65
  answerText = ""
66
  if SELECTORS.get("answer"):
 
79
 
80
  results = mainDiv.select(SELECTORS["search_results"])
81
  resultsDesc = mainDiv.select(SELECTORS["search_results_desc"])
82
+ # Ensure resultsDesc has the same length as results
83
+ resultsDesc += [None] * (len(results) - len(resultsDesc))
84
 
85
  if results:
86
  finalResponse.append("Search Results:\n")
 
92
  parsedUrl = urlparse(link)
93
  urlParams = parse_qs(parsedUrl.query)
94
  link = urlParams.get("q", [None])[0]
95
+ desc = resultsDesc[i].text if resultsDesc[i] else ""
96
  finalResponse.append(f"Title: {title}")
97
  finalResponse.append(f"Description: {desc}")
98
  finalResponse.append(f"URL: {link}\n")
 
100
  print("Failed to retrieve search results.")
101
 
102
  return "\n".join(finalResponse)
103
+
104
+ print(scrapeGoogleSearch("kommuneity"))