abdoh-alkhateeb commited on
Commit
c5f0fb9
1 Parent(s): cb980d4

Improve serper agent

Browse files
agents/google_serper_search_agent.py CHANGED
@@ -1,5 +1,9 @@
 
1
  import pandas as pd
2
  from langchain_community.utilities import GoogleSerperAPIWrapper
 
 
 
3
 
4
 
5
  class GoogleSerperSearchAgent:
@@ -9,12 +13,19 @@ class GoogleSerperSearchAgent:
9
 
10
  self._api = GoogleSerperAPIWrapper()
11
 
 
 
12
  def run(self, query: str, limit_per_source: int = 3) -> pd.DataFrame:
13
  results = []
14
  for site in self._sources:
15
  results.extend(self._search(query, site, limit_per_source))
16
 
 
 
 
 
17
  df = pd.DataFrame(results)
 
18
 
19
  return df
20
 
@@ -28,3 +39,12 @@ class GoogleSerperSearchAgent:
28
  results = [{"title": result["title"], "url": result["link"], "date": result.get("date", "")} for result in results]
29
 
30
  return results
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
  import pandas as pd
3
  from langchain_community.utilities import GoogleSerperAPIWrapper
4
+ from newspaper import Article
5
+
6
+ pd.options.mode.use_inf_as_na = True
7
 
8
 
9
  class GoogleSerperSearchAgent:
 
13
 
14
  self._api = GoogleSerperAPIWrapper()
15
 
16
+ nltk.download("punkt")
17
+
18
  def run(self, query: str, limit_per_source: int = 3) -> pd.DataFrame:
19
  results = []
20
  for site in self._sources:
21
  results.extend(self._search(query, site, limit_per_source))
22
 
23
+ for result in results:
24
+ for key, value in self._fetch(result["url"]).items():
25
+ result[key] = value
26
+
27
  df = pd.DataFrame(results)
28
+ df.dropna(subset=["content"], inplace=True)
29
 
30
  return df
31
 
 
39
  results = [{"title": result["title"], "url": result["link"], "date": result.get("date", "")} for result in results]
40
 
41
  return results
42
+
43
+ def _fetch(self, url: str) -> dict[str, str]:
44
+ article = Article(url=url)
45
+
46
+ article.download()
47
+ article.parse()
48
+ article.nlp()
49
+
50
+ return {"title": article.title, "author": article.authors[0] if article.authors else "", "content": article.text}