Spaces:
Running
Running
abdoh-alkhateeb
commited on
Commit
•
c5f0fb9
1
Parent(s):
cb980d4
Improve serper agent
Browse files
agents/google_serper_search_agent.py
CHANGED
@@ -1,5 +1,9 @@
|
|
|
|
1 |
import pandas as pd
|
2 |
from langchain_community.utilities import GoogleSerperAPIWrapper
|
|
|
|
|
|
|
3 |
|
4 |
|
5 |
class GoogleSerperSearchAgent:
|
@@ -9,12 +13,19 @@ class GoogleSerperSearchAgent:
|
|
9 |
|
10 |
self._api = GoogleSerperAPIWrapper()
|
11 |
|
|
|
|
|
12 |
def run(self, query: str, limit_per_source: int = 3) -> pd.DataFrame:
|
13 |
results = []
|
14 |
for site in self._sources:
|
15 |
results.extend(self._search(query, site, limit_per_source))
|
16 |
|
|
|
|
|
|
|
|
|
17 |
df = pd.DataFrame(results)
|
|
|
18 |
|
19 |
return df
|
20 |
|
@@ -28,3 +39,12 @@ class GoogleSerperSearchAgent:
|
|
28 |
results = [{"title": result["title"], "url": result["link"], "date": result.get("date", "")} for result in results]
|
29 |
|
30 |
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nltk
|
2 |
import pandas as pd
|
3 |
from langchain_community.utilities import GoogleSerperAPIWrapper
|
4 |
+
from newspaper import Article
|
5 |
+
|
6 |
+
pd.options.mode.use_inf_as_na = True
|
7 |
|
8 |
|
9 |
class GoogleSerperSearchAgent:
|
|
|
13 |
|
14 |
self._api = GoogleSerperAPIWrapper()
|
15 |
|
16 |
+
nltk.download("punkt")
|
17 |
+
|
18 |
def run(self, query: str, limit_per_source: int = 3) -> pd.DataFrame:
|
19 |
results = []
|
20 |
for site in self._sources:
|
21 |
results.extend(self._search(query, site, limit_per_source))
|
22 |
|
23 |
+
for result in results:
|
24 |
+
for key, value in self._fetch(result["url"]).items():
|
25 |
+
result[key] = value
|
26 |
+
|
27 |
df = pd.DataFrame(results)
|
28 |
+
df.dropna(subset=["content"], inplace=True)
|
29 |
|
30 |
return df
|
31 |
|
|
|
39 |
results = [{"title": result["title"], "url": result["link"], "date": result.get("date", "")} for result in results]
|
40 |
|
41 |
return results
|
42 |
+
|
43 |
+
def _fetch(self, url: str) -> dict[str, str]:
|
44 |
+
article = Article(url=url)
|
45 |
+
|
46 |
+
article.download()
|
47 |
+
article.parse()
|
48 |
+
article.nlp()
|
49 |
+
|
50 |
+
return {"title": article.title, "author": article.authors[0] if article.authors else "", "content": article.text}
|