WeByT3 commited on
Commit
a1c4a3e
·
verified ·
1 Parent(s): 8644d6e

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +27 -8
tools.py CHANGED
@@ -2,6 +2,9 @@ from langchain_core.tools import tool
2
  import wikipediaapi
3
  import pandas as pd
4
  import requests
 
 
 
5
 
6
 
7
  @tool
@@ -80,12 +83,12 @@ def search_wikipedia(page_title: str, language: str) -> str:
80
 
81
  for i, table in enumerate(tables):
82
  if isinstance(table, pd.DataFrame):
83
- markdown = table.head(10).to_markdown(index=False)
84
  markdown_tables.append(f"\n---\n**Table {i + 1}:**\n{markdown}")
85
 
86
  table_output = "\n".join(markdown_tables) if markdown_tables else "No tables found on this page."
87
 
88
- return f"Title: {page.title}\n\nText: {page.summary[:100]}\n\n{table_output}"
89
 
90
  except Exception as e:
91
  return f"Error retrieving Wikipedia content: {str(e)}"
@@ -127,10 +130,8 @@ def duckduckgo_search(query: str) -> str:
127
 
128
  @tool
129
  def search_papers(query: str) -> str:
130
- """Search for academic papers using Semantic Scholar.
131
- Args:
132
- query: The query to search the papers in Semantic Scholar
133
- """
134
  url = "https://api.semanticscholar.org/graph/v1/paper/search"
135
  params = {
136
  "query": query,
@@ -146,6 +147,7 @@ def search_papers(query: str) -> str:
146
  return "No papers found."
147
 
148
  results = []
 
149
  for paper in data["data"]:
150
  title = paper.get("title", "No title")
151
  authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])])
@@ -153,10 +155,27 @@ def search_papers(query: str) -> str:
153
  abstract = paper.get("abstract", "No abstract available.")
154
  link = paper.get("url", "")
155
 
156
- result = f"**{title}** ({year}) by {authors}\n{abstract}\nLink: {link}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  results.append(result)
158
 
159
- return "\n\n".join(results)
160
 
161
  except Exception as e:
162
  return f"Error fetching papers: {e}"
 
2
  import wikipediaapi
3
  import pandas as pd
4
  import requests
5
+ import fitz # PyMuPDF
6
+ import io
7
+ from urllib.parse import urlparse
8
 
9
 
10
  @tool
 
83
 
84
  for i, table in enumerate(tables):
85
  if isinstance(table, pd.DataFrame):
86
+ markdown = table.iloc[:10, :5].to_markdown(index=False)
87
  markdown_tables.append(f"\n---\n**Table {i + 1}:**\n{markdown}")
88
 
89
  table_output = "\n".join(markdown_tables) if markdown_tables else "No tables found on this page."
90
 
91
+ return f"Text: {page.summary[:75]}\n\n{table_output}"
92
 
93
  except Exception as e:
94
  return f"Error retrieving Wikipedia content: {str(e)}"
 
130
 
131
  @tool
132
  def search_papers(query: str) -> str:
133
+ """Search for academic papers and retrieve their content when possible."""
134
+
 
 
135
  url = "https://api.semanticscholar.org/graph/v1/paper/search"
136
  params = {
137
  "query": query,
 
147
  return "No papers found."
148
 
149
  results = []
150
+
151
  for paper in data["data"]:
152
  title = paper.get("title", "No title")
153
  authors = ", ".join([a.get("name", "") for a in paper.get("authors", [])])
 
155
  abstract = paper.get("abstract", "No abstract available.")
156
  link = paper.get("url", "")
157
 
158
+ full_text = "Full text not available."
159
+
160
+ # Attempt to download and parse PDF (for arXiv)
161
+ if "arxiv.org" in link:
162
+ pdf_url = link.replace("abs", "pdf") + ".pdf"
163
+ try:
164
+ pdf_response = requests.get(pdf_url)
165
+ doc = fitz.open(stream=pdf_response.content, filetype="pdf")
166
+ full_text = "\n".join(page.get_text() for page in doc[:3]) # Only first 3 pages
167
+ doc.close()
168
+ except Exception as pdf_err:
169
+ full_text = f"Failed to retrieve full text: {pdf_err}"
170
+
171
+ result = f"""**{title}** ({year}) by {authors}
172
+ Abstract: {abstract}
173
+ Link: {link}
174
+ Full Text (first pages):\n{full_text}"""
175
+
176
  results.append(result)
177
 
178
+ return "\n\n---\n\n".join(results)
179
 
180
  except Exception as e:
181
  return f"Error fetching papers: {e}"