Spaces:
Runtime error
Runtime error
Update agent.py
Browse files
agent.py
CHANGED
@@ -34,6 +34,10 @@ from youtube_transcript_api import YouTubeTranscriptApi
|
|
34 |
from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable
|
35 |
import re
|
36 |
|
|
|
|
|
|
|
|
|
37 |
|
38 |
load_dotenv()
|
39 |
|
@@ -131,6 +135,46 @@ def arvix_search(query: str) -> str:
|
|
131 |
return {"arvix_results": formatted_search_docs}
|
132 |
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
@tool
|
135 |
def get_youtube_transcript(url: str) -> str:
|
136 |
"""
|
@@ -210,6 +254,7 @@ tool_map = {
|
|
210 |
"arvix_search": arvix_search,
|
211 |
"get_youtube_transcript": get_youtube_transcript,
|
212 |
"extract_video_id": extract_video_id,
|
|
|
213 |
|
214 |
}
|
215 |
|
|
|
34 |
from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable
|
35 |
import re
|
36 |
|
37 |
+
from langchain_community.document_loaders import TextLoader, PyMuPDFLoader
|
38 |
+
from docx import Document as DocxDocument
|
39 |
+
import openpyxl
|
40 |
+
|
41 |
|
42 |
load_dotenv()
|
43 |
|
|
|
135 |
return {"arvix_results": formatted_search_docs}
|
136 |
|
137 |
|
138 |
+
|
139 |
+
@tool
|
140 |
+
def analyze_attachment(file_path: str) -> str:
|
141 |
+
"""
|
142 |
+
Analyzes attachments including PDF, TXT, DOCX, and XLSX files and returns text content.
|
143 |
+
|
144 |
+
Args:
|
145 |
+
file_path: Local path to the attachment.
|
146 |
+
"""
|
147 |
+
if not os.path.exists(file_path):
|
148 |
+
return f"File not found: {file_path}"
|
149 |
+
|
150 |
+
if file_path.lower().endswith(".pdf"):
|
151 |
+
loader = PyMuPDFLoader(file_path)
|
152 |
+
documents = loader.load()
|
153 |
+
content = "\n\n".join([doc.page_content for doc in documents])
|
154 |
+
|
155 |
+
elif file_path.lower().endswith(".txt"):
|
156 |
+
loader = TextLoader(file_path)
|
157 |
+
documents = loader.load()
|
158 |
+
content = "\n\n".join([doc.page_content for doc in documents])
|
159 |
+
|
160 |
+
elif file_path.lower().endswith(".docx"):
|
161 |
+
doc = DocxDocument(file_path)
|
162 |
+
content = "\n".join([para.text for para in doc.paragraphs])
|
163 |
+
|
164 |
+
elif file_path.lower().endswith(".xlsx"):
|
165 |
+
wb = openpyxl.load_workbook(file_path, data_only=True)
|
166 |
+
content = ""
|
167 |
+
for sheet in wb:
|
168 |
+
content += f"Sheet: {sheet.title}\n"
|
169 |
+
for row in sheet.iter_rows(values_only=True):
|
170 |
+
content += "\t".join([str(cell) if cell is not None else "" for cell in row]) + "\n"
|
171 |
+
|
172 |
+
else:
|
173 |
+
return "Unsupported file format. Please use PDF, TXT, DOCX, or XLSX."
|
174 |
+
|
175 |
+
return content[:3000] # Limit size for readability
|
176 |
+
|
177 |
+
|
178 |
@tool
|
179 |
def get_youtube_transcript(url: str) -> str:
|
180 |
"""
|
|
|
254 |
"arvix_search": arvix_search,
|
255 |
"get_youtube_transcript": get_youtube_transcript,
|
256 |
"extract_video_id": extract_video_id,
|
257 |
+
"analyze_attachment": analyze_attachment,
|
258 |
|
259 |
}
|
260 |
|