wt002 commited on
Commit
93a2770
·
verified ·
1 Parent(s): 95010ac

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +45 -0
agent.py CHANGED
@@ -34,6 +34,10 @@ from youtube_transcript_api import YouTubeTranscriptApi
34
  from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable
35
  import re
36
 
 
 
 
 
37
 
38
  load_dotenv()
39
 
@@ -131,6 +135,46 @@ def arvix_search(query: str) -> str:
131
  return {"arvix_results": formatted_search_docs}
132
 
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  @tool
135
  def get_youtube_transcript(url: str) -> str:
136
  """
@@ -210,6 +254,7 @@ tool_map = {
210
  "arvix_search": arvix_search,
211
  "get_youtube_transcript": get_youtube_transcript,
212
  "extract_video_id": extract_video_id,
 
213
 
214
  }
215
 
 
34
  from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable
35
  import re
36
 
37
+ from langchain_community.document_loaders import TextLoader, PyMuPDFLoader
38
+ from docx import Document as DocxDocument
39
+ import openpyxl
40
+
41
 
42
  load_dotenv()
43
 
 
135
  return {"arvix_results": formatted_search_docs}
136
 
137
 
138
+
139
+ @tool
140
+ def analyze_attachment(file_path: str) -> str:
141
+ """
142
+ Analyzes attachments including PDF, TXT, DOCX, and XLSX files and returns text content.
143
+
144
+ Args:
145
+ file_path: Local path to the attachment.
146
+ """
147
+ if not os.path.exists(file_path):
148
+ return f"File not found: {file_path}"
149
+
150
+ if file_path.lower().endswith(".pdf"):
151
+ loader = PyMuPDFLoader(file_path)
152
+ documents = loader.load()
153
+ content = "\n\n".join([doc.page_content for doc in documents])
154
+
155
+ elif file_path.lower().endswith(".txt"):
156
+ loader = TextLoader(file_path)
157
+ documents = loader.load()
158
+ content = "\n\n".join([doc.page_content for doc in documents])
159
+
160
+ elif file_path.lower().endswith(".docx"):
161
+ doc = DocxDocument(file_path)
162
+ content = "\n".join([para.text for para in doc.paragraphs])
163
+
164
+ elif file_path.lower().endswith(".xlsx"):
165
+ wb = openpyxl.load_workbook(file_path, data_only=True)
166
+ content = ""
167
+ for sheet in wb:
168
+ content += f"Sheet: {sheet.title}\n"
169
+ for row in sheet.iter_rows(values_only=True):
170
+ content += "\t".join([str(cell) if cell is not None else "" for cell in row]) + "\n"
171
+
172
+ else:
173
+ return "Unsupported file format. Please use PDF, TXT, DOCX, or XLSX."
174
+
175
+ return content[:3000] # Limit size for readability
176
+
177
+
178
  @tool
179
  def get_youtube_transcript(url: str) -> str:
180
  """
 
254
  "arvix_search": arvix_search,
255
  "get_youtube_transcript": get_youtube_transcript,
256
  "extract_video_id": extract_video_id,
257
+ "analyze_attachment": analyze_attachment,
258
 
259
  }
260