Prat_Final_Assignment_Template / agent_file_parser.py
yushnitp's picture
Create agent_file_parser.py
698c93a verified
import requests
import mimetypes
import tempfile
import fitz # PyMuPDF for PDF
import pandas as pd
import openpyxl
import ast
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def fetch_task_file_context(task_id, file_name):
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
response = requests.get(file_url, timeout=15)
if response.status_code != 200:
return "[File could not be retrieved]"
content_type = response.headers.get("Content-Type", "")
extension = mimetypes.guess_extension(content_type)
with tempfile.NamedTemporaryFile(delete=True, suffix=extension) as tmp:
tmp.write(response.content)
tmp.flush()
if extension == ".pdf":
return "\n".join([page.get_text() for page in fitz.open(tmp.name)])
elif extension == ".csv":
return pd.read_csv(tmp.name).to_string()
elif extension in [".xls", ".xlsx"]:
return pd.read_excel(tmp.name).to_string()
elif extension == ".py":
with open(tmp.name, "r") as f:
return f.read()
elif extension == ".mp3":
return "[Audio file: Speech-to-text not yet implemented]"
else:
return "[Unsupported file format]"