Spaces:
Sleeping
Sleeping
from smolagents import tool | |
import os | |
import requests | |
import pandas as pd | |
from io import BytesIO | |
DEFAULT_API_URL = os.getenv("AGENT_API_URL", "https://agents-course-unit4-scoring.hf.space") | |
def read_excel_as_json(task_id: str) -> dict: | |
""" | |
Fetches and parses an Excel file as structured JSON for a given task_id. | |
Args: | |
task_id: The task ID to fetch. | |
Returns: | |
{ | |
"task_id": str, | |
"sheets": { | |
"SheetName1": [ {col1: val1, col2: val2, ...}, ... ], | |
... | |
}, | |
"status": "Success" | "Error" | |
} | |
""" | |
url = f"{DEFAULT_API_URL}/files/{task_id}" | |
try: | |
response = requests.get(url, timeout=10) | |
if response.status_code != 200: | |
return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"} | |
xls = pd.ExcelFile(BytesIO(response.content)) | |
sheets_json = {} | |
for sheet in xls.sheet_names: | |
df = xls.parse(sheet) | |
df = df.dropna(how="all") # Remove completely empty rows | |
rows = df.head(20).to_dict(orient="records") # limit to first 20 rows | |
sheets_json[sheet] = rows | |
return { | |
"task_id": task_id, | |
"sheets": sheets_json, | |
"status": "Success" | |
} | |
except Exception as e: | |
return { | |
"task_id": task_id, | |
"sheets": {}, | |
"status": f"Error: {str(e)}" | |
} | |