from smolagents import tool import os import requests import pandas as pd from io import BytesIO DEFAULT_API_URL = os.getenv("AGENT_API_URL", "https://agents-course-unit4-scoring.hf.space") @tool def read_excel_as_json(task_id: str) -> dict: """ Fetches and parses an Excel file as structured JSON for a given task_id. Args: task_id: The task ID to fetch. Returns: { "task_id": str, "sheets": { "SheetName1": [ {col1: val1, col2: val2, ...}, ... ], ... }, "status": "Success" | "Error" } """ url = f"{DEFAULT_API_URL}/files/{task_id}" try: response = requests.get(url, timeout=10) if response.status_code != 200: return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"} xls = pd.ExcelFile(BytesIO(response.content)) sheets_json = {} for sheet in xls.sheet_names: df = xls.parse(sheet) df = df.dropna(how="all") # Remove completely empty rows rows = df.head(20).to_dict(orient="records") # limit to first 20 rows sheets_json[sheet] = rows return { "task_id": task_id, "sheets": sheets_json, "status": "Success" } except Exception as e: return { "task_id": task_id, "sheets": {}, "status": f"Error: {str(e)}" }