Sauten commited on
Commit
4325517
·
verified ·
1 Parent(s): 796c04e

Create tool_read_excel_as_json.py

Browse files
Files changed (1) hide show
  1. tool_read_excel_as_json.py +54 -0
tool_read_excel_as_json.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool
2
+ import os
3
+ import requests
4
+ import pandas as pd
5
+ from io import BytesIO
6
+
7
+ DEFAULT_API_URL = os.getenv("AGENT_API_URL", "https://agents-course-unit4-scoring.hf.space")
8
+
9
+ @tool
10
+ def read_excel_as_json(task_id: str) -> dict:
11
+ """
12
+ Fetches and parses an Excel file as structured JSON for a given task_id.
13
+
14
+ Args:
15
+ task_id: The task ID to fetch.
16
+
17
+ Returns:
18
+ {
19
+ "task_id": str,
20
+ "sheets": {
21
+ "SheetName1": [ {col1: val1, col2: val2, ...}, ... ],
22
+ ...
23
+ },
24
+ "status": "Success" | "Error"
25
+ }
26
+ """
27
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
28
+
29
+ try:
30
+ response = requests.get(url, timeout=10)
31
+ if response.status_code != 200:
32
+ return {"task_id": task_id, "sheets": {}, "status": f"{response.status_code} - Failed"}
33
+
34
+ xls = pd.ExcelFile(BytesIO(response.content))
35
+ sheets_json = {}
36
+
37
+ for sheet in xls.sheet_names:
38
+ df = xls.parse(sheet)
39
+ df = df.dropna(how="all") # Remove completely empty rows
40
+ rows = df.head(20).to_dict(orient="records") # limit to first 20 rows
41
+ sheets_json[sheet] = rows
42
+
43
+ return {
44
+ "task_id": task_id,
45
+ "sheets": sheets_json,
46
+ "status": "Success"
47
+ }
48
+
49
+ except Exception as e:
50
+ return {
51
+ "task_id": task_id,
52
+ "sheets": {},
53
+ "status": f"Error: {str(e)}"
54
+ }