Spaces:

jbaselga
/

agentes-unit4

Sleeping

App Files Files Community

jbaselga commited on Jun 19

Commit

0385645

verified ·

1 Parent(s): 401aee4

Create api_server.py

Browse files

Files changed (1) hide show

api_server.py +59 -0

api_server.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import List, Dict, Any, Union
+from datasets import load_dataset
+import random
+import os
+app = FastAPI()
+# Carga y filtra nivel 1 GAIA (validation split)
+ds = load_dataset("gaia-benchmark/GAIA", "2023_level1", split="validation", trust_remote_code=True)
+QUESTIONS = []
+GROUND_TRUTH: Dict[str, str] = {}
+for item in ds:
+    task_id = str(item["task_id"])
+    QUESTIONS.append({
+        "task_id": task_id,
+        "question": item["Question"]
+    })
+    GROUND_TRUTH[task_id] = str(item["Final answer"])
+class AnswerItem(BaseModel):
+    task_id: str
+    submitted_answer: Union[str, int, float]
+class Submission(BaseModel):
+    username: str
+    agent_code: str
+    answers: List[AnswerItem]
+class ScoreResponse(BaseModel):
+    username: str
+    score: float
+    correct_count: int
+    total_attempted: int
+    message: str
+@app.get("/questions")
+def get_questions():
+    # Devuelve las 20 preguntas aleatorias de nivel 1 cada vez
+    chosen = random.sample(QUESTIONS, k=min(20, len(QUESTIONS)))
+    return chosen
+@app.post("/submit")
+def submit(sub: Submission):
+    correct = sum(
+        1 for ans in sub.answers
+        if GROUND_TRUTH.get(ans.task_id, "") == str(ans.submitted_answer).strip()
+    )
+    total = len(sub.answers)
+    score = correct / total * 100 if total > 0 else 0.0
+    return ScoreResponse(
+        username=sub.username,
+        score=score,
+        correct_count=correct,
+        total_attempted=total,
+        message=f"Puntuación: {correct}/{total} = {score:.1f}%"
+    )