File size: 1,940 Bytes
10e9b7d
 
3c4371f
03cbe8f
 
 
10e9b7d
03cbe8f
31243f4
03cbe8f
3c4371f
03cbe8f
 
 
 
3c4371f
03cbe8f
 
 
 
 
e80aab9
36ed51a
03cbe8f
e80aab9
03cbe8f
7d65c66
03cbe8f
e80aab9
03cbe8f
 
 
 
 
 
e80aab9
03cbe8f
 
 
e80aab9
 
03cbe8f
 
e80aab9
7e4a06b
31243f4
03cbe8f
 
e80aab9
03cbe8f
e80aab9
 
03cbe8f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os
import gradio as gr
import pandas as pd
from langchain_core.messages import HumanMessage
from graph_builder import build_graph
from api_client import fetch_questions, submit_answers

class GaiaAgent:
    def __init__(self):
        self.graph = build_graph()

    def __call__(self, question):
        state = {"question": question}
        result_state = self.graph.invoke(state)
        return result_state["final_answer"]

def run_and_submit_all(profile):
    space_id = os.getenv("SPACE_ID")
    username = profile.username if profile else None
    if not username:
        return "Please log in to Hugging Face.", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    agent = GaiaAgent()

    questions_data = fetch_questions()
    answers_payload = []
    results_log = []

    for item in questions_data:
        task_id = item["task_id"]
        question = item["question"]
        answer = agent(question)
        answers_payload.append({"task_id": task_id, "submitted_answer": answer})
        results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})

    result = submit_answers(username, agent_code, answers_payload)
    final_status = f"Submission Successful!\nUser: {result.get('username')}\nScore: {result.get('score')}%\nCorrect: {result.get('correct_count')}/{result.get('total_attempted')}\nMessage: {result.get('message', '')}"
    return final_status, pd.DataFrame(results_log)

with gr.Blocks() as demo:
    gr.Markdown("# GAIA LangGraph Agent")
    gr.Markdown("Log in and run your agent to evaluate on GAIA benchmark.")

    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status", lines=5)
    results_table = gr.DataFrame(label="Results")

    run_button.click(run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch()