File size: 2,401 Bytes
ab3aca2
 
 
 
 
 
 
 
b540717
ab3aca2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2b686e
ab3aca2
 
 
 
 
 
 
 
 
 
b540717
ab3aca2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2b686e
 
 
 
 
 
 
ab3aca2
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import requests
from datasets import load_dataset
import sys
import contextlib
from io import StringIO

# Import one of the agents (toggle as needed)
from task_force import TaskForce
from langgraph_agent import get_agent as get_langgraph_agent

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
QUESTIONS_API = f"{DEFAULT_API_URL}/questions"

# Load GAIA dataset (validation split)
dataset = load_dataset("gaia-benchmark/GAIA", '2023_level1', split="validation")


def main():
    # print("πŸ” Fetching a single question from API...")
    # try:
    #     response = requests.get(QUESTIONS_API, timeout=10)
    #     response.raise_for_status()
    #     questions = response.json()
    #     if not questions:
    #         print("❌ No questions returned from API.")
    #         return
    #     question = questions[0]
    # except Exception as e:
    #     print(f"❌ Failed to fetch questions from API: {e}")
    #     return

    # task_id = question.get("task_id")
    # question_text = question.get("question")

    # Get one example
    example = dataset[52]
    task_id = example["task_id"]
    question = example["Question"]
    expected_answer = example["Final answer"]

    print(f"\n🧠 Task ID: {task_id}")
    print(f"πŸ“Œ Question:\n{question}\n")
    print(f"Expected Answer: {expected_answer}\n")

    print("βš™οΈ Loading agent...")
    try:
        agent = TaskForce()
        # print(agent.prompt_templates["managed_agent"])
        # agent = get_langgraph_agent()
    except Exception as e:
        print(f"❌ Failed to initialize agent: {e}")
        return

    print("πŸ€– Running agent on question...")

    try:
        # Silence internal print statements (e.g. from planning loop)
        # with contextlib.redirect_stdout(StringIO()):
        #     answer = agent(question)
        
        answer = agent(question)

        print(f"\n=====================\nRESULTS\n=====================\n")
        if answer == expected_answer:
            print("Final Answer: ", answer)
            print(f"βœ… Agent successfully answered task {task_id}!")
        else:
            print(f"❌ Agent's answer for task {task_id} did not match expected answer.")
            print(f"Expected: {expected_answer}\nGot: {answer}")
    except Exception as e:
        print(f"❌ Agent failed on task {task_id}: {e}")

if __name__ == "__main__":
    main()