Agents_Course_Final_Assignment

Sleeping

File size: 2,401 Bytes

import os
import requests
from datasets import load_dataset
import sys
import contextlib
from io import StringIO

# Import one of the agents (toggle as needed)
from task_force import TaskForce
from langgraph_agent import get_agent as get_langgraph_agent

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
QUESTIONS_API = f"{DEFAULT_API_URL}/questions"

# Load GAIA dataset (validation split)
dataset = load_dataset("gaia-benchmark/GAIA", '2023_level1', split="validation")


def main():
    # print("🔍 Fetching a single question from API...")
    # try:
    #     response = requests.get(QUESTIONS_API, timeout=10)
    #     response.raise_for_status()
    #     questions = response.json()
    #     if not questions:
    #         print("❌ No questions returned from API.")
    #         return
    #     question = questions[0]
    # except Exception as e:
    #     print(f"❌ Failed to fetch questions from API: {e}")
    #     return

    # task_id = question.get("task_id")
    # question_text = question.get("question")

    # Get one example
    example = dataset[52]
    task_id = example["task_id"]
    question = example["Question"]
    expected_answer = example["Final answer"]

    print(f"\n🧠 Task ID: {task_id}")
    print(f"📌 Question:\n{question}\n")
    print(f"Expected Answer: {expected_answer}\n")

    print("⚙️ Loading agent...")
    try:
        agent = TaskForce()
        # print(agent.prompt_templates["managed_agent"])
        # agent = get_langgraph_agent()
    except Exception as e:
        print(f"❌ Failed to initialize agent: {e}")
        return

    print("🤖 Running agent on question...")

    try:
        # Silence internal print statements (e.g. from planning loop)
        # with contextlib.redirect_stdout(StringIO()):
        #     answer = agent(question)
        
        answer = agent(question)

        print(f"\n=====================\nRESULTS\n=====================\n")
        if answer == expected_answer:
            print("Final Answer: ", answer)
            print(f"✅ Agent successfully answered task {task_id}!")
        else:
            print(f"❌ Agent's answer for task {task_id} did not match expected answer.")
            print(f"Expected: {expected_answer}\nGot: {answer}")
    except Exception as e:
        print(f"❌ Agent failed on task {task_id}: {e}")

if __name__ == "__main__":
    main()