import os import requests from datasets import load_dataset import sys import contextlib from io import StringIO # Import one of the agents (toggle as needed) from task_force import TaskForce from langgraph_agent import get_agent as get_langgraph_agent DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" QUESTIONS_API = f"{DEFAULT_API_URL}/questions" # Load GAIA dataset (validation split) dataset = load_dataset("gaia-benchmark/GAIA", '2023_level1', split="validation") def main(): # print("šŸ” Fetching a single question from API...") # try: # response = requests.get(QUESTIONS_API, timeout=10) # response.raise_for_status() # questions = response.json() # if not questions: # print("āŒ No questions returned from API.") # return # question = questions[0] # except Exception as e: # print(f"āŒ Failed to fetch questions from API: {e}") # return # task_id = question.get("task_id") # question_text = question.get("question") # Get one example example = dataset[52] task_id = example["task_id"] question = example["Question"] expected_answer = example["Final answer"] print(f"\n🧠 Task ID: {task_id}") print(f"šŸ“Œ Question:\n{question}\n") print(f"Expected Answer: {expected_answer}\n") print("āš™ļø Loading agent...") try: agent = TaskForce() # print(agent.prompt_templates["managed_agent"]) # agent = get_langgraph_agent() except Exception as e: print(f"āŒ Failed to initialize agent: {e}") return print("šŸ¤– Running agent on question...") try: # Silence internal print statements (e.g. from planning loop) # with contextlib.redirect_stdout(StringIO()): # answer = agent(question) answer = agent(question) print(f"\n=====================\nRESULTS\n=====================\n") if answer == expected_answer: print("Final Answer: ", answer) print(f"āœ… Agent successfully answered task {task_id}!") else: print(f"āŒ Agent's answer for task {task_id} did not match expected answer.") print(f"Expected: {expected_answer}\nGot: {answer}") except Exception as e: print(f"āŒ Agent failed on task {task_id}: {e}") if __name__ == "__main__": main()