File size: 2,401 Bytes
ab3aca2 b540717 ab3aca2 c2b686e ab3aca2 b540717 ab3aca2 c2b686e ab3aca2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import os
import requests
from datasets import load_dataset
import sys
import contextlib
from io import StringIO
# Import one of the agents (toggle as needed)
from task_force import TaskForce
from langgraph_agent import get_agent as get_langgraph_agent
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
QUESTIONS_API = f"{DEFAULT_API_URL}/questions"
# Load GAIA dataset (validation split)
dataset = load_dataset("gaia-benchmark/GAIA", '2023_level1', split="validation")
def main():
# print("π Fetching a single question from API...")
# try:
# response = requests.get(QUESTIONS_API, timeout=10)
# response.raise_for_status()
# questions = response.json()
# if not questions:
# print("β No questions returned from API.")
# return
# question = questions[0]
# except Exception as e:
# print(f"β Failed to fetch questions from API: {e}")
# return
# task_id = question.get("task_id")
# question_text = question.get("question")
# Get one example
example = dataset[52]
task_id = example["task_id"]
question = example["Question"]
expected_answer = example["Final answer"]
print(f"\nπ§ Task ID: {task_id}")
print(f"π Question:\n{question}\n")
print(f"Expected Answer: {expected_answer}\n")
print("βοΈ Loading agent...")
try:
agent = TaskForce()
# print(agent.prompt_templates["managed_agent"])
# agent = get_langgraph_agent()
except Exception as e:
print(f"β Failed to initialize agent: {e}")
return
print("π€ Running agent on question...")
try:
# Silence internal print statements (e.g. from planning loop)
# with contextlib.redirect_stdout(StringIO()):
# answer = agent(question)
answer = agent(question)
print(f"\n=====================\nRESULTS\n=====================\n")
if answer == expected_answer:
print("Final Answer: ", answer)
print(f"β
Agent successfully answered task {task_id}!")
else:
print(f"β Agent's answer for task {task_id} did not match expected answer.")
print(f"Expected: {expected_answer}\nGot: {answer}")
except Exception as e:
print(f"β Agent failed on task {task_id}: {e}")
if __name__ == "__main__":
main() |