Agents_Course_Final_Assignment

Sleeping

App Files Files Community

Agents_Course_Final_Assignment / test_agent.py

simoncwang

new attempt with better tools

c2b686e 5 months ago

raw

history blame contribute delete

2.4 kB

	import os
	import requests
	from datasets import load_dataset
	import sys
	import contextlib
	from io import StringIO

	# Import one of the agents (toggle as needed)
	from task_force import TaskForce
	from langgraph_agent import get_agent as get_langgraph_agent

	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
	QUESTIONS_API = f"{DEFAULT_API_URL}/questions"

	# Load GAIA dataset (validation split)
	dataset = load_dataset("gaia-benchmark/GAIA", '2023_level1', split="validation")


	def main():
	# print("🔍 Fetching a single question from API...")
	# try:
	# response = requests.get(QUESTIONS_API, timeout=10)
	# response.raise_for_status()
	# questions = response.json()
	# if not questions:
	# print("❌ No questions returned from API.")
	# return
	# question = questions[0]
	# except Exception as e:
	# print(f"❌ Failed to fetch questions from API: {e}")
	# return

	# task_id = question.get("task_id")
	# question_text = question.get("question")

	# Get one example
	example = dataset[52]
	task_id = example["task_id"]
	question = example["Question"]
	expected_answer = example["Final answer"]

	print(f"\n🧠 Task ID: {task_id}")
	print(f"📌 Question:\n{question}\n")
	print(f"Expected Answer: {expected_answer}\n")

	print("⚙️ Loading agent...")
	try:
	agent = TaskForce()
	# print(agent.prompt_templates["managed_agent"])
	# agent = get_langgraph_agent()
	except Exception as e:
	print(f"❌ Failed to initialize agent: {e}")
	return

	print("🤖 Running agent on question...")

	try:
	# Silence internal print statements (e.g. from planning loop)
	# with contextlib.redirect_stdout(StringIO()):
	# answer = agent(question)

	answer = agent(question)

	print(f"\n=====================\nRESULTS\n=====================\n")
	if answer == expected_answer:
	print("Final Answer: ", answer)
	print(f"✅ Agent successfully answered task {task_id}!")
	else:
	print(f"❌ Agent's answer for task {task_id} did not match expected answer.")
	print(f"Expected: {expected_answer}\nGot: {answer}")
	except Exception as e:
	print(f"❌ Agent failed on task {task_id}: {e}")

	if __name__ == "__main__":
	main()