Final_Assignment_Template

Sleeping

Final_Assignment_Template / smolagents-wip /test_correct_answers.py

diego.sancristobal

feat: Add partially working langgraph agent

4f6b4f2 2 months ago

2.87 kB

	#!/usr/bin/env python3
	"""
	Test script to demonstrate the new correct answer functionality
	"""

	import sys
	import os
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))

	from agent_test_client import load_metadata, get_correct_answer, questions

	def test_correct_answers():
	"""Test the correct answer functionality"""
	print("🧪 Testing Correct Answer Functionality")
	print("=" * 50)

	# Load metadata
	metadata = load_metadata()
	print(f"📊 Loaded {len(metadata)} correct answers from metadata.jsonl")
	print()

	# Test a few sample questions
	print("🔍 Sample Questions with Correct Answers:")
	print("-" * 40)

	for i in range(min(5, len(questions))): # Show first 5 questions
	question_data = questions[i]
	task_id = question_data.get("task_id", "Unknown")
	question_text = question_data.get("question", "No question")
	level = question_data.get("Level", "Unknown")

	# Get correct answer
	correct_answer = get_correct_answer(task_id)

	print(f"Question {i+1}:")
	print(f" 📋 Task ID: {task_id}")
	print(f" 📊 Level: {level}")
	print(f" ❓ Question: {question_text[:100]}{'...' if len(question_text) > 100 else ''}")
	print(f" ✅ Correct Answer: {correct_answer if correct_answer else 'Not found'}")
	print()

	# Test specific questions by task_id
	print("🎯 Testing Specific Questions:")
	print("-" * 30)

	test_cases = [
	"8e867cd7-cff9-4e6c-867a-ff5ddc2550be", # Mercedes Sosa - should be "3"
	"a1e91b78-d3d8-4675-bb8d-62741b4b68a6", # YouTube video - should be "3"
	"2d83110e-a098-4ebb-9987-066c06fa42d0", # Reversed text - should be "Right"
	"cca530fc-4052-43b2-b130-b30968d8aa44", # Chess - should be "Rd5"
	"6f37996b-2ac7-44b0-8e68-6d28256631b4", # Math table - should be "b, e"
	]

	for task_id in test_cases:
	answer = get_correct_answer(task_id)
	print(f"📋 {task_id[:8]}...: {answer if answer else 'Not found'}")

	print()
	print("🎉 Correct answer functionality is working!")
	print()
	print("📝 Summary of Changes Made:")
	print(" ✅ Added load_metadata() function to parse metadata.jsonl")
	print(" ✅ Added get_correct_answer() function to retrieve answers by task_id")
	print(" ✅ Modified test_evaluation_question() to return correct answer")
	print(" ✅ Modified test_all_evaluation_questions() to include correct answers")
	print(" ✅ Updated Gradio UI with additional Correct Answer box")
	print(" ✅ Updated DataFrame to show both Agent Answer and Correct Answer columns")
	print()
	print("🚀 The Evaluation Questions now show correct answers alongside agent responses!")

	if __name__ == "__main__":
	test_correct_answers()