Final_Assignment_Template / smolagents-wip /test_correct_answers.py
diego.sancristobal
feat: Add partially working langgraph agent
4f6b4f2
#!/usr/bin/env python3
"""
Test script to demonstrate the new correct answer functionality
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from agent_test_client import load_metadata, get_correct_answer, questions
def test_correct_answers():
"""Test the correct answer functionality"""
print("πŸ§ͺ Testing Correct Answer Functionality")
print("=" * 50)
# Load metadata
metadata = load_metadata()
print(f"πŸ“Š Loaded {len(metadata)} correct answers from metadata.jsonl")
print()
# Test a few sample questions
print("πŸ” Sample Questions with Correct Answers:")
print("-" * 40)
for i in range(min(5, len(questions))): # Show first 5 questions
question_data = questions[i]
task_id = question_data.get("task_id", "Unknown")
question_text = question_data.get("question", "No question")
level = question_data.get("Level", "Unknown")
# Get correct answer
correct_answer = get_correct_answer(task_id)
print(f"Question {i+1}:")
print(f" πŸ“‹ Task ID: {task_id}")
print(f" πŸ“Š Level: {level}")
print(f" ❓ Question: {question_text[:100]}{'...' if len(question_text) > 100 else ''}")
print(f" βœ… Correct Answer: {correct_answer if correct_answer else 'Not found'}")
print()
# Test specific questions by task_id
print("🎯 Testing Specific Questions:")
print("-" * 30)
test_cases = [
"8e867cd7-cff9-4e6c-867a-ff5ddc2550be", # Mercedes Sosa - should be "3"
"a1e91b78-d3d8-4675-bb8d-62741b4b68a6", # YouTube video - should be "3"
"2d83110e-a098-4ebb-9987-066c06fa42d0", # Reversed text - should be "Right"
"cca530fc-4052-43b2-b130-b30968d8aa44", # Chess - should be "Rd5"
"6f37996b-2ac7-44b0-8e68-6d28256631b4", # Math table - should be "b, e"
]
for task_id in test_cases:
answer = get_correct_answer(task_id)
print(f"πŸ“‹ {task_id[:8]}...: {answer if answer else 'Not found'}")
print()
print("πŸŽ‰ Correct answer functionality is working!")
print()
print("πŸ“ Summary of Changes Made:")
print(" βœ… Added load_metadata() function to parse metadata.jsonl")
print(" βœ… Added get_correct_answer() function to retrieve answers by task_id")
print(" βœ… Modified test_evaluation_question() to return correct answer")
print(" βœ… Modified test_all_evaluation_questions() to include correct answers")
print(" βœ… Updated Gradio UI with additional Correct Answer box")
print(" βœ… Updated DataFrame to show both Agent Answer and Correct Answer columns")
print()
print("πŸš€ The Evaluation Questions now show correct answers alongside agent responses!")
if __name__ == "__main__":
test_correct_answers()