Final_Assignment_Template

Sleeping

File size: 2,870 Bytes

4f6b4f2

#!/usr/bin/env python3
"""
Test script to demonstrate the new correct answer functionality
"""

import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from agent_test_client import load_metadata, get_correct_answer, questions

def test_correct_answers():
    """Test the correct answer functionality"""
    print("🧪 Testing Correct Answer Functionality")
    print("=" * 50)
    
    # Load metadata
    metadata = load_metadata()
    print(f"📊 Loaded {len(metadata)} correct answers from metadata.jsonl")
    print()
    
    # Test a few sample questions
    print("🔍 Sample Questions with Correct Answers:")
    print("-" * 40)
    
    for i in range(min(5, len(questions))):  # Show first 5 questions
        question_data = questions[i]
        task_id = question_data.get("task_id", "Unknown")
        question_text = question_data.get("question", "No question")
        level = question_data.get("Level", "Unknown")
        
        # Get correct answer
        correct_answer = get_correct_answer(task_id)
        
        print(f"Question {i+1}:")
        print(f"  📋 Task ID: {task_id}")
        print(f"  📊 Level: {level}")
        print(f"  ❓ Question: {question_text[:100]}{'...' if len(question_text) > 100 else ''}")
        print(f"  ✅ Correct Answer: {correct_answer if correct_answer else 'Not found'}")
        print()
    
    # Test specific questions by task_id
    print("🎯 Testing Specific Questions:")
    print("-" * 30)
    
    test_cases = [
        "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",  # Mercedes Sosa - should be "3"
        "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",  # YouTube video - should be "3"
        "2d83110e-a098-4ebb-9987-066c06fa42d0",  # Reversed text - should be "Right"
        "cca530fc-4052-43b2-b130-b30968d8aa44",  # Chess - should be "Rd5"
        "6f37996b-2ac7-44b0-8e68-6d28256631b4",  # Math table - should be "b, e"
    ]
    
    for task_id in test_cases:
        answer = get_correct_answer(task_id)
        print(f"📋 {task_id[:8]}...: {answer if answer else 'Not found'}")
    
    print()
    print("🎉 Correct answer functionality is working!")
    print()
    print("📝 Summary of Changes Made:")
    print("  ✅ Added load_metadata() function to parse metadata.jsonl")
    print("  ✅ Added get_correct_answer() function to retrieve answers by task_id")
    print("  ✅ Modified test_evaluation_question() to return correct answer")
    print("  ✅ Modified test_all_evaluation_questions() to include correct answers")
    print("  ✅ Updated Gradio UI with additional Correct Answer box")
    print("  ✅ Updated DataFrame to show both Agent Answer and Correct Answer columns")
    print()
    print("🚀 The Evaluation Questions now show correct answers alongside agent responses!")

if __name__ == "__main__":
    test_correct_answers()