Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
""" | |
Test script to demonstrate the new correct answer functionality | |
""" | |
import sys | |
import os | |
sys.path.append(os.path.dirname(os.path.abspath(__file__))) | |
from agent_test_client import load_metadata, get_correct_answer, questions | |
def test_correct_answers(): | |
"""Test the correct answer functionality""" | |
print("π§ͺ Testing Correct Answer Functionality") | |
print("=" * 50) | |
# Load metadata | |
metadata = load_metadata() | |
print(f"π Loaded {len(metadata)} correct answers from metadata.jsonl") | |
print() | |
# Test a few sample questions | |
print("π Sample Questions with Correct Answers:") | |
print("-" * 40) | |
for i in range(min(5, len(questions))): # Show first 5 questions | |
question_data = questions[i] | |
task_id = question_data.get("task_id", "Unknown") | |
question_text = question_data.get("question", "No question") | |
level = question_data.get("Level", "Unknown") | |
# Get correct answer | |
correct_answer = get_correct_answer(task_id) | |
print(f"Question {i+1}:") | |
print(f" π Task ID: {task_id}") | |
print(f" π Level: {level}") | |
print(f" β Question: {question_text[:100]}{'...' if len(question_text) > 100 else ''}") | |
print(f" β Correct Answer: {correct_answer if correct_answer else 'Not found'}") | |
print() | |
# Test specific questions by task_id | |
print("π― Testing Specific Questions:") | |
print("-" * 30) | |
test_cases = [ | |
"8e867cd7-cff9-4e6c-867a-ff5ddc2550be", # Mercedes Sosa - should be "3" | |
"a1e91b78-d3d8-4675-bb8d-62741b4b68a6", # YouTube video - should be "3" | |
"2d83110e-a098-4ebb-9987-066c06fa42d0", # Reversed text - should be "Right" | |
"cca530fc-4052-43b2-b130-b30968d8aa44", # Chess - should be "Rd5" | |
"6f37996b-2ac7-44b0-8e68-6d28256631b4", # Math table - should be "b, e" | |
] | |
for task_id in test_cases: | |
answer = get_correct_answer(task_id) | |
print(f"π {task_id[:8]}...: {answer if answer else 'Not found'}") | |
print() | |
print("π Correct answer functionality is working!") | |
print() | |
print("π Summary of Changes Made:") | |
print(" β Added load_metadata() function to parse metadata.jsonl") | |
print(" β Added get_correct_answer() function to retrieve answers by task_id") | |
print(" β Modified test_evaluation_question() to return correct answer") | |
print(" β Modified test_all_evaluation_questions() to include correct answers") | |
print(" β Updated Gradio UI with additional Correct Answer box") | |
print(" β Updated DataFrame to show both Agent Answer and Correct Answer columns") | |
print() | |
print("π The Evaluation Questions now show correct answers alongside agent responses!") | |
if __name__ == "__main__": | |
test_correct_answers() |