Final_Assignment / tests /validate_rd5_consensus.py
GAIA Developer
πŸ§ͺ Add comprehensive test infrastructure and async testing system
c262d1a
#!/usr/bin/env python3
"""
Quick validation: Are all tools now finding Rd5 with universal corrections?
"""
import sys
sys.path.append('.')
from gaia_tools import (
analyze_chess_position_manual,
analyze_chess_with_gemini_agent,
analyze_chess_with_checkmate_solver
)
def check_tool_for_rd5(tool_func, tool_name):
print(f"\nπŸ”§ Testing {tool_name}...")
try:
result = tool_func(
'downloads/cca530fc-4052-43b2-b130-b30968d8aa44.png',
'black to move find winning move'
)
has_rd5 = 'Rd5' in result
print(f" Contains 'Rd5': {'βœ…' if has_rd5 else '❌'}")
# Show what moves were found
import re
moves = re.findall(r'\b[NBRQK]?[a-h]?[1-8]?x?[a-h][1-8][+#]?\b', result)
unique_moves = list(set(moves))
print(f" Moves found: {unique_moves[:5]}") # Show first 5
return has_rd5
except Exception as e:
print(f" ❌ Error: {e}")
return False
def main():
print("🎯 VALIDATING Rd5 CONSENSUS WITH UNIVERSAL CORRECTIONS")
print("=" * 70)
tools = [
(analyze_chess_position_manual, "Manual Tool"),
(analyze_chess_with_gemini_agent, "Gemini Agent"),
(analyze_chess_with_checkmate_solver, "Checkmate Solver")
]
rd5_count = 0
total_tools = len(tools)
for tool_func, tool_name in tools:
if check_tool_for_rd5(tool_func, tool_name):
rd5_count += 1
print(f"\nπŸ“Š CONSENSUS SUMMARY")
print("-" * 30)
print(f"Tools finding Rd5: {rd5_count}/{total_tools}")
print(f"Consensus rate: {rd5_count/total_tools:.1%}")
if rd5_count == total_tools:
print("πŸŽ‰ PERFECT CONSENSUS - All tools find Rd5!")
return True
elif rd5_count >= 2:
print("βœ… MAJORITY CONSENSUS - Most tools find Rd5")
return True
else:
print("❌ NO CONSENSUS - Universal corrections need refinement")
return False
if __name__ == "__main__":
success = main()
exit(0 if success else 1)