Final_Assignment

Running

Final_Assignment / tests /validate_rd5_consensus.py

GAIA Developer

🧪 Add comprehensive test infrastructure and async testing system

c262d1a about 1 month ago

2.1 kB

	#!/usr/bin/env python3
	"""
	Quick validation: Are all tools now finding Rd5 with universal corrections?
	"""

	import sys
	sys.path.append('.')
	from gaia_tools import (
	analyze_chess_position_manual,
	analyze_chess_with_gemini_agent,
	analyze_chess_with_checkmate_solver
	)

	def check_tool_for_rd5(tool_func, tool_name):
	print(f"\n🔧 Testing {tool_name}...")
	try:
	result = tool_func(
	'downloads/cca530fc-4052-43b2-b130-b30968d8aa44.png',
	'black to move find winning move'
	)

	has_rd5 = 'Rd5' in result
	print(f" Contains 'Rd5': {'✅' if has_rd5 else '❌'}")

	# Show what moves were found
	import re
	moves = re.findall(r'\b[NBRQK]?[a-h]?[1-8]?x?[a-h][1-8][+#]?\b', result)
	unique_moves = list(set(moves))
	print(f" Moves found: {unique_moves[:5]}") # Show first 5

	return has_rd5

	except Exception as e:
	print(f" ❌ Error: {e}")
	return False

	def main():
	print("🎯 VALIDATING Rd5 CONSENSUS WITH UNIVERSAL CORRECTIONS")
	print("=" * 70)

	tools = [
	(analyze_chess_position_manual, "Manual Tool"),
	(analyze_chess_with_gemini_agent, "Gemini Agent"),
	(analyze_chess_with_checkmate_solver, "Checkmate Solver")
	]

	rd5_count = 0
	total_tools = len(tools)

	for tool_func, tool_name in tools:
	if check_tool_for_rd5(tool_func, tool_name):
	rd5_count += 1

	print(f"\n📊 CONSENSUS SUMMARY")
	print("-" * 30)
	print(f"Tools finding Rd5: {rd5_count}/{total_tools}")
	print(f"Consensus rate: {rd5_count/total_tools:.1%}")

	if rd5_count == total_tools:
	print("🎉 PERFECT CONSENSUS - All tools find Rd5!")
	return True
	elif rd5_count >= 2:
	print("✅ MAJORITY CONSENSUS - Most tools find Rd5")
	return True
	else:
	print("❌ NO CONSENSUS - Universal corrections need refinement")
	return False

	if __name__ == "__main__":
	success = main()
	exit(0 if success else 1)