Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Monitor GAIA test progress and provide real-time status updates | |
| """ | |
| import os | |
| import time | |
| import json | |
| from pathlib import Path | |
| from datetime import datetime | |
| import argparse | |
| def get_latest_log_file(): | |
| """Find the most recent classification test log file""" | |
| log_dir = Path("logs") | |
| if not log_dir.exists(): | |
| return None | |
| log_files = list(log_dir.glob("classification_test_*.log")) | |
| if not log_files: | |
| return None | |
| return max(log_files, key=lambda x: x.stat().st_mtime) | |
| def parse_log_progress(log_file): | |
| """Parse log file to extract current progress""" | |
| if not log_file or not log_file.exists(): | |
| return None | |
| try: | |
| with open(log_file, 'r') as f: | |
| lines = f.readlines() | |
| # Parse classification summary | |
| classification_summary = {} | |
| in_summary = False | |
| # Parse testing progress | |
| current_agent = None | |
| questions_processed = 0 | |
| total_questions = 0 | |
| current_question = None | |
| for line in lines: | |
| line = line.strip() | |
| # Classification summary section | |
| if "CLASSIFICATION SUMMARY:" in line: | |
| in_summary = True | |
| continue | |
| elif in_summary and ":" in line and "questions" in line: | |
| parts = line.split(":") | |
| if len(parts) == 2: | |
| agent = parts[0].strip() | |
| count_part = parts[1].strip() | |
| if "(" in count_part: | |
| count = int(count_part.split()[0]) | |
| classification_summary[agent] = count | |
| elif in_summary and "Testing agent types:" in line: | |
| in_summary = False | |
| # Current testing progress | |
| if "TESTING" in line and "AGENT" in line: | |
| current_agent = line.split("TESTING")[1].split("AGENT")[0].strip() | |
| elif "Questions to test:" in line: | |
| total_questions = int(line.split(":")[-1].strip()) | |
| elif "Testing" in line and "/" in line and "]" in line: | |
| # Extract current question number [X/Y] | |
| bracket_part = line.split("[")[1].split("]")[0] | |
| current_num = int(bracket_part.split("/")[0]) | |
| questions_processed = current_num - 1 # Since this is the one being processed | |
| current_question = line.split("Testing")[1].split("...")[0].strip() | |
| return { | |
| 'log_file': str(log_file), | |
| 'last_modified': datetime.fromtimestamp(log_file.stat().st_mtime), | |
| 'classification_summary': classification_summary, | |
| 'current_agent': current_agent, | |
| 'questions_processed': questions_processed, | |
| 'total_questions': total_questions, | |
| 'current_question': current_question, | |
| 'progress_percentage': (questions_processed / total_questions * 100) if total_questions > 0 else 0 | |
| } | |
| except Exception as e: | |
| return {'error': str(e)} | |
| def get_latest_results(): | |
| """Get the latest test results file""" | |
| result_files = list(Path(".").glob("gaia_classification_test_results_*.json")) | |
| if not result_files: | |
| return None | |
| latest_file = max(result_files, key=lambda x: x.stat().st_mtime) | |
| try: | |
| with open(latest_file, 'r') as f: | |
| data = json.load(f) | |
| return { | |
| 'file': str(latest_file), | |
| 'metadata': data.get('test_metadata', {}), | |
| 'overall_stats': data.get('overall_stats', {}), | |
| 'agent_performance': data.get('agent_performance', {}) | |
| } | |
| except: | |
| return None | |
| def display_status(progress, results, watch_mode=False): | |
| """Display current test status""" | |
| if watch_mode: | |
| # Clear screen in watch mode | |
| os.system('clear' if os.name == 'posix' else 'cls') | |
| print("π GAIA TEST MONITORING DASHBOARD") | |
| print("=" * 60) | |
| print(f"π Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| if progress and 'error' not in progress: | |
| print(f"\nπ CURRENT PROGRESS:") | |
| print(f"ποΈ Log File: {Path(progress['log_file']).name}") | |
| print(f"β° Last Modified: {progress['last_modified'].strftime('%H:%M:%S')}") | |
| if progress['current_agent']: | |
| print(f"\nπ€ Currently Testing: {progress['current_agent'].upper()} AGENT") | |
| print(f"π Progress: {progress['questions_processed']}/{progress['total_questions']} ({progress['progress_percentage']:.1f}%)") | |
| # Progress bar | |
| bar_length = 30 | |
| filled_length = int(bar_length * progress['progress_percentage'] / 100) | |
| bar = "β" * filled_length + "β" * (bar_length - filled_length) | |
| print(f"β Progress: [{bar}] {progress['progress_percentage']:.1f}%") | |
| if progress['current_question']: | |
| print(f"π§© Current Question: {progress['current_question']}...") | |
| if progress['classification_summary']: | |
| print(f"\nπ CLASSIFICATION BREAKDOWN:") | |
| total_questions = sum(progress['classification_summary'].values()) | |
| for agent, count in sorted(progress['classification_summary'].items()): | |
| percentage = (count / total_questions) * 100 if total_questions > 0 else 0 | |
| print(f" {agent}: {count} questions ({percentage:.1f}%)") | |
| elif progress and 'error' in progress: | |
| print(f"\nβ ERROR reading log file: {progress['error']}") | |
| else: | |
| print(f"\nβ οΈ No active test logs found") | |
| if results: | |
| print(f"\nπ LATEST COMPLETED RESULTS:") | |
| print(f"π Results File: {Path(results['file']).name}") | |
| overall = results.get('overall_stats', {}) | |
| if overall: | |
| print(f"β Success Rate: {overall.get('success_rate', 0):.1f}%") | |
| print(f"π Total Questions: {overall.get('total_questions', 0)}") | |
| print(f"β Successful: {overall.get('successful', 0)}") | |
| print(f"β Errors: {overall.get('errors', 0)}") | |
| agent_perf = results.get('agent_performance', {}) | |
| if agent_perf: | |
| print(f"\nπ― AGENT PERFORMANCE:") | |
| for agent, stats in sorted(agent_perf.items(), key=lambda x: x[1]['success_rate'], reverse=True): | |
| success_rate = stats['success_rate'] | |
| status_emoji = "π’" if success_rate >= 90 else "π‘" if success_rate >= 70 else "π΄" | |
| print(f" {status_emoji} {agent}: {success_rate:.1f}% ({stats['successful']}/{stats['total_questions']})") | |
| print(f"\nπ MONITORING OPTIONS:") | |
| print(f" Watch mode: python tests/monitor_tests.py --watch") | |
| print(f" Analyze results: python tests/analyze_test_results.py <results_file>") | |
| print(f" Run new test: python tests/test_by_classification.py --agent-types <type>") | |
| def main(): | |
| """Main monitoring interface""" | |
| parser = argparse.ArgumentParser(description="Monitor GAIA test progress") | |
| parser.add_argument('--watch', action='store_true', help='Watch mode (auto-refresh every 10s)') | |
| parser.add_argument('--interval', type=int, default=10, help='Refresh interval in seconds for watch mode') | |
| args = parser.parse_args() | |
| if args.watch: | |
| print("π Starting watch mode... (Press Ctrl+C to stop)") | |
| try: | |
| while True: | |
| progress = parse_log_progress(get_latest_log_file()) | |
| results = get_latest_results() | |
| display_status(progress, results, watch_mode=True) | |
| print(f"\nβ±οΈ Refreshing in {args.interval}s... (Ctrl+C to stop)") | |
| time.sleep(args.interval) | |
| except KeyboardInterrupt: | |
| print(f"\nπ Monitoring stopped.") | |
| else: | |
| progress = parse_log_progress(get_latest_log_file()) | |
| results = get_latest_results() | |
| display_status(progress, results, watch_mode=False) | |
| if __name__ == "__main__": | |
| main() |