#!/usr/bin/env python3 """ Diagnostic script for Trackio Space issues Helps debug dataset loading and API client issues """ import os import sys import logging # Add src directory to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'templates', 'spaces', 'trackio')) # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def test_dataset_manager(): """Test dataset manager functionality""" try: from dataset_utils import TrackioDatasetManager # Test with environment variables hf_token = os.environ.get('HF_TOKEN') dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', 'tonic/trackio-experiments') if not hf_token: logger.warning("โš ๏ธ HF_TOKEN not found in environment") return False logger.info(f"๐Ÿ”ง Testing dataset manager with repo: {dataset_repo}") # Initialize dataset manager manager = TrackioDatasetManager(dataset_repo, hf_token) # Test loading experiments experiments = manager.load_existing_experiments() logger.info(f"๐Ÿ“Š Loaded {len(experiments)} experiments from dataset") # Test creating a sample experiment sample_experiment = { 'experiment_id': f'test_diagnostic_{int(os.urandom(4).hex(), 16)}', 'name': 'Diagnostic Test Experiment', 'description': 'Test experiment created by diagnostic script', 'created_at': '2025-01-27T12:00:00', 'status': 'completed', 'metrics': '[]', 'parameters': '{"test": true}', 'artifacts': '[]', 'logs': '[]', 'last_updated': '2025-01-27T12:00:00' } # Test upsert functionality logger.info("๐Ÿงช Testing experiment upsert...") success = manager.upsert_experiment(sample_experiment) if success: logger.info("โœ… Dataset manager working correctly") # Verify the experiment was saved experiments_after = manager.load_existing_experiments() logger.info(f"๐Ÿ“Š After upsert: {len(experiments_after)} experiments") return True else: logger.error("โŒ Failed to upsert test experiment") return False except ImportError as e: logger.error(f"โŒ Failed to import dataset_utils: {e}") return False except Exception as e: logger.error(f"โŒ Dataset manager test failed: {e}") return False def test_trackio_space(): """Test TrackioSpace initialization""" try: # Import the TrackioSpace class from app import TrackioSpace logger.info("๐Ÿงช Testing TrackioSpace initialization...") # Initialize TrackioSpace space = TrackioSpace() logger.info(f"๐Ÿ“Š TrackioSpace initialized with {len(space.experiments)} experiments") logger.info(f"๐Ÿ›ก๏ธ Dataset manager available: {'Yes' if space.dataset_manager else 'No'}") logger.info(f"๐Ÿ”‘ HF Token available: {'Yes' if space.hf_token else 'No'}") logger.info(f"๐Ÿ“‚ Dataset repo: {space.dataset_repo}") return True except ImportError as e: logger.error(f"โŒ Failed to import TrackioSpace: {e}") return False except Exception as e: logger.error(f"โŒ TrackioSpace test failed: {e}") return False def test_environment(): """Test environment configuration""" logger.info("๐Ÿ” Checking environment configuration...") # Check required environment variables env_vars = { 'HF_TOKEN': os.environ.get('HF_TOKEN'), 'TRACKIO_DATASET_REPO': os.environ.get('TRACKIO_DATASET_REPO'), 'TRACKIO_URL': os.environ.get('TRACKIO_URL'), 'SPACE_ID': os.environ.get('SPACE_ID') } for var, value in env_vars.items(): if value: masked_value = value[:8] + '...' if len(value) > 8 and 'TOKEN' in var else value logger.info(f"โœ… {var}: {masked_value}") else: logger.warning(f"โš ๏ธ {var}: Not set") # Check if running on HF Spaces is_hf_spaces = bool(os.environ.get('SPACE_ID')) logger.info(f"๐Ÿš€ Running on HF Spaces: {'Yes' if is_hf_spaces else 'No'}") return True def fix_common_issues(): """Suggest fixes for common issues""" logger.info("๐Ÿ’ก Common issue fixes:") # Check dataset repository format dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', 'tonic/trackio-experiments') if '/' not in dataset_repo: logger.warning(f"โš ๏ธ Dataset repo format issue: {dataset_repo} should be 'username/dataset-name'") else: logger.info(f"โœ… Dataset repo format looks good: {dataset_repo}") # Check for URL issues trackio_url = os.environ.get('TRACKIO_URL', 'https://tonic-test-trackio-test.hf.space') if trackio_url.startswith('https://https://') or trackio_url.startswith('http://http://'): logger.warning(f"โš ๏ธ URL format issue detected: {trackio_url}") fixed_url = trackio_url.replace('https://https://', 'https://').replace('http://http://', 'http://') logger.info(f"๐Ÿ’ก Fixed URL should be: {fixed_url}") else: logger.info(f"โœ… Trackio URL format looks good: {trackio_url}") def main(): """Run all diagnostic tests""" logger.info("๐Ÿ”ง Starting Trackio Space diagnostics...") logger.info("=" * 60) try: # Test environment test_environment() logger.info("-" * 40) # Test dataset manager dataset_manager_ok = test_dataset_manager() logger.info("-" * 40) # Test TrackioSpace trackio_space_ok = test_trackio_space() logger.info("-" * 40) # Suggest fixes fix_common_issues() logger.info("-" * 40) # Summary logger.info("๐Ÿ“‹ DIAGNOSTIC SUMMARY:") logger.info(f"Dataset Manager: {'โœ… OK' if dataset_manager_ok else 'โŒ Issues'}") logger.info(f"TrackioSpace: {'โœ… OK' if trackio_space_ok else 'โŒ Issues'}") if dataset_manager_ok and trackio_space_ok: logger.info("๐ŸŽ‰ All systems appear to be working correctly!") logger.info("๐Ÿ’ก The issues in the logs might be related to:") logger.info(" - Empty dataset (expected for new setup)") logger.info(" - API client URL formatting (being auto-fixed)") logger.info(" - Remote data access (falling back to local data)") else: logger.warning("โš ๏ธ Some issues detected. Check the logs above for details.") except Exception as e: logger.error(f"โŒ Diagnostic script failed: {e}") return False return True if __name__ == "__main__": main()