File size: 8,336 Bytes
75bcdb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env python3
"""
Test script to verify that the Trackio Space can read from the real Hugging Face dataset
This test requires an HF_TOKEN environment variable to access the dataset
"""

import sys
import os
import json
import logging
from typing import Dict, Any

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def test_direct_dataset_access():
    """Test direct access to the Hugging Face dataset"""
    try:
        hf_token = os.environ.get('HF_TOKEN')
        
        if not hf_token:
            logger.warning("⚠️ No HF_TOKEN found. Skipping real dataset test.")
            logger.info("πŸ’‘ Set HF_TOKEN environment variable to test with real dataset")
            return False
        
        from datasets import load_dataset
        
        dataset_repo = "Tonic/trackio-experiments"
        logger.info(f"πŸ”§ Testing direct access to {dataset_repo}")
        
        # Load the dataset
        dataset = load_dataset(dataset_repo, token=hf_token)
        
        # Check structure
        experiment_count = len(dataset['train']) if 'train' in dataset else 0
        logger.info(f"πŸ“Š Dataset contains {experiment_count} experiments")
        
        if experiment_count == 0:
            logger.warning("⚠️ No experiments found in dataset")
            return False
        
        # Check columns
        columns = list(dataset['train'].column_names) if 'train' in dataset else []
        logger.info(f"πŸ“‹ Dataset columns: {columns}")
        
        expected_columns = ['experiment_id', 'name', 'description', 'created_at', 'status', 'metrics', 'parameters', 'artifacts', 'logs', 'last_updated']
        missing_columns = [col for col in expected_columns if col not in columns]
        
        if missing_columns:
            logger.warning(f"⚠️ Missing expected columns: {missing_columns}")
        else:
            logger.info("βœ… All expected columns present")
        
        # Test parsing a few experiments
        successful_parses = 0
        for i, row in enumerate(dataset['train']):
            if i >= 3:  # Test first 3 experiments
                break
                
            exp_id = row.get('experiment_id', 'unknown')
            logger.info(f"\nπŸ”¬ Testing experiment: {exp_id}")
            
            # Test metrics parsing
            metrics_raw = row.get('metrics', '[]')
            try:
                if isinstance(metrics_raw, str):
                    metrics = json.loads(metrics_raw)
                    if isinstance(metrics, list):
                        logger.info(f"   βœ… Metrics parsed: {len(metrics)} entries")
                        if metrics:
                            first_metric = metrics[0]
                            if 'metrics' in first_metric:
                                metric_keys = list(first_metric['metrics'].keys())
                                logger.info(f"   πŸ“Š Sample metrics: {metric_keys[:5]}...")
                        successful_parses += 1
                    else:
                        logger.warning(f"   ⚠️ Metrics is not a list: {type(metrics)}")
                else:
                    logger.warning(f"   ⚠️ Metrics is not a string: {type(metrics_raw)}")
            except json.JSONDecodeError as e:
                logger.warning(f"   ❌ Failed to parse metrics JSON: {e}")
            
            # Test parameters parsing
            parameters_raw = row.get('parameters', '{}')
            try:
                if isinstance(parameters_raw, str):
                    parameters = json.loads(parameters_raw)
                    if isinstance(parameters, dict):
                        logger.info(f"   βœ… Parameters parsed: {len(parameters)} entries")
                    else:
                        logger.warning(f"   ⚠️ Parameters is not a dict: {type(parameters)}")
                else:
                    logger.warning(f"   ⚠️ Parameters is not a string: {type(parameters_raw)}")
            except json.JSONDecodeError as e:
                logger.warning(f"   ❌ Failed to parse parameters JSON: {e}")
        
        logger.info(f"\nπŸ“‹ Successfully parsed {successful_parses} out of {min(3, experiment_count)} test experiments")
        
        return successful_parses > 0
        
    except Exception as e:
        logger.error(f"❌ Error testing direct dataset access: {e}")
        import traceback
        traceback.print_exc()
        return False

def test_trackio_space_with_real_dataset():
    """Test TrackioSpace class with real dataset"""
    try:
        hf_token = os.environ.get('HF_TOKEN')
        
        if not hf_token:
            logger.warning("⚠️ No HF_TOKEN found. Skipping TrackioSpace test with real dataset.")
            return False
        
        # Add the templates/spaces/trackio directory to the path
        sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'templates', 'spaces', 'trackio'))
        
        from app import TrackioSpace
        
        dataset_repo = "Tonic/trackio-experiments"
        logger.info(f"πŸ”§ Testing TrackioSpace with {dataset_repo}")
        
        # Create TrackioSpace instance with real credentials
        trackio_space = TrackioSpace(hf_token=hf_token, dataset_repo=dataset_repo)
        
        # Check if it loaded experiments from the dataset (not backup)
        experiments_count = len(trackio_space.experiments)
        logger.info(f"πŸ“Š TrackioSpace loaded {experiments_count} experiments")
        
        if experiments_count == 0:
            logger.warning("⚠️ TrackioSpace loaded no experiments")
            return False
        
        # Check if the dataset manager is available
        if trackio_space.dataset_manager:
            logger.info("βœ… Dataset manager is available - data preservation enabled")
        else:
            logger.warning("⚠️ Dataset manager not available - using legacy mode")
        
        # Test loading a specific experiment
        experiment_ids = list(trackio_space.experiments.keys())
        if experiment_ids:
            test_exp_id = experiment_ids[0]
            logger.info(f"πŸ”¬ Testing metrics loading for {test_exp_id}")
            
            from app import get_metrics_dataframe
            df = get_metrics_dataframe(test_exp_id)
            
            if not df.empty:
                logger.info(f"βœ… Metrics DataFrame created: {len(df)} rows, {len(df.columns)} columns")
                logger.info(f"πŸ“Š Available metrics: {list(df.columns)}")
                return True
            else:
                logger.warning(f"⚠️ Metrics DataFrame is empty for {test_exp_id}")
                return False
        else:
            logger.warning("⚠️ No experiments available for testing")
            return False
        
    except Exception as e:
        logger.error(f"❌ Error testing TrackioSpace with real dataset: {e}")
        import traceback
        traceback.print_exc()
        return False

if __name__ == "__main__":
    logger.info("πŸš€ Starting real dataset access test")
    
    # Test direct dataset access
    logger.info("\n" + "="*60)
    logger.info("TEST 1: Direct Dataset Access")
    logger.info("="*60)
    
    direct_test_passed = test_direct_dataset_access()
    
    # Test TrackioSpace with real dataset
    logger.info("\n" + "="*60)
    logger.info("TEST 2: TrackioSpace with Real Dataset")
    logger.info("="*60)
    
    trackio_test_passed = test_trackio_space_with_real_dataset()
    
    # Summary
    logger.info("\n" + "="*60)
    logger.info("TEST SUMMARY")
    logger.info("="*60)
    
    logger.info(f"Direct Dataset Access: {'βœ… PASSED' if direct_test_passed else '❌ FAILED/SKIPPED'}")
    logger.info(f"TrackioSpace Integration: {'βœ… PASSED' if trackio_test_passed else '❌ FAILED/SKIPPED'}")
    
    if direct_test_passed and trackio_test_passed:
        logger.info("πŸŽ‰ All tests passed! The dataset integration is working correctly.")
        sys.exit(0)
    elif not os.environ.get('HF_TOKEN'):
        logger.info("ℹ️ Tests skipped due to missing HF_TOKEN. Set the token to test with real dataset.")
        sys.exit(0)
    else:
        logger.error("❌ Some tests failed. Please check the implementation.")
        sys.exit(1)