Spaces:
Running
Running
File size: 6,927 Bytes
75bcdb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
#!/usr/bin/env python3
"""
Test script to verify that the Trackio Space can properly read from the actual dataset
"""
import sys
import os
import json
import logging
from typing import Dict, Any
# Add the templates/spaces/trackio directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'templates', 'spaces', 'trackio'))
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_dataset_loading():
"""Test loading experiments from the actual dataset"""
try:
# Import the TrackioSpace class
from app import TrackioSpace
# Create a TrackioSpace instance pointing to the real dataset
dataset_repo = "Tonic/trackio-experiments"
hf_token = os.environ.get('HF_TOKEN')
if not hf_token:
logger.warning("β οΈ No HF_TOKEN found in environment. Testing with public access.")
logger.info(f"π§ Testing dataset loading from {dataset_repo}")
# Create TrackioSpace instance
trackio_space = TrackioSpace(hf_token=hf_token, dataset_repo=dataset_repo)
# Check how many experiments were loaded
experiments_count = len(trackio_space.experiments)
logger.info(f"π Loaded {experiments_count} experiments")
if experiments_count == 0:
logger.warning("β οΈ No experiments loaded - this might indicate a problem")
return False
# Test specific experiment IDs from the logs
test_experiment_ids = [
'exp_20250720_130853',
'exp_20250720_134319',
'exp_20250727_172507',
'exp_20250727_172526'
]
found_experiments = []
for exp_id in test_experiment_ids:
if exp_id in trackio_space.experiments:
found_experiments.append(exp_id)
experiment = trackio_space.experiments[exp_id]
logger.info(f"β
Found experiment: {exp_id}")
logger.info(f" Name: {experiment.get('name', 'N/A')}")
logger.info(f" Status: {experiment.get('status', 'N/A')}")
logger.info(f" Metrics count: {len(experiment.get('metrics', []))}")
logger.info(f" Parameters count: {len(experiment.get('parameters', {}))}")
# Test metrics parsing specifically
metrics = experiment.get('metrics', [])
if metrics:
logger.info(f" First metric entry: {metrics[0] if metrics else 'None'}")
# Test if we can get a DataFrame for this experiment
from app import get_metrics_dataframe
df = get_metrics_dataframe(exp_id)
if not df.empty:
logger.info(f" β
DataFrame created successfully: {len(df)} rows, {len(df.columns)} columns")
logger.info(f" Available metrics: {list(df.columns)}")
else:
logger.warning(f" β οΈ DataFrame is empty for {exp_id}")
else:
logger.warning(f" β οΈ No metrics found for {exp_id}")
logger.info(f"π Found {len(found_experiments)} out of {len(test_experiment_ids)} test experiments")
if found_experiments:
logger.info("β
Dataset loading appears to be working correctly!")
return True
else:
logger.warning("β οΈ No test experiments found - dataset loading may have issues")
return False
except Exception as e:
logger.error(f"β Error testing dataset loading: {e}")
import traceback
traceback.print_exc()
return False
def test_metrics_parsing():
"""Test parsing metrics from the actual dataset format"""
try:
# Test with actual data structure from the dataset
sample_metrics_json = '''[{"timestamp": "2025-07-20T11:20:01.780908", "step": 25, "metrics": {"loss": 1.1659, "grad_norm": 10.3125, "learning_rate": 7e-08, "num_tokens": 1642080.0, "mean_token_accuracy": 0.75923578992486, "epoch": 0.004851130919895701}}, {"timestamp": "2025-07-20T11:26:39.042155", "step": 50, "metrics": {"loss": 1.165, "grad_norm": 10.75, "learning_rate": 1.4291666666666667e-07, "num_tokens": 3324682.0, "mean_token_accuracy": 0.7577659255266189, "epoch": 0.009702261839791402}}]'''
logger.info("π§ Testing metrics parsing")
# Parse the JSON
metrics_list = json.loads(sample_metrics_json)
logger.info(f"π Parsed {len(metrics_list)} metric entries")
# Convert to DataFrame format (like the app does)
import pandas as pd
df_data = []
for metric_entry in metrics_list:
if isinstance(metric_entry, dict):
step = metric_entry.get('step', 0)
timestamp = metric_entry.get('timestamp', '')
metrics = metric_entry.get('metrics', {})
row = {'step': step, 'timestamp': timestamp}
row.update(metrics)
df_data.append(row)
if df_data:
df = pd.DataFrame(df_data)
logger.info(f"β
DataFrame created: {len(df)} rows, {len(df.columns)} columns")
logger.info(f"π Columns: {list(df.columns)}")
logger.info(f"π Sample data:\n{df.head()}")
return True
else:
logger.warning("β οΈ No data converted to DataFrame format")
return False
except Exception as e:
logger.error(f"β Error testing metrics parsing: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
logger.info("π Starting Trackio dataset fix verification")
# Test metrics parsing first
logger.info("\n" + "="*50)
logger.info("TEST 1: Metrics Parsing")
logger.info("="*50)
metrics_test_passed = test_metrics_parsing()
# Test dataset loading
logger.info("\n" + "="*50)
logger.info("TEST 2: Dataset Loading")
logger.info("="*50)
dataset_test_passed = test_dataset_loading()
# Summary
logger.info("\n" + "="*50)
logger.info("TEST SUMMARY")
logger.info("="*50)
logger.info(f"Metrics Parsing: {'β
PASSED' if metrics_test_passed else 'β FAILED'}")
logger.info(f"Dataset Loading: {'β
PASSED' if dataset_test_passed else 'β FAILED'}")
if metrics_test_passed and dataset_test_passed:
logger.info("π All tests passed! The dataset fix should work correctly.")
sys.exit(0)
else:
logger.error("β Some tests failed. Please check the implementation.")
sys.exit(1)
|