SmolFactory / tests /test_trackio_interface.py
Tonic's picture
adds formatting fix
ebe598e verified
#!/usr/bin/env python3
"""
Test script for Trackio interface
Demonstrates how to use the enhanced monitoring interface
"""
import requests
import json
import time
from datetime import datetime
def test_trackio_interface():
"""Test the Trackio interface with realistic SmolLM3 training data"""
# Trackio Space URL (replace with your actual URL)
trackio_url = "https://tonic-test-trackio-test.hf.space"
print("πŸš€ Testing Trackio Interface")
print("=" * 50)
# Step 1: Create an experiment
print("\n1. Creating experiment...")
experiment_name = "smollm3_openhermes_fr_balanced_test"
experiment_description = "SmolLM3 fine-tuning on OpenHermes-FR dataset with balanced A100 configuration"
# For demonstration, we'll simulate the API calls
# In reality, these would be HTTP requests to your Trackio Space
print(f"βœ… Created experiment: {experiment_name}")
experiment_id = f"exp_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
print(f" Experiment ID: {experiment_id}")
# Step 2: Log parameters
print("\n2. Logging experiment parameters...")
parameters = {
"model_name": "HuggingFaceTB/SmolLM3-3B",
"dataset_name": "legmlai/openhermes-fr",
"batch_size": 8,
"gradient_accumulation_steps": 16,
"effective_batch_size": 128,
"learning_rate": 3.5e-6,
"max_iters": 18000,
"max_seq_length": 12288,
"mixed_precision": "bf16",
"use_flash_attention": True,
"use_gradient_checkpointing": False,
"optimizer": "adamw_torch",
"scheduler": "cosine",
"warmup_steps": 1200,
"save_steps": 2000,
"eval_steps": 1000,
"logging_steps": 25,
"no_think_system_message": True
}
print("βœ… Logged parameters:")
for key, value in parameters.items():
print(f" {key}: {value}")
# Step 3: Simulate training metrics
print("\n3. Simulating training metrics...")
# Simulate realistic training progression
base_loss = 2.5
steps = list(range(0, 1000, 50)) # Every 50 steps
for i, step in enumerate(steps):
# Simulate loss decreasing over time with some noise
progress = step / 1000
loss = base_loss * (0.1 + 0.9 * (1 - progress)) + 0.1 * (1 - progress) * (i % 3 - 1)
# Simulate accuracy increasing
accuracy = 0.2 + 0.7 * progress + 0.05 * (i % 2)
# Simulate learning rate decay
lr = 3.5e-6 * (0.9 ** (step // 200))
# Simulate GPU metrics
gpu_memory = 20 + 5 * (0.8 + 0.2 * (i % 4) / 4)
gpu_utilization = 85 + 10 * (i % 3 - 1)
# Simulate training time
training_time = 0.4 + 0.2 * (i % 2)
metrics = {
"loss": round(loss, 4),
"accuracy": round(accuracy, 4),
"learning_rate": round(lr, 8),
"gpu_memory_gb": round(gpu_memory, 2),
"gpu_utilization_percent": round(gpu_utilization, 1),
"training_time_per_step": round(training_time, 3),
"step": step
}
print(f" Step {step}: Loss={metrics['loss']:.4f}, Accuracy={metrics['accuracy']:.4f}, LR={metrics['learning_rate']:.2e}")
# In reality, this would be an HTTP POST to your Trackio Space
# requests.post(f"{trackio_url}/log_metrics", json={
# "experiment_id": experiment_id,
# "metrics": metrics,
# "step": step
# })
time.sleep(0.1) # Simulate processing time
# Step 4: Log final results
print("\n4. Logging final results...")
final_results = {
"final_loss": 0.234,
"final_accuracy": 0.892,
"total_training_time_hours": 4.5,
"total_steps": 1000,
"model_size_gb": 6.2,
"training_completed": True,
"checkpoint_path": "./outputs/balanced/checkpoint-1000"
}
print("βœ… Final results:")
for key, value in final_results.items():
print(f" {key}: {value}")
# Step 5: Update experiment status
print("\n5. Updating experiment status...")
status = "completed"
print(f"βœ… Experiment status updated to: {status}")
print("\n" + "=" * 50)
print("πŸŽ‰ Test completed successfully!")
print(f"πŸ“Š View your experiment at: {trackio_url}")
print(f"πŸ” Experiment ID: {experiment_id}")
print("\nNext steps:")
print("1. Visit your Trackio Space")
print("2. Go to 'View Experiments' tab")
print("3. Enter the experiment ID to see details")
print("4. Go to 'Visualizations' tab to see plots")
print("5. Use 'Demo Data' tab to generate more test data")
def show_interface_features():
"""Show what features are available in the enhanced interface"""
print("\nπŸ“Š Enhanced Trackio Interface Features")
print("=" * 50)
features = [
"βœ… Create experiments with detailed descriptions",
"βœ… Log comprehensive training parameters",
"βœ… Real-time metrics visualization with Plotly",
"βœ… Multiple metric types: loss, accuracy, learning rate, GPU metrics",
"βœ… Experiment comparison across multiple runs",
"βœ… Demo data generation for testing",
"βœ… Formatted experiment details with emojis and structure",
"βœ… Status tracking (running, completed, failed, paused)",
"βœ… Interactive plots with hover information",
"βœ… Comprehensive experiment overview with statistics"
]
for feature in features:
print(feature)
print("\n🎯 How to use with your SmolLM3 training:")
print("1. Start your training with the monitoring enabled")
print("2. Visit your Trackio Space during training")
print("3. Watch real-time loss curves and metrics")
print("4. Compare different training runs")
print("5. Track GPU utilization and system metrics")
if __name__ == "__main__":
test_trackio_interface()
show_interface_features()