Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Test script for Trackio interface | |
Demonstrates how to use the enhanced monitoring interface | |
""" | |
import requests | |
import json | |
import time | |
from datetime import datetime | |
def test_trackio_interface(): | |
"""Test the Trackio interface with realistic SmolLM3 training data""" | |
# Trackio Space URL (replace with your actual URL) | |
trackio_url = "https://tonic-test-trackio-test.hf.space" | |
print("π Testing Trackio Interface") | |
print("=" * 50) | |
# Step 1: Create an experiment | |
print("\n1. Creating experiment...") | |
experiment_name = "smollm3_openhermes_fr_balanced_test" | |
experiment_description = "SmolLM3 fine-tuning on OpenHermes-FR dataset with balanced A100 configuration" | |
# For demonstration, we'll simulate the API calls | |
# In reality, these would be HTTP requests to your Trackio Space | |
print(f"β Created experiment: {experiment_name}") | |
experiment_id = f"exp_{datetime.now().strftime('%Y%m%d_%H%M%S')}" | |
print(f" Experiment ID: {experiment_id}") | |
# Step 2: Log parameters | |
print("\n2. Logging experiment parameters...") | |
parameters = { | |
"model_name": "HuggingFaceTB/SmolLM3-3B", | |
"dataset_name": "legmlai/openhermes-fr", | |
"batch_size": 8, | |
"gradient_accumulation_steps": 16, | |
"effective_batch_size": 128, | |
"learning_rate": 3.5e-6, | |
"max_iters": 18000, | |
"max_seq_length": 12288, | |
"mixed_precision": "bf16", | |
"use_flash_attention": True, | |
"use_gradient_checkpointing": False, | |
"optimizer": "adamw_torch", | |
"scheduler": "cosine", | |
"warmup_steps": 1200, | |
"save_steps": 2000, | |
"eval_steps": 1000, | |
"logging_steps": 25, | |
"no_think_system_message": True | |
} | |
print("β Logged parameters:") | |
for key, value in parameters.items(): | |
print(f" {key}: {value}") | |
# Step 3: Simulate training metrics | |
print("\n3. Simulating training metrics...") | |
# Simulate realistic training progression | |
base_loss = 2.5 | |
steps = list(range(0, 1000, 50)) # Every 50 steps | |
for i, step in enumerate(steps): | |
# Simulate loss decreasing over time with some noise | |
progress = step / 1000 | |
loss = base_loss * (0.1 + 0.9 * (1 - progress)) + 0.1 * (1 - progress) * (i % 3 - 1) | |
# Simulate accuracy increasing | |
accuracy = 0.2 + 0.7 * progress + 0.05 * (i % 2) | |
# Simulate learning rate decay | |
lr = 3.5e-6 * (0.9 ** (step // 200)) | |
# Simulate GPU metrics | |
gpu_memory = 20 + 5 * (0.8 + 0.2 * (i % 4) / 4) | |
gpu_utilization = 85 + 10 * (i % 3 - 1) | |
# Simulate training time | |
training_time = 0.4 + 0.2 * (i % 2) | |
metrics = { | |
"loss": round(loss, 4), | |
"accuracy": round(accuracy, 4), | |
"learning_rate": round(lr, 8), | |
"gpu_memory_gb": round(gpu_memory, 2), | |
"gpu_utilization_percent": round(gpu_utilization, 1), | |
"training_time_per_step": round(training_time, 3), | |
"step": step | |
} | |
print(f" Step {step}: Loss={metrics['loss']:.4f}, Accuracy={metrics['accuracy']:.4f}, LR={metrics['learning_rate']:.2e}") | |
# In reality, this would be an HTTP POST to your Trackio Space | |
# requests.post(f"{trackio_url}/log_metrics", json={ | |
# "experiment_id": experiment_id, | |
# "metrics": metrics, | |
# "step": step | |
# }) | |
time.sleep(0.1) # Simulate processing time | |
# Step 4: Log final results | |
print("\n4. Logging final results...") | |
final_results = { | |
"final_loss": 0.234, | |
"final_accuracy": 0.892, | |
"total_training_time_hours": 4.5, | |
"total_steps": 1000, | |
"model_size_gb": 6.2, | |
"training_completed": True, | |
"checkpoint_path": "./outputs/balanced/checkpoint-1000" | |
} | |
print("β Final results:") | |
for key, value in final_results.items(): | |
print(f" {key}: {value}") | |
# Step 5: Update experiment status | |
print("\n5. Updating experiment status...") | |
status = "completed" | |
print(f"β Experiment status updated to: {status}") | |
print("\n" + "=" * 50) | |
print("π Test completed successfully!") | |
print(f"π View your experiment at: {trackio_url}") | |
print(f"π Experiment ID: {experiment_id}") | |
print("\nNext steps:") | |
print("1. Visit your Trackio Space") | |
print("2. Go to 'View Experiments' tab") | |
print("3. Enter the experiment ID to see details") | |
print("4. Go to 'Visualizations' tab to see plots") | |
print("5. Use 'Demo Data' tab to generate more test data") | |
def show_interface_features(): | |
"""Show what features are available in the enhanced interface""" | |
print("\nπ Enhanced Trackio Interface Features") | |
print("=" * 50) | |
features = [ | |
"β Create experiments with detailed descriptions", | |
"β Log comprehensive training parameters", | |
"β Real-time metrics visualization with Plotly", | |
"β Multiple metric types: loss, accuracy, learning rate, GPU metrics", | |
"β Experiment comparison across multiple runs", | |
"β Demo data generation for testing", | |
"β Formatted experiment details with emojis and structure", | |
"β Status tracking (running, completed, failed, paused)", | |
"β Interactive plots with hover information", | |
"β Comprehensive experiment overview with statistics" | |
] | |
for feature in features: | |
print(feature) | |
print("\nπ― How to use with your SmolLM3 training:") | |
print("1. Start your training with the monitoring enabled") | |
print("2. Visit your Trackio Space during training") | |
print("3. Watch real-time loss curves and metrics") | |
print("4. Compare different training runs") | |
print("5. Track GPU utilization and system metrics") | |
if __name__ == "__main__": | |
test_trackio_interface() | |
show_interface_features() |