tinystories / training_metadata.json
jacksuuuu's picture
Update to checkpoint 20000 (best quality, loss 0.758)
582d1f9 verified
raw
history blame contribute delete
586 Bytes
{
"model_name": "nanoGPT-MLX-53M-FineWebEdu",
"framework": "MLX",
"architecture": "Pre-LN Transformer (GPT-2 style)",
"training": {
"dataset": "FineWebEdu-10M",
"iterations": 20000,
"final_loss": 0.7583,
"optimizer": "AdamW",
"learning_rate": 0.0006,
"batch_size": 16,
"context_length": 512
},
"model_config": {
"vocab_size": 50257,
"d_model": 384,
"n_layers": 8,
"n_heads": 8,
"d_ff": 1536,
"dropout": 0.1
},
"parameters": "52.99M",
"converted_from": "MLX checkpoint_20000.npz",
"conversion_date": "2025-11-14"
}