|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 17910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.944165270798437e-05, |
|
"loss": 3.7464, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8883305415968732e-05, |
|
"loss": 3.0711, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.83249581239531e-05, |
|
"loss": 2.8204, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.7766610831937466e-05, |
|
"loss": 2.6566, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.720826353992183e-05, |
|
"loss": 2.5476, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.66499162479062e-05, |
|
"loss": 2.4642, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6091568955890565e-05, |
|
"loss": 2.3983, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.553322166387493e-05, |
|
"loss": 2.3347, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.4974874371859299e-05, |
|
"loss": 2.2982, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.4416527079843662e-05, |
|
"loss": 2.2627, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.385817978782803e-05, |
|
"loss": 2.2366, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.3299832495812398e-05, |
|
"loss": 2.1983, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2741485203796761e-05, |
|
"loss": 2.1904, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2183137911781128e-05, |
|
"loss": 2.1333, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.1624790619765495e-05, |
|
"loss": 2.1207, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.106644332774986e-05, |
|
"loss": 2.1035, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0508096035734227e-05, |
|
"loss": 2.0814, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.9788250923156738, |
|
"eval_runtime": 99.7037, |
|
"eval_samples_per_second": 302.537, |
|
"eval_steps_per_second": 4.734, |
|
"step": 8955 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.949748743718594e-06, |
|
"loss": 2.0605, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.39140145170296e-06, |
|
"loss": 2.0316, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.833054159687326e-06, |
|
"loss": 2.0237, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.274706867671693e-06, |
|
"loss": 2.0168, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.716359575656058e-06, |
|
"loss": 1.9817, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.158012283640425e-06, |
|
"loss": 1.9987, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.599664991624791e-06, |
|
"loss": 1.9769, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.041317699609157e-06, |
|
"loss": 1.9657, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.482970407593524e-06, |
|
"loss": 1.9676, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.92462311557789e-06, |
|
"loss": 1.9505, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.366275823562256e-06, |
|
"loss": 1.9629, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.8079285315466224e-06, |
|
"loss": 1.9709, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.2495812395309884e-06, |
|
"loss": 1.9377, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.691233947515355e-06, |
|
"loss": 1.9388, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.132886655499721e-06, |
|
"loss": 1.9248, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.5745393634840873e-06, |
|
"loss": 1.9169, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.0161920714684535e-06, |
|
"loss": 1.9248, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.5784477945281974e-07, |
|
"loss": 1.9237, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.8554816246032715, |
|
"eval_runtime": 89.1269, |
|
"eval_samples_per_second": 338.439, |
|
"eval_steps_per_second": 5.296, |
|
"step": 17910 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 17910, |
|
"total_flos": 3.772502476406784e+16, |
|
"train_loss": 2.197303864837425, |
|
"train_runtime": 7444.9411, |
|
"train_samples_per_second": 153.96, |
|
"train_steps_per_second": 2.406 |
|
} |
|
], |
|
"max_steps": 17910, |
|
"num_train_epochs": 2, |
|
"total_flos": 3.772502476406784e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|