|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 2260, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8893805309734514e-05, |
|
"loss": 5.1703, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.778761061946903e-05, |
|
"loss": 4.528, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.668141592920354e-05, |
|
"loss": 4.2274, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.5575221238938055e-05, |
|
"loss": 4.183, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.446902654867257e-05, |
|
"loss": 4.0013, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3362831858407084e-05, |
|
"loss": 4.0381, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.2256637168141596e-05, |
|
"loss": 3.9149, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.115044247787611e-05, |
|
"loss": 3.9236, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.0044247787610625e-05, |
|
"loss": 3.9675, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.893805309734514e-05, |
|
"loss": 3.8204, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.783185840707965e-05, |
|
"loss": 3.833, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.672566371681416e-05, |
|
"loss": 3.8205, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.561946902654867e-05, |
|
"loss": 3.8247, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.451327433628319e-05, |
|
"loss": 3.8613, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.34070796460177e-05, |
|
"loss": 3.7906, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.230088495575221e-05, |
|
"loss": 3.829, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.119469026548672e-05, |
|
"loss": 3.7752, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.008849557522124e-05, |
|
"loss": 3.7212, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.8982300884955753e-05, |
|
"loss": 3.8136, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.7876106194690264e-05, |
|
"loss": 3.7791, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.6769911504424782e-05, |
|
"loss": 3.8232, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.5663716814159294e-05, |
|
"loss": 3.6982, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.4557522123893805e-05, |
|
"loss": 3.7689, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.345132743362832e-05, |
|
"loss": 3.7717, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.234513274336283e-05, |
|
"loss": 3.6647, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1238938053097346e-05, |
|
"loss": 3.6956, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.013274336283186e-05, |
|
"loss": 3.705, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.9026548672566372e-05, |
|
"loss": 3.5994, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.7920353982300887e-05, |
|
"loss": 3.8063, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.6814159292035402e-05, |
|
"loss": 3.6585, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.5707964601769913e-05, |
|
"loss": 3.705, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.4601769911504426e-05, |
|
"loss": 3.6719, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.3495575221238938e-05, |
|
"loss": 3.7247, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.2389380530973452e-05, |
|
"loss": 3.7643, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.1283185840707964e-05, |
|
"loss": 3.6094, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.0176991150442479e-05, |
|
"loss": 3.6419, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 9.070796460176992e-06, |
|
"loss": 3.6619, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 7.964601769911505e-06, |
|
"loss": 3.6914, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 6.858407079646018e-06, |
|
"loss": 3.6443, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 5.752212389380531e-06, |
|
"loss": 3.5983, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 4.646017699115045e-06, |
|
"loss": 3.7606, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 3.5398230088495575e-06, |
|
"loss": 3.6427, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.433628318584071e-06, |
|
"loss": 3.7245, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1.3274336283185841e-06, |
|
"loss": 3.6501, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.2123893805309735e-07, |
|
"loss": 3.679, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2260, |
|
"train_runtime": 938.0621, |
|
"train_samples_per_second": 2.409 |
|
} |
|
], |
|
"max_steps": 2260, |
|
"num_train_epochs": 5, |
|
"total_flos": 2729754170757120, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|