|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 12897, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.612312940994031e-06, |
|
"loss": 0.5637, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.22462588198806e-06, |
|
"loss": 0.4, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.83693882298209e-06, |
|
"loss": 0.3797, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.44925176397612e-06, |
|
"loss": 0.3645, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.061564704970149e-06, |
|
"loss": 0.3484, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.673877645964178e-06, |
|
"loss": 0.3472, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.286190586958209e-06, |
|
"loss": 0.3356, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.898503527952237e-06, |
|
"loss": 0.3341, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.5108164689462675e-06, |
|
"loss": 0.3156, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.123129409940297e-06, |
|
"loss": 0.3061, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.735442350934326e-06, |
|
"loss": 0.3022, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.347755291928356e-06, |
|
"loss": 0.2965, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.960068232922385e-06, |
|
"loss": 0.2934, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.572381173916415e-06, |
|
"loss": 0.2979, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.184694114910445e-06, |
|
"loss": 0.2915, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.797007055904474e-06, |
|
"loss": 0.2914, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.4093199968985036e-06, |
|
"loss": 0.2909, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.0216329378925334e-06, |
|
"loss": 0.2711, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.6339458788865628e-06, |
|
"loss": 0.2674, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.2462588198805926e-06, |
|
"loss": 0.2683, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.858571760874622e-06, |
|
"loss": 0.2631, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.4708847018686518e-06, |
|
"loss": 0.2637, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.0831976428626812e-06, |
|
"loss": 0.2643, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.955105838567109e-07, |
|
"loss": 0.2689, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.078235248507405e-07, |
|
"loss": 0.2695, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 12897, |
|
"total_flos": 3.159950420124426e+17, |
|
"train_runtime": 16177.7165, |
|
"train_samples_per_second": 0.797 |
|
} |
|
], |
|
"max_steps": 12897, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.159950420124426e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|