|
{ |
|
"best_metric": 0.0727909728884697, |
|
"best_model_checkpoint": "/data/wheld3/mt5-base-pointer-adv-cstop_artificial/checkpoint-400", |
|
"epoch": 187.49805447470817, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 1.7423, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_exact_match": 0.23971377459749552, |
|
"eval_loss": 0.11730749905109406, |
|
"eval_runtime": 27.8131, |
|
"eval_samples_per_second": 20.098, |
|
"eval_steps_per_second": 2.517, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 0.3678, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_exact_match": 0.3363148479427549, |
|
"eval_loss": 0.0727909728884697, |
|
"eval_runtime": 29.0466, |
|
"eval_samples_per_second": 19.245, |
|
"eval_steps_per_second": 2.41, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 0.0008, |
|
"loss": 0.3202, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"eval_exact_match": 0.33810375670840787, |
|
"eval_loss": 0.08794570714235306, |
|
"eval_runtime": 29.1974, |
|
"eval_samples_per_second": 19.146, |
|
"eval_steps_per_second": 2.397, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 0.3452, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_exact_match": 0.3363148479427549, |
|
"eval_loss": 0.09075574576854706, |
|
"eval_runtime": 28.6647, |
|
"eval_samples_per_second": 19.501, |
|
"eval_steps_per_second": 2.442, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 0.3099, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"eval_exact_match": 0.3434704830053667, |
|
"eval_loss": 0.10556001961231232, |
|
"eval_runtime": 28.9715, |
|
"eval_samples_per_second": 19.295, |
|
"eval_steps_per_second": 2.416, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 0.0006, |
|
"loss": 0.3057, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_exact_match": 0.3470483005366726, |
|
"eval_loss": 0.11086518317461014, |
|
"eval_runtime": 29.2047, |
|
"eval_samples_per_second": 19.141, |
|
"eval_steps_per_second": 2.397, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"learning_rate": 0.0005333333333333334, |
|
"loss": 0.3045, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"eval_exact_match": 0.34525939177101966, |
|
"eval_loss": 0.1273432970046997, |
|
"eval_runtime": 29.0031, |
|
"eval_samples_per_second": 19.274, |
|
"eval_steps_per_second": 2.414, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 0.3052, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_exact_match": 0.3416815742397138, |
|
"eval_loss": 0.10654404759407043, |
|
"eval_runtime": 28.7998, |
|
"eval_samples_per_second": 19.41, |
|
"eval_steps_per_second": 2.431, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 112.5, |
|
"learning_rate": 0.0004, |
|
"loss": 0.3037, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 112.5, |
|
"eval_exact_match": 0.33810375670840787, |
|
"eval_loss": 0.13873372972011566, |
|
"eval_runtime": 29.2765, |
|
"eval_samples_per_second": 19.094, |
|
"eval_steps_per_second": 2.391, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.3036, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_exact_match": 0.34525939177101966, |
|
"eval_loss": 0.1421414017677307, |
|
"eval_runtime": 28.8117, |
|
"eval_samples_per_second": 19.402, |
|
"eval_steps_per_second": 2.43, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 137.5, |
|
"learning_rate": 0.0002666666666666667, |
|
"loss": 0.3023, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 137.5, |
|
"eval_exact_match": 0.33989266547406083, |
|
"eval_loss": 0.16489343345165253, |
|
"eval_runtime": 30.9662, |
|
"eval_samples_per_second": 18.052, |
|
"eval_steps_per_second": 2.261, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3028, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_exact_match": 0.33989266547406083, |
|
"eval_loss": 0.1573849618434906, |
|
"eval_runtime": 29.0042, |
|
"eval_samples_per_second": 19.273, |
|
"eval_steps_per_second": 2.413, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 162.5, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.3025, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 162.5, |
|
"eval_exact_match": 0.33989266547406083, |
|
"eval_loss": 0.15625949203968048, |
|
"eval_runtime": 28.8424, |
|
"eval_samples_per_second": 19.381, |
|
"eval_steps_per_second": 2.427, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.3017, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_exact_match": 0.33989266547406083, |
|
"eval_loss": 0.1589040458202362, |
|
"eval_runtime": 28.7706, |
|
"eval_samples_per_second": 19.43, |
|
"eval_steps_per_second": 2.433, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 187.5, |
|
"learning_rate": 0.0, |
|
"loss": 0.302, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 187.5, |
|
"eval_exact_match": 0.3416815742397138, |
|
"eval_loss": 0.15874968469142914, |
|
"eval_runtime": 33.2641, |
|
"eval_samples_per_second": 16.805, |
|
"eval_steps_per_second": 2.104, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 187.5, |
|
"step": 3000, |
|
"total_flos": 6.474719775139762e+16, |
|
"train_loss": 0.4079610900878906, |
|
"train_runtime": 34544.7098, |
|
"train_samples_per_second": 44.464, |
|
"train_steps_per_second": 0.087 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 188, |
|
"total_flos": 6.474719775139762e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|