|
{ |
|
"best_metric": 0.02550993673503399, |
|
"best_model_checkpoint": "/data/wheld3/mt5-base-pointer-adv-top_v2/checkpoint-3000", |
|
"epoch": 6.160056430151656, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 2.2938, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_exact_match": 0.0011655011655011655, |
|
"eval_loss": 0.5532176494598389, |
|
"eval_runtime": 914.7564, |
|
"eval_samples_per_second": 18.759, |
|
"eval_steps_per_second": 2.345, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 0.671, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 0.16095571095571096, |
|
"eval_loss": 0.16243064403533936, |
|
"eval_runtime": 902.0916, |
|
"eval_samples_per_second": 19.022, |
|
"eval_steps_per_second": 2.378, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0008, |
|
"loss": 0.5276, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_exact_match": 0.21567599067599066, |
|
"eval_loss": 0.06916385143995285, |
|
"eval_runtime": 882.9446, |
|
"eval_samples_per_second": 19.435, |
|
"eval_steps_per_second": 2.429, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 0.4196, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 0.22587412587412586, |
|
"eval_loss": 0.04908030480146408, |
|
"eval_runtime": 882.5419, |
|
"eval_samples_per_second": 19.444, |
|
"eval_steps_per_second": 2.43, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 0.3593, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_exact_match": 0.22913752913752913, |
|
"eval_loss": 0.03997402638196945, |
|
"eval_runtime": 888.3883, |
|
"eval_samples_per_second": 19.316, |
|
"eval_steps_per_second": 2.414, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0006, |
|
"loss": 0.3471, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_exact_match": 0.22966200466200465, |
|
"eval_loss": 0.03349088877439499, |
|
"eval_runtime": 876.9206, |
|
"eval_samples_per_second": 19.568, |
|
"eval_steps_per_second": 2.446, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.0005333333333333334, |
|
"loss": 0.3416, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_exact_match": 0.23175990675990676, |
|
"eval_loss": 0.03065803460776806, |
|
"eval_runtime": 880.241, |
|
"eval_samples_per_second": 19.495, |
|
"eval_steps_per_second": 2.437, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 0.3351, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_exact_match": 0.23344988344988346, |
|
"eval_loss": 0.030744880437850952, |
|
"eval_runtime": 884.2271, |
|
"eval_samples_per_second": 19.407, |
|
"eval_steps_per_second": 2.426, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.0004, |
|
"loss": 0.3316, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_exact_match": 0.23432400932400932, |
|
"eval_loss": 0.029669322073459625, |
|
"eval_runtime": 876.7173, |
|
"eval_samples_per_second": 19.573, |
|
"eval_steps_per_second": 2.447, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.3312, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_exact_match": 0.23444055944055944, |
|
"eval_loss": 0.02815121039748192, |
|
"eval_runtime": 885.0693, |
|
"eval_samples_per_second": 19.388, |
|
"eval_steps_per_second": 2.424, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0002666666666666667, |
|
"loss": 0.3271, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_exact_match": 0.23648018648018648, |
|
"eval_loss": 0.02621879242360592, |
|
"eval_runtime": 890.2789, |
|
"eval_samples_per_second": 19.275, |
|
"eval_steps_per_second": 2.409, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3241, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_exact_match": 0.23653846153846153, |
|
"eval_loss": 0.02629098668694496, |
|
"eval_runtime": 876.3961, |
|
"eval_samples_per_second": 19.58, |
|
"eval_steps_per_second": 2.448, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.3227, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_exact_match": 0.23677156177156178, |
|
"eval_loss": 0.02586781419813633, |
|
"eval_runtime": 890.1947, |
|
"eval_samples_per_second": 19.277, |
|
"eval_steps_per_second": 2.41, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.3201, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_exact_match": 0.23653846153846153, |
|
"eval_loss": 0.025654641911387444, |
|
"eval_runtime": 876.5182, |
|
"eval_samples_per_second": 19.577, |
|
"eval_steps_per_second": 2.447, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0, |
|
"loss": 0.3227, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_exact_match": 0.23653846153846153, |
|
"eval_loss": 0.02550993673503399, |
|
"eval_runtime": 881.7574, |
|
"eval_samples_per_second": 19.461, |
|
"eval_steps_per_second": 2.433, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"step": 3000, |
|
"total_flos": 8.2913691900475e+16, |
|
"train_loss": 0.5049722137451171, |
|
"train_runtime": 47265.062, |
|
"train_samples_per_second": 32.498, |
|
"train_steps_per_second": 0.063 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 7, |
|
"total_flos": 8.2913691900475e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|