|
{ |
|
"best_metric": 0.04605380445718765, |
|
"best_model_checkpoint": "/data/wheld3/byt5-base-cstop_artificial/checkpoint-200", |
|
"epoch": 428.5019607843137, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 0.2563, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"eval_exact_match": 0.03756708407871199, |
|
"eval_loss": 0.04605380445718765, |
|
"eval_runtime": 27.8305, |
|
"eval_samples_per_second": 20.086, |
|
"eval_steps_per_second": 2.515, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 57.13, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 0.0065, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 57.13, |
|
"eval_exact_match": 0.03756708407871199, |
|
"eval_loss": 0.05634024366736412, |
|
"eval_runtime": 28.5438, |
|
"eval_samples_per_second": 19.584, |
|
"eval_steps_per_second": 2.452, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 85.63, |
|
"learning_rate": 0.0008, |
|
"loss": 0.0021, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 85.63, |
|
"eval_exact_match": 0.03577817531305903, |
|
"eval_loss": 0.05924277380108833, |
|
"eval_runtime": 28.509, |
|
"eval_samples_per_second": 19.608, |
|
"eval_steps_per_second": 2.455, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 114.25, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 0.0013, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 114.25, |
|
"eval_exact_match": 0.03756708407871199, |
|
"eval_loss": 0.05689763277769089, |
|
"eval_runtime": 28.1931, |
|
"eval_samples_per_second": 19.828, |
|
"eval_steps_per_second": 2.483, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 142.75, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 0.0008, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 142.75, |
|
"eval_exact_match": 0.03577817531305903, |
|
"eval_loss": 0.06747602671384811, |
|
"eval_runtime": 28.4722, |
|
"eval_samples_per_second": 19.633, |
|
"eval_steps_per_second": 2.459, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 171.38, |
|
"learning_rate": 0.0006, |
|
"loss": 0.0007, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 171.38, |
|
"eval_exact_match": 0.03935599284436494, |
|
"eval_loss": 0.06273317337036133, |
|
"eval_runtime": 28.026, |
|
"eval_samples_per_second": 19.946, |
|
"eval_steps_per_second": 2.498, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 199.88, |
|
"learning_rate": 0.0005333333333333334, |
|
"loss": 0.0004, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 199.88, |
|
"eval_exact_match": 0.03577817531305903, |
|
"eval_loss": 0.06774432212114334, |
|
"eval_runtime": 29.2197, |
|
"eval_samples_per_second": 19.131, |
|
"eval_steps_per_second": 2.396, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 228.5, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 0.0003, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 228.5, |
|
"eval_exact_match": 0.03756708407871199, |
|
"eval_loss": 0.06500059366226196, |
|
"eval_runtime": 27.5272, |
|
"eval_samples_per_second": 20.307, |
|
"eval_steps_per_second": 2.543, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 257.13, |
|
"learning_rate": 0.0004, |
|
"loss": 0.0002, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 257.13, |
|
"eval_exact_match": 0.03935599284436494, |
|
"eval_loss": 0.06925792992115021, |
|
"eval_runtime": 27.2422, |
|
"eval_samples_per_second": 20.52, |
|
"eval_steps_per_second": 2.57, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 285.63, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.0002, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 285.63, |
|
"eval_exact_match": 0.03935599284436494, |
|
"eval_loss": 0.07205212116241455, |
|
"eval_runtime": 27.1898, |
|
"eval_samples_per_second": 20.559, |
|
"eval_steps_per_second": 2.574, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 314.25, |
|
"learning_rate": 0.0002666666666666667, |
|
"loss": 0.0002, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 314.25, |
|
"eval_exact_match": 0.03756708407871199, |
|
"eval_loss": 0.0713699460029602, |
|
"eval_runtime": 27.2794, |
|
"eval_samples_per_second": 20.492, |
|
"eval_steps_per_second": 2.566, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 342.75, |
|
"learning_rate": 0.0002, |
|
"loss": 0.0002, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 342.75, |
|
"eval_exact_match": 0.03935599284436494, |
|
"eval_loss": 0.07009705901145935, |
|
"eval_runtime": 27.286, |
|
"eval_samples_per_second": 20.487, |
|
"eval_steps_per_second": 2.565, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 371.38, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.0002, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 371.38, |
|
"eval_exact_match": 0.03935599284436494, |
|
"eval_loss": 0.07500004023313522, |
|
"eval_runtime": 27.1876, |
|
"eval_samples_per_second": 20.561, |
|
"eval_steps_per_second": 2.575, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 399.88, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0001, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 399.88, |
|
"eval_exact_match": 0.03935599284436494, |
|
"eval_loss": 0.07391420006752014, |
|
"eval_runtime": 27.4245, |
|
"eval_samples_per_second": 20.383, |
|
"eval_steps_per_second": 2.552, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 428.5, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 428.5, |
|
"eval_exact_match": 0.03935599284436494, |
|
"eval_loss": 0.07452824711799622, |
|
"eval_runtime": 28.2025, |
|
"eval_samples_per_second": 19.821, |
|
"eval_steps_per_second": 2.482, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 428.5, |
|
"step": 3000, |
|
"total_flos": 3.592904346944225e+17, |
|
"train_loss": 0.017975078212097286, |
|
"train_runtime": 47263.2401, |
|
"train_samples_per_second": 32.499, |
|
"train_steps_per_second": 0.063 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 429, |
|
"total_flos": 3.592904346944225e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|