|
{ |
|
"best_metric": 6387.95458984375, |
|
"best_model_checkpoint": "./coco_outputs/checkpoint-80", |
|
"epoch": 20.0, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 6394.3193359375, |
|
"eval_runtime": 0.6785, |
|
"eval_samples_per_second": 11.791, |
|
"eval_steps_per_second": 1.474, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 6393.3564453125, |
|
"eval_runtime": 0.625, |
|
"eval_samples_per_second": 12.799, |
|
"eval_steps_per_second": 1.6, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.000175, |
|
"loss": 6173.6062, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 6390.9482421875, |
|
"eval_runtime": 0.6116, |
|
"eval_samples_per_second": 13.08, |
|
"eval_steps_per_second": 1.635, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 6389.5869140625, |
|
"eval_runtime": 0.5503, |
|
"eval_samples_per_second": 14.538, |
|
"eval_steps_per_second": 1.817, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 6287.2859, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 6388.72802734375, |
|
"eval_runtime": 0.5904, |
|
"eval_samples_per_second": 13.55, |
|
"eval_steps_per_second": 1.694, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 6388.6220703125, |
|
"eval_runtime": 0.5392, |
|
"eval_samples_per_second": 14.837, |
|
"eval_steps_per_second": 1.855, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 6388.43994140625, |
|
"eval_runtime": 0.5365, |
|
"eval_samples_per_second": 14.91, |
|
"eval_steps_per_second": 1.864, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.000125, |
|
"loss": 6224.3344, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 6388.2119140625, |
|
"eval_runtime": 0.5405, |
|
"eval_samples_per_second": 14.802, |
|
"eval_steps_per_second": 1.85, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 6388.544921875, |
|
"eval_runtime": 0.5344, |
|
"eval_samples_per_second": 14.971, |
|
"eval_steps_per_second": 1.871, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001, |
|
"loss": 6083.1363, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 6388.326171875, |
|
"eval_runtime": 0.5236, |
|
"eval_samples_per_second": 15.28, |
|
"eval_steps_per_second": 1.91, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 6390.20556640625, |
|
"eval_runtime": 0.5417, |
|
"eval_samples_per_second": 14.767, |
|
"eval_steps_per_second": 1.846, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 6388.66064453125, |
|
"eval_runtime": 0.5437, |
|
"eval_samples_per_second": 14.713, |
|
"eval_steps_per_second": 1.839, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 6260.7426, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 6388.1484375, |
|
"eval_runtime": 0.5303, |
|
"eval_samples_per_second": 15.086, |
|
"eval_steps_per_second": 1.886, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 6388.0478515625, |
|
"eval_runtime": 0.5365, |
|
"eval_samples_per_second": 14.913, |
|
"eval_steps_per_second": 1.864, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 5e-05, |
|
"loss": 6377.543, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 6388.0302734375, |
|
"eval_runtime": 0.5355, |
|
"eval_samples_per_second": 14.94, |
|
"eval_steps_per_second": 1.867, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 6388.1337890625, |
|
"eval_runtime": 0.5423, |
|
"eval_samples_per_second": 14.751, |
|
"eval_steps_per_second": 1.844, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 6388.0595703125, |
|
"eval_runtime": 0.5448, |
|
"eval_samples_per_second": 14.684, |
|
"eval_steps_per_second": 1.835, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 2.5e-05, |
|
"loss": 6318.3168, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 6388.01953125, |
|
"eval_runtime": 0.5448, |
|
"eval_samples_per_second": 14.685, |
|
"eval_steps_per_second": 1.836, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 6387.9638671875, |
|
"eval_runtime": 0.5362, |
|
"eval_samples_per_second": 14.918, |
|
"eval_steps_per_second": 1.865, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 6258.2133, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 6387.95458984375, |
|
"eval_runtime": 0.544, |
|
"eval_samples_per_second": 14.705, |
|
"eval_steps_per_second": 1.838, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 80, |
|
"total_flos": 3.060008460288e+17, |
|
"train_loss": 6247.897314453125, |
|
"train_runtime": 365.3078, |
|
"train_samples_per_second": 1.752, |
|
"train_steps_per_second": 0.219 |
|
} |
|
], |
|
"max_steps": 80, |
|
"num_train_epochs": 20, |
|
"total_flos": 3.060008460288e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|