|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.997356828193833, |
|
"global_step": 2830, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.307420494699646e-05, |
|
"loss": 0.5254, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.8732394576072693, |
|
"eval_loss": 0.30324122309684753, |
|
"eval_runtime": 39.6845, |
|
"eval_samples_per_second": 14.313, |
|
"eval_steps_per_second": 7.156, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.604240282685513e-05, |
|
"loss": 0.5153, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.9049295783042908, |
|
"eval_loss": 0.28999415040016174, |
|
"eval_runtime": 40.176, |
|
"eval_samples_per_second": 14.138, |
|
"eval_steps_per_second": 7.069, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 7.897526501766784e-05, |
|
"loss": 0.2832, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.9419013857841492, |
|
"eval_loss": 0.24485430121421814, |
|
"eval_runtime": 43.1522, |
|
"eval_samples_per_second": 13.163, |
|
"eval_steps_per_second": 6.581, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 7.190812720848057e-05, |
|
"loss": 0.2435, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.9436619877815247, |
|
"eval_loss": 0.18418200314044952, |
|
"eval_runtime": 40.2247, |
|
"eval_samples_per_second": 14.121, |
|
"eval_steps_per_second": 7.06, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 6.48409893992933e-05, |
|
"loss": 0.2056, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_accuracy": 0.952464759349823, |
|
"eval_loss": 0.17370758950710297, |
|
"eval_runtime": 43.6775, |
|
"eval_samples_per_second": 13.004, |
|
"eval_steps_per_second": 6.502, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 5.7773851590106006e-05, |
|
"loss": 0.1485, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_accuracy": 0.966549277305603, |
|
"eval_loss": 0.13336466252803802, |
|
"eval_runtime": 38.8727, |
|
"eval_samples_per_second": 14.612, |
|
"eval_steps_per_second": 7.306, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.0706713780918735e-05, |
|
"loss": 0.1717, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_accuracy": 0.9700704216957092, |
|
"eval_loss": 0.12312730401754379, |
|
"eval_runtime": 43.1241, |
|
"eval_samples_per_second": 13.171, |
|
"eval_steps_per_second": 6.586, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 4.367491166077738e-05, |
|
"loss": 0.1274, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_accuracy": 0.9700704216957092, |
|
"eval_loss": 0.12308631092309952, |
|
"eval_runtime": 39.3033, |
|
"eval_samples_per_second": 14.452, |
|
"eval_steps_per_second": 7.226, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 3.660777385159011e-05, |
|
"loss": 0.1268, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_accuracy": 0.9718309640884399, |
|
"eval_loss": 0.114546038210392, |
|
"eval_runtime": 38.8418, |
|
"eval_samples_per_second": 14.623, |
|
"eval_steps_per_second": 7.312, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 2.954063604240283e-05, |
|
"loss": 0.1395, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_accuracy": 0.9771126508712769, |
|
"eval_loss": 0.09564723074436188, |
|
"eval_runtime": 41.3738, |
|
"eval_samples_per_second": 13.728, |
|
"eval_steps_per_second": 6.864, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 2.247349823321555e-05, |
|
"loss": 0.0993, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"eval_accuracy": 0.98591548204422, |
|
"eval_loss": 0.05968547984957695, |
|
"eval_runtime": 41.7972, |
|
"eval_samples_per_second": 13.589, |
|
"eval_steps_per_second": 6.795, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 1.5406360424028267e-05, |
|
"loss": 0.0785, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"eval_accuracy": 0.9788732528686523, |
|
"eval_loss": 0.10253579914569855, |
|
"eval_runtime": 40.093, |
|
"eval_samples_per_second": 14.167, |
|
"eval_steps_per_second": 7.084, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 8.33922261484099e-06, |
|
"loss": 0.089, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"eval_accuracy": 0.9788732528686523, |
|
"eval_loss": 0.10597831010818481, |
|
"eval_runtime": 37.5593, |
|
"eval_samples_per_second": 15.123, |
|
"eval_steps_per_second": 7.561, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 1.2720848056537103e-06, |
|
"loss": 0.0796, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"eval_accuracy": 0.9806337952613831, |
|
"eval_loss": 0.09218709170818329, |
|
"eval_runtime": 40.4161, |
|
"eval_samples_per_second": 14.054, |
|
"eval_steps_per_second": 7.027, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2830, |
|
"total_flos": 1.2045174996823788e+18, |
|
"train_loss": 0.20077389284915723, |
|
"train_runtime": 3673.7626, |
|
"train_samples_per_second": 6.179, |
|
"train_steps_per_second": 0.77 |
|
} |
|
], |
|
"max_steps": 2830, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.2045174996823788e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|