|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.991735537190083, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00016428571428571428, |
|
"loss": 1.4242, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.6776859760284424, |
|
"eval_loss": 0.9407395124435425, |
|
"eval_runtime": 15.4142, |
|
"eval_samples_per_second": 7.85, |
|
"eval_steps_per_second": 3.957, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00012857142857142858, |
|
"loss": 0.884, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.6528925895690918, |
|
"eval_loss": 0.8700841665267944, |
|
"eval_runtime": 23.5759, |
|
"eval_samples_per_second": 5.132, |
|
"eval_steps_per_second": 2.587, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 9.357142857142858e-05, |
|
"loss": 0.5967, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_accuracy": 0.8181818127632141, |
|
"eval_loss": 0.7293241620063782, |
|
"eval_runtime": 15.1537, |
|
"eval_samples_per_second": 7.985, |
|
"eval_steps_per_second": 4.025, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 5.785714285714287e-05, |
|
"loss": 0.3024, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.8595041036605835, |
|
"eval_loss": 0.5951272249221802, |
|
"eval_runtime": 23.2801, |
|
"eval_samples_per_second": 5.198, |
|
"eval_steps_per_second": 2.62, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 2.214285714285714e-05, |
|
"loss": 0.1382, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_accuracy": 0.9090909361839294, |
|
"eval_loss": 0.3326501250267029, |
|
"eval_runtime": 23.5413, |
|
"eval_samples_per_second": 5.14, |
|
"eval_steps_per_second": 2.591, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"step": 560, |
|
"total_flos": 5.1637585435657395e+17, |
|
"train_loss": 0.6040960873876299, |
|
"train_runtime": 1484.0947, |
|
"train_samples_per_second": 2.278, |
|
"train_steps_per_second": 0.377 |
|
} |
|
], |
|
"max_steps": 560, |
|
"num_train_epochs": 7, |
|
"total_flos": 5.1637585435657395e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|