|
{ |
|
"best_metric": 0.7785388127853882, |
|
"best_model_checkpoint": "tmp/tst-translation355/checkpoint-840", |
|
"epoch": 10.0, |
|
"global_step": 1050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2054794520547945, |
|
"eval_loss": 0.2730105221271515, |
|
"eval_runtime": 13.9745, |
|
"eval_samples_per_second": 31.343, |
|
"eval_steps_per_second": 2.648, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4132420091324201, |
|
"eval_loss": 0.2166765183210373, |
|
"eval_runtime": 13.9525, |
|
"eval_samples_per_second": 31.392, |
|
"eval_steps_per_second": 2.652, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6598173515981736, |
|
"eval_loss": 0.16653937101364136, |
|
"eval_runtime": 16.3239, |
|
"eval_samples_per_second": 26.832, |
|
"eval_steps_per_second": 2.267, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6940639269406392, |
|
"eval_loss": 0.1442675143480301, |
|
"eval_runtime": 13.707, |
|
"eval_samples_per_second": 31.954, |
|
"eval_steps_per_second": 2.699, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 0.3779, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7146118721461188, |
|
"eval_loss": 0.1320880502462387, |
|
"eval_runtime": 14.2394, |
|
"eval_samples_per_second": 30.76, |
|
"eval_steps_per_second": 2.598, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7579908675799086, |
|
"eval_loss": 0.11835604161024094, |
|
"eval_runtime": 14.2589, |
|
"eval_samples_per_second": 30.718, |
|
"eval_steps_per_second": 2.595, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7579908675799086, |
|
"eval_loss": 0.119329072535038, |
|
"eval_runtime": 14.1651, |
|
"eval_samples_per_second": 30.921, |
|
"eval_steps_per_second": 2.612, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7785388127853882, |
|
"eval_loss": 0.14183764159679413, |
|
"eval_runtime": 14.017, |
|
"eval_samples_per_second": 31.248, |
|
"eval_steps_per_second": 2.64, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7602739726027398, |
|
"eval_loss": 0.1450866013765335, |
|
"eval_runtime": 14.0858, |
|
"eval_samples_per_second": 31.095, |
|
"eval_steps_per_second": 2.627, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"loss": 0.0787, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7625570776255708, |
|
"eval_loss": 0.148654505610466, |
|
"eval_runtime": 15.3383, |
|
"eval_samples_per_second": 28.556, |
|
"eval_steps_per_second": 2.412, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1050, |
|
"total_flos": 8482350845952000.0, |
|
"train_loss": 0.2194767295746576, |
|
"train_runtime": 810.0029, |
|
"train_samples_per_second": 15.481, |
|
"train_steps_per_second": 1.296 |
|
} |
|
], |
|
"max_steps": 1050, |
|
"num_train_epochs": 10, |
|
"total_flos": 8482350845952000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|