|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6e-06, |
|
"loss": 2.718, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6e-06, |
|
"loss": 2.5793, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.27131470139518427, |
|
"eval_loss": 2.572265625, |
|
"eval_runtime": 75.3129, |
|
"eval_samples_per_second": 4.833, |
|
"eval_steps_per_second": 0.611, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 6e-06, |
|
"loss": 2.5612, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.27501824131602803, |
|
"eval_loss": 2.5, |
|
"eval_runtime": 75.9396, |
|
"eval_samples_per_second": 4.793, |
|
"eval_steps_per_second": 0.606, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 6e-06, |
|
"loss": 2.5235, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.27841775930307117, |
|
"eval_loss": 2.447265625, |
|
"eval_runtime": 75.8369, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 0.607, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6e-06, |
|
"loss": 2.4961, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.2818393879762089, |
|
"eval_loss": 2.41015625, |
|
"eval_runtime": 75.7164, |
|
"eval_samples_per_second": 4.807, |
|
"eval_steps_per_second": 0.608, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6e-06, |
|
"loss": 2.4488, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.2848740796426913, |
|
"eval_loss": 2.3671875, |
|
"eval_runtime": 75.8548, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 0.606, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6e-06, |
|
"loss": 2.4121, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.2877761071926062, |
|
"eval_loss": 2.33203125, |
|
"eval_runtime": 75.8441, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 0.607, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3901, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.2902580317067239, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 75.8507, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 0.606, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6e-06, |
|
"loss": 2.2845, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.29269573484865236, |
|
"eval_loss": 2.271484375, |
|
"eval_runtime": 75.7801, |
|
"eval_samples_per_second": 4.803, |
|
"eval_steps_per_second": 0.607, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3032, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.29554248568333075, |
|
"eval_loss": 2.2421875, |
|
"eval_runtime": 75.8262, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 0.607, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6e-06, |
|
"loss": 2.2954, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.2984555685762929, |
|
"eval_loss": 2.208984375, |
|
"eval_runtime": 75.8901, |
|
"eval_samples_per_second": 4.796, |
|
"eval_steps_per_second": 0.606, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3908, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.30088774404669777, |
|
"eval_loss": 2.18359375, |
|
"eval_runtime": 76.1084, |
|
"eval_samples_per_second": 4.783, |
|
"eval_steps_per_second": 0.604, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6e-06, |
|
"loss": 2.2676, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.3035631370641431, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 75.8379, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 0.607, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 75, |
|
"total_flos": 4974649540608.0, |
|
"train_loss": 2.4108121744791666, |
|
"train_runtime": 14351.1542, |
|
"train_samples_per_second": 0.042, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"max_steps": 75, |
|
"num_train_epochs": 1, |
|
"total_flos": 4974649540608.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|