|
{ |
|
"best_metric": 0.7351907898564811, |
|
"best_model_checkpoint": "training_dir/checkpoint-10000", |
|
"epoch": 1.5659254619480114, |
|
"eval_steps": 1000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15659254619480112, |
|
"grad_norm": 4.369892120361328, |
|
"learning_rate": 9.92108585858586e-06, |
|
"loss": 0.726, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15659254619480112, |
|
"eval_accuracy": 0.652534965034965, |
|
"eval_f1": 0.6156117064442015, |
|
"eval_loss": 0.9217989444732666, |
|
"eval_precision": 0.647833453696251, |
|
"eval_recall": 0.652534965034965, |
|
"eval_runtime": 30.8162, |
|
"eval_samples_per_second": 74.247, |
|
"eval_steps_per_second": 9.281, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31318509238960224, |
|
"grad_norm": 10.090314865112305, |
|
"learning_rate": 9.763257575757577e-06, |
|
"loss": 0.5078, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31318509238960224, |
|
"eval_accuracy": 0.6984265734265734, |
|
"eval_f1": 0.6840354165768472, |
|
"eval_loss": 0.8569145798683167, |
|
"eval_precision": 0.7035176110711845, |
|
"eval_recall": 0.6984265734265734, |
|
"eval_runtime": 29.8277, |
|
"eval_samples_per_second": 76.707, |
|
"eval_steps_per_second": 9.588, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.46977763858440336, |
|
"grad_norm": 2.5105936527252197, |
|
"learning_rate": 9.605429292929293e-06, |
|
"loss": 0.4747, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.46977763858440336, |
|
"eval_accuracy": 0.7076048951048951, |
|
"eval_f1": 0.6892053083671648, |
|
"eval_loss": 0.8822671175003052, |
|
"eval_precision": 0.7144457563342712, |
|
"eval_recall": 0.7076048951048951, |
|
"eval_runtime": 29.6497, |
|
"eval_samples_per_second": 77.168, |
|
"eval_steps_per_second": 9.646, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6263701847792045, |
|
"grad_norm": 28.96643829345703, |
|
"learning_rate": 9.44760101010101e-06, |
|
"loss": 0.4593, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6263701847792045, |
|
"eval_accuracy": 0.7338286713286714, |
|
"eval_f1": 0.727562941019853, |
|
"eval_loss": 0.7618293762207031, |
|
"eval_precision": 0.7330094463964066, |
|
"eval_recall": 0.7338286713286714, |
|
"eval_runtime": 29.7317, |
|
"eval_samples_per_second": 76.955, |
|
"eval_steps_per_second": 9.619, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7829627309740056, |
|
"grad_norm": 5.809261798858643, |
|
"learning_rate": 9.289772727272728e-06, |
|
"loss": 0.4486, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7829627309740056, |
|
"eval_accuracy": 0.7351398601398601, |
|
"eval_f1": 0.7293047058493298, |
|
"eval_loss": 0.8104275465011597, |
|
"eval_precision": 0.7355000090280587, |
|
"eval_recall": 0.7351398601398601, |
|
"eval_runtime": 29.7412, |
|
"eval_samples_per_second": 76.93, |
|
"eval_steps_per_second": 9.616, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9395552771688067, |
|
"grad_norm": 16.398216247558594, |
|
"learning_rate": 9.131944444444445e-06, |
|
"loss": 0.4562, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9395552771688067, |
|
"eval_accuracy": 0.728583916083916, |
|
"eval_f1": 0.722523941260205, |
|
"eval_loss": 0.764935314655304, |
|
"eval_precision": 0.7390044648352858, |
|
"eval_recall": 0.728583916083916, |
|
"eval_runtime": 29.8199, |
|
"eval_samples_per_second": 76.727, |
|
"eval_steps_per_second": 9.591, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.096147823363608, |
|
"grad_norm": 4.596695899963379, |
|
"learning_rate": 8.974116161616161e-06, |
|
"loss": 0.3993, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.096147823363608, |
|
"eval_accuracy": 0.7198426573426573, |
|
"eval_f1": 0.7041969146725503, |
|
"eval_loss": 1.0100624561309814, |
|
"eval_precision": 0.7315013162404153, |
|
"eval_recall": 0.7198426573426573, |
|
"eval_runtime": 29.6299, |
|
"eval_samples_per_second": 77.219, |
|
"eval_steps_per_second": 9.652, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.252740369558409, |
|
"grad_norm": 2.7841145992279053, |
|
"learning_rate": 8.816287878787879e-06, |
|
"loss": 0.3901, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.252740369558409, |
|
"eval_accuracy": 0.7185314685314685, |
|
"eval_f1": 0.6970213656627479, |
|
"eval_loss": 1.013152003288269, |
|
"eval_precision": 0.7291871168086037, |
|
"eval_recall": 0.7185314685314685, |
|
"eval_runtime": 29.6033, |
|
"eval_samples_per_second": 77.289, |
|
"eval_steps_per_second": 9.661, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.4093329157532102, |
|
"grad_norm": 5.9810404777526855, |
|
"learning_rate": 8.658459595959596e-06, |
|
"loss": 0.3809, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.4093329157532102, |
|
"eval_accuracy": 0.7211538461538461, |
|
"eval_f1": 0.7055943588116754, |
|
"eval_loss": 0.9052737355232239, |
|
"eval_precision": 0.7224815370590899, |
|
"eval_recall": 0.7211538461538461, |
|
"eval_runtime": 30.2216, |
|
"eval_samples_per_second": 75.707, |
|
"eval_steps_per_second": 9.463, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.5659254619480114, |
|
"grad_norm": 9.159900665283203, |
|
"learning_rate": 8.500631313131314e-06, |
|
"loss": 0.3932, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.5659254619480114, |
|
"eval_accuracy": 0.7443181818181818, |
|
"eval_f1": 0.7351907898564811, |
|
"eval_loss": 0.9013388156890869, |
|
"eval_precision": 0.7458339781734145, |
|
"eval_recall": 0.7443181818181818, |
|
"eval_runtime": 29.7576, |
|
"eval_samples_per_second": 76.888, |
|
"eval_steps_per_second": 9.611, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 63860, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0072339831839852e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|