Francesco0101's picture
Training in progress, step 10000, checkpoint
80576b8 verified
raw
history blame
No virus
6.35 kB
{
"best_metric": 0.7351907898564811,
"best_model_checkpoint": "training_dir/checkpoint-10000",
"epoch": 1.5659254619480114,
"eval_steps": 1000,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15659254619480112,
"grad_norm": 4.369892120361328,
"learning_rate": 9.92108585858586e-06,
"loss": 0.726,
"step": 1000
},
{
"epoch": 0.15659254619480112,
"eval_accuracy": 0.652534965034965,
"eval_f1": 0.6156117064442015,
"eval_loss": 0.9217989444732666,
"eval_precision": 0.647833453696251,
"eval_recall": 0.652534965034965,
"eval_runtime": 30.8162,
"eval_samples_per_second": 74.247,
"eval_steps_per_second": 9.281,
"step": 1000
},
{
"epoch": 0.31318509238960224,
"grad_norm": 10.090314865112305,
"learning_rate": 9.763257575757577e-06,
"loss": 0.5078,
"step": 2000
},
{
"epoch": 0.31318509238960224,
"eval_accuracy": 0.6984265734265734,
"eval_f1": 0.6840354165768472,
"eval_loss": 0.8569145798683167,
"eval_precision": 0.7035176110711845,
"eval_recall": 0.6984265734265734,
"eval_runtime": 29.8277,
"eval_samples_per_second": 76.707,
"eval_steps_per_second": 9.588,
"step": 2000
},
{
"epoch": 0.46977763858440336,
"grad_norm": 2.5105936527252197,
"learning_rate": 9.605429292929293e-06,
"loss": 0.4747,
"step": 3000
},
{
"epoch": 0.46977763858440336,
"eval_accuracy": 0.7076048951048951,
"eval_f1": 0.6892053083671648,
"eval_loss": 0.8822671175003052,
"eval_precision": 0.7144457563342712,
"eval_recall": 0.7076048951048951,
"eval_runtime": 29.6497,
"eval_samples_per_second": 77.168,
"eval_steps_per_second": 9.646,
"step": 3000
},
{
"epoch": 0.6263701847792045,
"grad_norm": 28.96643829345703,
"learning_rate": 9.44760101010101e-06,
"loss": 0.4593,
"step": 4000
},
{
"epoch": 0.6263701847792045,
"eval_accuracy": 0.7338286713286714,
"eval_f1": 0.727562941019853,
"eval_loss": 0.7618293762207031,
"eval_precision": 0.7330094463964066,
"eval_recall": 0.7338286713286714,
"eval_runtime": 29.7317,
"eval_samples_per_second": 76.955,
"eval_steps_per_second": 9.619,
"step": 4000
},
{
"epoch": 0.7829627309740056,
"grad_norm": 5.809261798858643,
"learning_rate": 9.289772727272728e-06,
"loss": 0.4486,
"step": 5000
},
{
"epoch": 0.7829627309740056,
"eval_accuracy": 0.7351398601398601,
"eval_f1": 0.7293047058493298,
"eval_loss": 0.8104275465011597,
"eval_precision": 0.7355000090280587,
"eval_recall": 0.7351398601398601,
"eval_runtime": 29.7412,
"eval_samples_per_second": 76.93,
"eval_steps_per_second": 9.616,
"step": 5000
},
{
"epoch": 0.9395552771688067,
"grad_norm": 16.398216247558594,
"learning_rate": 9.131944444444445e-06,
"loss": 0.4562,
"step": 6000
},
{
"epoch": 0.9395552771688067,
"eval_accuracy": 0.728583916083916,
"eval_f1": 0.722523941260205,
"eval_loss": 0.764935314655304,
"eval_precision": 0.7390044648352858,
"eval_recall": 0.728583916083916,
"eval_runtime": 29.8199,
"eval_samples_per_second": 76.727,
"eval_steps_per_second": 9.591,
"step": 6000
},
{
"epoch": 1.096147823363608,
"grad_norm": 4.596695899963379,
"learning_rate": 8.974116161616161e-06,
"loss": 0.3993,
"step": 7000
},
{
"epoch": 1.096147823363608,
"eval_accuracy": 0.7198426573426573,
"eval_f1": 0.7041969146725503,
"eval_loss": 1.0100624561309814,
"eval_precision": 0.7315013162404153,
"eval_recall": 0.7198426573426573,
"eval_runtime": 29.6299,
"eval_samples_per_second": 77.219,
"eval_steps_per_second": 9.652,
"step": 7000
},
{
"epoch": 1.252740369558409,
"grad_norm": 2.7841145992279053,
"learning_rate": 8.816287878787879e-06,
"loss": 0.3901,
"step": 8000
},
{
"epoch": 1.252740369558409,
"eval_accuracy": 0.7185314685314685,
"eval_f1": 0.6970213656627479,
"eval_loss": 1.013152003288269,
"eval_precision": 0.7291871168086037,
"eval_recall": 0.7185314685314685,
"eval_runtime": 29.6033,
"eval_samples_per_second": 77.289,
"eval_steps_per_second": 9.661,
"step": 8000
},
{
"epoch": 1.4093329157532102,
"grad_norm": 5.9810404777526855,
"learning_rate": 8.658459595959596e-06,
"loss": 0.3809,
"step": 9000
},
{
"epoch": 1.4093329157532102,
"eval_accuracy": 0.7211538461538461,
"eval_f1": 0.7055943588116754,
"eval_loss": 0.9052737355232239,
"eval_precision": 0.7224815370590899,
"eval_recall": 0.7211538461538461,
"eval_runtime": 30.2216,
"eval_samples_per_second": 75.707,
"eval_steps_per_second": 9.463,
"step": 9000
},
{
"epoch": 1.5659254619480114,
"grad_norm": 9.159900665283203,
"learning_rate": 8.500631313131314e-06,
"loss": 0.3932,
"step": 10000
},
{
"epoch": 1.5659254619480114,
"eval_accuracy": 0.7443181818181818,
"eval_f1": 0.7351907898564811,
"eval_loss": 0.9013388156890869,
"eval_precision": 0.7458339781734145,
"eval_recall": 0.7443181818181818,
"eval_runtime": 29.7576,
"eval_samples_per_second": 76.888,
"eval_steps_per_second": 9.611,
"step": 10000
}
],
"logging_steps": 1000,
"max_steps": 63860,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0072339831839852e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}