Francesco0101's picture
Training in progress, step 11000, checkpoint
612a16b verified
raw
history blame
No virus
6.9 kB
{
"best_metric": 0.7624731472235634,
"best_model_checkpoint": "training_dir/checkpoint-6000",
"epoch": 1.7225180081428124,
"eval_steps": 1000,
"global_step": 11000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15659254619480112,
"grad_norm": 67.27922058105469,
"learning_rate": 9.92108585858586e-06,
"loss": 0.7405,
"step": 1000
},
{
"epoch": 0.15659254619480112,
"eval_accuracy": 0.6914335664335665,
"eval_f1": 0.6593208421129121,
"eval_loss": 1.1309396028518677,
"eval_precision": 0.6956839140215929,
"eval_recall": 0.6914335664335665,
"eval_runtime": 47.9571,
"eval_samples_per_second": 47.709,
"eval_steps_per_second": 5.964,
"step": 1000
},
{
"epoch": 0.31318509238960224,
"grad_norm": 14.41289234161377,
"learning_rate": 9.763257575757577e-06,
"loss": 0.4907,
"step": 2000
},
{
"epoch": 0.31318509238960224,
"eval_accuracy": 0.743006993006993,
"eval_f1": 0.733830578366595,
"eval_loss": 0.8223879337310791,
"eval_precision": 0.7463673332921382,
"eval_recall": 0.743006993006993,
"eval_runtime": 47.4569,
"eval_samples_per_second": 48.212,
"eval_steps_per_second": 6.027,
"step": 2000
},
{
"epoch": 0.46977763858440336,
"grad_norm": 1.8675850629806519,
"learning_rate": 9.605429292929293e-06,
"loss": 0.4543,
"step": 3000
},
{
"epoch": 0.46977763858440336,
"eval_accuracy": 0.7312062937062938,
"eval_f1": 0.7152230962884388,
"eval_loss": 0.9456853270530701,
"eval_precision": 0.7333745521313426,
"eval_recall": 0.7312062937062938,
"eval_runtime": 47.3928,
"eval_samples_per_second": 48.277,
"eval_steps_per_second": 6.035,
"step": 3000
},
{
"epoch": 0.6263701847792045,
"grad_norm": 36.809181213378906,
"learning_rate": 9.44760101010101e-06,
"loss": 0.4431,
"step": 4000
},
{
"epoch": 0.6263701847792045,
"eval_accuracy": 0.7456293706293706,
"eval_f1": 0.7418425557235627,
"eval_loss": 0.7822393774986267,
"eval_precision": 0.7420175571942784,
"eval_recall": 0.7456293706293706,
"eval_runtime": 47.4108,
"eval_samples_per_second": 48.259,
"eval_steps_per_second": 6.032,
"step": 4000
},
{
"epoch": 0.7829627309740056,
"grad_norm": 1.452012300491333,
"learning_rate": 9.289772727272728e-06,
"loss": 0.4423,
"step": 5000
},
{
"epoch": 0.7829627309740056,
"eval_accuracy": 0.7539335664335665,
"eval_f1": 0.7466934407427619,
"eval_loss": 0.8257411122322083,
"eval_precision": 0.7528625679945705,
"eval_recall": 0.7539335664335665,
"eval_runtime": 47.5443,
"eval_samples_per_second": 48.123,
"eval_steps_per_second": 6.015,
"step": 5000
},
{
"epoch": 0.9395552771688067,
"grad_norm": 16.726686477661133,
"learning_rate": 9.131944444444445e-06,
"loss": 0.4505,
"step": 6000
},
{
"epoch": 0.9395552771688067,
"eval_accuracy": 0.7670454545454546,
"eval_f1": 0.7624731472235634,
"eval_loss": 0.7416993379592896,
"eval_precision": 0.7669396954578515,
"eval_recall": 0.7670454545454546,
"eval_runtime": 47.5056,
"eval_samples_per_second": 48.163,
"eval_steps_per_second": 6.02,
"step": 6000
},
{
"epoch": 1.096147823363608,
"grad_norm": 47.86187744140625,
"learning_rate": 8.974116161616161e-06,
"loss": 0.4028,
"step": 7000
},
{
"epoch": 1.096147823363608,
"eval_accuracy": 0.7399475524475524,
"eval_f1": 0.7251034362957096,
"eval_loss": 1.0867348909378052,
"eval_precision": 0.7436508232814505,
"eval_recall": 0.7399475524475524,
"eval_runtime": 47.6192,
"eval_samples_per_second": 48.048,
"eval_steps_per_second": 6.006,
"step": 7000
},
{
"epoch": 1.252740369558409,
"grad_norm": 1.5419590473175049,
"learning_rate": 8.816287878787879e-06,
"loss": 0.3891,
"step": 8000
},
{
"epoch": 1.252740369558409,
"eval_accuracy": 0.7504370629370629,
"eval_f1": 0.7360022991843806,
"eval_loss": 1.1146304607391357,
"eval_precision": 0.7541770480812687,
"eval_recall": 0.7504370629370629,
"eval_runtime": 47.6132,
"eval_samples_per_second": 48.054,
"eval_steps_per_second": 6.007,
"step": 8000
},
{
"epoch": 1.4093329157532102,
"grad_norm": 48.14420700073242,
"learning_rate": 8.658459595959596e-06,
"loss": 0.3957,
"step": 9000
},
{
"epoch": 1.4093329157532102,
"eval_accuracy": 0.7613636363636364,
"eval_f1": 0.7534759031290063,
"eval_loss": 0.9045655727386475,
"eval_precision": 0.7581869302928095,
"eval_recall": 0.7613636363636364,
"eval_runtime": 47.5148,
"eval_samples_per_second": 48.153,
"eval_steps_per_second": 6.019,
"step": 9000
},
{
"epoch": 1.5659254619480114,
"grad_norm": 5.539546489715576,
"learning_rate": 8.500631313131314e-06,
"loss": 0.401,
"step": 10000
},
{
"epoch": 1.5659254619480114,
"eval_accuracy": 0.7622377622377622,
"eval_f1": 0.7562280560821415,
"eval_loss": 0.951400876045227,
"eval_precision": 0.760121427774359,
"eval_recall": 0.7622377622377622,
"eval_runtime": 47.6294,
"eval_samples_per_second": 48.038,
"eval_steps_per_second": 6.005,
"step": 10000
},
{
"epoch": 1.7225180081428124,
"grad_norm": 44.26694107055664,
"learning_rate": 8.342803030303031e-06,
"loss": 0.3903,
"step": 11000
},
{
"epoch": 1.7225180081428124,
"eval_accuracy": 0.7552447552447552,
"eval_f1": 0.7447143825904207,
"eval_loss": 0.9456614255905151,
"eval_precision": 0.7531489883387709,
"eval_recall": 0.7552447552447552,
"eval_runtime": 47.5072,
"eval_samples_per_second": 48.161,
"eval_steps_per_second": 6.02,
"step": 11000
}
],
"logging_steps": 1000,
"max_steps": 63860,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.2525788225157308e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}