|
{ |
|
"best_metric": 0.7624731472235634, |
|
"best_model_checkpoint": "training_dir/checkpoint-6000", |
|
"epoch": 1.7225180081428124, |
|
"eval_steps": 1000, |
|
"global_step": 11000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15659254619480112, |
|
"grad_norm": 67.27922058105469, |
|
"learning_rate": 9.92108585858586e-06, |
|
"loss": 0.7405, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15659254619480112, |
|
"eval_accuracy": 0.6914335664335665, |
|
"eval_f1": 0.6593208421129121, |
|
"eval_loss": 1.1309396028518677, |
|
"eval_precision": 0.6956839140215929, |
|
"eval_recall": 0.6914335664335665, |
|
"eval_runtime": 47.9571, |
|
"eval_samples_per_second": 47.709, |
|
"eval_steps_per_second": 5.964, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31318509238960224, |
|
"grad_norm": 14.41289234161377, |
|
"learning_rate": 9.763257575757577e-06, |
|
"loss": 0.4907, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31318509238960224, |
|
"eval_accuracy": 0.743006993006993, |
|
"eval_f1": 0.733830578366595, |
|
"eval_loss": 0.8223879337310791, |
|
"eval_precision": 0.7463673332921382, |
|
"eval_recall": 0.743006993006993, |
|
"eval_runtime": 47.4569, |
|
"eval_samples_per_second": 48.212, |
|
"eval_steps_per_second": 6.027, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.46977763858440336, |
|
"grad_norm": 1.8675850629806519, |
|
"learning_rate": 9.605429292929293e-06, |
|
"loss": 0.4543, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.46977763858440336, |
|
"eval_accuracy": 0.7312062937062938, |
|
"eval_f1": 0.7152230962884388, |
|
"eval_loss": 0.9456853270530701, |
|
"eval_precision": 0.7333745521313426, |
|
"eval_recall": 0.7312062937062938, |
|
"eval_runtime": 47.3928, |
|
"eval_samples_per_second": 48.277, |
|
"eval_steps_per_second": 6.035, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6263701847792045, |
|
"grad_norm": 36.809181213378906, |
|
"learning_rate": 9.44760101010101e-06, |
|
"loss": 0.4431, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6263701847792045, |
|
"eval_accuracy": 0.7456293706293706, |
|
"eval_f1": 0.7418425557235627, |
|
"eval_loss": 0.7822393774986267, |
|
"eval_precision": 0.7420175571942784, |
|
"eval_recall": 0.7456293706293706, |
|
"eval_runtime": 47.4108, |
|
"eval_samples_per_second": 48.259, |
|
"eval_steps_per_second": 6.032, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7829627309740056, |
|
"grad_norm": 1.452012300491333, |
|
"learning_rate": 9.289772727272728e-06, |
|
"loss": 0.4423, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7829627309740056, |
|
"eval_accuracy": 0.7539335664335665, |
|
"eval_f1": 0.7466934407427619, |
|
"eval_loss": 0.8257411122322083, |
|
"eval_precision": 0.7528625679945705, |
|
"eval_recall": 0.7539335664335665, |
|
"eval_runtime": 47.5443, |
|
"eval_samples_per_second": 48.123, |
|
"eval_steps_per_second": 6.015, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9395552771688067, |
|
"grad_norm": 16.726686477661133, |
|
"learning_rate": 9.131944444444445e-06, |
|
"loss": 0.4505, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9395552771688067, |
|
"eval_accuracy": 0.7670454545454546, |
|
"eval_f1": 0.7624731472235634, |
|
"eval_loss": 0.7416993379592896, |
|
"eval_precision": 0.7669396954578515, |
|
"eval_recall": 0.7670454545454546, |
|
"eval_runtime": 47.5056, |
|
"eval_samples_per_second": 48.163, |
|
"eval_steps_per_second": 6.02, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.096147823363608, |
|
"grad_norm": 47.86187744140625, |
|
"learning_rate": 8.974116161616161e-06, |
|
"loss": 0.4028, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.096147823363608, |
|
"eval_accuracy": 0.7399475524475524, |
|
"eval_f1": 0.7251034362957096, |
|
"eval_loss": 1.0867348909378052, |
|
"eval_precision": 0.7436508232814505, |
|
"eval_recall": 0.7399475524475524, |
|
"eval_runtime": 47.6192, |
|
"eval_samples_per_second": 48.048, |
|
"eval_steps_per_second": 6.006, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.252740369558409, |
|
"grad_norm": 1.5419590473175049, |
|
"learning_rate": 8.816287878787879e-06, |
|
"loss": 0.3891, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.252740369558409, |
|
"eval_accuracy": 0.7504370629370629, |
|
"eval_f1": 0.7360022991843806, |
|
"eval_loss": 1.1146304607391357, |
|
"eval_precision": 0.7541770480812687, |
|
"eval_recall": 0.7504370629370629, |
|
"eval_runtime": 47.6132, |
|
"eval_samples_per_second": 48.054, |
|
"eval_steps_per_second": 6.007, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.4093329157532102, |
|
"grad_norm": 48.14420700073242, |
|
"learning_rate": 8.658459595959596e-06, |
|
"loss": 0.3957, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.4093329157532102, |
|
"eval_accuracy": 0.7613636363636364, |
|
"eval_f1": 0.7534759031290063, |
|
"eval_loss": 0.9045655727386475, |
|
"eval_precision": 0.7581869302928095, |
|
"eval_recall": 0.7613636363636364, |
|
"eval_runtime": 47.5148, |
|
"eval_samples_per_second": 48.153, |
|
"eval_steps_per_second": 6.019, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.5659254619480114, |
|
"grad_norm": 5.539546489715576, |
|
"learning_rate": 8.500631313131314e-06, |
|
"loss": 0.401, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.5659254619480114, |
|
"eval_accuracy": 0.7622377622377622, |
|
"eval_f1": 0.7562280560821415, |
|
"eval_loss": 0.951400876045227, |
|
"eval_precision": 0.760121427774359, |
|
"eval_recall": 0.7622377622377622, |
|
"eval_runtime": 47.6294, |
|
"eval_samples_per_second": 48.038, |
|
"eval_steps_per_second": 6.005, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.7225180081428124, |
|
"grad_norm": 44.26694107055664, |
|
"learning_rate": 8.342803030303031e-06, |
|
"loss": 0.3903, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.7225180081428124, |
|
"eval_accuracy": 0.7552447552447552, |
|
"eval_f1": 0.7447143825904207, |
|
"eval_loss": 0.9456614255905151, |
|
"eval_precision": 0.7531489883387709, |
|
"eval_recall": 0.7552447552447552, |
|
"eval_runtime": 47.5072, |
|
"eval_samples_per_second": 48.161, |
|
"eval_steps_per_second": 6.02, |
|
"step": 11000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 63860, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.2525788225157308e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|