{ "best_metric": 0.7624731472235634, "best_model_checkpoint": "training_dir/checkpoint-6000", "epoch": 1.7225180081428124, "eval_steps": 1000, "global_step": 11000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15659254619480112, "grad_norm": 67.27922058105469, "learning_rate": 9.92108585858586e-06, "loss": 0.7405, "step": 1000 }, { "epoch": 0.15659254619480112, "eval_accuracy": 0.6914335664335665, "eval_f1": 0.6593208421129121, "eval_loss": 1.1309396028518677, "eval_precision": 0.6956839140215929, "eval_recall": 0.6914335664335665, "eval_runtime": 47.9571, "eval_samples_per_second": 47.709, "eval_steps_per_second": 5.964, "step": 1000 }, { "epoch": 0.31318509238960224, "grad_norm": 14.41289234161377, "learning_rate": 9.763257575757577e-06, "loss": 0.4907, "step": 2000 }, { "epoch": 0.31318509238960224, "eval_accuracy": 0.743006993006993, "eval_f1": 0.733830578366595, "eval_loss": 0.8223879337310791, "eval_precision": 0.7463673332921382, "eval_recall": 0.743006993006993, "eval_runtime": 47.4569, "eval_samples_per_second": 48.212, "eval_steps_per_second": 6.027, "step": 2000 }, { "epoch": 0.46977763858440336, "grad_norm": 1.8675850629806519, "learning_rate": 9.605429292929293e-06, "loss": 0.4543, "step": 3000 }, { "epoch": 0.46977763858440336, "eval_accuracy": 0.7312062937062938, "eval_f1": 0.7152230962884388, "eval_loss": 0.9456853270530701, "eval_precision": 0.7333745521313426, "eval_recall": 0.7312062937062938, "eval_runtime": 47.3928, "eval_samples_per_second": 48.277, "eval_steps_per_second": 6.035, "step": 3000 }, { "epoch": 0.6263701847792045, "grad_norm": 36.809181213378906, "learning_rate": 9.44760101010101e-06, "loss": 0.4431, "step": 4000 }, { "epoch": 0.6263701847792045, "eval_accuracy": 0.7456293706293706, "eval_f1": 0.7418425557235627, "eval_loss": 0.7822393774986267, "eval_precision": 0.7420175571942784, "eval_recall": 0.7456293706293706, "eval_runtime": 47.4108, "eval_samples_per_second": 48.259, "eval_steps_per_second": 6.032, "step": 4000 }, { "epoch": 0.7829627309740056, "grad_norm": 1.452012300491333, "learning_rate": 9.289772727272728e-06, "loss": 0.4423, "step": 5000 }, { "epoch": 0.7829627309740056, "eval_accuracy": 0.7539335664335665, "eval_f1": 0.7466934407427619, "eval_loss": 0.8257411122322083, "eval_precision": 0.7528625679945705, "eval_recall": 0.7539335664335665, "eval_runtime": 47.5443, "eval_samples_per_second": 48.123, "eval_steps_per_second": 6.015, "step": 5000 }, { "epoch": 0.9395552771688067, "grad_norm": 16.726686477661133, "learning_rate": 9.131944444444445e-06, "loss": 0.4505, "step": 6000 }, { "epoch": 0.9395552771688067, "eval_accuracy": 0.7670454545454546, "eval_f1": 0.7624731472235634, "eval_loss": 0.7416993379592896, "eval_precision": 0.7669396954578515, "eval_recall": 0.7670454545454546, "eval_runtime": 47.5056, "eval_samples_per_second": 48.163, "eval_steps_per_second": 6.02, "step": 6000 }, { "epoch": 1.096147823363608, "grad_norm": 47.86187744140625, "learning_rate": 8.974116161616161e-06, "loss": 0.4028, "step": 7000 }, { "epoch": 1.096147823363608, "eval_accuracy": 0.7399475524475524, "eval_f1": 0.7251034362957096, "eval_loss": 1.0867348909378052, "eval_precision": 0.7436508232814505, "eval_recall": 0.7399475524475524, "eval_runtime": 47.6192, "eval_samples_per_second": 48.048, "eval_steps_per_second": 6.006, "step": 7000 }, { "epoch": 1.252740369558409, "grad_norm": 1.5419590473175049, "learning_rate": 8.816287878787879e-06, "loss": 0.3891, "step": 8000 }, { "epoch": 1.252740369558409, "eval_accuracy": 0.7504370629370629, "eval_f1": 0.7360022991843806, "eval_loss": 1.1146304607391357, "eval_precision": 0.7541770480812687, "eval_recall": 0.7504370629370629, "eval_runtime": 47.6132, "eval_samples_per_second": 48.054, "eval_steps_per_second": 6.007, "step": 8000 }, { "epoch": 1.4093329157532102, "grad_norm": 48.14420700073242, "learning_rate": 8.658459595959596e-06, "loss": 0.3957, "step": 9000 }, { "epoch": 1.4093329157532102, "eval_accuracy": 0.7613636363636364, "eval_f1": 0.7534759031290063, "eval_loss": 0.9045655727386475, "eval_precision": 0.7581869302928095, "eval_recall": 0.7613636363636364, "eval_runtime": 47.5148, "eval_samples_per_second": 48.153, "eval_steps_per_second": 6.019, "step": 9000 }, { "epoch": 1.5659254619480114, "grad_norm": 5.539546489715576, "learning_rate": 8.500631313131314e-06, "loss": 0.401, "step": 10000 }, { "epoch": 1.5659254619480114, "eval_accuracy": 0.7622377622377622, "eval_f1": 0.7562280560821415, "eval_loss": 0.951400876045227, "eval_precision": 0.760121427774359, "eval_recall": 0.7622377622377622, "eval_runtime": 47.6294, "eval_samples_per_second": 48.038, "eval_steps_per_second": 6.005, "step": 10000 }, { "epoch": 1.7225180081428124, "grad_norm": 44.26694107055664, "learning_rate": 8.342803030303031e-06, "loss": 0.3903, "step": 11000 }, { "epoch": 1.7225180081428124, "eval_accuracy": 0.7552447552447552, "eval_f1": 0.7447143825904207, "eval_loss": 0.9456614255905151, "eval_precision": 0.7531489883387709, "eval_recall": 0.7552447552447552, "eval_runtime": 47.5072, "eval_samples_per_second": 48.161, "eval_steps_per_second": 6.02, "step": 11000 } ], "logging_steps": 1000, "max_steps": 63860, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.2525788225157308e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }