{ "best_metric": 0.4823857545852661, "best_model_checkpoint": "ft-roberta-toxicity/checkpoint-200", "epoch": 2.0, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 3.1541271209716797, "learning_rate": 5e-06, "loss": 0.7051, "step": 5 }, { "epoch": 0.1, "grad_norm": 4.209589958190918, "learning_rate": 1e-05, "loss": 0.6954, "step": 10 }, { "epoch": 0.15, "grad_norm": 5.086088180541992, "learning_rate": 1.5e-05, "loss": 0.719, "step": 15 }, { "epoch": 0.2, "grad_norm": 4.385767936706543, "learning_rate": 2e-05, "loss": 0.6785, "step": 20 }, { "epoch": 0.25, "grad_norm": 3.3053433895111084, "learning_rate": 2.5e-05, "loss": 0.6874, "step": 25 }, { "epoch": 0.3, "grad_norm": 2.8885927200317383, "learning_rate": 3e-05, "loss": 0.6988, "step": 30 }, { "epoch": 0.35, "grad_norm": 2.523589611053467, "learning_rate": 3.5e-05, "loss": 0.7674, "step": 35 }, { "epoch": 0.4, "grad_norm": 8.986541748046875, "learning_rate": 4e-05, "loss": 0.6891, "step": 40 }, { "epoch": 0.45, "grad_norm": 6.452502727508545, "learning_rate": 4.5e-05, "loss": 0.8348, "step": 45 }, { "epoch": 0.5, "grad_norm": 6.318837642669678, "learning_rate": 5e-05, "loss": 0.7019, "step": 50 }, { "epoch": 0.55, "grad_norm": 3.7034225463867188, "learning_rate": 4.9444444444444446e-05, "loss": 0.65, "step": 55 }, { "epoch": 0.6, "grad_norm": 3.1009528636932373, "learning_rate": 4.888888888888889e-05, "loss": 0.6593, "step": 60 }, { "epoch": 0.65, "grad_norm": 7.615400791168213, "learning_rate": 4.8333333333333334e-05, "loss": 0.6755, "step": 65 }, { "epoch": 0.7, "grad_norm": 3.228475332260132, "learning_rate": 4.7777777777777784e-05, "loss": 0.6034, "step": 70 }, { "epoch": 0.75, "grad_norm": 4.151826858520508, "learning_rate": 4.722222222222222e-05, "loss": 0.7218, "step": 75 }, { "epoch": 0.8, "grad_norm": 11.06347370147705, "learning_rate": 4.666666666666667e-05, "loss": 0.7539, "step": 80 }, { "epoch": 0.85, "grad_norm": 24.23040771484375, "learning_rate": 4.6111111111111115e-05, "loss": 0.6296, "step": 85 }, { "epoch": 0.9, "grad_norm": 6.828371047973633, "learning_rate": 4.555555555555556e-05, "loss": 0.559, "step": 90 }, { "epoch": 0.95, "grad_norm": 7.590404510498047, "learning_rate": 4.5e-05, "loss": 0.4996, "step": 95 }, { "epoch": 1.0, "grad_norm": 51.719970703125, "learning_rate": 4.4444444444444447e-05, "loss": 0.6665, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.715, "eval_auc": 0.795187969924812, "eval_f1": 0.7219512195121951, "eval_loss": 0.5786986351013184, "eval_precision": 0.74, "eval_recall": 0.7047619047619048, "eval_runtime": 32.3877, "eval_samples_per_second": 6.175, "eval_steps_per_second": 0.401, "step": 100 }, { "epoch": 1.05, "grad_norm": 13.197641372680664, "learning_rate": 4.388888888888889e-05, "loss": 0.6974, "step": 105 }, { "epoch": 1.1, "grad_norm": 4.645239353179932, "learning_rate": 4.3333333333333334e-05, "loss": 0.4852, "step": 110 }, { "epoch": 1.15, "grad_norm": 4.36188268661499, "learning_rate": 4.277777777777778e-05, "loss": 0.7566, "step": 115 }, { "epoch": 1.2, "grad_norm": 6.4677300453186035, "learning_rate": 4.222222222222222e-05, "loss": 0.4989, "step": 120 }, { "epoch": 1.25, "grad_norm": 6.412233829498291, "learning_rate": 4.166666666666667e-05, "loss": 0.5496, "step": 125 }, { "epoch": 1.3, "grad_norm": 12.167023658752441, "learning_rate": 4.111111111111111e-05, "loss": 0.5181, "step": 130 }, { "epoch": 1.35, "grad_norm": 4.28169584274292, "learning_rate": 4.055555555555556e-05, "loss": 0.4636, "step": 135 }, { "epoch": 1.4, "grad_norm": 20.04325294494629, "learning_rate": 4e-05, "loss": 0.4278, "step": 140 }, { "epoch": 1.45, "grad_norm": 18.524545669555664, "learning_rate": 3.944444444444445e-05, "loss": 0.574, "step": 145 }, { "epoch": 1.5, "grad_norm": 12.304378509521484, "learning_rate": 3.888888888888889e-05, "loss": 0.4926, "step": 150 }, { "epoch": 1.55, "grad_norm": 16.889781951904297, "learning_rate": 3.8333333333333334e-05, "loss": 0.2429, "step": 155 }, { "epoch": 1.6, "grad_norm": 18.878406524658203, "learning_rate": 3.777777777777778e-05, "loss": 0.4433, "step": 160 }, { "epoch": 1.65, "grad_norm": 21.254526138305664, "learning_rate": 3.722222222222222e-05, "loss": 0.644, "step": 165 }, { "epoch": 1.7, "grad_norm": 21.575471878051758, "learning_rate": 3.6666666666666666e-05, "loss": 0.6373, "step": 170 }, { "epoch": 1.75, "grad_norm": 8.538455963134766, "learning_rate": 3.611111111111111e-05, "loss": 0.5708, "step": 175 }, { "epoch": 1.8, "grad_norm": 7.5060577392578125, "learning_rate": 3.555555555555556e-05, "loss": 0.7443, "step": 180 }, { "epoch": 1.85, "grad_norm": 7.51224946975708, "learning_rate": 3.5e-05, "loss": 0.4896, "step": 185 }, { "epoch": 1.9, "grad_norm": 5.721360683441162, "learning_rate": 3.444444444444445e-05, "loss": 0.544, "step": 190 }, { "epoch": 1.95, "grad_norm": 6.373270511627197, "learning_rate": 3.388888888888889e-05, "loss": 0.4813, "step": 195 }, { "epoch": 2.0, "grad_norm": 5.219586372375488, "learning_rate": 3.3333333333333335e-05, "loss": 0.3724, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.79, "eval_auc": 0.8741854636591478, "eval_f1": 0.7666666666666667, "eval_loss": 0.4823857545852661, "eval_precision": 0.92, "eval_recall": 0.6571428571428571, "eval_runtime": 32.4699, "eval_samples_per_second": 6.16, "eval_steps_per_second": 0.4, "step": 200 } ], "logging_steps": 5, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 105244422144000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }