{ "best_metric": 0.2875193953514099, "best_model_checkpoint": "autotrain-rcira-jogfv/checkpoint-280", "epoch": 2.0, "eval_steps": 500, "global_step": 280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 3.117601156234741, "learning_rate": 8.333333333333334e-06, "loss": 0.6004, "step": 7 }, { "epoch": 0.1, "grad_norm": 2.980803966522217, "learning_rate": 1.6666666666666667e-05, "loss": 0.5799, "step": 14 }, { "epoch": 0.15, "grad_norm": 6.8337836265563965, "learning_rate": 2.5e-05, "loss": 0.5843, "step": 21 }, { "epoch": 0.2, "grad_norm": 7.110068321228027, "learning_rate": 3.3333333333333335e-05, "loss": 0.6206, "step": 28 }, { "epoch": 0.25, "grad_norm": 5.731410503387451, "learning_rate": 4.166666666666667e-05, "loss": 0.449, "step": 35 }, { "epoch": 0.3, "grad_norm": 4.437646865844727, "learning_rate": 5e-05, "loss": 0.4225, "step": 42 }, { "epoch": 0.35, "grad_norm": 16.640886306762695, "learning_rate": 4.9074074074074075e-05, "loss": 0.4816, "step": 49 }, { "epoch": 0.4, "grad_norm": 5.661762714385986, "learning_rate": 4.814814814814815e-05, "loss": 0.3799, "step": 56 }, { "epoch": 0.45, "grad_norm": 3.9557294845581055, "learning_rate": 4.722222222222222e-05, "loss": 0.3908, "step": 63 }, { "epoch": 0.5, "grad_norm": 14.93912124633789, "learning_rate": 4.62962962962963e-05, "loss": 0.4501, "step": 70 }, { "epoch": 0.55, "grad_norm": 4.729761600494385, "learning_rate": 4.5370370370370374e-05, "loss": 0.4502, "step": 77 }, { "epoch": 0.6, "grad_norm": 8.4749755859375, "learning_rate": 4.4444444444444447e-05, "loss": 0.4013, "step": 84 }, { "epoch": 0.65, "grad_norm": 3.363345146179199, "learning_rate": 4.351851851851852e-05, "loss": 0.2332, "step": 91 }, { "epoch": 0.7, "grad_norm": 1.7409418821334839, "learning_rate": 4.259259259259259e-05, "loss": 0.2461, "step": 98 }, { "epoch": 0.75, "grad_norm": 1.162623643875122, "learning_rate": 4.166666666666667e-05, "loss": 0.3435, "step": 105 }, { "epoch": 0.8, "grad_norm": 24.589733123779297, "learning_rate": 4.074074074074074e-05, "loss": 0.2542, "step": 112 }, { "epoch": 0.85, "grad_norm": 6.879342555999756, "learning_rate": 3.981481481481482e-05, "loss": 0.4404, "step": 119 }, { "epoch": 0.9, "grad_norm": 9.209638595581055, "learning_rate": 3.888888888888889e-05, "loss": 0.2772, "step": 126 }, { "epoch": 0.95, "grad_norm": 1.787611961364746, "learning_rate": 3.7962962962962964e-05, "loss": 0.2977, "step": 133 }, { "epoch": 1.0, "grad_norm": 9.668951988220215, "learning_rate": 3.7037037037037037e-05, "loss": 0.4281, "step": 140 }, { "epoch": 1.0, "eval_accuracy": 0.8714285714285714, "eval_auc": 0.9277579351708335, "eval_f1": 0.660377358490566, "eval_loss": 0.3361506164073944, "eval_precision": 1.0, "eval_recall": 0.49295774647887325, "eval_runtime": 60.7868, "eval_samples_per_second": 4.606, "eval_steps_per_second": 0.296, "step": 140 }, { "epoch": 1.05, "grad_norm": 1.7222918272018433, "learning_rate": 3.611111111111111e-05, "loss": 0.2177, "step": 147 }, { "epoch": 1.1, "grad_norm": 1.8709276914596558, "learning_rate": 3.518518518518519e-05, "loss": 0.2144, "step": 154 }, { "epoch": 1.15, "grad_norm": 7.57355260848999, "learning_rate": 3.425925925925926e-05, "loss": 0.2451, "step": 161 }, { "epoch": 1.2, "grad_norm": 9.764376640319824, "learning_rate": 3.3333333333333335e-05, "loss": 0.1751, "step": 168 }, { "epoch": 1.25, "grad_norm": 0.35536810755729675, "learning_rate": 3.240740740740741e-05, "loss": 0.3158, "step": 175 }, { "epoch": 1.3, "grad_norm": 28.518468856811523, "learning_rate": 3.148148148148148e-05, "loss": 0.4364, "step": 182 }, { "epoch": 1.35, "grad_norm": 0.5735307335853577, "learning_rate": 3.055555555555556e-05, "loss": 0.2745, "step": 189 }, { "epoch": 1.4, "grad_norm": 1.0257889032363892, "learning_rate": 2.962962962962963e-05, "loss": 0.2171, "step": 196 }, { "epoch": 1.45, "grad_norm": 10.6473388671875, "learning_rate": 2.8703703703703706e-05, "loss": 0.6241, "step": 203 }, { "epoch": 1.5, "grad_norm": 5.9080681800842285, "learning_rate": 2.777777777777778e-05, "loss": 0.2061, "step": 210 }, { "epoch": 1.55, "grad_norm": 16.31466293334961, "learning_rate": 2.6851851851851855e-05, "loss": 0.3655, "step": 217 }, { "epoch": 1.6, "grad_norm": 1.7148101329803467, "learning_rate": 2.5925925925925925e-05, "loss": 0.2594, "step": 224 }, { "epoch": 1.65, "grad_norm": 3.448263168334961, "learning_rate": 2.5e-05, "loss": 0.2242, "step": 231 }, { "epoch": 1.7, "grad_norm": 3.1968586444854736, "learning_rate": 2.4074074074074074e-05, "loss": 0.3384, "step": 238 }, { "epoch": 1.75, "grad_norm": 0.8760737776756287, "learning_rate": 2.314814814814815e-05, "loss": 0.4047, "step": 245 }, { "epoch": 1.8, "grad_norm": 6.7152099609375, "learning_rate": 2.2222222222222223e-05, "loss": 0.2784, "step": 252 }, { "epoch": 1.85, "grad_norm": 2.6230883598327637, "learning_rate": 2.1296296296296296e-05, "loss": 0.2764, "step": 259 }, { "epoch": 1.9, "grad_norm": 8.257742881774902, "learning_rate": 2.037037037037037e-05, "loss": 0.2538, "step": 266 }, { "epoch": 1.95, "grad_norm": 11.801633834838867, "learning_rate": 1.9444444444444445e-05, "loss": 0.3419, "step": 273 }, { "epoch": 2.0, "grad_norm": 3.414065361022949, "learning_rate": 1.8518518518518518e-05, "loss": 0.2464, "step": 280 }, { "epoch": 2.0, "eval_accuracy": 0.8464285714285714, "eval_auc": 0.933283914010378, "eval_f1": 0.7261146496815286, "eval_loss": 0.2875193953514099, "eval_precision": 0.6627906976744186, "eval_recall": 0.8028169014084507, "eval_runtime": 55.1352, "eval_samples_per_second": 5.078, "eval_steps_per_second": 0.326, "step": 280 } ], "logging_steps": 7, "max_steps": 420, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 146947524418560.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }