{ "best_metric": 0.6774867177009583, "best_model_checkpoint": "autotrain-xt6nb-pf6ri/checkpoint-1000", "epoch": 2.0, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.048, "grad_norm": 4.167473793029785, "learning_rate": 8.000000000000001e-06, "loss": 1.0876, "step": 24 }, { "epoch": 0.096, "grad_norm": 5.14194917678833, "learning_rate": 1.5666666666666667e-05, "loss": 1.1048, "step": 48 }, { "epoch": 0.144, "grad_norm": 8.47073745727539, "learning_rate": 2.3666666666666668e-05, "loss": 1.085, "step": 72 }, { "epoch": 0.192, "grad_norm": 6.285531997680664, "learning_rate": 3.1e-05, "loss": 1.0327, "step": 96 }, { "epoch": 0.24, "grad_norm": 10.787246704101562, "learning_rate": 3.9000000000000006e-05, "loss": 1.0035, "step": 120 }, { "epoch": 0.288, "grad_norm": 11.439258575439453, "learning_rate": 4.7e-05, "loss": 1.0177, "step": 144 }, { "epoch": 0.336, "grad_norm": 6.169108867645264, "learning_rate": 4.9444444444444446e-05, "loss": 0.8407, "step": 168 }, { "epoch": 0.384, "grad_norm": 7.220673084259033, "learning_rate": 4.8592592592592596e-05, "loss": 1.0002, "step": 192 }, { "epoch": 0.432, "grad_norm": 8.805166244506836, "learning_rate": 4.770370370370371e-05, "loss": 0.8377, "step": 216 }, { "epoch": 0.48, "grad_norm": 15.92953872680664, "learning_rate": 4.681481481481482e-05, "loss": 0.975, "step": 240 }, { "epoch": 0.528, "grad_norm": 12.280447006225586, "learning_rate": 4.592592592592593e-05, "loss": 0.8324, "step": 264 }, { "epoch": 0.576, "grad_norm": 7.390661716461182, "learning_rate": 4.503703703703704e-05, "loss": 0.8676, "step": 288 }, { "epoch": 0.624, "grad_norm": 3.326416492462158, "learning_rate": 4.414814814814815e-05, "loss": 1.0311, "step": 312 }, { "epoch": 0.672, "grad_norm": 1.4128365516662598, "learning_rate": 4.325925925925926e-05, "loss": 0.9408, "step": 336 }, { "epoch": 0.72, "grad_norm": 1.4254629611968994, "learning_rate": 4.237037037037037e-05, "loss": 0.9534, "step": 360 }, { "epoch": 0.768, "grad_norm": 5.265137195587158, "learning_rate": 4.148148148148148e-05, "loss": 0.8517, "step": 384 }, { "epoch": 0.816, "grad_norm": 139.75430297851562, "learning_rate": 4.059259259259259e-05, "loss": 0.9692, "step": 408 }, { "epoch": 0.864, "grad_norm": 172.7858123779297, "learning_rate": 3.97037037037037e-05, "loss": 0.9143, "step": 432 }, { "epoch": 0.912, "grad_norm": 42.714324951171875, "learning_rate": 3.885185185185186e-05, "loss": 0.8104, "step": 456 }, { "epoch": 0.96, "grad_norm": 7.165309906005859, "learning_rate": 3.7962962962962964e-05, "loss": 0.9425, "step": 480 }, { "epoch": 1.0, "eval_accuracy": 0.6906906906906907, "eval_f1_macro": 0.6632603879089224, "eval_f1_micro": 0.6906906906906907, "eval_f1_weighted": 0.6878712561273783, "eval_loss": 0.8078358769416809, "eval_precision_macro": 0.6746375251220212, "eval_precision_micro": 0.6906906906906907, "eval_precision_weighted": 0.6963016741978844, "eval_recall_macro": 0.6637361051449268, "eval_recall_micro": 0.6906906906906907, "eval_recall_weighted": 0.6906906906906907, "eval_runtime": 2.1598, "eval_samples_per_second": 462.551, "eval_steps_per_second": 29.17, "step": 500 }, { "epoch": 1.008, "grad_norm": 40.330535888671875, "learning_rate": 3.7074074074074075e-05, "loss": 0.8571, "step": 504 }, { "epoch": 1.056, "grad_norm": 18.710975646972656, "learning_rate": 3.6185185185185186e-05, "loss": 0.8107, "step": 528 }, { "epoch": 1.104, "grad_norm": 7.848007678985596, "learning_rate": 3.52962962962963e-05, "loss": 0.8219, "step": 552 }, { "epoch": 1.152, "grad_norm": 12.77737808227539, "learning_rate": 3.440740740740741e-05, "loss": 0.6249, "step": 576 }, { "epoch": 1.2, "grad_norm": 4.067225456237793, "learning_rate": 3.351851851851852e-05, "loss": 0.7372, "step": 600 }, { "epoch": 1.248, "grad_norm": 22.771604537963867, "learning_rate": 3.262962962962963e-05, "loss": 0.6791, "step": 624 }, { "epoch": 1.296, "grad_norm": 32.429100036621094, "learning_rate": 3.174074074074074e-05, "loss": 0.7434, "step": 648 }, { "epoch": 1.3439999999999999, "grad_norm": 7.296426773071289, "learning_rate": 3.0851851851851854e-05, "loss": 0.7742, "step": 672 }, { "epoch": 1.392, "grad_norm": 52.18477249145508, "learning_rate": 2.9962962962962966e-05, "loss": 0.7379, "step": 696 }, { "epoch": 1.44, "grad_norm": 47.735477447509766, "learning_rate": 2.9074074074074077e-05, "loss": 0.7652, "step": 720 }, { "epoch": 1.488, "grad_norm": 214.20318603515625, "learning_rate": 2.8185185185185185e-05, "loss": 0.6941, "step": 744 }, { "epoch": 1.536, "grad_norm": 20.468931198120117, "learning_rate": 2.7296296296296296e-05, "loss": 0.7389, "step": 768 }, { "epoch": 1.584, "grad_norm": 4.2643866539001465, "learning_rate": 2.6407407407407408e-05, "loss": 0.7372, "step": 792 }, { "epoch": 1.6320000000000001, "grad_norm": 18.783554077148438, "learning_rate": 2.551851851851852e-05, "loss": 0.711, "step": 816 }, { "epoch": 1.6800000000000002, "grad_norm": 28.915599822998047, "learning_rate": 2.462962962962963e-05, "loss": 0.7423, "step": 840 }, { "epoch": 1.728, "grad_norm": 4.792373180389404, "learning_rate": 2.3740740740740742e-05, "loss": 0.6692, "step": 864 }, { "epoch": 1.776, "grad_norm": 19.33057975769043, "learning_rate": 2.2851851851851853e-05, "loss": 0.631, "step": 888 }, { "epoch": 1.8239999999999998, "grad_norm": 18.55846405029297, "learning_rate": 2.1962962962962964e-05, "loss": 0.6367, "step": 912 }, { "epoch": 1.8719999999999999, "grad_norm": 5.190149784088135, "learning_rate": 2.1074074074074072e-05, "loss": 0.6872, "step": 936 }, { "epoch": 1.92, "grad_norm": 38.714290618896484, "learning_rate": 2.0185185185185187e-05, "loss": 0.6252, "step": 960 }, { "epoch": 1.968, "grad_norm": 20.541481018066406, "learning_rate": 1.92962962962963e-05, "loss": 0.6303, "step": 984 }, { "epoch": 2.0, "eval_accuracy": 0.7177177177177178, "eval_f1_macro": 0.6839774417313933, "eval_f1_micro": 0.7177177177177178, "eval_f1_weighted": 0.7107988255501857, "eval_loss": 0.6774867177009583, "eval_precision_macro": 0.7089199316136637, "eval_precision_micro": 0.7177177177177178, "eval_precision_weighted": 0.7240729310198633, "eval_recall_macro": 0.6773815101662369, "eval_recall_micro": 0.7177177177177178, "eval_recall_weighted": 0.7177177177177178, "eval_runtime": 2.1078, "eval_samples_per_second": 473.965, "eval_steps_per_second": 29.89, "step": 1000 } ], "logging_steps": 24, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 525305938493952.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }