{ "best_metric": 0.588494598865509, "best_model_checkpoint": "autotrain-f8u3m-1w0uc/checkpoint-129", "epoch": 3.0, "eval_steps": 500, "global_step": 129, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 17.909360885620117, "learning_rate": 3.846153846153847e-06, "loss": 1.7009, "step": 2 }, { "epoch": 0.09, "grad_norm": 13.120195388793945, "learning_rate": 7.692307692307694e-06, "loss": 1.4648, "step": 4 }, { "epoch": 0.14, "grad_norm": 14.559846878051758, "learning_rate": 1.5384615384615387e-05, "loss": 1.4937, "step": 6 }, { "epoch": 0.19, "grad_norm": 8.80319881439209, "learning_rate": 2.307692307692308e-05, "loss": 1.1978, "step": 8 }, { "epoch": 0.23, "grad_norm": 10.07514476776123, "learning_rate": 3.0769230769230774e-05, "loss": 1.2436, "step": 10 }, { "epoch": 0.28, "grad_norm": 5.79322624206543, "learning_rate": 3.846153846153846e-05, "loss": 0.8964, "step": 12 }, { "epoch": 0.33, "grad_norm": 15.918188095092773, "learning_rate": 4.615384615384616e-05, "loss": 1.5669, "step": 14 }, { "epoch": 0.37, "grad_norm": 8.671540260314941, "learning_rate": 4.9568965517241384e-05, "loss": 0.9671, "step": 16 }, { "epoch": 0.42, "grad_norm": 6.892113208770752, "learning_rate": 4.870689655172414e-05, "loss": 0.9687, "step": 18 }, { "epoch": 0.47, "grad_norm": 6.245675563812256, "learning_rate": 4.78448275862069e-05, "loss": 0.5675, "step": 20 }, { "epoch": 0.51, "grad_norm": 11.2545804977417, "learning_rate": 4.698275862068966e-05, "loss": 1.2117, "step": 22 }, { "epoch": 0.56, "grad_norm": 5.111425399780273, "learning_rate": 4.612068965517242e-05, "loss": 0.9048, "step": 24 }, { "epoch": 0.6, "grad_norm": 5.0768232345581055, "learning_rate": 4.5258620689655176e-05, "loss": 0.7902, "step": 26 }, { "epoch": 0.65, "grad_norm": 8.407731056213379, "learning_rate": 4.4396551724137933e-05, "loss": 0.941, "step": 28 }, { "epoch": 0.7, "grad_norm": 12.011809349060059, "learning_rate": 4.353448275862069e-05, "loss": 1.1812, "step": 30 }, { "epoch": 0.74, "grad_norm": 6.191880702972412, "learning_rate": 4.267241379310345e-05, "loss": 0.7905, "step": 32 }, { "epoch": 0.79, "grad_norm": 5.270814418792725, "learning_rate": 4.1810344827586205e-05, "loss": 0.7904, "step": 34 }, { "epoch": 0.84, "grad_norm": 5.346973896026611, "learning_rate": 4.094827586206897e-05, "loss": 0.6126, "step": 36 }, { "epoch": 0.88, "grad_norm": 6.431284427642822, "learning_rate": 4.0086206896551726e-05, "loss": 0.9431, "step": 38 }, { "epoch": 0.93, "grad_norm": 5.295109748840332, "learning_rate": 3.922413793103448e-05, "loss": 0.8013, "step": 40 }, { "epoch": 0.98, "grad_norm": 7.394813060760498, "learning_rate": 3.8362068965517246e-05, "loss": 0.5561, "step": 42 }, { "epoch": 1.0, "eval_accuracy": 0.6511627906976745, "eval_f1_macro": 0.5219298245614035, "eval_f1_micro": 0.6511627906976745, "eval_f1_weighted": 0.5661464708282333, "eval_loss": 0.8391300439834595, "eval_precision_macro": 0.5958528951486698, "eval_precision_micro": 0.6511627906976745, "eval_precision_weighted": 0.5547548859045747, "eval_recall_macro": 0.5166666666666667, "eval_recall_micro": 0.6511627906976745, "eval_recall_weighted": 0.6511627906976745, "eval_runtime": 0.5055, "eval_samples_per_second": 170.128, "eval_steps_per_second": 11.869, "step": 43 }, { "epoch": 1.02, "grad_norm": 12.433929443359375, "learning_rate": 3.7500000000000003e-05, "loss": 0.8422, "step": 44 }, { "epoch": 1.07, "grad_norm": 9.662508010864258, "learning_rate": 3.663793103448276e-05, "loss": 0.8562, "step": 46 }, { "epoch": 1.12, "grad_norm": 6.504687309265137, "learning_rate": 3.5775862068965524e-05, "loss": 0.6381, "step": 48 }, { "epoch": 1.16, "grad_norm": 12.690322875976562, "learning_rate": 3.4913793103448275e-05, "loss": 0.5752, "step": 50 }, { "epoch": 1.21, "grad_norm": 5.9632792472839355, "learning_rate": 3.405172413793103e-05, "loss": 0.6849, "step": 52 }, { "epoch": 1.26, "grad_norm": 8.237900733947754, "learning_rate": 3.3189655172413796e-05, "loss": 0.6422, "step": 54 }, { "epoch": 1.3, "grad_norm": 5.977579593658447, "learning_rate": 3.232758620689655e-05, "loss": 0.6344, "step": 56 }, { "epoch": 1.35, "grad_norm": 6.388967037200928, "learning_rate": 3.146551724137931e-05, "loss": 0.6051, "step": 58 }, { "epoch": 1.4, "grad_norm": 6.785774230957031, "learning_rate": 3.060344827586207e-05, "loss": 0.5379, "step": 60 }, { "epoch": 1.44, "grad_norm": 6.693228244781494, "learning_rate": 2.974137931034483e-05, "loss": 0.4483, "step": 62 }, { "epoch": 1.49, "grad_norm": 4.33690071105957, "learning_rate": 2.8879310344827588e-05, "loss": 0.5161, "step": 64 }, { "epoch": 1.53, "grad_norm": 7.381749629974365, "learning_rate": 2.8017241379310345e-05, "loss": 0.7829, "step": 66 }, { "epoch": 1.58, "grad_norm": 11.129793167114258, "learning_rate": 2.7155172413793105e-05, "loss": 0.7715, "step": 68 }, { "epoch": 1.63, "grad_norm": 11.77314281463623, "learning_rate": 2.6293103448275862e-05, "loss": 1.114, "step": 70 }, { "epoch": 1.67, "grad_norm": 7.2926130294799805, "learning_rate": 2.543103448275862e-05, "loss": 0.938, "step": 72 }, { "epoch": 1.72, "grad_norm": 5.774272441864014, "learning_rate": 2.456896551724138e-05, "loss": 0.7925, "step": 74 }, { "epoch": 1.77, "grad_norm": 9.42758560180664, "learning_rate": 2.370689655172414e-05, "loss": 0.6834, "step": 76 }, { "epoch": 1.81, "grad_norm": 7.223062038421631, "learning_rate": 2.2844827586206897e-05, "loss": 0.8257, "step": 78 }, { "epoch": 1.86, "grad_norm": 9.974703788757324, "learning_rate": 2.1982758620689654e-05, "loss": 0.5291, "step": 80 }, { "epoch": 1.91, "grad_norm": 11.898812294006348, "learning_rate": 2.1120689655172415e-05, "loss": 0.8295, "step": 82 }, { "epoch": 1.95, "grad_norm": 5.932551383972168, "learning_rate": 2.0258620689655172e-05, "loss": 0.5594, "step": 84 }, { "epoch": 2.0, "grad_norm": 8.01093864440918, "learning_rate": 1.9396551724137932e-05, "loss": 0.7166, "step": 86 }, { "epoch": 2.0, "eval_accuracy": 0.7790697674418605, "eval_f1_macro": 0.7326664355062412, "eval_f1_micro": 0.7790697674418605, "eval_f1_weighted": 0.7589375221752733, "eval_loss": 0.6218534708023071, "eval_precision_macro": 0.8907051282051283, "eval_precision_micro": 0.7790697674418605, "eval_precision_weighted": 0.831499701848539, "eval_recall_macro": 0.6805555555555556, "eval_recall_micro": 0.7790697674418605, "eval_recall_weighted": 0.7790697674418605, "eval_runtime": 0.4535, "eval_samples_per_second": 189.622, "eval_steps_per_second": 13.229, "step": 86 }, { "epoch": 2.05, "grad_norm": 4.2975664138793945, "learning_rate": 1.8534482758620693e-05, "loss": 0.5073, "step": 88 }, { "epoch": 2.09, "grad_norm": 8.290116310119629, "learning_rate": 1.767241379310345e-05, "loss": 0.606, "step": 90 }, { "epoch": 2.14, "grad_norm": 5.366226673126221, "learning_rate": 1.6810344827586207e-05, "loss": 0.4448, "step": 92 }, { "epoch": 2.19, "grad_norm": 6.743834495544434, "learning_rate": 1.5948275862068967e-05, "loss": 0.7529, "step": 94 }, { "epoch": 2.23, "grad_norm": 6.312230587005615, "learning_rate": 1.5086206896551724e-05, "loss": 0.662, "step": 96 }, { "epoch": 2.28, "grad_norm": 9.200089454650879, "learning_rate": 1.4224137931034485e-05, "loss": 0.9112, "step": 98 }, { "epoch": 2.33, "grad_norm": 3.5302746295928955, "learning_rate": 1.336206896551724e-05, "loss": 0.2405, "step": 100 }, { "epoch": 2.37, "grad_norm": 13.742111206054688, "learning_rate": 1.25e-05, "loss": 0.5303, "step": 102 }, { "epoch": 2.42, "grad_norm": 8.292695045471191, "learning_rate": 1.163793103448276e-05, "loss": 0.5544, "step": 104 }, { "epoch": 2.47, "grad_norm": 9.335094451904297, "learning_rate": 1.0775862068965516e-05, "loss": 0.7992, "step": 106 }, { "epoch": 2.51, "grad_norm": 4.947837829589844, "learning_rate": 9.913793103448277e-06, "loss": 0.4896, "step": 108 }, { "epoch": 2.56, "grad_norm": 11.157235145568848, "learning_rate": 9.051724137931036e-06, "loss": 0.6746, "step": 110 }, { "epoch": 2.6, "grad_norm": 10.61878776550293, "learning_rate": 8.189655172413793e-06, "loss": 0.6722, "step": 112 }, { "epoch": 2.65, "grad_norm": 4.7017621994018555, "learning_rate": 7.3275862068965514e-06, "loss": 0.5214, "step": 114 }, { "epoch": 2.7, "grad_norm": 3.746359348297119, "learning_rate": 6.465517241379311e-06, "loss": 0.592, "step": 116 }, { "epoch": 2.74, "grad_norm": 5.942258358001709, "learning_rate": 5.603448275862069e-06, "loss": 0.5589, "step": 118 }, { "epoch": 2.79, "grad_norm": 13.716927528381348, "learning_rate": 4.741379310344828e-06, "loss": 1.0046, "step": 120 }, { "epoch": 2.84, "grad_norm": 3.577460765838623, "learning_rate": 3.8793103448275865e-06, "loss": 0.4807, "step": 122 }, { "epoch": 2.88, "grad_norm": 6.835740566253662, "learning_rate": 3.017241379310345e-06, "loss": 0.5735, "step": 124 }, { "epoch": 2.93, "grad_norm": 7.47273588180542, "learning_rate": 2.1551724137931035e-06, "loss": 0.569, "step": 126 }, { "epoch": 2.98, "grad_norm": 6.011229991912842, "learning_rate": 1.293103448275862e-06, "loss": 0.3485, "step": 128 }, { "epoch": 3.0, "eval_accuracy": 0.7674418604651163, "eval_f1_macro": 0.726419878296146, "eval_f1_micro": 0.7674418604651162, "eval_f1_weighted": 0.7444926647483373, "eval_loss": 0.588494598865509, "eval_precision_macro": 0.8743946731234866, "eval_precision_micro": 0.7674418604651163, "eval_precision_weighted": 0.8185567881074384, "eval_recall_macro": 0.6858527131782945, "eval_recall_micro": 0.7674418604651163, "eval_recall_weighted": 0.7674418604651163, "eval_runtime": 0.4559, "eval_samples_per_second": 188.618, "eval_steps_per_second": 13.159, "step": 129 } ], "logging_steps": 2, "max_steps": 129, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 7.97406866688983e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }