|
{ |
|
"best_metric": 0.588494598865509, |
|
"best_model_checkpoint": "autotrain-f8u3m-1w0uc/checkpoint-129", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 129, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 17.909360885620117, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 1.7009, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 13.120195388793945, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 1.4648, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 14.559846878051758, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 1.4937, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 8.80319881439209, |
|
"learning_rate": 2.307692307692308e-05, |
|
"loss": 1.1978, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.07514476776123, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 1.2436, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 5.79322624206543, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.8964, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 15.918188095092773, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 1.5669, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 8.671540260314941, |
|
"learning_rate": 4.9568965517241384e-05, |
|
"loss": 0.9671, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.892113208770752, |
|
"learning_rate": 4.870689655172414e-05, |
|
"loss": 0.9687, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 6.245675563812256, |
|
"learning_rate": 4.78448275862069e-05, |
|
"loss": 0.5675, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 11.2545804977417, |
|
"learning_rate": 4.698275862068966e-05, |
|
"loss": 1.2117, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 5.111425399780273, |
|
"learning_rate": 4.612068965517242e-05, |
|
"loss": 0.9048, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.0768232345581055, |
|
"learning_rate": 4.5258620689655176e-05, |
|
"loss": 0.7902, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 8.407731056213379, |
|
"learning_rate": 4.4396551724137933e-05, |
|
"loss": 0.941, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 12.011809349060059, |
|
"learning_rate": 4.353448275862069e-05, |
|
"loss": 1.1812, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 6.191880702972412, |
|
"learning_rate": 4.267241379310345e-05, |
|
"loss": 0.7905, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 5.270814418792725, |
|
"learning_rate": 4.1810344827586205e-05, |
|
"loss": 0.7904, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 5.346973896026611, |
|
"learning_rate": 4.094827586206897e-05, |
|
"loss": 0.6126, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 6.431284427642822, |
|
"learning_rate": 4.0086206896551726e-05, |
|
"loss": 0.9431, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 5.295109748840332, |
|
"learning_rate": 3.922413793103448e-05, |
|
"loss": 0.8013, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 7.394813060760498, |
|
"learning_rate": 3.8362068965517246e-05, |
|
"loss": 0.5561, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6511627906976745, |
|
"eval_f1_macro": 0.5219298245614035, |
|
"eval_f1_micro": 0.6511627906976745, |
|
"eval_f1_weighted": 0.5661464708282333, |
|
"eval_loss": 0.8391300439834595, |
|
"eval_precision_macro": 0.5958528951486698, |
|
"eval_precision_micro": 0.6511627906976745, |
|
"eval_precision_weighted": 0.5547548859045747, |
|
"eval_recall_macro": 0.5166666666666667, |
|
"eval_recall_micro": 0.6511627906976745, |
|
"eval_recall_weighted": 0.6511627906976745, |
|
"eval_runtime": 0.5055, |
|
"eval_samples_per_second": 170.128, |
|
"eval_steps_per_second": 11.869, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 12.433929443359375, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.8422, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 9.662508010864258, |
|
"learning_rate": 3.663793103448276e-05, |
|
"loss": 0.8562, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 6.504687309265137, |
|
"learning_rate": 3.5775862068965524e-05, |
|
"loss": 0.6381, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 12.690322875976562, |
|
"learning_rate": 3.4913793103448275e-05, |
|
"loss": 0.5752, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 5.9632792472839355, |
|
"learning_rate": 3.405172413793103e-05, |
|
"loss": 0.6849, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 8.237900733947754, |
|
"learning_rate": 3.3189655172413796e-05, |
|
"loss": 0.6422, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 5.977579593658447, |
|
"learning_rate": 3.232758620689655e-05, |
|
"loss": 0.6344, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 6.388967037200928, |
|
"learning_rate": 3.146551724137931e-05, |
|
"loss": 0.6051, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 6.785774230957031, |
|
"learning_rate": 3.060344827586207e-05, |
|
"loss": 0.5379, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 6.693228244781494, |
|
"learning_rate": 2.974137931034483e-05, |
|
"loss": 0.4483, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 4.33690071105957, |
|
"learning_rate": 2.8879310344827588e-05, |
|
"loss": 0.5161, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 7.381749629974365, |
|
"learning_rate": 2.8017241379310345e-05, |
|
"loss": 0.7829, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 11.129793167114258, |
|
"learning_rate": 2.7155172413793105e-05, |
|
"loss": 0.7715, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 11.77314281463623, |
|
"learning_rate": 2.6293103448275862e-05, |
|
"loss": 1.114, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 7.2926130294799805, |
|
"learning_rate": 2.543103448275862e-05, |
|
"loss": 0.938, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 5.774272441864014, |
|
"learning_rate": 2.456896551724138e-05, |
|
"loss": 0.7925, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 9.42758560180664, |
|
"learning_rate": 2.370689655172414e-05, |
|
"loss": 0.6834, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 7.223062038421631, |
|
"learning_rate": 2.2844827586206897e-05, |
|
"loss": 0.8257, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 9.974703788757324, |
|
"learning_rate": 2.1982758620689654e-05, |
|
"loss": 0.5291, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 11.898812294006348, |
|
"learning_rate": 2.1120689655172415e-05, |
|
"loss": 0.8295, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 5.932551383972168, |
|
"learning_rate": 2.0258620689655172e-05, |
|
"loss": 0.5594, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 8.01093864440918, |
|
"learning_rate": 1.9396551724137932e-05, |
|
"loss": 0.7166, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7790697674418605, |
|
"eval_f1_macro": 0.7326664355062412, |
|
"eval_f1_micro": 0.7790697674418605, |
|
"eval_f1_weighted": 0.7589375221752733, |
|
"eval_loss": 0.6218534708023071, |
|
"eval_precision_macro": 0.8907051282051283, |
|
"eval_precision_micro": 0.7790697674418605, |
|
"eval_precision_weighted": 0.831499701848539, |
|
"eval_recall_macro": 0.6805555555555556, |
|
"eval_recall_micro": 0.7790697674418605, |
|
"eval_recall_weighted": 0.7790697674418605, |
|
"eval_runtime": 0.4535, |
|
"eval_samples_per_second": 189.622, |
|
"eval_steps_per_second": 13.229, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 4.2975664138793945, |
|
"learning_rate": 1.8534482758620693e-05, |
|
"loss": 0.5073, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 8.290116310119629, |
|
"learning_rate": 1.767241379310345e-05, |
|
"loss": 0.606, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 5.366226673126221, |
|
"learning_rate": 1.6810344827586207e-05, |
|
"loss": 0.4448, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 6.743834495544434, |
|
"learning_rate": 1.5948275862068967e-05, |
|
"loss": 0.7529, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 6.312230587005615, |
|
"learning_rate": 1.5086206896551724e-05, |
|
"loss": 0.662, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 9.200089454650879, |
|
"learning_rate": 1.4224137931034485e-05, |
|
"loss": 0.9112, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 3.5302746295928955, |
|
"learning_rate": 1.336206896551724e-05, |
|
"loss": 0.2405, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 13.742111206054688, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.5303, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 8.292695045471191, |
|
"learning_rate": 1.163793103448276e-05, |
|
"loss": 0.5544, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 9.335094451904297, |
|
"learning_rate": 1.0775862068965516e-05, |
|
"loss": 0.7992, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 4.947837829589844, |
|
"learning_rate": 9.913793103448277e-06, |
|
"loss": 0.4896, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 11.157235145568848, |
|
"learning_rate": 9.051724137931036e-06, |
|
"loss": 0.6746, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 10.61878776550293, |
|
"learning_rate": 8.189655172413793e-06, |
|
"loss": 0.6722, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 4.7017621994018555, |
|
"learning_rate": 7.3275862068965514e-06, |
|
"loss": 0.5214, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 3.746359348297119, |
|
"learning_rate": 6.465517241379311e-06, |
|
"loss": 0.592, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 5.942258358001709, |
|
"learning_rate": 5.603448275862069e-06, |
|
"loss": 0.5589, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 13.716927528381348, |
|
"learning_rate": 4.741379310344828e-06, |
|
"loss": 1.0046, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 3.577460765838623, |
|
"learning_rate": 3.8793103448275865e-06, |
|
"loss": 0.4807, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 6.835740566253662, |
|
"learning_rate": 3.017241379310345e-06, |
|
"loss": 0.5735, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 7.47273588180542, |
|
"learning_rate": 2.1551724137931035e-06, |
|
"loss": 0.569, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 6.011229991912842, |
|
"learning_rate": 1.293103448275862e-06, |
|
"loss": 0.3485, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7674418604651163, |
|
"eval_f1_macro": 0.726419878296146, |
|
"eval_f1_micro": 0.7674418604651162, |
|
"eval_f1_weighted": 0.7444926647483373, |
|
"eval_loss": 0.588494598865509, |
|
"eval_precision_macro": 0.8743946731234866, |
|
"eval_precision_micro": 0.7674418604651163, |
|
"eval_precision_weighted": 0.8185567881074384, |
|
"eval_recall_macro": 0.6858527131782945, |
|
"eval_recall_micro": 0.7674418604651163, |
|
"eval_recall_weighted": 0.7674418604651163, |
|
"eval_runtime": 0.4559, |
|
"eval_samples_per_second": 188.618, |
|
"eval_steps_per_second": 13.159, |
|
"step": 129 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 129, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 7.97406866688983e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|