|
{ |
|
"best_metric": 0.3891792595386505, |
|
"best_model_checkpoint": "autotrain-xblock-twitter-1/checkpoint-210", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 210, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.318061828613281, |
|
"learning_rate": 4.7619047619047615e-06, |
|
"loss": 0.8613, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 10.021459579467773, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.8653, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 9.035711288452148, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 0.6439, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 6.433827877044678, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 0.6497, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.095238095238095e-05, |
|
"loss": 0.6613, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 15.247386932373047, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 1.1368, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 13.3652982711792, |
|
"learning_rate": 4.523809523809524e-05, |
|
"loss": 0.7526, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 5.259400367736816, |
|
"learning_rate": 4.973544973544973e-05, |
|
"loss": 0.6684, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 9.16520881652832, |
|
"learning_rate": 4.894179894179895e-05, |
|
"loss": 0.6681, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 6.597314357757568, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.4601, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 5.8065385818481445, |
|
"learning_rate": 4.7354497354497356e-05, |
|
"loss": 0.6782, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 5.5024566650390625, |
|
"learning_rate": 4.656084656084656e-05, |
|
"loss": 0.5565, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.190059661865234, |
|
"learning_rate": 4.576719576719577e-05, |
|
"loss": 0.52, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 15.302659034729004, |
|
"learning_rate": 4.4973544973544974e-05, |
|
"loss": 0.7737, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.845982074737549, |
|
"learning_rate": 4.417989417989418e-05, |
|
"loss": 0.549, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 5.293482780456543, |
|
"learning_rate": 4.3386243386243384e-05, |
|
"loss": 0.7392, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 7.307839870452881, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.5303, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 6.7569661140441895, |
|
"learning_rate": 4.17989417989418e-05, |
|
"loss": 0.4638, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 6.70114803314209, |
|
"learning_rate": 4.100529100529101e-05, |
|
"loss": 0.4203, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 11.536452293395996, |
|
"learning_rate": 4.021164021164021e-05, |
|
"loss": 0.6686, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 7.2152934074401855, |
|
"learning_rate": 3.941798941798942e-05, |
|
"loss": 0.596, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 4.001399040222168, |
|
"learning_rate": 3.862433862433863e-05, |
|
"loss": 0.3098, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 7.049214839935303, |
|
"learning_rate": 3.7830687830687835e-05, |
|
"loss": 0.5719, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7050359712230215, |
|
"eval_auc": 0.6505034446210917, |
|
"eval_f1": 0.8144796380090497, |
|
"eval_loss": 0.5788741111755371, |
|
"eval_precision": 0.7563025210084033, |
|
"eval_recall": 0.8823529411764706, |
|
"eval_runtime": 5.1352, |
|
"eval_samples_per_second": 27.068, |
|
"eval_steps_per_second": 1.753, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 8.485621452331543, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.8083, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 8.083047866821289, |
|
"learning_rate": 3.6243386243386245e-05, |
|
"loss": 0.5378, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 4.286181926727295, |
|
"learning_rate": 3.5449735449735446e-05, |
|
"loss": 0.4745, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 6.330202579498291, |
|
"learning_rate": 3.465608465608466e-05, |
|
"loss": 0.4858, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 5.901049613952637, |
|
"learning_rate": 3.386243386243386e-05, |
|
"loss": 0.4534, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 4.458903789520264, |
|
"learning_rate": 3.306878306878307e-05, |
|
"loss": 0.4565, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 8.628113746643066, |
|
"learning_rate": 3.227513227513227e-05, |
|
"loss": 0.5021, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 12.450860023498535, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.6712, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 6.316427230834961, |
|
"learning_rate": 3.068783068783069e-05, |
|
"loss": 0.3929, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 7.734628200531006, |
|
"learning_rate": 2.9894179894179897e-05, |
|
"loss": 0.5408, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 6.100373268127441, |
|
"learning_rate": 2.91005291005291e-05, |
|
"loss": 0.6207, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 6.035886764526367, |
|
"learning_rate": 2.830687830687831e-05, |
|
"loss": 0.6811, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 5.461982250213623, |
|
"learning_rate": 2.7513227513227512e-05, |
|
"loss": 0.5054, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 8.910785675048828, |
|
"learning_rate": 2.6719576719576723e-05, |
|
"loss": 0.6739, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 4.59702205657959, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.4551, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 2.666771173477173, |
|
"learning_rate": 2.5132275132275137e-05, |
|
"loss": 0.3917, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 3.653053045272827, |
|
"learning_rate": 2.4338624338624338e-05, |
|
"loss": 0.4881, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.8620728254318237, |
|
"learning_rate": 2.3544973544973546e-05, |
|
"loss": 0.2256, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 15.068528175354004, |
|
"learning_rate": 2.275132275132275e-05, |
|
"loss": 0.8185, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 2.865483045578003, |
|
"learning_rate": 2.1957671957671956e-05, |
|
"loss": 0.6451, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.9436465501785278, |
|
"learning_rate": 2.1164021164021164e-05, |
|
"loss": 0.294, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 2.666163921356201, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.4637, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 3.914135217666626, |
|
"learning_rate": 1.9576719576719577e-05, |
|
"loss": 0.4303, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7841726618705036, |
|
"eval_auc": 0.8285638579756226, |
|
"eval_f1": 0.8684210526315789, |
|
"eval_loss": 0.4182623028755188, |
|
"eval_precision": 0.7857142857142857, |
|
"eval_recall": 0.9705882352941176, |
|
"eval_runtime": 5.2653, |
|
"eval_samples_per_second": 26.399, |
|
"eval_steps_per_second": 1.709, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 6.129709243774414, |
|
"learning_rate": 1.8783068783068782e-05, |
|
"loss": 0.5329, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 6.085241317749023, |
|
"learning_rate": 1.798941798941799e-05, |
|
"loss": 0.4188, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 7.526946067810059, |
|
"learning_rate": 1.7195767195767195e-05, |
|
"loss": 0.4073, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 3.7981059551239014, |
|
"learning_rate": 1.6402116402116404e-05, |
|
"loss": 0.3207, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 5.229352951049805, |
|
"learning_rate": 1.560846560846561e-05, |
|
"loss": 0.5247, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 3.7582530975341797, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.2936, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 6.107592582702637, |
|
"learning_rate": 1.4021164021164022e-05, |
|
"loss": 0.4513, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 8.501120567321777, |
|
"learning_rate": 1.3227513227513228e-05, |
|
"loss": 0.4808, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 3.4457428455352783, |
|
"learning_rate": 1.2433862433862433e-05, |
|
"loss": 0.27, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 5.215019702911377, |
|
"learning_rate": 1.164021164021164e-05, |
|
"loss": 0.3364, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 6.2070794105529785, |
|
"learning_rate": 1.0846560846560846e-05, |
|
"loss": 0.4075, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 5.491907119750977, |
|
"learning_rate": 1.0052910052910053e-05, |
|
"loss": 0.5218, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 7.795518398284912, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.329, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 3.6145973205566406, |
|
"learning_rate": 8.465608465608466e-06, |
|
"loss": 0.3696, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 5.689661502838135, |
|
"learning_rate": 7.671957671957672e-06, |
|
"loss": 0.6274, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 4.555980682373047, |
|
"learning_rate": 6.878306878306878e-06, |
|
"loss": 0.3477, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 6.758476257324219, |
|
"learning_rate": 6.0846560846560845e-06, |
|
"loss": 0.3809, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 3.1286351680755615, |
|
"learning_rate": 5.291005291005291e-06, |
|
"loss": 0.35, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 8.514357566833496, |
|
"learning_rate": 4.497354497354498e-06, |
|
"loss": 0.5799, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 4.899698734283447, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.5754, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 9.083395004272461, |
|
"learning_rate": 2.91005291005291e-06, |
|
"loss": 0.3895, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 2.1063151359558105, |
|
"learning_rate": 2.1164021164021164e-06, |
|
"loss": 0.2194, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 5.722929954528809, |
|
"learning_rate": 1.3227513227513228e-06, |
|
"loss": 0.2762, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.7745004892349243, |
|
"learning_rate": 5.291005291005291e-07, |
|
"loss": 0.1982, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8201438848920863, |
|
"eval_auc": 0.8559883412824589, |
|
"eval_f1": 0.8878923766816144, |
|
"eval_loss": 0.3891792595386505, |
|
"eval_precision": 0.8181818181818182, |
|
"eval_recall": 0.9705882352941176, |
|
"eval_runtime": 5.0466, |
|
"eval_samples_per_second": 27.543, |
|
"eval_steps_per_second": 1.783, |
|
"step": 210 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 210, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.2925663867713946e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|