{ "best_metric": 0.3891792595386505, "best_model_checkpoint": "autotrain-xblock-twitter-1/checkpoint-210", "epoch": 3.0, "eval_steps": 500, "global_step": 210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 13.318061828613281, "learning_rate": 4.7619047619047615e-06, "loss": 0.8613, "step": 3 }, { "epoch": 0.09, "grad_norm": 10.021459579467773, "learning_rate": 1.1904761904761905e-05, "loss": 0.8653, "step": 6 }, { "epoch": 0.13, "grad_norm": 9.035711288452148, "learning_rate": 1.9047619047619046e-05, "loss": 0.6439, "step": 9 }, { "epoch": 0.17, "grad_norm": 6.433827877044678, "learning_rate": 2.6190476190476192e-05, "loss": 0.6497, "step": 12 }, { "epoch": 0.21, "grad_norm": NaN, "learning_rate": 3.095238095238095e-05, "loss": 0.6613, "step": 15 }, { "epoch": 0.26, "grad_norm": 15.247386932373047, "learning_rate": 3.809523809523809e-05, "loss": 1.1368, "step": 18 }, { "epoch": 0.3, "grad_norm": 13.3652982711792, "learning_rate": 4.523809523809524e-05, "loss": 0.7526, "step": 21 }, { "epoch": 0.34, "grad_norm": 5.259400367736816, "learning_rate": 4.973544973544973e-05, "loss": 0.6684, "step": 24 }, { "epoch": 0.39, "grad_norm": 9.16520881652832, "learning_rate": 4.894179894179895e-05, "loss": 0.6681, "step": 27 }, { "epoch": 0.43, "grad_norm": 6.597314357757568, "learning_rate": 4.814814814814815e-05, "loss": 0.4601, "step": 30 }, { "epoch": 0.47, "grad_norm": 5.8065385818481445, "learning_rate": 4.7354497354497356e-05, "loss": 0.6782, "step": 33 }, { "epoch": 0.51, "grad_norm": 5.5024566650390625, "learning_rate": 4.656084656084656e-05, "loss": 0.5565, "step": 36 }, { "epoch": 0.56, "grad_norm": 6.190059661865234, "learning_rate": 4.576719576719577e-05, "loss": 0.52, "step": 39 }, { "epoch": 0.6, "grad_norm": 15.302659034729004, "learning_rate": 4.4973544973544974e-05, "loss": 0.7737, "step": 42 }, { "epoch": 0.64, "grad_norm": 4.845982074737549, "learning_rate": 4.417989417989418e-05, "loss": 0.549, "step": 45 }, { "epoch": 0.69, "grad_norm": 5.293482780456543, "learning_rate": 4.3386243386243384e-05, "loss": 0.7392, "step": 48 }, { "epoch": 0.73, "grad_norm": 7.307839870452881, "learning_rate": 4.259259259259259e-05, "loss": 0.5303, "step": 51 }, { "epoch": 0.77, "grad_norm": 6.7569661140441895, "learning_rate": 4.17989417989418e-05, "loss": 0.4638, "step": 54 }, { "epoch": 0.81, "grad_norm": 6.70114803314209, "learning_rate": 4.100529100529101e-05, "loss": 0.4203, "step": 57 }, { "epoch": 0.86, "grad_norm": 11.536452293395996, "learning_rate": 4.021164021164021e-05, "loss": 0.6686, "step": 60 }, { "epoch": 0.9, "grad_norm": 7.2152934074401855, "learning_rate": 3.941798941798942e-05, "loss": 0.596, "step": 63 }, { "epoch": 0.94, "grad_norm": 4.001399040222168, "learning_rate": 3.862433862433863e-05, "loss": 0.3098, "step": 66 }, { "epoch": 0.99, "grad_norm": 7.049214839935303, "learning_rate": 3.7830687830687835e-05, "loss": 0.5719, "step": 69 }, { "epoch": 1.0, "eval_accuracy": 0.7050359712230215, "eval_auc": 0.6505034446210917, "eval_f1": 0.8144796380090497, "eval_loss": 0.5788741111755371, "eval_precision": 0.7563025210084033, "eval_recall": 0.8823529411764706, "eval_runtime": 5.1352, "eval_samples_per_second": 27.068, "eval_steps_per_second": 1.753, "step": 70 }, { "epoch": 1.03, "grad_norm": 8.485621452331543, "learning_rate": 3.7037037037037037e-05, "loss": 0.8083, "step": 72 }, { "epoch": 1.07, "grad_norm": 8.083047866821289, "learning_rate": 3.6243386243386245e-05, "loss": 0.5378, "step": 75 }, { "epoch": 1.11, "grad_norm": 4.286181926727295, "learning_rate": 3.5449735449735446e-05, "loss": 0.4745, "step": 78 }, { "epoch": 1.16, "grad_norm": 6.330202579498291, "learning_rate": 3.465608465608466e-05, "loss": 0.4858, "step": 81 }, { "epoch": 1.2, "grad_norm": 5.901049613952637, "learning_rate": 3.386243386243386e-05, "loss": 0.4534, "step": 84 }, { "epoch": 1.24, "grad_norm": 4.458903789520264, "learning_rate": 3.306878306878307e-05, "loss": 0.4565, "step": 87 }, { "epoch": 1.29, "grad_norm": 8.628113746643066, "learning_rate": 3.227513227513227e-05, "loss": 0.5021, "step": 90 }, { "epoch": 1.33, "grad_norm": 12.450860023498535, "learning_rate": 3.148148148148148e-05, "loss": 0.6712, "step": 93 }, { "epoch": 1.37, "grad_norm": 6.316427230834961, "learning_rate": 3.068783068783069e-05, "loss": 0.3929, "step": 96 }, { "epoch": 1.41, "grad_norm": 7.734628200531006, "learning_rate": 2.9894179894179897e-05, "loss": 0.5408, "step": 99 }, { "epoch": 1.46, "grad_norm": 6.100373268127441, "learning_rate": 2.91005291005291e-05, "loss": 0.6207, "step": 102 }, { "epoch": 1.5, "grad_norm": 6.035886764526367, "learning_rate": 2.830687830687831e-05, "loss": 0.6811, "step": 105 }, { "epoch": 1.54, "grad_norm": 5.461982250213623, "learning_rate": 2.7513227513227512e-05, "loss": 0.5054, "step": 108 }, { "epoch": 1.59, "grad_norm": 8.910785675048828, "learning_rate": 2.6719576719576723e-05, "loss": 0.6739, "step": 111 }, { "epoch": 1.63, "grad_norm": 4.59702205657959, "learning_rate": 2.5925925925925925e-05, "loss": 0.4551, "step": 114 }, { "epoch": 1.67, "grad_norm": 2.666771173477173, "learning_rate": 2.5132275132275137e-05, "loss": 0.3917, "step": 117 }, { "epoch": 1.71, "grad_norm": 3.653053045272827, "learning_rate": 2.4338624338624338e-05, "loss": 0.4881, "step": 120 }, { "epoch": 1.76, "grad_norm": 1.8620728254318237, "learning_rate": 2.3544973544973546e-05, "loss": 0.2256, "step": 123 }, { "epoch": 1.8, "grad_norm": 15.068528175354004, "learning_rate": 2.275132275132275e-05, "loss": 0.8185, "step": 126 }, { "epoch": 1.84, "grad_norm": 2.865483045578003, "learning_rate": 2.1957671957671956e-05, "loss": 0.6451, "step": 129 }, { "epoch": 1.89, "grad_norm": 1.9436465501785278, "learning_rate": 2.1164021164021164e-05, "loss": 0.294, "step": 132 }, { "epoch": 1.93, "grad_norm": 2.666163921356201, "learning_rate": 2.037037037037037e-05, "loss": 0.4637, "step": 135 }, { "epoch": 1.97, "grad_norm": 3.914135217666626, "learning_rate": 1.9576719576719577e-05, "loss": 0.4303, "step": 138 }, { "epoch": 2.0, "eval_accuracy": 0.7841726618705036, "eval_auc": 0.8285638579756226, "eval_f1": 0.8684210526315789, "eval_loss": 0.4182623028755188, "eval_precision": 0.7857142857142857, "eval_recall": 0.9705882352941176, "eval_runtime": 5.2653, "eval_samples_per_second": 26.399, "eval_steps_per_second": 1.709, "step": 140 }, { "epoch": 2.01, "grad_norm": 6.129709243774414, "learning_rate": 1.8783068783068782e-05, "loss": 0.5329, "step": 141 }, { "epoch": 2.06, "grad_norm": 6.085241317749023, "learning_rate": 1.798941798941799e-05, "loss": 0.4188, "step": 144 }, { "epoch": 2.1, "grad_norm": 7.526946067810059, "learning_rate": 1.7195767195767195e-05, "loss": 0.4073, "step": 147 }, { "epoch": 2.14, "grad_norm": 3.7981059551239014, "learning_rate": 1.6402116402116404e-05, "loss": 0.3207, "step": 150 }, { "epoch": 2.19, "grad_norm": 5.229352951049805, "learning_rate": 1.560846560846561e-05, "loss": 0.5247, "step": 153 }, { "epoch": 2.23, "grad_norm": 3.7582530975341797, "learning_rate": 1.4814814814814815e-05, "loss": 0.2936, "step": 156 }, { "epoch": 2.27, "grad_norm": 6.107592582702637, "learning_rate": 1.4021164021164022e-05, "loss": 0.4513, "step": 159 }, { "epoch": 2.31, "grad_norm": 8.501120567321777, "learning_rate": 1.3227513227513228e-05, "loss": 0.4808, "step": 162 }, { "epoch": 2.36, "grad_norm": 3.4457428455352783, "learning_rate": 1.2433862433862433e-05, "loss": 0.27, "step": 165 }, { "epoch": 2.4, "grad_norm": 5.215019702911377, "learning_rate": 1.164021164021164e-05, "loss": 0.3364, "step": 168 }, { "epoch": 2.44, "grad_norm": 6.2070794105529785, "learning_rate": 1.0846560846560846e-05, "loss": 0.4075, "step": 171 }, { "epoch": 2.49, "grad_norm": 5.491907119750977, "learning_rate": 1.0052910052910053e-05, "loss": 0.5218, "step": 174 }, { "epoch": 2.53, "grad_norm": 7.795518398284912, "learning_rate": 9.259259259259259e-06, "loss": 0.329, "step": 177 }, { "epoch": 2.57, "grad_norm": 3.6145973205566406, "learning_rate": 8.465608465608466e-06, "loss": 0.3696, "step": 180 }, { "epoch": 2.61, "grad_norm": 5.689661502838135, "learning_rate": 7.671957671957672e-06, "loss": 0.6274, "step": 183 }, { "epoch": 2.66, "grad_norm": 4.555980682373047, "learning_rate": 6.878306878306878e-06, "loss": 0.3477, "step": 186 }, { "epoch": 2.7, "grad_norm": 6.758476257324219, "learning_rate": 6.0846560846560845e-06, "loss": 0.3809, "step": 189 }, { "epoch": 2.74, "grad_norm": 3.1286351680755615, "learning_rate": 5.291005291005291e-06, "loss": 0.35, "step": 192 }, { "epoch": 2.79, "grad_norm": 8.514357566833496, "learning_rate": 4.497354497354498e-06, "loss": 0.5799, "step": 195 }, { "epoch": 2.83, "grad_norm": 4.899698734283447, "learning_rate": 3.7037037037037037e-06, "loss": 0.5754, "step": 198 }, { "epoch": 2.87, "grad_norm": 9.083395004272461, "learning_rate": 2.91005291005291e-06, "loss": 0.3895, "step": 201 }, { "epoch": 2.91, "grad_norm": 2.1063151359558105, "learning_rate": 2.1164021164021164e-06, "loss": 0.2194, "step": 204 }, { "epoch": 2.96, "grad_norm": 5.722929954528809, "learning_rate": 1.3227513227513228e-06, "loss": 0.2762, "step": 207 }, { "epoch": 3.0, "grad_norm": 1.7745004892349243, "learning_rate": 5.291005291005291e-07, "loss": 0.1982, "step": 210 }, { "epoch": 3.0, "eval_accuracy": 0.8201438848920863, "eval_auc": 0.8559883412824589, "eval_f1": 0.8878923766816144, "eval_loss": 0.3891792595386505, "eval_precision": 0.8181818181818182, "eval_recall": 0.9705882352941176, "eval_runtime": 5.0466, "eval_samples_per_second": 27.543, "eval_steps_per_second": 1.783, "step": 210 } ], "logging_steps": 3, "max_steps": 210, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.2925663867713946e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }