howdyaendra's picture
Upload folder using huggingface_hub
492d843 verified
{
"best_metric": 0.3891792595386505,
"best_model_checkpoint": "autotrain-xblock-twitter-1/checkpoint-210",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 210,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 13.318061828613281,
"learning_rate": 4.7619047619047615e-06,
"loss": 0.8613,
"step": 3
},
{
"epoch": 0.09,
"grad_norm": 10.021459579467773,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.8653,
"step": 6
},
{
"epoch": 0.13,
"grad_norm": 9.035711288452148,
"learning_rate": 1.9047619047619046e-05,
"loss": 0.6439,
"step": 9
},
{
"epoch": 0.17,
"grad_norm": 6.433827877044678,
"learning_rate": 2.6190476190476192e-05,
"loss": 0.6497,
"step": 12
},
{
"epoch": 0.21,
"grad_norm": NaN,
"learning_rate": 3.095238095238095e-05,
"loss": 0.6613,
"step": 15
},
{
"epoch": 0.26,
"grad_norm": 15.247386932373047,
"learning_rate": 3.809523809523809e-05,
"loss": 1.1368,
"step": 18
},
{
"epoch": 0.3,
"grad_norm": 13.3652982711792,
"learning_rate": 4.523809523809524e-05,
"loss": 0.7526,
"step": 21
},
{
"epoch": 0.34,
"grad_norm": 5.259400367736816,
"learning_rate": 4.973544973544973e-05,
"loss": 0.6684,
"step": 24
},
{
"epoch": 0.39,
"grad_norm": 9.16520881652832,
"learning_rate": 4.894179894179895e-05,
"loss": 0.6681,
"step": 27
},
{
"epoch": 0.43,
"grad_norm": 6.597314357757568,
"learning_rate": 4.814814814814815e-05,
"loss": 0.4601,
"step": 30
},
{
"epoch": 0.47,
"grad_norm": 5.8065385818481445,
"learning_rate": 4.7354497354497356e-05,
"loss": 0.6782,
"step": 33
},
{
"epoch": 0.51,
"grad_norm": 5.5024566650390625,
"learning_rate": 4.656084656084656e-05,
"loss": 0.5565,
"step": 36
},
{
"epoch": 0.56,
"grad_norm": 6.190059661865234,
"learning_rate": 4.576719576719577e-05,
"loss": 0.52,
"step": 39
},
{
"epoch": 0.6,
"grad_norm": 15.302659034729004,
"learning_rate": 4.4973544973544974e-05,
"loss": 0.7737,
"step": 42
},
{
"epoch": 0.64,
"grad_norm": 4.845982074737549,
"learning_rate": 4.417989417989418e-05,
"loss": 0.549,
"step": 45
},
{
"epoch": 0.69,
"grad_norm": 5.293482780456543,
"learning_rate": 4.3386243386243384e-05,
"loss": 0.7392,
"step": 48
},
{
"epoch": 0.73,
"grad_norm": 7.307839870452881,
"learning_rate": 4.259259259259259e-05,
"loss": 0.5303,
"step": 51
},
{
"epoch": 0.77,
"grad_norm": 6.7569661140441895,
"learning_rate": 4.17989417989418e-05,
"loss": 0.4638,
"step": 54
},
{
"epoch": 0.81,
"grad_norm": 6.70114803314209,
"learning_rate": 4.100529100529101e-05,
"loss": 0.4203,
"step": 57
},
{
"epoch": 0.86,
"grad_norm": 11.536452293395996,
"learning_rate": 4.021164021164021e-05,
"loss": 0.6686,
"step": 60
},
{
"epoch": 0.9,
"grad_norm": 7.2152934074401855,
"learning_rate": 3.941798941798942e-05,
"loss": 0.596,
"step": 63
},
{
"epoch": 0.94,
"grad_norm": 4.001399040222168,
"learning_rate": 3.862433862433863e-05,
"loss": 0.3098,
"step": 66
},
{
"epoch": 0.99,
"grad_norm": 7.049214839935303,
"learning_rate": 3.7830687830687835e-05,
"loss": 0.5719,
"step": 69
},
{
"epoch": 1.0,
"eval_accuracy": 0.7050359712230215,
"eval_auc": 0.6505034446210917,
"eval_f1": 0.8144796380090497,
"eval_loss": 0.5788741111755371,
"eval_precision": 0.7563025210084033,
"eval_recall": 0.8823529411764706,
"eval_runtime": 5.1352,
"eval_samples_per_second": 27.068,
"eval_steps_per_second": 1.753,
"step": 70
},
{
"epoch": 1.03,
"grad_norm": 8.485621452331543,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.8083,
"step": 72
},
{
"epoch": 1.07,
"grad_norm": 8.083047866821289,
"learning_rate": 3.6243386243386245e-05,
"loss": 0.5378,
"step": 75
},
{
"epoch": 1.11,
"grad_norm": 4.286181926727295,
"learning_rate": 3.5449735449735446e-05,
"loss": 0.4745,
"step": 78
},
{
"epoch": 1.16,
"grad_norm": 6.330202579498291,
"learning_rate": 3.465608465608466e-05,
"loss": 0.4858,
"step": 81
},
{
"epoch": 1.2,
"grad_norm": 5.901049613952637,
"learning_rate": 3.386243386243386e-05,
"loss": 0.4534,
"step": 84
},
{
"epoch": 1.24,
"grad_norm": 4.458903789520264,
"learning_rate": 3.306878306878307e-05,
"loss": 0.4565,
"step": 87
},
{
"epoch": 1.29,
"grad_norm": 8.628113746643066,
"learning_rate": 3.227513227513227e-05,
"loss": 0.5021,
"step": 90
},
{
"epoch": 1.33,
"grad_norm": 12.450860023498535,
"learning_rate": 3.148148148148148e-05,
"loss": 0.6712,
"step": 93
},
{
"epoch": 1.37,
"grad_norm": 6.316427230834961,
"learning_rate": 3.068783068783069e-05,
"loss": 0.3929,
"step": 96
},
{
"epoch": 1.41,
"grad_norm": 7.734628200531006,
"learning_rate": 2.9894179894179897e-05,
"loss": 0.5408,
"step": 99
},
{
"epoch": 1.46,
"grad_norm": 6.100373268127441,
"learning_rate": 2.91005291005291e-05,
"loss": 0.6207,
"step": 102
},
{
"epoch": 1.5,
"grad_norm": 6.035886764526367,
"learning_rate": 2.830687830687831e-05,
"loss": 0.6811,
"step": 105
},
{
"epoch": 1.54,
"grad_norm": 5.461982250213623,
"learning_rate": 2.7513227513227512e-05,
"loss": 0.5054,
"step": 108
},
{
"epoch": 1.59,
"grad_norm": 8.910785675048828,
"learning_rate": 2.6719576719576723e-05,
"loss": 0.6739,
"step": 111
},
{
"epoch": 1.63,
"grad_norm": 4.59702205657959,
"learning_rate": 2.5925925925925925e-05,
"loss": 0.4551,
"step": 114
},
{
"epoch": 1.67,
"grad_norm": 2.666771173477173,
"learning_rate": 2.5132275132275137e-05,
"loss": 0.3917,
"step": 117
},
{
"epoch": 1.71,
"grad_norm": 3.653053045272827,
"learning_rate": 2.4338624338624338e-05,
"loss": 0.4881,
"step": 120
},
{
"epoch": 1.76,
"grad_norm": 1.8620728254318237,
"learning_rate": 2.3544973544973546e-05,
"loss": 0.2256,
"step": 123
},
{
"epoch": 1.8,
"grad_norm": 15.068528175354004,
"learning_rate": 2.275132275132275e-05,
"loss": 0.8185,
"step": 126
},
{
"epoch": 1.84,
"grad_norm": 2.865483045578003,
"learning_rate": 2.1957671957671956e-05,
"loss": 0.6451,
"step": 129
},
{
"epoch": 1.89,
"grad_norm": 1.9436465501785278,
"learning_rate": 2.1164021164021164e-05,
"loss": 0.294,
"step": 132
},
{
"epoch": 1.93,
"grad_norm": 2.666163921356201,
"learning_rate": 2.037037037037037e-05,
"loss": 0.4637,
"step": 135
},
{
"epoch": 1.97,
"grad_norm": 3.914135217666626,
"learning_rate": 1.9576719576719577e-05,
"loss": 0.4303,
"step": 138
},
{
"epoch": 2.0,
"eval_accuracy": 0.7841726618705036,
"eval_auc": 0.8285638579756226,
"eval_f1": 0.8684210526315789,
"eval_loss": 0.4182623028755188,
"eval_precision": 0.7857142857142857,
"eval_recall": 0.9705882352941176,
"eval_runtime": 5.2653,
"eval_samples_per_second": 26.399,
"eval_steps_per_second": 1.709,
"step": 140
},
{
"epoch": 2.01,
"grad_norm": 6.129709243774414,
"learning_rate": 1.8783068783068782e-05,
"loss": 0.5329,
"step": 141
},
{
"epoch": 2.06,
"grad_norm": 6.085241317749023,
"learning_rate": 1.798941798941799e-05,
"loss": 0.4188,
"step": 144
},
{
"epoch": 2.1,
"grad_norm": 7.526946067810059,
"learning_rate": 1.7195767195767195e-05,
"loss": 0.4073,
"step": 147
},
{
"epoch": 2.14,
"grad_norm": 3.7981059551239014,
"learning_rate": 1.6402116402116404e-05,
"loss": 0.3207,
"step": 150
},
{
"epoch": 2.19,
"grad_norm": 5.229352951049805,
"learning_rate": 1.560846560846561e-05,
"loss": 0.5247,
"step": 153
},
{
"epoch": 2.23,
"grad_norm": 3.7582530975341797,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.2936,
"step": 156
},
{
"epoch": 2.27,
"grad_norm": 6.107592582702637,
"learning_rate": 1.4021164021164022e-05,
"loss": 0.4513,
"step": 159
},
{
"epoch": 2.31,
"grad_norm": 8.501120567321777,
"learning_rate": 1.3227513227513228e-05,
"loss": 0.4808,
"step": 162
},
{
"epoch": 2.36,
"grad_norm": 3.4457428455352783,
"learning_rate": 1.2433862433862433e-05,
"loss": 0.27,
"step": 165
},
{
"epoch": 2.4,
"grad_norm": 5.215019702911377,
"learning_rate": 1.164021164021164e-05,
"loss": 0.3364,
"step": 168
},
{
"epoch": 2.44,
"grad_norm": 6.2070794105529785,
"learning_rate": 1.0846560846560846e-05,
"loss": 0.4075,
"step": 171
},
{
"epoch": 2.49,
"grad_norm": 5.491907119750977,
"learning_rate": 1.0052910052910053e-05,
"loss": 0.5218,
"step": 174
},
{
"epoch": 2.53,
"grad_norm": 7.795518398284912,
"learning_rate": 9.259259259259259e-06,
"loss": 0.329,
"step": 177
},
{
"epoch": 2.57,
"grad_norm": 3.6145973205566406,
"learning_rate": 8.465608465608466e-06,
"loss": 0.3696,
"step": 180
},
{
"epoch": 2.61,
"grad_norm": 5.689661502838135,
"learning_rate": 7.671957671957672e-06,
"loss": 0.6274,
"step": 183
},
{
"epoch": 2.66,
"grad_norm": 4.555980682373047,
"learning_rate": 6.878306878306878e-06,
"loss": 0.3477,
"step": 186
},
{
"epoch": 2.7,
"grad_norm": 6.758476257324219,
"learning_rate": 6.0846560846560845e-06,
"loss": 0.3809,
"step": 189
},
{
"epoch": 2.74,
"grad_norm": 3.1286351680755615,
"learning_rate": 5.291005291005291e-06,
"loss": 0.35,
"step": 192
},
{
"epoch": 2.79,
"grad_norm": 8.514357566833496,
"learning_rate": 4.497354497354498e-06,
"loss": 0.5799,
"step": 195
},
{
"epoch": 2.83,
"grad_norm": 4.899698734283447,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.5754,
"step": 198
},
{
"epoch": 2.87,
"grad_norm": 9.083395004272461,
"learning_rate": 2.91005291005291e-06,
"loss": 0.3895,
"step": 201
},
{
"epoch": 2.91,
"grad_norm": 2.1063151359558105,
"learning_rate": 2.1164021164021164e-06,
"loss": 0.2194,
"step": 204
},
{
"epoch": 2.96,
"grad_norm": 5.722929954528809,
"learning_rate": 1.3227513227513228e-06,
"loss": 0.2762,
"step": 207
},
{
"epoch": 3.0,
"grad_norm": 1.7745004892349243,
"learning_rate": 5.291005291005291e-07,
"loss": 0.1982,
"step": 210
},
{
"epoch": 3.0,
"eval_accuracy": 0.8201438848920863,
"eval_auc": 0.8559883412824589,
"eval_f1": 0.8878923766816144,
"eval_loss": 0.3891792595386505,
"eval_precision": 0.8181818181818182,
"eval_recall": 0.9705882352941176,
"eval_runtime": 5.0466,
"eval_samples_per_second": 27.543,
"eval_steps_per_second": 1.783,
"step": 210
}
],
"logging_steps": 3,
"max_steps": 210,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.2925663867713946e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}