twitter-roberta-base-2022-154m / trainer_state.json
danlou's picture
Upload 14 files
ac520b1
raw
history blame
20.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.5714025899946678,
"global_step": 54000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 5e-06,
"loss": 2.5739,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 1e-05,
"loss": 2.3712,
"step": 1000
},
{
"epoch": 0.1,
"learning_rate": 9.999669290296979e-06,
"loss": 2.3057,
"step": 1500
},
{
"epoch": 0.13,
"learning_rate": 9.999338580593956e-06,
"loss": 2.2676,
"step": 2000
},
{
"epoch": 0.13,
"eval_accuracy": 0.624435573588116,
"eval_loss": 2.00943922996521,
"eval_runtime": 4.2994,
"eval_samples_per_second": 930.363,
"eval_steps_per_second": 1.628,
"step": 2000
},
{
"epoch": 0.17,
"learning_rate": 9.999007870890934e-06,
"loss": 2.2392,
"step": 2500
},
{
"epoch": 0.2,
"learning_rate": 9.99867716118791e-06,
"loss": 2.2181,
"step": 3000
},
{
"epoch": 0.23,
"learning_rate": 9.998346451484888e-06,
"loss": 2.2015,
"step": 3500
},
{
"epoch": 0.26,
"learning_rate": 9.998016403201271e-06,
"loss": 2.1881,
"step": 4000
},
{
"epoch": 0.26,
"eval_accuracy": 0.6230075777371309,
"eval_loss": 1.980251431465149,
"eval_runtime": 4.1905,
"eval_samples_per_second": 954.545,
"eval_steps_per_second": 1.67,
"step": 4000
},
{
"epoch": 0.3,
"learning_rate": 9.997685693498247e-06,
"loss": 2.1754,
"step": 4500
},
{
"epoch": 0.33,
"learning_rate": 9.997354983795225e-06,
"loss": 2.1646,
"step": 5000
},
{
"epoch": 0.36,
"learning_rate": 9.997024274092203e-06,
"loss": 2.155,
"step": 5500
},
{
"epoch": 0.4,
"learning_rate": 9.99669356438918e-06,
"loss": 2.1462,
"step": 6000
},
{
"epoch": 0.4,
"eval_accuracy": 0.6319083064356762,
"eval_loss": 1.9461042881011963,
"eval_runtime": 4.2092,
"eval_samples_per_second": 950.295,
"eval_steps_per_second": 1.663,
"step": 6000
},
{
"epoch": 0.43,
"learning_rate": 9.996362854686157e-06,
"loss": 2.1369,
"step": 6500
},
{
"epoch": 0.46,
"learning_rate": 9.996032144983134e-06,
"loss": 2.129,
"step": 7000
},
{
"epoch": 0.5,
"learning_rate": 9.995701435280112e-06,
"loss": 2.1228,
"step": 7500
},
{
"epoch": 0.53,
"learning_rate": 9.99537072557709e-06,
"loss": 2.1163,
"step": 8000
},
{
"epoch": 0.53,
"eval_accuracy": 0.630312540865699,
"eval_loss": 1.9446306228637695,
"eval_runtime": 4.1812,
"eval_samples_per_second": 956.66,
"eval_steps_per_second": 1.674,
"step": 8000
},
{
"epoch": 0.56,
"learning_rate": 9.995040677293473e-06,
"loss": 2.1093,
"step": 8500
},
{
"epoch": 0.6,
"learning_rate": 9.994709967590451e-06,
"loss": 2.1049,
"step": 9000
},
{
"epoch": 0.63,
"learning_rate": 9.994379257887427e-06,
"loss": 2.0986,
"step": 9500
},
{
"epoch": 0.66,
"learning_rate": 9.994048548184405e-06,
"loss": 2.0949,
"step": 10000
},
{
"epoch": 0.66,
"eval_accuracy": 0.6310234052317577,
"eval_loss": 1.9502341747283936,
"eval_runtime": 4.3076,
"eval_samples_per_second": 928.598,
"eval_steps_per_second": 1.625,
"step": 10000
},
{
"epoch": 0.69,
"learning_rate": 9.993717838481381e-06,
"loss": 2.09,
"step": 10500
},
{
"epoch": 0.73,
"learning_rate": 9.993387790197764e-06,
"loss": 2.0861,
"step": 11000
},
{
"epoch": 0.76,
"learning_rate": 9.993057080494742e-06,
"loss": 2.08,
"step": 11500
},
{
"epoch": 0.79,
"learning_rate": 9.99272637079172e-06,
"loss": 2.0752,
"step": 12000
},
{
"epoch": 0.79,
"eval_accuracy": 0.6285732890538517,
"eval_loss": 1.9644430875778198,
"eval_runtime": 4.1884,
"eval_samples_per_second": 955.011,
"eval_steps_per_second": 1.671,
"step": 12000
},
{
"epoch": 0.83,
"learning_rate": 9.992395661088698e-06,
"loss": 2.0717,
"step": 12500
},
{
"epoch": 0.86,
"learning_rate": 9.992065612805081e-06,
"loss": 2.0677,
"step": 13000
},
{
"epoch": 0.89,
"learning_rate": 9.991734903102059e-06,
"loss": 2.0643,
"step": 13500
},
{
"epoch": 0.93,
"learning_rate": 9.991404193399035e-06,
"loss": 2.0609,
"step": 14000
},
{
"epoch": 0.93,
"eval_accuracy": 0.6438428177158628,
"eval_loss": 1.8919312953948975,
"eval_runtime": 5.6046,
"eval_samples_per_second": 713.696,
"eval_steps_per_second": 1.249,
"step": 14000
},
{
"epoch": 0.96,
"learning_rate": 9.991073483696012e-06,
"loss": 2.055,
"step": 14500
},
{
"epoch": 0.99,
"learning_rate": 9.99074277399299e-06,
"loss": 2.052,
"step": 15000
},
{
"epoch": 1.03,
"learning_rate": 9.990412725709374e-06,
"loss": 2.0516,
"step": 15500
},
{
"epoch": 1.06,
"learning_rate": 9.990082016006351e-06,
"loss": 2.0461,
"step": 16000
},
{
"epoch": 1.06,
"eval_accuracy": 0.6411957950065703,
"eval_loss": 1.8524950742721558,
"eval_runtime": 4.1951,
"eval_samples_per_second": 953.502,
"eval_steps_per_second": 1.669,
"step": 16000
},
{
"epoch": 1.09,
"learning_rate": 9.989751306303327e-06,
"loss": 2.0429,
"step": 16500
},
{
"epoch": 1.12,
"learning_rate": 9.989420596600305e-06,
"loss": 2.0391,
"step": 17000
},
{
"epoch": 1.16,
"learning_rate": 9.989089886897281e-06,
"loss": 2.0364,
"step": 17500
},
{
"epoch": 1.19,
"learning_rate": 9.988759177194259e-06,
"loss": 2.0347,
"step": 18000
},
{
"epoch": 1.19,
"eval_accuracy": 0.646603611349957,
"eval_loss": 1.842495322227478,
"eval_runtime": 4.1943,
"eval_samples_per_second": 953.666,
"eval_steps_per_second": 1.669,
"step": 18000
},
{
"epoch": 1.22,
"learning_rate": 9.988428467491237e-06,
"loss": 2.0311,
"step": 18500
},
{
"epoch": 1.26,
"learning_rate": 9.98809841920762e-06,
"loss": 2.0298,
"step": 19000
},
{
"epoch": 1.29,
"learning_rate": 9.987767709504598e-06,
"loss": 2.0263,
"step": 19500
},
{
"epoch": 1.32,
"learning_rate": 9.987436999801576e-06,
"loss": 2.0227,
"step": 20000
},
{
"epoch": 1.32,
"eval_accuracy": 0.6428384906645777,
"eval_loss": 1.8519748449325562,
"eval_runtime": 4.4809,
"eval_samples_per_second": 892.668,
"eval_steps_per_second": 1.562,
"step": 20000
},
{
"epoch": 1.36,
"learning_rate": 9.987106290098553e-06,
"loss": 2.0213,
"step": 20500
},
{
"epoch": 1.39,
"learning_rate": 9.98677558039553e-06,
"loss": 2.0202,
"step": 21000
},
{
"epoch": 1.42,
"learning_rate": 9.986444870692507e-06,
"loss": 2.0176,
"step": 21500
},
{
"epoch": 1.46,
"learning_rate": 9.986114160989483e-06,
"loss": 2.0156,
"step": 22000
},
{
"epoch": 1.46,
"eval_accuracy": 0.6465079984393289,
"eval_loss": 1.838840126991272,
"eval_runtime": 5.6638,
"eval_samples_per_second": 706.234,
"eval_steps_per_second": 1.236,
"step": 22000
},
{
"epoch": 1.49,
"learning_rate": 9.985783451286461e-06,
"loss": 2.0135,
"step": 22500
},
{
"epoch": 1.52,
"learning_rate": 9.985452741583439e-06,
"loss": 2.0108,
"step": 23000
},
{
"epoch": 1.55,
"learning_rate": 9.985122031880417e-06,
"loss": 2.0083,
"step": 23500
},
{
"epoch": 1.59,
"learning_rate": 9.984792645016205e-06,
"loss": 2.0061,
"step": 24000
},
{
"epoch": 1.59,
"eval_accuracy": 0.6455588887439676,
"eval_loss": 1.8356744050979614,
"eval_runtime": 4.1905,
"eval_samples_per_second": 954.546,
"eval_steps_per_second": 1.67,
"step": 24000
},
{
"epoch": 1.62,
"learning_rate": 9.984461935313183e-06,
"loss": 2.0035,
"step": 24500
},
{
"epoch": 1.65,
"learning_rate": 9.98413122561016e-06,
"loss": 2.0021,
"step": 25000
},
{
"epoch": 1.69,
"learning_rate": 9.983800515907137e-06,
"loss": 2.001,
"step": 25500
},
{
"epoch": 1.72,
"learning_rate": 9.983469806204115e-06,
"loss": 1.9985,
"step": 26000
},
{
"epoch": 1.72,
"eval_accuracy": 0.6489613463055482,
"eval_loss": 1.7983965873718262,
"eval_runtime": 4.2683,
"eval_samples_per_second": 937.141,
"eval_steps_per_second": 1.64,
"step": 26000
},
{
"epoch": 1.75,
"learning_rate": 9.983139096501093e-06,
"loss": 1.9962,
"step": 26500
},
{
"epoch": 1.79,
"learning_rate": 9.98280838679807e-06,
"loss": 1.9948,
"step": 27000
},
{
"epoch": 1.82,
"learning_rate": 9.982477677095046e-06,
"loss": 1.9947,
"step": 27500
},
{
"epoch": 1.85,
"learning_rate": 9.982146967392024e-06,
"loss": 1.9909,
"step": 28000
},
{
"epoch": 1.85,
"eval_accuracy": 0.6503962176200306,
"eval_loss": 1.7985965013504028,
"eval_runtime": 4.4068,
"eval_samples_per_second": 907.681,
"eval_steps_per_second": 1.588,
"step": 28000
},
{
"epoch": 1.88,
"learning_rate": 9.981816919108407e-06,
"loss": 1.9888,
"step": 28500
},
{
"epoch": 1.92,
"learning_rate": 9.98148687082479e-06,
"loss": 1.9877,
"step": 29000
},
{
"epoch": 1.95,
"learning_rate": 9.981156161121769e-06,
"loss": 1.9862,
"step": 29500
},
{
"epoch": 1.98,
"learning_rate": 9.980825451418745e-06,
"loss": 1.9856,
"step": 30000
},
{
"epoch": 1.98,
"eval_accuracy": 0.647755693036725,
"eval_loss": 1.8175112009048462,
"eval_runtime": 5.8254,
"eval_samples_per_second": 686.652,
"eval_steps_per_second": 1.202,
"step": 30000
},
{
"epoch": 2.02,
"learning_rate": 9.980494741715722e-06,
"loss": 1.9859,
"step": 30500
},
{
"epoch": 2.05,
"learning_rate": 9.9801640320127e-06,
"loss": 1.982,
"step": 31000
},
{
"epoch": 2.08,
"learning_rate": 9.979833322309678e-06,
"loss": 1.979,
"step": 31500
},
{
"epoch": 2.12,
"learning_rate": 9.979502612606656e-06,
"loss": 1.9805,
"step": 32000
},
{
"epoch": 2.12,
"eval_accuracy": 0.6516655780535597,
"eval_loss": 1.8021340370178223,
"eval_runtime": 4.2089,
"eval_samples_per_second": 950.37,
"eval_steps_per_second": 1.663,
"step": 32000
},
{
"epoch": 2.15,
"learning_rate": 9.979171902903632e-06,
"loss": 1.9771,
"step": 32500
},
{
"epoch": 2.18,
"learning_rate": 9.978841854620015e-06,
"loss": 1.9767,
"step": 33000
},
{
"epoch": 2.22,
"learning_rate": 9.978511144916993e-06,
"loss": 1.9738,
"step": 33500
},
{
"epoch": 2.25,
"learning_rate": 9.978181096633376e-06,
"loss": 1.9732,
"step": 34000
},
{
"epoch": 2.25,
"eval_accuracy": 0.6534042413498785,
"eval_loss": 1.7741553783416748,
"eval_runtime": 5.6645,
"eval_samples_per_second": 706.155,
"eval_steps_per_second": 1.236,
"step": 34000
},
{
"epoch": 2.28,
"learning_rate": 9.977850386930354e-06,
"loss": 1.9711,
"step": 34500
},
{
"epoch": 2.31,
"learning_rate": 9.977519677227332e-06,
"loss": 1.9697,
"step": 35000
},
{
"epoch": 2.35,
"learning_rate": 9.977188967524308e-06,
"loss": 1.969,
"step": 35500
},
{
"epoch": 2.38,
"learning_rate": 9.976858257821285e-06,
"loss": 1.968,
"step": 36000
},
{
"epoch": 2.38,
"eval_accuracy": 0.6549699887870193,
"eval_loss": 1.7642747163772583,
"eval_runtime": 5.7171,
"eval_samples_per_second": 699.657,
"eval_steps_per_second": 1.224,
"step": 36000
},
{
"epoch": 2.41,
"learning_rate": 9.976527548118262e-06,
"loss": 1.9671,
"step": 36500
},
{
"epoch": 2.45,
"learning_rate": 9.97619683841524e-06,
"loss": 1.9664,
"step": 37000
},
{
"epoch": 2.48,
"learning_rate": 9.975866128712217e-06,
"loss": 1.9634,
"step": 37500
},
{
"epoch": 2.51,
"learning_rate": 9.975535419009195e-06,
"loss": 1.9618,
"step": 38000
},
{
"epoch": 2.51,
"eval_accuracy": 0.6490381480273884,
"eval_loss": 1.8036186695098877,
"eval_runtime": 4.2126,
"eval_samples_per_second": 949.526,
"eval_steps_per_second": 1.662,
"step": 38000
},
{
"epoch": 2.55,
"learning_rate": 9.975205370725578e-06,
"loss": 1.9605,
"step": 38500
},
{
"epoch": 2.58,
"learning_rate": 9.974874661022556e-06,
"loss": 1.9593,
"step": 39000
},
{
"epoch": 2.61,
"learning_rate": 9.974543951319534e-06,
"loss": 1.9598,
"step": 39500
},
{
"epoch": 2.65,
"learning_rate": 9.97421324161651e-06,
"loss": 1.9582,
"step": 40000
},
{
"epoch": 2.65,
"eval_accuracy": 0.6469189617594611,
"eval_loss": 1.80427885055542,
"eval_runtime": 4.2765,
"eval_samples_per_second": 935.338,
"eval_steps_per_second": 1.637,
"step": 40000
},
{
"epoch": 2.68,
"learning_rate": 9.973883193332893e-06,
"loss": 1.9555,
"step": 40500
},
{
"epoch": 2.71,
"learning_rate": 9.973552483629871e-06,
"loss": 1.9546,
"step": 41000
},
{
"epoch": 2.74,
"learning_rate": 9.973221773926847e-06,
"loss": 1.9543,
"step": 41500
},
{
"epoch": 2.78,
"learning_rate": 9.972891064223825e-06,
"loss": 1.9533,
"step": 42000
},
{
"epoch": 2.78,
"eval_accuracy": 0.6523191823899371,
"eval_loss": 1.8008100986480713,
"eval_runtime": 4.4543,
"eval_samples_per_second": 898.013,
"eval_steps_per_second": 1.572,
"step": 42000
},
{
"epoch": 2.81,
"learning_rate": 9.972560354520802e-06,
"loss": 1.9528,
"step": 42500
},
{
"epoch": 2.84,
"learning_rate": 9.972230306237186e-06,
"loss": 1.9514,
"step": 43000
},
{
"epoch": 2.88,
"learning_rate": 9.971900257953569e-06,
"loss": 1.9504,
"step": 43500
},
{
"epoch": 2.91,
"learning_rate": 9.971569548250547e-06,
"loss": 1.9472,
"step": 44000
},
{
"epoch": 2.91,
"eval_accuracy": 0.6498401704848162,
"eval_loss": 1.7917245626449585,
"eval_runtime": 4.1847,
"eval_samples_per_second": 955.859,
"eval_steps_per_second": 1.673,
"step": 44000
},
{
"epoch": 2.94,
"learning_rate": 9.971238838547523e-06,
"loss": 1.9484,
"step": 44500
},
{
"epoch": 2.98,
"learning_rate": 9.9709081288445e-06,
"loss": 1.9464,
"step": 45000
},
{
"epoch": 3.01,
"learning_rate": 9.970577419141478e-06,
"loss": 1.9474,
"step": 45500
},
{
"epoch": 3.04,
"learning_rate": 9.970246709438456e-06,
"loss": 1.9443,
"step": 46000
},
{
"epoch": 3.04,
"eval_accuracy": 0.6572224802601022,
"eval_loss": 1.7718559503555298,
"eval_runtime": 4.1961,
"eval_samples_per_second": 953.273,
"eval_steps_per_second": 1.668,
"step": 46000
},
{
"epoch": 3.08,
"learning_rate": 9.969915999735434e-06,
"loss": 1.9433,
"step": 46500
},
{
"epoch": 3.11,
"learning_rate": 9.96958529003241e-06,
"loss": 1.9424,
"step": 47000
},
{
"epoch": 3.14,
"learning_rate": 9.969254580329388e-06,
"loss": 1.9409,
"step": 47500
},
{
"epoch": 3.17,
"learning_rate": 9.968923870626364e-06,
"loss": 1.9394,
"step": 48000
},
{
"epoch": 3.17,
"eval_accuracy": 0.6642529728293364,
"eval_loss": 1.721533179283142,
"eval_runtime": 4.2728,
"eval_samples_per_second": 936.147,
"eval_steps_per_second": 1.638,
"step": 48000
},
{
"epoch": 3.21,
"learning_rate": 9.968593822342749e-06,
"loss": 1.9394,
"step": 48500
},
{
"epoch": 3.24,
"learning_rate": 9.968263112639725e-06,
"loss": 1.9388,
"step": 49000
},
{
"epoch": 3.27,
"learning_rate": 9.967932402936703e-06,
"loss": 1.9376,
"step": 49500
},
{
"epoch": 3.31,
"learning_rate": 9.96760169323368e-06,
"loss": 1.9372,
"step": 50000
},
{
"epoch": 3.31,
"eval_accuracy": 0.6554556120453656,
"eval_loss": 1.7481720447540283,
"eval_runtime": 4.2868,
"eval_samples_per_second": 933.107,
"eval_steps_per_second": 1.633,
"step": 50000
},
{
"epoch": 3.34,
"learning_rate": 9.967270983530658e-06,
"loss": 1.936,
"step": 50500
},
{
"epoch": 3.37,
"learning_rate": 9.966940273827634e-06,
"loss": 1.9339,
"step": 51000
},
{
"epoch": 3.41,
"learning_rate": 9.966609564124612e-06,
"loss": 1.9333,
"step": 51500
},
{
"epoch": 3.44,
"learning_rate": 9.966279515840995e-06,
"loss": 1.9324,
"step": 52000
},
{
"epoch": 3.44,
"eval_accuracy": 0.6522052559145423,
"eval_loss": 1.7548049688339233,
"eval_runtime": 5.7821,
"eval_samples_per_second": 691.794,
"eval_steps_per_second": 1.211,
"step": 52000
},
{
"epoch": 3.47,
"learning_rate": 9.965948806137973e-06,
"loss": 1.9317,
"step": 52500
},
{
"epoch": 3.51,
"learning_rate": 9.965618096434951e-06,
"loss": 1.9323,
"step": 53000
},
{
"epoch": 3.54,
"learning_rate": 9.965287386731927e-06,
"loss": 1.9303,
"step": 53500
},
{
"epoch": 3.57,
"learning_rate": 9.964956677028905e-06,
"loss": 1.9297,
"step": 54000
},
{
"epoch": 3.57,
"eval_accuracy": 0.6580935017580414,
"eval_loss": 1.7288295030593872,
"eval_runtime": 4.2876,
"eval_samples_per_second": 932.929,
"eval_steps_per_second": 1.633,
"step": 54000
}
],
"max_steps": 15120000,
"num_train_epochs": 1000,
"total_flos": 1.8197541890074767e+19,
"trial_name": null,
"trial_params": null
}