t5-small-e2e-qa-full / trainer_state.json
longcld's picture
loss 1.0
2e2b19c
raw
history blame
No virus
19.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.7377881560726602,
"global_step": 16000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 0.0006992397089171283,
"loss": 0.9904,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 0.0006984794178342565,
"loss": 1.0331,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 0.0006977191267513849,
"loss": 1.0506,
"step": 300
},
{
"epoch": 0.04,
"learning_rate": 0.0006969588356685131,
"loss": 1.0698,
"step": 400
},
{
"epoch": 0.05,
"learning_rate": 0.0006961985445856413,
"loss": 1.0191,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 0.0006954382535027696,
"loss": 1.0418,
"step": 600
},
{
"epoch": 0.08,
"learning_rate": 0.0006946779624198979,
"loss": 1.1059,
"step": 700
},
{
"epoch": 0.09,
"learning_rate": 0.0006939176713370262,
"loss": 1.0536,
"step": 800
},
{
"epoch": 0.1,
"learning_rate": 0.0006931573802541545,
"loss": 1.0631,
"step": 900
},
{
"epoch": 0.11,
"learning_rate": 0.0006923970891712827,
"loss": 1.0861,
"step": 1000
},
{
"epoch": 0.12,
"learning_rate": 0.0006916367980884109,
"loss": 1.1102,
"step": 1100
},
{
"epoch": 0.13,
"learning_rate": 0.0006908765070055393,
"loss": 1.0543,
"step": 1200
},
{
"epoch": 0.14,
"learning_rate": 0.0006901162159226675,
"loss": 1.0827,
"step": 1300
},
{
"epoch": 0.15,
"learning_rate": 0.0006893559248397958,
"loss": 1.0794,
"step": 1400
},
{
"epoch": 0.16,
"learning_rate": 0.0006885956337569241,
"loss": 1.0768,
"step": 1500
},
{
"epoch": 0.17,
"learning_rate": 0.0006878353426740524,
"loss": 1.1544,
"step": 1600
},
{
"epoch": 0.18,
"learning_rate": 0.0006870750515911806,
"loss": 1.1412,
"step": 1700
},
{
"epoch": 0.2,
"learning_rate": 0.0006863147605083089,
"loss": 1.0918,
"step": 1800
},
{
"epoch": 0.21,
"learning_rate": 0.0006855544694254371,
"loss": 1.0784,
"step": 1900
},
{
"epoch": 0.22,
"learning_rate": 0.0006847941783425654,
"loss": 1.085,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 0.0006840338872596937,
"loss": 1.1224,
"step": 2100
},
{
"epoch": 0.24,
"learning_rate": 0.000683273596176822,
"loss": 1.1052,
"step": 2200
},
{
"epoch": 0.25,
"learning_rate": 0.0006825133050939503,
"loss": 1.0949,
"step": 2300
},
{
"epoch": 0.26,
"learning_rate": 0.0006817530140110785,
"loss": 1.0888,
"step": 2400
},
{
"epoch": 0.27,
"learning_rate": 0.0006809927229282067,
"loss": 1.0424,
"step": 2500
},
{
"epoch": 0.28,
"learning_rate": 0.0006802324318453351,
"loss": 1.0191,
"step": 2600
},
{
"epoch": 0.29,
"learning_rate": 0.0006794721407624634,
"loss": 1.1374,
"step": 2700
},
{
"epoch": 0.3,
"learning_rate": 0.0006787118496795916,
"loss": 1.0963,
"step": 2800
},
{
"epoch": 0.31,
"learning_rate": 0.0006779515585967199,
"loss": 1.1217,
"step": 2900
},
{
"epoch": 0.33,
"learning_rate": 0.0006771912675138482,
"loss": 1.1168,
"step": 3000
},
{
"epoch": 0.34,
"learning_rate": 0.0006764309764309764,
"loss": 1.0919,
"step": 3100
},
{
"epoch": 0.35,
"learning_rate": 0.0006756706853481047,
"loss": 1.0552,
"step": 3200
},
{
"epoch": 0.36,
"learning_rate": 0.000674910394265233,
"loss": 1.1408,
"step": 3300
},
{
"epoch": 0.37,
"learning_rate": 0.0006741501031823612,
"loss": 1.1443,
"step": 3400
},
{
"epoch": 0.38,
"learning_rate": 0.0006733898120994895,
"loss": 1.137,
"step": 3500
},
{
"epoch": 0.39,
"learning_rate": 0.0006726295210166178,
"loss": 1.1577,
"step": 3600
},
{
"epoch": 0.4,
"learning_rate": 0.000671869229933746,
"loss": 1.1505,
"step": 3700
},
{
"epoch": 0.41,
"learning_rate": 0.0006711089388508743,
"loss": 1.1479,
"step": 3800
},
{
"epoch": 0.42,
"learning_rate": 0.0006703486477680026,
"loss": 1.0887,
"step": 3900
},
{
"epoch": 0.43,
"learning_rate": 0.0006695883566851309,
"loss": 1.1219,
"step": 4000
},
{
"epoch": 0.45,
"learning_rate": 0.0006688280656022592,
"loss": 1.1188,
"step": 4100
},
{
"epoch": 0.46,
"learning_rate": 0.0006680677745193874,
"loss": 1.0956,
"step": 4200
},
{
"epoch": 0.47,
"learning_rate": 0.0006673074834365157,
"loss": 1.1544,
"step": 4300
},
{
"epoch": 0.48,
"learning_rate": 0.0006665471923536439,
"loss": 1.1014,
"step": 4400
},
{
"epoch": 0.49,
"learning_rate": 0.0006657869012707722,
"loss": 1.0761,
"step": 4500
},
{
"epoch": 0.5,
"learning_rate": 0.0006650266101879005,
"loss": 1.1168,
"step": 4600
},
{
"epoch": 0.51,
"learning_rate": 0.0006642663191050288,
"loss": 1.0792,
"step": 4700
},
{
"epoch": 0.52,
"learning_rate": 0.000663506028022157,
"loss": 1.085,
"step": 4800
},
{
"epoch": 0.53,
"learning_rate": 0.0006627457369392853,
"loss": 1.1649,
"step": 4900
},
{
"epoch": 0.54,
"learning_rate": 0.0006619854458564137,
"loss": 1.1575,
"step": 5000
},
{
"epoch": 0.55,
"learning_rate": 0.0006612251547735418,
"loss": 1.098,
"step": 5100
},
{
"epoch": 0.56,
"learning_rate": 0.0006604648636906701,
"loss": 1.1442,
"step": 5200
},
{
"epoch": 0.58,
"learning_rate": 0.0006597045726077984,
"loss": 1.1135,
"step": 5300
},
{
"epoch": 0.59,
"learning_rate": 0.0006589442815249267,
"loss": 1.1428,
"step": 5400
},
{
"epoch": 0.6,
"learning_rate": 0.000658183990442055,
"loss": 1.0998,
"step": 5500
},
{
"epoch": 0.61,
"learning_rate": 0.0006574236993591833,
"loss": 1.0847,
"step": 5600
},
{
"epoch": 0.62,
"learning_rate": 0.0006566634082763115,
"loss": 1.0957,
"step": 5700
},
{
"epoch": 0.63,
"learning_rate": 0.0006559031171934397,
"loss": 1.0979,
"step": 5800
},
{
"epoch": 0.64,
"learning_rate": 0.000655142826110568,
"loss": 1.1163,
"step": 5900
},
{
"epoch": 0.65,
"learning_rate": 0.0006543825350276963,
"loss": 1.1216,
"step": 6000
},
{
"epoch": 0.66,
"learning_rate": 0.0006536222439448246,
"loss": 1.0892,
"step": 6100
},
{
"epoch": 0.67,
"learning_rate": 0.0006528619528619529,
"loss": 1.1051,
"step": 6200
},
{
"epoch": 0.68,
"learning_rate": 0.0006521016617790811,
"loss": 1.1076,
"step": 6300
},
{
"epoch": 0.7,
"learning_rate": 0.0006513413706962095,
"loss": 1.1235,
"step": 6400
},
{
"epoch": 0.71,
"learning_rate": 0.0006505810796133376,
"loss": 1.0842,
"step": 6500
},
{
"epoch": 0.72,
"learning_rate": 0.0006498207885304659,
"loss": 1.1257,
"step": 6600
},
{
"epoch": 0.73,
"learning_rate": 0.0006490604974475942,
"loss": 1.1014,
"step": 6700
},
{
"epoch": 0.74,
"learning_rate": 0.0006483002063647225,
"loss": 1.0675,
"step": 6800
},
{
"epoch": 0.75,
"learning_rate": 0.0006475399152818508,
"loss": 1.0937,
"step": 6900
},
{
"epoch": 0.76,
"learning_rate": 0.0006467796241989791,
"loss": 1.1009,
"step": 7000
},
{
"epoch": 0.77,
"learning_rate": 0.0006460193331161072,
"loss": 1.0831,
"step": 7100
},
{
"epoch": 0.78,
"learning_rate": 0.0006452590420332355,
"loss": 1.0571,
"step": 7200
},
{
"epoch": 0.79,
"learning_rate": 0.0006444987509503639,
"loss": 1.046,
"step": 7300
},
{
"epoch": 0.8,
"learning_rate": 0.0006437384598674921,
"loss": 1.1214,
"step": 7400
},
{
"epoch": 0.81,
"learning_rate": 0.0006429781687846204,
"loss": 1.0686,
"step": 7500
},
{
"epoch": 0.83,
"learning_rate": 0.0006422178777017487,
"loss": 1.1346,
"step": 7600
},
{
"epoch": 0.84,
"learning_rate": 0.0006414575866188769,
"loss": 1.16,
"step": 7700
},
{
"epoch": 0.85,
"learning_rate": 0.0006406972955360052,
"loss": 1.1717,
"step": 7800
},
{
"epoch": 0.86,
"learning_rate": 0.0006399370044531335,
"loss": 1.1601,
"step": 7900
},
{
"epoch": 0.87,
"learning_rate": 0.0006391767133702617,
"loss": 1.1241,
"step": 8000
},
{
"epoch": 0.88,
"learning_rate": 0.00063841642228739,
"loss": 1.0692,
"step": 8100
},
{
"epoch": 0.89,
"learning_rate": 0.0006376561312045183,
"loss": 1.0775,
"step": 8200
},
{
"epoch": 0.9,
"learning_rate": 0.0006368958401216466,
"loss": 1.126,
"step": 8300
},
{
"epoch": 0.91,
"learning_rate": 0.0006361355490387749,
"loss": 1.0801,
"step": 8400
},
{
"epoch": 0.92,
"learning_rate": 0.0006353752579559031,
"loss": 1.1176,
"step": 8500
},
{
"epoch": 0.93,
"learning_rate": 0.0006346149668730313,
"loss": 1.1101,
"step": 8600
},
{
"epoch": 0.94,
"learning_rate": 0.0006338546757901597,
"loss": 1.0881,
"step": 8700
},
{
"epoch": 0.96,
"learning_rate": 0.0006330943847072879,
"loss": 1.1009,
"step": 8800
},
{
"epoch": 0.97,
"learning_rate": 0.0006323340936244162,
"loss": 1.1158,
"step": 8900
},
{
"epoch": 0.98,
"learning_rate": 0.0006315738025415445,
"loss": 1.0991,
"step": 9000
},
{
"epoch": 0.99,
"learning_rate": 0.0006308135114586728,
"loss": 1.1478,
"step": 9100
},
{
"epoch": 1.0,
"learning_rate": 0.000630053220375801,
"loss": 1.1408,
"step": 9200
},
{
"epoch": 1.01,
"learning_rate": 0.0006292929292929293,
"loss": 1.0484,
"step": 9300
},
{
"epoch": 1.02,
"learning_rate": 0.0006285326382100575,
"loss": 1.103,
"step": 9400
},
{
"epoch": 1.03,
"learning_rate": 0.0006277723471271858,
"loss": 0.9948,
"step": 9500
},
{
"epoch": 1.04,
"learning_rate": 0.0006270120560443141,
"loss": 1.0212,
"step": 9600
},
{
"epoch": 1.05,
"learning_rate": 0.0006262517649614424,
"loss": 1.1029,
"step": 9700
},
{
"epoch": 1.06,
"learning_rate": 0.0006254914738785707,
"loss": 1.064,
"step": 9800
},
{
"epoch": 1.08,
"learning_rate": 0.0006247311827956989,
"loss": 1.0066,
"step": 9900
},
{
"epoch": 1.09,
"learning_rate": 0.0006239708917128271,
"loss": 1.0117,
"step": 10000
},
{
"epoch": 1.1,
"learning_rate": 0.0006232106006299555,
"loss": 1.0196,
"step": 10100
},
{
"epoch": 1.11,
"learning_rate": 0.0006224503095470838,
"loss": 1.0576,
"step": 10200
},
{
"epoch": 1.12,
"learning_rate": 0.000621690018464212,
"loss": 1.0636,
"step": 10300
},
{
"epoch": 1.13,
"learning_rate": 0.0006209297273813403,
"loss": 1.0454,
"step": 10400
},
{
"epoch": 1.14,
"learning_rate": 0.0006201694362984685,
"loss": 1.0798,
"step": 10500
},
{
"epoch": 1.15,
"learning_rate": 0.0006194091452155968,
"loss": 1.007,
"step": 10600
},
{
"epoch": 1.16,
"learning_rate": 0.0006186488541327251,
"loss": 1.0612,
"step": 10700
},
{
"epoch": 1.17,
"learning_rate": 0.0006178885630498534,
"loss": 1.0791,
"step": 10800
},
{
"epoch": 1.18,
"learning_rate": 0.0006171282719669816,
"loss": 1.0961,
"step": 10900
},
{
"epoch": 1.19,
"learning_rate": 0.0006163679808841099,
"loss": 1.0745,
"step": 11000
},
{
"epoch": 1.21,
"learning_rate": 0.0006156076898012382,
"loss": 0.9832,
"step": 11100
},
{
"epoch": 1.22,
"learning_rate": 0.0006148473987183664,
"loss": 1.0083,
"step": 11200
},
{
"epoch": 1.23,
"learning_rate": 0.0006140871076354947,
"loss": 1.0772,
"step": 11300
},
{
"epoch": 1.24,
"learning_rate": 0.000613326816552623,
"loss": 1.0673,
"step": 11400
},
{
"epoch": 1.25,
"learning_rate": 0.0006125665254697513,
"loss": 1.1005,
"step": 11500
},
{
"epoch": 1.26,
"learning_rate": 0.0006118062343868796,
"loss": 1.0219,
"step": 11600
},
{
"epoch": 1.27,
"learning_rate": 0.0006110459433040078,
"loss": 1.0878,
"step": 11700
},
{
"epoch": 1.28,
"learning_rate": 0.0006102856522211361,
"loss": 1.047,
"step": 11800
},
{
"epoch": 1.29,
"learning_rate": 0.0006095253611382643,
"loss": 1.0663,
"step": 11900
},
{
"epoch": 1.3,
"learning_rate": 0.0006087650700553926,
"loss": 1.0473,
"step": 12000
},
{
"epoch": 1.31,
"learning_rate": 0.0006080047789725209,
"loss": 1.0146,
"step": 12100
},
{
"epoch": 1.33,
"learning_rate": 0.0006072444878896492,
"loss": 1.0381,
"step": 12200
},
{
"epoch": 1.34,
"learning_rate": 0.0006064841968067774,
"loss": 1.0991,
"step": 12300
},
{
"epoch": 1.35,
"learning_rate": 0.0006057239057239057,
"loss": 1.0377,
"step": 12400
},
{
"epoch": 1.36,
"learning_rate": 0.0006049636146410341,
"loss": 1.0478,
"step": 12500
},
{
"epoch": 1.37,
"learning_rate": 0.0006042033235581622,
"loss": 1.0773,
"step": 12600
},
{
"epoch": 1.38,
"learning_rate": 0.0006034430324752905,
"loss": 1.0496,
"step": 12700
},
{
"epoch": 1.39,
"learning_rate": 0.0006026827413924188,
"loss": 1.0943,
"step": 12800
},
{
"epoch": 1.4,
"learning_rate": 0.0006019224503095471,
"loss": 1.0303,
"step": 12900
},
{
"epoch": 1.41,
"learning_rate": 0.0006011621592266754,
"loss": 0.9987,
"step": 13000
},
{
"epoch": 1.42,
"learning_rate": 0.0006004018681438037,
"loss": 1.0489,
"step": 13100
},
{
"epoch": 1.43,
"learning_rate": 0.0005996415770609318,
"loss": 1.111,
"step": 13200
},
{
"epoch": 1.44,
"learning_rate": 0.0005988812859780601,
"loss": 1.0514,
"step": 13300
},
{
"epoch": 1.46,
"learning_rate": 0.0005981209948951884,
"loss": 1.0931,
"step": 13400
},
{
"epoch": 1.47,
"learning_rate": 0.0005973607038123167,
"loss": 1.0225,
"step": 13500
},
{
"epoch": 1.48,
"learning_rate": 0.000596600412729445,
"loss": 1.0295,
"step": 13600
},
{
"epoch": 1.49,
"learning_rate": 0.0005958401216465733,
"loss": 1.1068,
"step": 13700
},
{
"epoch": 1.5,
"learning_rate": 0.0005950798305637015,
"loss": 1.0739,
"step": 13800
},
{
"epoch": 1.51,
"learning_rate": 0.0005943195394808299,
"loss": 1.0453,
"step": 13900
},
{
"epoch": 1.52,
"learning_rate": 0.000593559248397958,
"loss": 1.0392,
"step": 14000
},
{
"epoch": 1.53,
"learning_rate": 0.0005927989573150863,
"loss": 1.0482,
"step": 14100
},
{
"epoch": 1.54,
"learning_rate": 0.0005920386662322146,
"loss": 1.0337,
"step": 14200
},
{
"epoch": 1.55,
"learning_rate": 0.0005912783751493429,
"loss": 1.0415,
"step": 14300
},
{
"epoch": 1.56,
"learning_rate": 0.0005905180840664712,
"loss": 1.0534,
"step": 14400
},
{
"epoch": 1.57,
"learning_rate": 0.0005897577929835995,
"loss": 1.028,
"step": 14500
},
{
"epoch": 1.59,
"learning_rate": 0.0005889975019007276,
"loss": 0.9978,
"step": 14600
},
{
"epoch": 1.6,
"learning_rate": 0.0005882372108178559,
"loss": 1.047,
"step": 14700
},
{
"epoch": 1.61,
"learning_rate": 0.0005874769197349843,
"loss": 1.0501,
"step": 14800
},
{
"epoch": 1.62,
"learning_rate": 0.0005867166286521125,
"loss": 1.0165,
"step": 14900
},
{
"epoch": 1.63,
"learning_rate": 0.0005859563375692408,
"loss": 1.0867,
"step": 15000
},
{
"epoch": 1.64,
"learning_rate": 0.0005851960464863691,
"loss": 1.0614,
"step": 15100
},
{
"epoch": 1.65,
"learning_rate": 0.0005844357554034973,
"loss": 1.0512,
"step": 15200
},
{
"epoch": 1.66,
"learning_rate": 0.0005836754643206256,
"loss": 0.9962,
"step": 15300
},
{
"epoch": 1.67,
"learning_rate": 0.0005829151732377539,
"loss": 1.0325,
"step": 15400
},
{
"epoch": 1.68,
"learning_rate": 0.0005821548821548821,
"loss": 1.0313,
"step": 15500
},
{
"epoch": 1.69,
"learning_rate": 0.0005813945910720104,
"loss": 1.0441,
"step": 15600
},
{
"epoch": 1.71,
"learning_rate": 0.0005806342999891387,
"loss": 1.0705,
"step": 15700
},
{
"epoch": 1.72,
"learning_rate": 0.000579874008906267,
"loss": 1.0209,
"step": 15800
},
{
"epoch": 1.73,
"learning_rate": 0.0005791137178233953,
"loss": 1.0627,
"step": 15900
},
{
"epoch": 1.74,
"learning_rate": 0.0005783534267405235,
"loss": 1.076,
"step": 16000
}
],
"max_steps": 92070,
"num_train_epochs": 10,
"total_flos": 3.281082781355213e+16,
"trial_name": null,
"trial_params": null
}