t5_small_qg_ae_hl / trainer_state.json
longcld's picture
update
3a9499c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.325045768617493,
"global_step": 15500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 9.952741020793951e-05,
"loss": 2.3854,
"step": 100
},
{
"epoch": 0.09,
"learning_rate": 9.905482041587902e-05,
"loss": 2.3565,
"step": 200
},
{
"epoch": 0.14,
"learning_rate": 9.858223062381853e-05,
"loss": 2.288,
"step": 300
},
{
"epoch": 0.19,
"learning_rate": 9.810964083175804e-05,
"loss": 2.2879,
"step": 400
},
{
"epoch": 0.24,
"learning_rate": 9.763705103969755e-05,
"loss": 2.3212,
"step": 500
},
{
"epoch": 0.28,
"learning_rate": 9.716446124763706e-05,
"loss": 2.2266,
"step": 600
},
{
"epoch": 0.33,
"learning_rate": 9.669187145557656e-05,
"loss": 2.2894,
"step": 700
},
{
"epoch": 0.38,
"learning_rate": 9.621928166351607e-05,
"loss": 2.2738,
"step": 800
},
{
"epoch": 0.43,
"learning_rate": 9.574669187145558e-05,
"loss": 2.3342,
"step": 900
},
{
"epoch": 0.47,
"learning_rate": 9.527410207939509e-05,
"loss": 2.2613,
"step": 1000
},
{
"epoch": 0.52,
"learning_rate": 9.48015122873346e-05,
"loss": 2.1509,
"step": 1100
},
{
"epoch": 0.57,
"learning_rate": 9.432892249527411e-05,
"loss": 2.2683,
"step": 1200
},
{
"epoch": 0.61,
"learning_rate": 9.385633270321362e-05,
"loss": 2.2501,
"step": 1300
},
{
"epoch": 0.66,
"learning_rate": 9.338374291115312e-05,
"loss": 2.2351,
"step": 1400
},
{
"epoch": 0.71,
"learning_rate": 9.291115311909263e-05,
"loss": 2.2134,
"step": 1500
},
{
"epoch": 0.76,
"learning_rate": 9.243856332703214e-05,
"loss": 2.2317,
"step": 1600
},
{
"epoch": 0.8,
"learning_rate": 9.196597353497165e-05,
"loss": 2.226,
"step": 1700
},
{
"epoch": 0.85,
"learning_rate": 9.149338374291116e-05,
"loss": 2.1991,
"step": 1800
},
{
"epoch": 0.9,
"learning_rate": 9.102079395085067e-05,
"loss": 2.1648,
"step": 1900
},
{
"epoch": 0.94,
"learning_rate": 9.054820415879018e-05,
"loss": 2.1412,
"step": 2000
},
{
"epoch": 0.99,
"learning_rate": 9.007561436672968e-05,
"loss": 2.2009,
"step": 2100
},
{
"epoch": 1.04,
"learning_rate": 8.960302457466919e-05,
"loss": 2.205,
"step": 2200
},
{
"epoch": 1.09,
"learning_rate": 8.91304347826087e-05,
"loss": 2.1226,
"step": 2300
},
{
"epoch": 1.13,
"learning_rate": 8.865784499054821e-05,
"loss": 2.1963,
"step": 2400
},
{
"epoch": 1.18,
"learning_rate": 8.818525519848772e-05,
"loss": 2.0325,
"step": 2500
},
{
"epoch": 1.23,
"learning_rate": 8.771266540642723e-05,
"loss": 2.1478,
"step": 2600
},
{
"epoch": 1.28,
"learning_rate": 8.724007561436674e-05,
"loss": 2.1116,
"step": 2700
},
{
"epoch": 1.32,
"learning_rate": 8.676748582230624e-05,
"loss": 2.1805,
"step": 2800
},
{
"epoch": 1.37,
"learning_rate": 8.629489603024575e-05,
"loss": 2.109,
"step": 2900
},
{
"epoch": 1.42,
"learning_rate": 8.582230623818526e-05,
"loss": 2.1156,
"step": 3000
},
{
"epoch": 1.46,
"learning_rate": 8.534971644612477e-05,
"loss": 2.152,
"step": 3100
},
{
"epoch": 1.51,
"learning_rate": 8.487712665406428e-05,
"loss": 2.1512,
"step": 3200
},
{
"epoch": 1.56,
"learning_rate": 8.440453686200379e-05,
"loss": 2.1396,
"step": 3300
},
{
"epoch": 1.61,
"learning_rate": 8.39319470699433e-05,
"loss": 2.0951,
"step": 3400
},
{
"epoch": 1.65,
"learning_rate": 8.34593572778828e-05,
"loss": 2.1533,
"step": 3500
},
{
"epoch": 1.7,
"learning_rate": 8.298676748582231e-05,
"loss": 2.0959,
"step": 3600
},
{
"epoch": 1.75,
"learning_rate": 8.251417769376182e-05,
"loss": 2.1345,
"step": 3700
},
{
"epoch": 1.8,
"learning_rate": 8.204158790170132e-05,
"loss": 2.0643,
"step": 3800
},
{
"epoch": 1.84,
"learning_rate": 8.156899810964084e-05,
"loss": 2.0566,
"step": 3900
},
{
"epoch": 1.89,
"learning_rate": 8.109640831758035e-05,
"loss": 2.0729,
"step": 4000
},
{
"epoch": 1.94,
"learning_rate": 8.062381852551986e-05,
"loss": 2.0768,
"step": 4100
},
{
"epoch": 1.98,
"learning_rate": 8.015122873345936e-05,
"loss": 2.1026,
"step": 4200
},
{
"epoch": 2.03,
"learning_rate": 7.967863894139886e-05,
"loss": 2.0804,
"step": 4300
},
{
"epoch": 2.08,
"learning_rate": 7.920604914933838e-05,
"loss": 2.0875,
"step": 4400
},
{
"epoch": 2.13,
"learning_rate": 7.873345935727789e-05,
"loss": 2.039,
"step": 4500
},
{
"epoch": 2.17,
"learning_rate": 7.82608695652174e-05,
"loss": 2.0605,
"step": 4600
},
{
"epoch": 2.22,
"learning_rate": 7.77882797731569e-05,
"loss": 2.0495,
"step": 4700
},
{
"epoch": 2.27,
"learning_rate": 7.731568998109642e-05,
"loss": 2.0146,
"step": 4800
},
{
"epoch": 2.32,
"learning_rate": 7.684310018903592e-05,
"loss": 2.0378,
"step": 4900
},
{
"epoch": 2.36,
"learning_rate": 7.637051039697543e-05,
"loss": 2.0806,
"step": 5000
},
{
"epoch": 2.41,
"learning_rate": 7.589792060491494e-05,
"loss": 2.0675,
"step": 5100
},
{
"epoch": 2.46,
"learning_rate": 7.542533081285445e-05,
"loss": 2.0247,
"step": 5200
},
{
"epoch": 2.5,
"learning_rate": 7.495274102079396e-05,
"loss": 2.0442,
"step": 5300
},
{
"epoch": 2.55,
"learning_rate": 7.448015122873347e-05,
"loss": 2.0232,
"step": 5400
},
{
"epoch": 2.6,
"learning_rate": 7.400756143667296e-05,
"loss": 2.0648,
"step": 5500
},
{
"epoch": 2.65,
"learning_rate": 7.353497164461248e-05,
"loss": 2.0579,
"step": 5600
},
{
"epoch": 2.69,
"learning_rate": 7.306238185255199e-05,
"loss": 2.0438,
"step": 5700
},
{
"epoch": 2.74,
"learning_rate": 7.25897920604915e-05,
"loss": 2.0849,
"step": 5800
},
{
"epoch": 2.79,
"learning_rate": 7.211720226843101e-05,
"loss": 1.9828,
"step": 5900
},
{
"epoch": 2.84,
"learning_rate": 7.16446124763705e-05,
"loss": 1.9923,
"step": 6000
},
{
"epoch": 2.88,
"learning_rate": 7.117202268431003e-05,
"loss": 1.9898,
"step": 6100
},
{
"epoch": 2.93,
"learning_rate": 7.069943289224953e-05,
"loss": 2.0663,
"step": 6200
},
{
"epoch": 2.98,
"learning_rate": 7.022684310018904e-05,
"loss": 1.9967,
"step": 6300
},
{
"epoch": 3.02,
"learning_rate": 6.975425330812855e-05,
"loss": 1.9941,
"step": 6400
},
{
"epoch": 3.07,
"learning_rate": 6.928166351606805e-05,
"loss": 1.9458,
"step": 6500
},
{
"epoch": 3.12,
"learning_rate": 6.880907372400757e-05,
"loss": 2.0411,
"step": 6600
},
{
"epoch": 3.17,
"learning_rate": 6.833648393194706e-05,
"loss": 1.9634,
"step": 6700
},
{
"epoch": 3.21,
"learning_rate": 6.786389413988659e-05,
"loss": 1.9897,
"step": 6800
},
{
"epoch": 3.26,
"learning_rate": 6.73913043478261e-05,
"loss": 1.9798,
"step": 6900
},
{
"epoch": 3.31,
"learning_rate": 6.691871455576559e-05,
"loss": 2.0127,
"step": 7000
},
{
"epoch": 3.36,
"learning_rate": 6.644612476370511e-05,
"loss": 1.9827,
"step": 7100
},
{
"epoch": 3.4,
"learning_rate": 6.59735349716446e-05,
"loss": 1.9949,
"step": 7200
},
{
"epoch": 3.45,
"learning_rate": 6.550094517958413e-05,
"loss": 2.0152,
"step": 7300
},
{
"epoch": 3.5,
"learning_rate": 6.502835538752364e-05,
"loss": 1.959,
"step": 7400
},
{
"epoch": 3.54,
"learning_rate": 6.455576559546313e-05,
"loss": 1.9876,
"step": 7500
},
{
"epoch": 3.59,
"learning_rate": 6.408317580340265e-05,
"loss": 1.9955,
"step": 7600
},
{
"epoch": 3.64,
"learning_rate": 6.361058601134215e-05,
"loss": 1.9812,
"step": 7700
},
{
"epoch": 3.69,
"learning_rate": 6.313799621928167e-05,
"loss": 1.9219,
"step": 7800
},
{
"epoch": 3.73,
"learning_rate": 6.266540642722118e-05,
"loss": 1.9576,
"step": 7900
},
{
"epoch": 3.78,
"learning_rate": 6.219281663516069e-05,
"loss": 1.9608,
"step": 8000
},
{
"epoch": 3.83,
"learning_rate": 6.17202268431002e-05,
"loss": 1.9916,
"step": 8100
},
{
"epoch": 3.87,
"learning_rate": 6.124763705103969e-05,
"loss": 2.0394,
"step": 8200
},
{
"epoch": 3.92,
"learning_rate": 6.0775047258979214e-05,
"loss": 1.9416,
"step": 8300
},
{
"epoch": 3.97,
"learning_rate": 6.0302457466918716e-05,
"loss": 2.0283,
"step": 8400
},
{
"epoch": 4.02,
"learning_rate": 5.982986767485823e-05,
"loss": 2.0244,
"step": 8500
},
{
"epoch": 4.06,
"learning_rate": 5.935727788279773e-05,
"loss": 1.9132,
"step": 8600
},
{
"epoch": 4.11,
"learning_rate": 5.888468809073724e-05,
"loss": 1.9917,
"step": 8700
},
{
"epoch": 4.16,
"learning_rate": 5.841209829867676e-05,
"loss": 1.9355,
"step": 8800
},
{
"epoch": 4.21,
"learning_rate": 5.793950850661626e-05,
"loss": 1.9248,
"step": 8900
},
{
"epoch": 4.25,
"learning_rate": 5.7466918714555774e-05,
"loss": 2.0129,
"step": 9000
},
{
"epoch": 4.3,
"learning_rate": 5.6994328922495276e-05,
"loss": 1.9497,
"step": 9100
},
{
"epoch": 4.35,
"learning_rate": 5.652173913043478e-05,
"loss": 1.8806,
"step": 9200
},
{
"epoch": 4.39,
"learning_rate": 5.604914933837429e-05,
"loss": 1.9262,
"step": 9300
},
{
"epoch": 4.44,
"learning_rate": 5.55765595463138e-05,
"loss": 2.0024,
"step": 9400
},
{
"epoch": 4.49,
"learning_rate": 5.510396975425332e-05,
"loss": 1.9407,
"step": 9500
},
{
"epoch": 4.54,
"learning_rate": 5.463137996219282e-05,
"loss": 1.991,
"step": 9600
},
{
"epoch": 4.58,
"learning_rate": 5.415879017013232e-05,
"loss": 1.964,
"step": 9700
},
{
"epoch": 4.63,
"learning_rate": 5.3686200378071836e-05,
"loss": 1.9025,
"step": 9800
},
{
"epoch": 4.68,
"learning_rate": 5.3213610586011344e-05,
"loss": 1.9489,
"step": 9900
},
{
"epoch": 4.73,
"learning_rate": 5.274102079395086e-05,
"loss": 1.942,
"step": 10000
},
{
"epoch": 4.77,
"learning_rate": 5.226843100189036e-05,
"loss": 1.9868,
"step": 10100
},
{
"epoch": 4.82,
"learning_rate": 5.179584120982986e-05,
"loss": 1.9231,
"step": 10200
},
{
"epoch": 4.87,
"learning_rate": 5.132325141776938e-05,
"loss": 1.9625,
"step": 10300
},
{
"epoch": 4.91,
"learning_rate": 5.085066162570889e-05,
"loss": 1.9306,
"step": 10400
},
{
"epoch": 4.96,
"learning_rate": 5.03780718336484e-05,
"loss": 1.9106,
"step": 10500
},
{
"epoch": 5.01,
"learning_rate": 4.9905482041587904e-05,
"loss": 1.9209,
"step": 10600
},
{
"epoch": 5.06,
"learning_rate": 4.943289224952741e-05,
"loss": 1.925,
"step": 10700
},
{
"epoch": 5.1,
"learning_rate": 4.896030245746692e-05,
"loss": 1.9368,
"step": 10800
},
{
"epoch": 5.15,
"learning_rate": 4.848771266540643e-05,
"loss": 1.9076,
"step": 10900
},
{
"epoch": 5.2,
"learning_rate": 4.801512287334594e-05,
"loss": 1.8892,
"step": 11000
},
{
"epoch": 5.25,
"learning_rate": 4.754253308128545e-05,
"loss": 1.9667,
"step": 11100
},
{
"epoch": 5.29,
"learning_rate": 4.7069943289224955e-05,
"loss": 1.9371,
"step": 11200
},
{
"epoch": 5.34,
"learning_rate": 4.6597353497164464e-05,
"loss": 1.8549,
"step": 11300
},
{
"epoch": 5.39,
"learning_rate": 4.612476370510397e-05,
"loss": 1.9157,
"step": 11400
},
{
"epoch": 5.43,
"learning_rate": 4.565217391304348e-05,
"loss": 1.9051,
"step": 11500
},
{
"epoch": 5.48,
"learning_rate": 4.517958412098299e-05,
"loss": 1.9175,
"step": 11600
},
{
"epoch": 5.53,
"learning_rate": 4.47069943289225e-05,
"loss": 1.9367,
"step": 11700
},
{
"epoch": 5.58,
"learning_rate": 4.423440453686201e-05,
"loss": 1.8351,
"step": 11800
},
{
"epoch": 5.62,
"learning_rate": 4.3761814744801515e-05,
"loss": 1.8961,
"step": 11900
},
{
"epoch": 5.67,
"learning_rate": 4.3289224952741024e-05,
"loss": 1.8583,
"step": 12000
},
{
"epoch": 5.72,
"learning_rate": 4.281663516068053e-05,
"loss": 1.8909,
"step": 12100
},
{
"epoch": 5.77,
"learning_rate": 4.234404536862004e-05,
"loss": 1.8642,
"step": 12200
},
{
"epoch": 5.81,
"learning_rate": 4.187145557655955e-05,
"loss": 1.9229,
"step": 12300
},
{
"epoch": 5.86,
"learning_rate": 4.139886578449906e-05,
"loss": 1.9524,
"step": 12400
},
{
"epoch": 5.91,
"learning_rate": 4.0926275992438567e-05,
"loss": 1.9143,
"step": 12500
},
{
"epoch": 5.95,
"learning_rate": 4.045368620037807e-05,
"loss": 1.9229,
"step": 12600
},
{
"epoch": 6.0,
"learning_rate": 3.9981096408317584e-05,
"loss": 1.9368,
"step": 12700
},
{
"epoch": 6.05,
"learning_rate": 3.950850661625709e-05,
"loss": 1.9183,
"step": 12800
},
{
"epoch": 6.1,
"learning_rate": 3.90359168241966e-05,
"loss": 1.8864,
"step": 12900
},
{
"epoch": 6.14,
"learning_rate": 3.856332703213611e-05,
"loss": 1.8453,
"step": 13000
},
{
"epoch": 6.19,
"learning_rate": 3.809073724007561e-05,
"loss": 1.8785,
"step": 13100
},
{
"epoch": 6.24,
"learning_rate": 3.7618147448015126e-05,
"loss": 1.9145,
"step": 13200
},
{
"epoch": 6.29,
"learning_rate": 3.7145557655954635e-05,
"loss": 1.9477,
"step": 13300
},
{
"epoch": 6.33,
"learning_rate": 3.6672967863894143e-05,
"loss": 1.8378,
"step": 13400
},
{
"epoch": 6.38,
"learning_rate": 3.620037807183365e-05,
"loss": 1.843,
"step": 13500
},
{
"epoch": 6.43,
"learning_rate": 3.572778827977316e-05,
"loss": 1.8386,
"step": 13600
},
{
"epoch": 6.47,
"learning_rate": 3.525519848771266e-05,
"loss": 1.9,
"step": 13700
},
{
"epoch": 6.52,
"learning_rate": 3.478260869565218e-05,
"loss": 1.8437,
"step": 13800
},
{
"epoch": 6.57,
"learning_rate": 3.4310018903591686e-05,
"loss": 1.9301,
"step": 13900
},
{
"epoch": 6.62,
"learning_rate": 3.3837429111531195e-05,
"loss": 1.8447,
"step": 14000
},
{
"epoch": 6.66,
"learning_rate": 3.33648393194707e-05,
"loss": 1.8811,
"step": 14100
},
{
"epoch": 6.71,
"learning_rate": 3.2892249527410205e-05,
"loss": 1.8775,
"step": 14200
},
{
"epoch": 6.76,
"learning_rate": 3.2419659735349714e-05,
"loss": 1.9123,
"step": 14300
},
{
"epoch": 6.81,
"learning_rate": 3.194706994328923e-05,
"loss": 1.8236,
"step": 14400
},
{
"epoch": 6.85,
"learning_rate": 3.147448015122874e-05,
"loss": 1.8434,
"step": 14500
},
{
"epoch": 6.9,
"learning_rate": 3.1001890359168246e-05,
"loss": 1.8975,
"step": 14600
},
{
"epoch": 6.95,
"learning_rate": 3.052930056710775e-05,
"loss": 1.887,
"step": 14700
},
{
"epoch": 6.99,
"learning_rate": 3.005671077504726e-05,
"loss": 1.8665,
"step": 14800
},
{
"epoch": 7.04,
"learning_rate": 2.9584120982986768e-05,
"loss": 1.9142,
"step": 14900
},
{
"epoch": 7.09,
"learning_rate": 2.9111531190926277e-05,
"loss": 1.8892,
"step": 15000
},
{
"epoch": 7.14,
"learning_rate": 2.863894139886579e-05,
"loss": 1.8762,
"step": 15100
},
{
"epoch": 7.18,
"learning_rate": 2.8166351606805297e-05,
"loss": 1.8169,
"step": 15200
},
{
"epoch": 7.23,
"learning_rate": 2.7693761814744802e-05,
"loss": 1.8673,
"step": 15300
},
{
"epoch": 7.28,
"learning_rate": 2.722117202268431e-05,
"loss": 1.8615,
"step": 15400
},
{
"epoch": 7.33,
"learning_rate": 2.674858223062382e-05,
"loss": 1.8072,
"step": 15500
}
],
"max_steps": 21160,
"num_train_epochs": 10,
"total_flos": 1.1710826687824896e+17,
"trial_name": null,
"trial_params": null
}