OPT-Alpaca-125M / trainer_state.json
linkanjarad's picture
End of training
54acd22
raw
history blame contribute delete
No virus
15.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.998038536979347,
"global_step": 1218,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 1.092896174863388e-06,
"loss": 5.1703,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 2.185792349726776e-06,
"loss": 4.4905,
"step": 20
},
{
"epoch": 0.07,
"learning_rate": 3.2786885245901638e-06,
"loss": 3.712,
"step": 30
},
{
"epoch": 0.1,
"learning_rate": 4.371584699453552e-06,
"loss": 2.9301,
"step": 40
},
{
"epoch": 0.12,
"learning_rate": 5.464480874316941e-06,
"loss": 2.4493,
"step": 50
},
{
"epoch": 0.15,
"learning_rate": 6.5573770491803276e-06,
"loss": 2.2009,
"step": 60
},
{
"epoch": 0.17,
"learning_rate": 7.650273224043716e-06,
"loss": 2.1093,
"step": 70
},
{
"epoch": 0.2,
"learning_rate": 8.743169398907103e-06,
"loss": 2.0401,
"step": 80
},
{
"epoch": 0.22,
"learning_rate": 9.836065573770493e-06,
"loss": 1.9798,
"step": 90
},
{
"epoch": 0.25,
"learning_rate": 1.0928961748633882e-05,
"loss": 1.9179,
"step": 100
},
{
"epoch": 0.27,
"learning_rate": 1.202185792349727e-05,
"loss": 1.8746,
"step": 110
},
{
"epoch": 0.3,
"learning_rate": 1.3114754098360655e-05,
"loss": 1.8008,
"step": 120
},
{
"epoch": 0.32,
"learning_rate": 1.4207650273224044e-05,
"loss": 1.7745,
"step": 130
},
{
"epoch": 0.34,
"learning_rate": 1.5300546448087432e-05,
"loss": 1.7164,
"step": 140
},
{
"epoch": 0.37,
"learning_rate": 1.639344262295082e-05,
"loss": 1.6755,
"step": 150
},
{
"epoch": 0.39,
"learning_rate": 1.7486338797814207e-05,
"loss": 1.613,
"step": 160
},
{
"epoch": 0.42,
"learning_rate": 1.85792349726776e-05,
"loss": 1.5696,
"step": 170
},
{
"epoch": 0.44,
"learning_rate": 1.9672131147540985e-05,
"loss": 1.5038,
"step": 180
},
{
"epoch": 0.47,
"learning_rate": 1.986473429951691e-05,
"loss": 1.482,
"step": 190
},
{
"epoch": 0.49,
"learning_rate": 1.9671497584541064e-05,
"loss": 1.4213,
"step": 200
},
{
"epoch": 0.52,
"learning_rate": 1.947826086956522e-05,
"loss": 1.3555,
"step": 210
},
{
"epoch": 0.54,
"learning_rate": 1.9285024154589372e-05,
"loss": 1.3546,
"step": 220
},
{
"epoch": 0.57,
"learning_rate": 1.909178743961353e-05,
"loss": 1.3325,
"step": 230
},
{
"epoch": 0.59,
"learning_rate": 1.8898550724637685e-05,
"loss": 1.2889,
"step": 240
},
{
"epoch": 0.62,
"learning_rate": 1.8705314009661837e-05,
"loss": 1.2737,
"step": 250
},
{
"epoch": 0.64,
"learning_rate": 1.8512077294685994e-05,
"loss": 1.2861,
"step": 260
},
{
"epoch": 0.66,
"learning_rate": 1.8318840579710146e-05,
"loss": 1.2707,
"step": 270
},
{
"epoch": 0.69,
"learning_rate": 1.81256038647343e-05,
"loss": 1.2544,
"step": 280
},
{
"epoch": 0.71,
"learning_rate": 1.7932367149758455e-05,
"loss": 1.2397,
"step": 290
},
{
"epoch": 0.74,
"learning_rate": 1.773913043478261e-05,
"loss": 1.2487,
"step": 300
},
{
"epoch": 0.76,
"learning_rate": 1.7545893719806767e-05,
"loss": 1.2469,
"step": 310
},
{
"epoch": 0.79,
"learning_rate": 1.735265700483092e-05,
"loss": 1.2192,
"step": 320
},
{
"epoch": 0.81,
"learning_rate": 1.7159420289855073e-05,
"loss": 1.2422,
"step": 330
},
{
"epoch": 0.84,
"learning_rate": 1.696618357487923e-05,
"loss": 1.23,
"step": 340
},
{
"epoch": 0.86,
"learning_rate": 1.677294685990338e-05,
"loss": 1.2295,
"step": 350
},
{
"epoch": 0.89,
"learning_rate": 1.6579710144927538e-05,
"loss": 1.224,
"step": 360
},
{
"epoch": 0.91,
"learning_rate": 1.6386473429951694e-05,
"loss": 1.2363,
"step": 370
},
{
"epoch": 0.94,
"learning_rate": 1.6193236714975847e-05,
"loss": 1.216,
"step": 380
},
{
"epoch": 0.96,
"learning_rate": 1.6000000000000003e-05,
"loss": 1.205,
"step": 390
},
{
"epoch": 0.98,
"learning_rate": 1.5806763285024155e-05,
"loss": 1.2222,
"step": 400
},
{
"epoch": 1.01,
"learning_rate": 1.5613526570048308e-05,
"loss": 1.1912,
"step": 410
},
{
"epoch": 1.03,
"learning_rate": 1.5420289855072464e-05,
"loss": 1.1828,
"step": 420
},
{
"epoch": 1.06,
"learning_rate": 1.5227053140096619e-05,
"loss": 1.1682,
"step": 430
},
{
"epoch": 1.08,
"learning_rate": 1.5033816425120775e-05,
"loss": 1.1822,
"step": 440
},
{
"epoch": 1.11,
"learning_rate": 1.484057971014493e-05,
"loss": 1.166,
"step": 450
},
{
"epoch": 1.13,
"learning_rate": 1.4647342995169082e-05,
"loss": 1.1727,
"step": 460
},
{
"epoch": 1.16,
"learning_rate": 1.4454106280193238e-05,
"loss": 1.1796,
"step": 470
},
{
"epoch": 1.18,
"learning_rate": 1.4260869565217392e-05,
"loss": 1.1568,
"step": 480
},
{
"epoch": 1.21,
"learning_rate": 1.4067632850241547e-05,
"loss": 1.1816,
"step": 490
},
{
"epoch": 1.23,
"learning_rate": 1.3874396135265701e-05,
"loss": 1.1614,
"step": 500
},
{
"epoch": 1.26,
"learning_rate": 1.3681159420289856e-05,
"loss": 1.1636,
"step": 510
},
{
"epoch": 1.28,
"learning_rate": 1.3487922705314012e-05,
"loss": 1.1602,
"step": 520
},
{
"epoch": 1.3,
"learning_rate": 1.3294685990338165e-05,
"loss": 1.1624,
"step": 530
},
{
"epoch": 1.33,
"learning_rate": 1.3101449275362319e-05,
"loss": 1.1523,
"step": 540
},
{
"epoch": 1.35,
"learning_rate": 1.2908212560386475e-05,
"loss": 1.1672,
"step": 550
},
{
"epoch": 1.38,
"learning_rate": 1.271497584541063e-05,
"loss": 1.1565,
"step": 560
},
{
"epoch": 1.4,
"learning_rate": 1.2521739130434784e-05,
"loss": 1.1585,
"step": 570
},
{
"epoch": 1.43,
"learning_rate": 1.2328502415458938e-05,
"loss": 1.1564,
"step": 580
},
{
"epoch": 1.45,
"learning_rate": 1.2135265700483093e-05,
"loss": 1.1381,
"step": 590
},
{
"epoch": 1.48,
"learning_rate": 1.1942028985507247e-05,
"loss": 1.17,
"step": 600
},
{
"epoch": 1.5,
"learning_rate": 1.1748792270531402e-05,
"loss": 1.1754,
"step": 610
},
{
"epoch": 1.53,
"learning_rate": 1.1555555555555556e-05,
"loss": 1.1292,
"step": 620
},
{
"epoch": 1.55,
"learning_rate": 1.1362318840579712e-05,
"loss": 1.1239,
"step": 630
},
{
"epoch": 1.58,
"learning_rate": 1.1169082125603865e-05,
"loss": 1.1878,
"step": 640
},
{
"epoch": 1.6,
"learning_rate": 1.0975845410628021e-05,
"loss": 1.1506,
"step": 650
},
{
"epoch": 1.62,
"learning_rate": 1.0782608695652175e-05,
"loss": 1.1536,
"step": 660
},
{
"epoch": 1.65,
"learning_rate": 1.0589371980676328e-05,
"loss": 1.1449,
"step": 670
},
{
"epoch": 1.67,
"learning_rate": 1.0396135265700484e-05,
"loss": 1.1287,
"step": 680
},
{
"epoch": 1.7,
"learning_rate": 1.0202898550724639e-05,
"loss": 1.1366,
"step": 690
},
{
"epoch": 1.72,
"learning_rate": 1.0009661835748795e-05,
"loss": 1.1468,
"step": 700
},
{
"epoch": 1.75,
"learning_rate": 9.816425120772947e-06,
"loss": 1.1484,
"step": 710
},
{
"epoch": 1.77,
"learning_rate": 9.623188405797102e-06,
"loss": 1.1549,
"step": 720
},
{
"epoch": 1.8,
"learning_rate": 9.429951690821256e-06,
"loss": 1.1501,
"step": 730
},
{
"epoch": 1.82,
"learning_rate": 9.23671497584541e-06,
"loss": 1.1433,
"step": 740
},
{
"epoch": 1.85,
"learning_rate": 9.043478260869565e-06,
"loss": 1.1315,
"step": 750
},
{
"epoch": 1.87,
"learning_rate": 8.850241545893721e-06,
"loss": 1.1615,
"step": 760
},
{
"epoch": 1.9,
"learning_rate": 8.657004830917876e-06,
"loss": 1.1521,
"step": 770
},
{
"epoch": 1.92,
"learning_rate": 8.46376811594203e-06,
"loss": 1.138,
"step": 780
},
{
"epoch": 1.94,
"learning_rate": 8.270531400966184e-06,
"loss": 1.1608,
"step": 790
},
{
"epoch": 1.97,
"learning_rate": 8.077294685990339e-06,
"loss": 1.1655,
"step": 800
},
{
"epoch": 1.99,
"learning_rate": 7.884057971014493e-06,
"loss": 1.1367,
"step": 810
},
{
"epoch": 2.02,
"learning_rate": 7.690821256038648e-06,
"loss": 1.1201,
"step": 820
},
{
"epoch": 2.04,
"learning_rate": 7.497584541062802e-06,
"loss": 1.1403,
"step": 830
},
{
"epoch": 2.07,
"learning_rate": 7.304347826086957e-06,
"loss": 1.1166,
"step": 840
},
{
"epoch": 2.09,
"learning_rate": 7.111111111111112e-06,
"loss": 1.1243,
"step": 850
},
{
"epoch": 2.12,
"learning_rate": 6.917874396135267e-06,
"loss": 1.0965,
"step": 860
},
{
"epoch": 2.14,
"learning_rate": 6.724637681159421e-06,
"loss": 1.1033,
"step": 870
},
{
"epoch": 2.17,
"learning_rate": 6.531400966183575e-06,
"loss": 1.1162,
"step": 880
},
{
"epoch": 2.19,
"learning_rate": 6.33816425120773e-06,
"loss": 1.1242,
"step": 890
},
{
"epoch": 2.22,
"learning_rate": 6.144927536231885e-06,
"loss": 1.1151,
"step": 900
},
{
"epoch": 2.24,
"learning_rate": 5.951690821256038e-06,
"loss": 1.1068,
"step": 910
},
{
"epoch": 2.26,
"learning_rate": 5.758454106280194e-06,
"loss": 1.1132,
"step": 920
},
{
"epoch": 2.29,
"learning_rate": 5.565217391304348e-06,
"loss": 1.1307,
"step": 930
},
{
"epoch": 2.31,
"learning_rate": 5.371980676328503e-06,
"loss": 1.1174,
"step": 940
},
{
"epoch": 2.34,
"learning_rate": 5.178743961352657e-06,
"loss": 1.1135,
"step": 950
},
{
"epoch": 2.36,
"learning_rate": 4.985507246376812e-06,
"loss": 1.0946,
"step": 960
},
{
"epoch": 2.39,
"learning_rate": 4.7922705314009665e-06,
"loss": 1.1089,
"step": 970
},
{
"epoch": 2.41,
"learning_rate": 4.599033816425121e-06,
"loss": 1.116,
"step": 980
},
{
"epoch": 2.44,
"learning_rate": 4.405797101449275e-06,
"loss": 1.1129,
"step": 990
},
{
"epoch": 2.46,
"learning_rate": 4.212560386473431e-06,
"loss": 1.0921,
"step": 1000
},
{
"epoch": 2.49,
"learning_rate": 4.019323671497585e-06,
"loss": 1.0999,
"step": 1010
},
{
"epoch": 2.51,
"learning_rate": 3.8260869565217395e-06,
"loss": 1.1141,
"step": 1020
},
{
"epoch": 2.54,
"learning_rate": 3.632850241545894e-06,
"loss": 1.1274,
"step": 1030
},
{
"epoch": 2.56,
"learning_rate": 3.4396135265700487e-06,
"loss": 1.1194,
"step": 1040
},
{
"epoch": 2.58,
"learning_rate": 3.2463768115942027e-06,
"loss": 1.1065,
"step": 1050
},
{
"epoch": 2.61,
"learning_rate": 3.0531400966183576e-06,
"loss": 1.1107,
"step": 1060
},
{
"epoch": 2.63,
"learning_rate": 2.8599033816425124e-06,
"loss": 1.112,
"step": 1070
},
{
"epoch": 2.66,
"learning_rate": 2.666666666666667e-06,
"loss": 1.1252,
"step": 1080
},
{
"epoch": 2.68,
"learning_rate": 2.4734299516908212e-06,
"loss": 1.1116,
"step": 1090
},
{
"epoch": 2.71,
"learning_rate": 2.280193236714976e-06,
"loss": 1.0865,
"step": 1100
},
{
"epoch": 2.73,
"learning_rate": 2.0869565217391305e-06,
"loss": 1.1122,
"step": 1110
},
{
"epoch": 2.76,
"learning_rate": 1.8937198067632853e-06,
"loss": 1.1225,
"step": 1120
},
{
"epoch": 2.78,
"learning_rate": 1.7004830917874398e-06,
"loss": 1.1077,
"step": 1130
},
{
"epoch": 2.81,
"learning_rate": 1.5072463768115944e-06,
"loss": 1.1104,
"step": 1140
},
{
"epoch": 2.83,
"learning_rate": 1.314009661835749e-06,
"loss": 1.0929,
"step": 1150
},
{
"epoch": 2.86,
"learning_rate": 1.1207729468599034e-06,
"loss": 1.0986,
"step": 1160
},
{
"epoch": 2.88,
"learning_rate": 9.275362318840581e-07,
"loss": 1.1137,
"step": 1170
},
{
"epoch": 2.9,
"learning_rate": 7.342995169082126e-07,
"loss": 1.0868,
"step": 1180
},
{
"epoch": 2.93,
"learning_rate": 5.410628019323672e-07,
"loss": 1.1027,
"step": 1190
},
{
"epoch": 2.95,
"learning_rate": 3.4782608695652175e-07,
"loss": 1.1193,
"step": 1200
},
{
"epoch": 2.98,
"learning_rate": 1.5458937198067633e-07,
"loss": 1.1247,
"step": 1210
},
{
"epoch": 3.0,
"step": 1218,
"total_flos": 1.1227700242944e+16,
"train_loss": 1.3416433044646565,
"train_runtime": 4283.2723,
"train_samples_per_second": 36.422,
"train_steps_per_second": 0.284
}
],
"max_steps": 1218,
"num_train_epochs": 3,
"total_flos": 1.1227700242944e+16,
"trial_name": null,
"trial_params": null
}