Phi-3-mini-4k-instruct-finetuned / trainer_state.json
satyanshu404's picture
Model save
7eb9800 verified
raw
history blame
No virus
51.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 8295,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 6.027727546714888e-09,
"loss": 13.638,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 1.2055455093429776e-08,
"loss": 13.7323,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 1.8083182640144666e-08,
"loss": 13.5875,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 2.4110910186859552e-08,
"loss": 13.6351,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 3.013863773357444e-08,
"loss": 13.634,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 3.616636528028933e-08,
"loss": 13.6684,
"step": 120
},
{
"epoch": 0.05,
"learning_rate": 4.2194092827004215e-08,
"loss": 13.6572,
"step": 140
},
{
"epoch": 0.06,
"learning_rate": 4.8221820373719104e-08,
"loss": 13.6528,
"step": 160
},
{
"epoch": 0.07,
"learning_rate": 5.4249547920433994e-08,
"loss": 13.7057,
"step": 180
},
{
"epoch": 0.07,
"learning_rate": 6.027727546714888e-08,
"loss": 13.6378,
"step": 200
},
{
"epoch": 0.08,
"learning_rate": 6.630500301386377e-08,
"loss": 13.7237,
"step": 220
},
{
"epoch": 0.09,
"learning_rate": 7.233273056057866e-08,
"loss": 13.7124,
"step": 240
},
{
"epoch": 0.09,
"learning_rate": 7.836045810729356e-08,
"loss": 13.6658,
"step": 260
},
{
"epoch": 0.1,
"learning_rate": 8.438818565400843e-08,
"loss": 13.6358,
"step": 280
},
{
"epoch": 0.11,
"learning_rate": 9.041591320072333e-08,
"loss": 13.6806,
"step": 300
},
{
"epoch": 0.12,
"learning_rate": 9.644364074743821e-08,
"loss": 13.7341,
"step": 320
},
{
"epoch": 0.12,
"learning_rate": 1.0247136829415309e-07,
"loss": 13.6129,
"step": 340
},
{
"epoch": 0.13,
"learning_rate": 1.0849909584086799e-07,
"loss": 13.6649,
"step": 360
},
{
"epoch": 0.14,
"learning_rate": 1.1452682338758287e-07,
"loss": 13.6915,
"step": 380
},
{
"epoch": 0.14,
"learning_rate": 1.2055455093429777e-07,
"loss": 13.7191,
"step": 400
},
{
"epoch": 0.15,
"learning_rate": 1.2658227848101266e-07,
"loss": 13.5871,
"step": 420
},
{
"epoch": 0.16,
"learning_rate": 1.3261000602772753e-07,
"loss": 13.6615,
"step": 440
},
{
"epoch": 0.17,
"learning_rate": 1.3863773357444243e-07,
"loss": 13.5988,
"step": 460
},
{
"epoch": 0.17,
"learning_rate": 1.4466546112115733e-07,
"loss": 13.5958,
"step": 480
},
{
"epoch": 0.18,
"learning_rate": 1.506931886678722e-07,
"loss": 13.6678,
"step": 500
},
{
"epoch": 0.19,
"learning_rate": 1.5672091621458712e-07,
"loss": 13.6995,
"step": 520
},
{
"epoch": 0.2,
"learning_rate": 1.62748643761302e-07,
"loss": 13.6663,
"step": 540
},
{
"epoch": 0.2,
"learning_rate": 1.6877637130801686e-07,
"loss": 13.659,
"step": 560
},
{
"epoch": 0.21,
"learning_rate": 1.7480409885473178e-07,
"loss": 13.554,
"step": 580
},
{
"epoch": 0.22,
"learning_rate": 1.8083182640144665e-07,
"loss": 13.5805,
"step": 600
},
{
"epoch": 0.22,
"learning_rate": 1.8685955394816152e-07,
"loss": 13.6546,
"step": 620
},
{
"epoch": 0.23,
"learning_rate": 1.9288728149487642e-07,
"loss": 13.5849,
"step": 640
},
{
"epoch": 0.24,
"learning_rate": 1.9891500904159131e-07,
"loss": 13.611,
"step": 660
},
{
"epoch": 0.25,
"learning_rate": 2.0494273658830618e-07,
"loss": 13.5593,
"step": 680
},
{
"epoch": 0.25,
"learning_rate": 2.1097046413502108e-07,
"loss": 13.6788,
"step": 700
},
{
"epoch": 0.26,
"learning_rate": 2.1699819168173598e-07,
"loss": 13.591,
"step": 720
},
{
"epoch": 0.27,
"learning_rate": 2.2302591922845087e-07,
"loss": 13.5295,
"step": 740
},
{
"epoch": 0.27,
"learning_rate": 2.2905364677516574e-07,
"loss": 13.4517,
"step": 760
},
{
"epoch": 0.28,
"learning_rate": 2.3508137432188064e-07,
"loss": 13.4802,
"step": 780
},
{
"epoch": 0.29,
"learning_rate": 2.4110910186859554e-07,
"loss": 13.4712,
"step": 800
},
{
"epoch": 0.3,
"learning_rate": 2.4713682941531043e-07,
"loss": 13.5632,
"step": 820
},
{
"epoch": 0.3,
"learning_rate": 2.5316455696202533e-07,
"loss": 13.6037,
"step": 840
},
{
"epoch": 0.31,
"learning_rate": 2.591922845087402e-07,
"loss": 13.537,
"step": 860
},
{
"epoch": 0.32,
"learning_rate": 2.6522001205545507e-07,
"loss": 13.555,
"step": 880
},
{
"epoch": 0.33,
"learning_rate": 2.7124773960216996e-07,
"loss": 13.5306,
"step": 900
},
{
"epoch": 0.33,
"learning_rate": 2.7727546714888486e-07,
"loss": 13.5647,
"step": 920
},
{
"epoch": 0.34,
"learning_rate": 2.8330319469559976e-07,
"loss": 13.6317,
"step": 940
},
{
"epoch": 0.35,
"learning_rate": 2.8933092224231465e-07,
"loss": 13.4934,
"step": 960
},
{
"epoch": 0.35,
"learning_rate": 2.9535864978902955e-07,
"loss": 13.6209,
"step": 980
},
{
"epoch": 0.36,
"learning_rate": 3.013863773357444e-07,
"loss": 13.5188,
"step": 1000
},
{
"epoch": 0.37,
"learning_rate": 3.074141048824593e-07,
"loss": 13.5084,
"step": 1020
},
{
"epoch": 0.38,
"learning_rate": 3.1344183242917424e-07,
"loss": 13.4541,
"step": 1040
},
{
"epoch": 0.38,
"learning_rate": 3.194695599758891e-07,
"loss": 13.404,
"step": 1060
},
{
"epoch": 0.39,
"learning_rate": 3.25497287522604e-07,
"loss": 13.4051,
"step": 1080
},
{
"epoch": 0.4,
"learning_rate": 3.315250150693188e-07,
"loss": 13.4567,
"step": 1100
},
{
"epoch": 0.41,
"learning_rate": 3.375527426160337e-07,
"loss": 13.4018,
"step": 1120
},
{
"epoch": 0.41,
"learning_rate": 3.435804701627486e-07,
"loss": 13.3846,
"step": 1140
},
{
"epoch": 0.42,
"learning_rate": 3.4960819770946356e-07,
"loss": 13.3325,
"step": 1160
},
{
"epoch": 0.43,
"learning_rate": 3.556359252561784e-07,
"loss": 13.3005,
"step": 1180
},
{
"epoch": 0.43,
"learning_rate": 3.616636528028933e-07,
"loss": 13.3281,
"step": 1200
},
{
"epoch": 0.44,
"learning_rate": 3.6769138034960815e-07,
"loss": 13.258,
"step": 1220
},
{
"epoch": 0.45,
"learning_rate": 3.7371910789632304e-07,
"loss": 13.2784,
"step": 1240
},
{
"epoch": 0.46,
"learning_rate": 3.79746835443038e-07,
"loss": 13.2793,
"step": 1260
},
{
"epoch": 0.46,
"learning_rate": 3.8577456298975284e-07,
"loss": 13.2464,
"step": 1280
},
{
"epoch": 0.47,
"learning_rate": 3.9180229053646773e-07,
"loss": 13.3347,
"step": 1300
},
{
"epoch": 0.48,
"learning_rate": 3.9783001808318263e-07,
"loss": 13.3884,
"step": 1320
},
{
"epoch": 0.48,
"learning_rate": 4.0385774562989747e-07,
"loss": 13.2666,
"step": 1340
},
{
"epoch": 0.49,
"learning_rate": 4.0988547317661237e-07,
"loss": 13.2176,
"step": 1360
},
{
"epoch": 0.5,
"learning_rate": 4.159132007233273e-07,
"loss": 13.233,
"step": 1380
},
{
"epoch": 0.51,
"learning_rate": 4.2194092827004216e-07,
"loss": 13.2696,
"step": 1400
},
{
"epoch": 0.51,
"learning_rate": 4.2796865581675706e-07,
"loss": 13.1597,
"step": 1420
},
{
"epoch": 0.52,
"learning_rate": 4.3399638336347195e-07,
"loss": 13.1763,
"step": 1440
},
{
"epoch": 0.53,
"learning_rate": 4.400241109101868e-07,
"loss": 13.2415,
"step": 1460
},
{
"epoch": 0.54,
"learning_rate": 4.4605183845690175e-07,
"loss": 13.2653,
"step": 1480
},
{
"epoch": 0.54,
"learning_rate": 4.5207956600361664e-07,
"loss": 13.1193,
"step": 1500
},
{
"epoch": 0.55,
"learning_rate": 4.581072935503315e-07,
"loss": 13.1928,
"step": 1520
},
{
"epoch": 0.56,
"learning_rate": 4.641350210970464e-07,
"loss": 13.1678,
"step": 1540
},
{
"epoch": 0.56,
"learning_rate": 4.701627486437613e-07,
"loss": 13.1757,
"step": 1560
},
{
"epoch": 0.57,
"learning_rate": 4.761904761904761e-07,
"loss": 13.1773,
"step": 1580
},
{
"epoch": 0.58,
"learning_rate": 4.822182037371911e-07,
"loss": 13.1095,
"step": 1600
},
{
"epoch": 0.59,
"learning_rate": 4.88245931283906e-07,
"loss": 13.146,
"step": 1620
},
{
"epoch": 0.59,
"learning_rate": 4.942736588306209e-07,
"loss": 13.1242,
"step": 1640
},
{
"epoch": 0.6,
"learning_rate": 4.999999719845892e-07,
"loss": 13.0289,
"step": 1660
},
{
"epoch": 0.61,
"learning_rate": 4.99987645305352e-07,
"loss": 13.1282,
"step": 1680
},
{
"epoch": 0.61,
"learning_rate": 4.999529075720485e-07,
"loss": 13.1208,
"step": 1700
},
{
"epoch": 0.62,
"learning_rate": 4.998957618988693e-07,
"loss": 13.1201,
"step": 1720
},
{
"epoch": 0.63,
"learning_rate": 4.998162134088466e-07,
"loss": 12.9442,
"step": 1740
},
{
"epoch": 0.64,
"learning_rate": 4.997142692333949e-07,
"loss": 13.114,
"step": 1760
},
{
"epoch": 0.64,
"learning_rate": 4.995899385116716e-07,
"loss": 13.0605,
"step": 1780
},
{
"epoch": 0.65,
"learning_rate": 4.994432323897575e-07,
"loss": 13.0292,
"step": 1800
},
{
"epoch": 0.66,
"learning_rate": 4.992741640196581e-07,
"loss": 13.0242,
"step": 1820
},
{
"epoch": 0.67,
"learning_rate": 4.990827485581237e-07,
"loss": 13.0583,
"step": 1840
},
{
"epoch": 0.67,
"learning_rate": 4.988690031652916e-07,
"loss": 13.0788,
"step": 1860
},
{
"epoch": 0.68,
"learning_rate": 4.986329470031468e-07,
"loss": 13.0321,
"step": 1880
},
{
"epoch": 0.69,
"learning_rate": 4.983746012338049e-07,
"loss": 12.9585,
"step": 1900
},
{
"epoch": 0.69,
"learning_rate": 4.980939890176143e-07,
"loss": 13.0254,
"step": 1920
},
{
"epoch": 0.7,
"learning_rate": 4.977911355110807e-07,
"loss": 13.0148,
"step": 1940
},
{
"epoch": 0.71,
"learning_rate": 4.974660678646106e-07,
"loss": 12.8612,
"step": 1960
},
{
"epoch": 0.72,
"learning_rate": 4.971188152200791e-07,
"loss": 12.8905,
"step": 1980
},
{
"epoch": 0.72,
"learning_rate": 4.967494087082155e-07,
"loss": 12.7506,
"step": 2000
},
{
"epoch": 0.73,
"learning_rate": 4.963578814458136e-07,
"loss": 12.7439,
"step": 2020
},
{
"epoch": 0.74,
"learning_rate": 4.959442685327627e-07,
"loss": 12.5732,
"step": 2040
},
{
"epoch": 0.75,
"learning_rate": 4.955086070489006e-07,
"loss": 12.4223,
"step": 2060
},
{
"epoch": 0.75,
"learning_rate": 4.950509360506898e-07,
"loss": 12.4088,
"step": 2080
},
{
"epoch": 0.76,
"learning_rate": 4.945712965677158e-07,
"loss": 12.4423,
"step": 2100
},
{
"epoch": 0.77,
"learning_rate": 4.94069731599009e-07,
"loss": 12.2641,
"step": 2120
},
{
"epoch": 0.77,
"learning_rate": 4.935462861091903e-07,
"loss": 12.3842,
"step": 2140
},
{
"epoch": 0.78,
"learning_rate": 4.93001007024439e-07,
"loss": 12.1959,
"step": 2160
},
{
"epoch": 0.79,
"learning_rate": 4.924339432282875e-07,
"loss": 12.2833,
"step": 2180
},
{
"epoch": 0.8,
"learning_rate": 4.918451455572374e-07,
"loss": 12.146,
"step": 2200
},
{
"epoch": 0.8,
"learning_rate": 4.912346667962032e-07,
"loss": 12.1976,
"step": 2220
},
{
"epoch": 0.81,
"learning_rate": 4.906025616737794e-07,
"loss": 12.0598,
"step": 2240
},
{
"epoch": 0.82,
"learning_rate": 4.899488868573349e-07,
"loss": 12.0777,
"step": 2260
},
{
"epoch": 0.82,
"learning_rate": 4.892737009479322e-07,
"loss": 12.0493,
"step": 2280
},
{
"epoch": 0.83,
"learning_rate": 4.885770644750743e-07,
"loss": 12.0849,
"step": 2300
},
{
"epoch": 0.84,
"learning_rate": 4.878590398912782e-07,
"loss": 11.9523,
"step": 2320
},
{
"epoch": 0.85,
"learning_rate": 4.871196915664761e-07,
"loss": 11.976,
"step": 2340
},
{
"epoch": 0.85,
"learning_rate": 4.863590857822445e-07,
"loss": 11.8329,
"step": 2360
},
{
"epoch": 0.86,
"learning_rate": 4.855772907258626e-07,
"loss": 11.9747,
"step": 2380
},
{
"epoch": 0.87,
"learning_rate": 4.847743764841993e-07,
"loss": 11.9154,
"step": 2400
},
{
"epoch": 0.88,
"learning_rate": 4.839504150374295e-07,
"loss": 11.8741,
"step": 2420
},
{
"epoch": 0.88,
"learning_rate": 4.831054802525819e-07,
"loss": 11.914,
"step": 2440
},
{
"epoch": 0.89,
"learning_rate": 4.822396478769162e-07,
"loss": 11.7922,
"step": 2460
},
{
"epoch": 0.9,
"learning_rate": 4.813529955311333e-07,
"loss": 11.693,
"step": 2480
},
{
"epoch": 0.9,
"learning_rate": 4.804456027024159e-07,
"loss": 11.8023,
"step": 2500
},
{
"epoch": 0.91,
"learning_rate": 4.795175507373028e-07,
"loss": 11.8223,
"step": 2520
},
{
"epoch": 0.92,
"learning_rate": 4.785689228343971e-07,
"loss": 11.6569,
"step": 2540
},
{
"epoch": 0.93,
"learning_rate": 4.77599804036906e-07,
"loss": 11.6978,
"step": 2560
},
{
"epoch": 0.93,
"learning_rate": 4.766102812250183e-07,
"loss": 11.7304,
"step": 2580
},
{
"epoch": 0.94,
"learning_rate": 4.756004431081149e-07,
"loss": 11.6343,
"step": 2600
},
{
"epoch": 0.95,
"learning_rate": 4.745703802168161e-07,
"loss": 11.7166,
"step": 2620
},
{
"epoch": 0.95,
"learning_rate": 4.7352018489486606e-07,
"loss": 11.5801,
"step": 2640
},
{
"epoch": 0.96,
"learning_rate": 4.724499512908541e-07,
"loss": 11.5505,
"step": 2660
},
{
"epoch": 0.97,
"learning_rate": 4.7135977534977425e-07,
"loss": 11.573,
"step": 2680
},
{
"epoch": 0.98,
"learning_rate": 4.702497548044243e-07,
"loss": 11.7114,
"step": 2700
},
{
"epoch": 0.98,
"learning_rate": 4.6911998916664375e-07,
"loss": 11.6111,
"step": 2720
},
{
"epoch": 0.99,
"learning_rate": 4.6797057971839306e-07,
"loss": 11.6444,
"step": 2740
},
{
"epoch": 1.0,
"learning_rate": 4.6680162950267356e-07,
"loss": 11.5578,
"step": 2760
},
{
"epoch": 1.01,
"learning_rate": 4.656132433142901e-07,
"loss": 11.5535,
"step": 2780
},
{
"epoch": 1.01,
"learning_rate": 4.6440552769045604e-07,
"loss": 11.6784,
"step": 2800
},
{
"epoch": 1.02,
"learning_rate": 4.631785909012426e-07,
"loss": 11.5629,
"step": 2820
},
{
"epoch": 1.03,
"learning_rate": 4.619325429398724e-07,
"loss": 11.5986,
"step": 2840
},
{
"epoch": 1.03,
"learning_rate": 4.606674955128586e-07,
"loss": 11.5513,
"step": 2860
},
{
"epoch": 1.04,
"learning_rate": 4.593835620299911e-07,
"loss": 11.4675,
"step": 2880
},
{
"epoch": 1.05,
"learning_rate": 4.580808575941688e-07,
"loss": 11.5263,
"step": 2900
},
{
"epoch": 1.06,
"learning_rate": 4.5675949899108133e-07,
"loss": 11.6017,
"step": 2920
},
{
"epoch": 1.06,
"learning_rate": 4.554196046787392e-07,
"loss": 11.6048,
"step": 2940
},
{
"epoch": 1.07,
"learning_rate": 4.5406129477685406e-07,
"loss": 11.4808,
"step": 2960
},
{
"epoch": 1.08,
"learning_rate": 4.526846910560702e-07,
"loss": 11.5947,
"step": 2980
},
{
"epoch": 1.08,
"learning_rate": 4.512899169270481e-07,
"loss": 11.5756,
"step": 3000
},
{
"epoch": 1.09,
"learning_rate": 4.4987709742940077e-07,
"loss": 11.5105,
"step": 3020
},
{
"epoch": 1.1,
"learning_rate": 4.4844635922048405e-07,
"loss": 11.5413,
"step": 3040
},
{
"epoch": 1.11,
"learning_rate": 4.46997830564042e-07,
"loss": 11.4868,
"step": 3060
},
{
"epoch": 1.11,
"learning_rate": 4.4553164131870813e-07,
"loss": 11.557,
"step": 3080
},
{
"epoch": 1.12,
"learning_rate": 4.440479229263641e-07,
"loss": 11.5823,
"step": 3100
},
{
"epoch": 1.13,
"learning_rate": 4.4254680840035554e-07,
"loss": 11.4501,
"step": 3120
},
{
"epoch": 1.14,
"learning_rate": 4.410284323135682e-07,
"loss": 11.5275,
"step": 3140
},
{
"epoch": 1.14,
"learning_rate": 4.394929307863632e-07,
"loss": 11.5249,
"step": 3160
},
{
"epoch": 1.15,
"learning_rate": 4.3794044147437437e-07,
"loss": 11.479,
"step": 3180
},
{
"epoch": 1.16,
"learning_rate": 4.3637110355616735e-07,
"loss": 11.5742,
"step": 3200
},
{
"epoch": 1.16,
"learning_rate": 4.347850577207626e-07,
"loss": 11.5581,
"step": 3220
},
{
"epoch": 1.17,
"learning_rate": 4.3318244615502254e-07,
"loss": 11.4662,
"step": 3240
},
{
"epoch": 1.18,
"learning_rate": 4.3156341253090524e-07,
"loss": 11.5468,
"step": 3260
},
{
"epoch": 1.19,
"learning_rate": 4.2992810199258373e-07,
"loss": 11.5638,
"step": 3280
},
{
"epoch": 1.19,
"learning_rate": 4.2827666114343463e-07,
"loss": 11.5076,
"step": 3300
},
{
"epoch": 1.2,
"learning_rate": 4.26609238032895e-07,
"loss": 11.5381,
"step": 3320
},
{
"epoch": 1.21,
"learning_rate": 4.2492598214319e-07,
"loss": 11.5322,
"step": 3340
},
{
"epoch": 1.22,
"learning_rate": 4.232270443759319e-07,
"loss": 11.5848,
"step": 3360
},
{
"epoch": 1.22,
"learning_rate": 4.215125770385923e-07,
"loss": 11.5532,
"step": 3380
},
{
"epoch": 1.23,
"learning_rate": 4.197827338308475e-07,
"loss": 11.5282,
"step": 3400
},
{
"epoch": 1.24,
"learning_rate": 4.1803766983080006e-07,
"loss": 11.5016,
"step": 3420
},
{
"epoch": 1.24,
"learning_rate": 4.162775414810757e-07,
"loss": 11.5488,
"step": 3440
},
{
"epoch": 1.25,
"learning_rate": 4.145025065747988e-07,
"loss": 11.4712,
"step": 3460
},
{
"epoch": 1.26,
"learning_rate": 4.1271272424144645e-07,
"loss": 11.6967,
"step": 3480
},
{
"epoch": 1.27,
"learning_rate": 4.1090835493258235e-07,
"loss": 11.5829,
"step": 3500
},
{
"epoch": 1.27,
"learning_rate": 4.0908956040747315e-07,
"loss": 11.5436,
"step": 3520
},
{
"epoch": 1.28,
"learning_rate": 4.0725650371858646e-07,
"loss": 11.6792,
"step": 3540
},
{
"epoch": 1.29,
"learning_rate": 4.054093491969734e-07,
"loss": 11.5023,
"step": 3560
},
{
"epoch": 1.29,
"learning_rate": 4.03548262437537e-07,
"loss": 11.4841,
"step": 3580
},
{
"epoch": 1.3,
"learning_rate": 4.0167341028418655e-07,
"loss": 11.5712,
"step": 3600
},
{
"epoch": 1.31,
"learning_rate": 3.997849608148799e-07,
"loss": 11.566,
"step": 3620
},
{
"epoch": 1.32,
"learning_rate": 3.978830833265563e-07,
"loss": 11.5569,
"step": 3640
},
{
"epoch": 1.32,
"learning_rate": 3.9596794831995863e-07,
"loss": 11.5758,
"step": 3660
},
{
"epoch": 1.33,
"learning_rate": 3.940397274843481e-07,
"loss": 11.4845,
"step": 3680
},
{
"epoch": 1.34,
"learning_rate": 3.9209859368211304e-07,
"loss": 11.5205,
"step": 3700
},
{
"epoch": 1.35,
"learning_rate": 3.9014472093327164e-07,
"loss": 11.5794,
"step": 3720
},
{
"epoch": 1.35,
"learning_rate": 3.8817828439987154e-07,
"loss": 11.5827,
"step": 3740
},
{
"epoch": 1.36,
"learning_rate": 3.8619946037028644e-07,
"loss": 11.5148,
"step": 3760
},
{
"epoch": 1.37,
"learning_rate": 3.842084262434125e-07,
"loss": 11.6089,
"step": 3780
},
{
"epoch": 1.37,
"learning_rate": 3.822053605127645e-07,
"loss": 11.6853,
"step": 3800
},
{
"epoch": 1.38,
"learning_rate": 3.801904427504743e-07,
"loss": 11.5037,
"step": 3820
},
{
"epoch": 1.39,
"learning_rate": 3.781638535911922e-07,
"loss": 11.4557,
"step": 3840
},
{
"epoch": 1.4,
"learning_rate": 3.7612577471589355e-07,
"loss": 11.5509,
"step": 3860
},
{
"epoch": 1.4,
"learning_rate": 3.7407638883559117e-07,
"loss": 11.5338,
"step": 3880
},
{
"epoch": 1.41,
"learning_rate": 3.720158796749556e-07,
"loss": 11.5365,
"step": 3900
},
{
"epoch": 1.42,
"learning_rate": 3.699444319558444e-07,
"loss": 11.5654,
"step": 3920
},
{
"epoch": 1.42,
"learning_rate": 3.6786223138074195e-07,
"loss": 11.5881,
"step": 3940
},
{
"epoch": 1.43,
"learning_rate": 3.657694646161119e-07,
"loss": 11.6041,
"step": 3960
},
{
"epoch": 1.44,
"learning_rate": 3.6366631927566223e-07,
"loss": 11.6121,
"step": 3980
},
{
"epoch": 1.45,
"learning_rate": 3.6155298390352637e-07,
"loss": 11.5077,
"step": 4000
},
{
"epoch": 1.45,
"learning_rate": 3.594296479573602e-07,
"loss": 11.6038,
"step": 4020
},
{
"epoch": 1.46,
"learning_rate": 3.5729650179135743e-07,
"loss": 11.5351,
"step": 4040
},
{
"epoch": 1.47,
"learning_rate": 3.5515373663918504e-07,
"loss": 11.6607,
"step": 4060
},
{
"epoch": 1.48,
"learning_rate": 3.530015445968388e-07,
"loss": 11.5155,
"step": 4080
},
{
"epoch": 1.48,
"learning_rate": 3.508401186054224e-07,
"loss": 11.5073,
"step": 4100
},
{
"epoch": 1.49,
"learning_rate": 3.4866965243385093e-07,
"loss": 11.5617,
"step": 4120
},
{
"epoch": 1.5,
"learning_rate": 3.4649034066147894e-07,
"loss": 11.4584,
"step": 4140
},
{
"epoch": 1.5,
"learning_rate": 3.4430237866065735e-07,
"loss": 11.5588,
"step": 4160
},
{
"epoch": 1.51,
"learning_rate": 3.4210596257921844e-07,
"loss": 11.4431,
"step": 4180
},
{
"epoch": 1.52,
"learning_rate": 3.399012893228912e-07,
"loss": 11.5465,
"step": 4200
},
{
"epoch": 1.53,
"learning_rate": 3.376885565376491e-07,
"loss": 11.5845,
"step": 4220
},
{
"epoch": 1.53,
"learning_rate": 3.3546796259199185e-07,
"loss": 11.6001,
"step": 4240
},
{
"epoch": 1.54,
"learning_rate": 3.3323970655916115e-07,
"loss": 11.5957,
"step": 4260
},
{
"epoch": 1.55,
"learning_rate": 3.3100398819929473e-07,
"loss": 11.5674,
"step": 4280
},
{
"epoch": 1.56,
"learning_rate": 3.2876100794151755e-07,
"loss": 11.554,
"step": 4300
},
{
"epoch": 1.56,
"learning_rate": 3.2651096686597423e-07,
"loss": 11.5188,
"step": 4320
},
{
"epoch": 1.57,
"learning_rate": 3.2425406668580185e-07,
"loss": 11.5757,
"step": 4340
},
{
"epoch": 1.58,
"learning_rate": 3.2199050972904706e-07,
"loss": 11.4447,
"step": 4360
},
{
"epoch": 1.58,
"learning_rate": 3.197204989205276e-07,
"loss": 11.5552,
"step": 4380
},
{
"epoch": 1.59,
"learning_rate": 3.174442377636401e-07,
"loss": 11.5855,
"step": 4400
},
{
"epoch": 1.6,
"learning_rate": 3.151619303221166e-07,
"loss": 11.5776,
"step": 4420
},
{
"epoch": 1.61,
"learning_rate": 3.1287378120173045e-07,
"loss": 11.4827,
"step": 4440
},
{
"epoch": 1.61,
"learning_rate": 3.1057999553195337e-07,
"loss": 11.7251,
"step": 4460
},
{
"epoch": 1.62,
"learning_rate": 3.082807789475662e-07,
"loss": 11.6293,
"step": 4480
},
{
"epoch": 1.63,
"learning_rate": 3.059763375702241e-07,
"loss": 11.494,
"step": 4500
},
{
"epoch": 1.63,
"learning_rate": 3.0366687798997756e-07,
"loss": 11.5105,
"step": 4520
},
{
"epoch": 1.64,
"learning_rate": 3.0135260724675236e-07,
"loss": 11.4956,
"step": 4540
},
{
"epoch": 1.65,
"learning_rate": 2.990337328117886e-07,
"loss": 11.5945,
"step": 4560
},
{
"epoch": 1.66,
"learning_rate": 2.9671046256904075e-07,
"loss": 11.5549,
"step": 4580
},
{
"epoch": 1.66,
"learning_rate": 2.9438300479654196e-07,
"loss": 11.5096,
"step": 4600
},
{
"epoch": 1.67,
"learning_rate": 2.9205156814773143e-07,
"loss": 11.6499,
"step": 4620
},
{
"epoch": 1.68,
"learning_rate": 2.8971636163274924e-07,
"loss": 11.5242,
"step": 4640
},
{
"epoch": 1.69,
"learning_rate": 2.8737759459969925e-07,
"loss": 11.6741,
"step": 4660
},
{
"epoch": 1.69,
"learning_rate": 2.850354767158804e-07,
"loss": 11.5848,
"step": 4680
},
{
"epoch": 1.7,
"learning_rate": 2.826902179489913e-07,
"loss": 11.3855,
"step": 4700
},
{
"epoch": 1.71,
"learning_rate": 2.803420285483062e-07,
"loss": 11.6143,
"step": 4720
},
{
"epoch": 1.71,
"learning_rate": 2.7799111902582693e-07,
"loss": 11.5048,
"step": 4740
},
{
"epoch": 1.72,
"learning_rate": 2.7563770013741036e-07,
"loss": 11.6516,
"step": 4760
},
{
"epoch": 1.73,
"learning_rate": 2.7328198286387473e-07,
"loss": 11.496,
"step": 4780
},
{
"epoch": 1.74,
"learning_rate": 2.7092417839208537e-07,
"loss": 11.5108,
"step": 4800
},
{
"epoch": 1.74,
"learning_rate": 2.685644980960221e-07,
"loss": 11.6525,
"step": 4820
},
{
"epoch": 1.75,
"learning_rate": 2.662031535178298e-07,
"loss": 11.6017,
"step": 4840
},
{
"epoch": 1.76,
"learning_rate": 2.638403563488542e-07,
"loss": 11.5589,
"step": 4860
},
{
"epoch": 1.76,
"learning_rate": 2.614763184106635e-07,
"loss": 11.4828,
"step": 4880
},
{
"epoch": 1.77,
"learning_rate": 2.591112516360594e-07,
"loss": 11.5234,
"step": 4900
},
{
"epoch": 1.78,
"learning_rate": 2.567453680500774e-07,
"loss": 11.4855,
"step": 4920
},
{
"epoch": 1.79,
"learning_rate": 2.5437887975097887e-07,
"loss": 11.4654,
"step": 4940
},
{
"epoch": 1.79,
"learning_rate": 2.5201199889123715e-07,
"loss": 11.5591,
"step": 4960
},
{
"epoch": 1.8,
"learning_rate": 2.4964493765851795e-07,
"loss": 11.5175,
"step": 4980
},
{
"epoch": 1.81,
"learning_rate": 2.4727790825665703e-07,
"loss": 11.5616,
"step": 5000
},
{
"epoch": 1.82,
"learning_rate": 2.4491112288663675e-07,
"loss": 11.6138,
"step": 5020
},
{
"epoch": 1.82,
"learning_rate": 2.4254479372756236e-07,
"loss": 11.5176,
"step": 5040
},
{
"epoch": 1.83,
"learning_rate": 2.4017913291764045e-07,
"loss": 11.5173,
"step": 5060
},
{
"epoch": 1.84,
"learning_rate": 2.3781435253516076e-07,
"loss": 11.4549,
"step": 5080
},
{
"epoch": 1.84,
"learning_rate": 2.354506645794842e-07,
"loss": 11.5263,
"step": 5100
},
{
"epoch": 1.85,
"learning_rate": 2.330882809520369e-07,
"loss": 11.5152,
"step": 5120
},
{
"epoch": 1.86,
"learning_rate": 2.3072741343731383e-07,
"loss": 11.4778,
"step": 5140
},
{
"epoch": 1.87,
"learning_rate": 2.2836827368389245e-07,
"loss": 11.4824,
"step": 5160
},
{
"epoch": 1.87,
"learning_rate": 2.2601107318545874e-07,
"loss": 11.5656,
"step": 5180
},
{
"epoch": 1.88,
"learning_rate": 2.2365602326184714e-07,
"loss": 11.5009,
"step": 5200
},
{
"epoch": 1.89,
"learning_rate": 2.21303335040096e-07,
"loss": 11.5992,
"step": 5220
},
{
"epoch": 1.9,
"learning_rate": 2.1895321943552024e-07,
"loss": 11.6539,
"step": 5240
},
{
"epoch": 1.9,
"learning_rate": 2.1660588713280316e-07,
"loss": 11.5716,
"step": 5260
},
{
"epoch": 1.91,
"learning_rate": 2.142615485671088e-07,
"loss": 11.6155,
"step": 5280
},
{
"epoch": 1.92,
"learning_rate": 2.119204139052167e-07,
"loss": 11.6557,
"step": 5300
},
{
"epoch": 1.92,
"learning_rate": 2.0958269302668075e-07,
"loss": 11.6282,
"step": 5320
},
{
"epoch": 1.93,
"learning_rate": 2.0724859550501393e-07,
"loss": 11.3897,
"step": 5340
},
{
"epoch": 1.94,
"learning_rate": 2.0491833058890002e-07,
"loss": 11.6027,
"step": 5360
},
{
"epoch": 1.95,
"learning_rate": 2.0259210718343495e-07,
"loss": 11.6817,
"step": 5380
},
{
"epoch": 1.95,
"learning_rate": 2.002701338313987e-07,
"loss": 11.6283,
"step": 5400
},
{
"epoch": 1.96,
"learning_rate": 1.9795261869455967e-07,
"loss": 11.6081,
"step": 5420
},
{
"epoch": 1.97,
"learning_rate": 1.9563976953501333e-07,
"loss": 11.6218,
"step": 5440
},
{
"epoch": 1.97,
"learning_rate": 1.933317936965566e-07,
"loss": 11.6083,
"step": 5460
},
{
"epoch": 1.98,
"learning_rate": 1.9102889808609993e-07,
"loss": 11.5382,
"step": 5480
},
{
"epoch": 1.99,
"learning_rate": 1.8873128915511782e-07,
"loss": 11.7335,
"step": 5500
},
{
"epoch": 2.0,
"learning_rate": 1.8643917288114146e-07,
"loss": 11.7195,
"step": 5520
},
{
"epoch": 2.0,
"learning_rate": 1.8415275474929265e-07,
"loss": 11.6062,
"step": 5540
},
{
"epoch": 2.01,
"learning_rate": 1.8187223973386243e-07,
"loss": 11.4377,
"step": 5560
},
{
"epoch": 2.02,
"learning_rate": 1.795978322799354e-07,
"loss": 11.4762,
"step": 5580
},
{
"epoch": 2.03,
"learning_rate": 1.773297362850617e-07,
"loss": 11.7563,
"step": 5600
},
{
"epoch": 2.03,
"learning_rate": 1.7506815508097754e-07,
"loss": 11.5128,
"step": 5620
},
{
"epoch": 2.04,
"learning_rate": 1.728132914153771e-07,
"loss": 11.604,
"step": 5640
},
{
"epoch": 2.05,
"learning_rate": 1.7056534743373617e-07,
"loss": 11.4812,
"step": 5660
},
{
"epoch": 2.05,
"learning_rate": 1.6832452466119039e-07,
"loss": 11.529,
"step": 5680
},
{
"epoch": 2.06,
"learning_rate": 1.6609102398446852e-07,
"loss": 11.5573,
"step": 5700
},
{
"epoch": 2.07,
"learning_rate": 1.6386504563388343e-07,
"loss": 11.5652,
"step": 5720
},
{
"epoch": 2.08,
"learning_rate": 1.6164678916538154e-07,
"loss": 11.594,
"step": 5740
},
{
"epoch": 2.08,
"learning_rate": 1.5943645344265312e-07,
"loss": 11.5252,
"step": 5760
},
{
"epoch": 2.09,
"learning_rate": 1.572342366193042e-07,
"loss": 11.5946,
"step": 5780
},
{
"epoch": 2.1,
"learning_rate": 1.5504033612109246e-07,
"loss": 11.5068,
"step": 5800
},
{
"epoch": 2.1,
"learning_rate": 1.528549486282284e-07,
"loss": 11.5782,
"step": 5820
},
{
"epoch": 2.11,
"learning_rate": 1.5067827005774297e-07,
"loss": 11.5785,
"step": 5840
},
{
"epoch": 2.12,
"learning_rate": 1.4851049554592403e-07,
"loss": 11.4974,
"step": 5860
},
{
"epoch": 2.13,
"learning_rate": 1.4635181943082284e-07,
"loss": 11.636,
"step": 5880
},
{
"epoch": 2.13,
"learning_rate": 1.442024352348314e-07,
"loss": 11.5021,
"step": 5900
},
{
"epoch": 2.14,
"learning_rate": 1.42062535647334e-07,
"loss": 11.4928,
"step": 5920
},
{
"epoch": 2.15,
"learning_rate": 1.3993231250743243e-07,
"loss": 11.5453,
"step": 5940
},
{
"epoch": 2.16,
"learning_rate": 1.3781195678674817e-07,
"loss": 11.5554,
"step": 5960
},
{
"epoch": 2.16,
"learning_rate": 1.3570165857230176e-07,
"loss": 11.4993,
"step": 5980
},
{
"epoch": 2.17,
"learning_rate": 1.3360160704947221e-07,
"loss": 11.5695,
"step": 6000
},
{
"epoch": 2.18,
"learning_rate": 1.3151199048503625e-07,
"loss": 11.5464,
"step": 6020
},
{
"epoch": 2.18,
"learning_rate": 1.2943299621029076e-07,
"loss": 11.5437,
"step": 6040
},
{
"epoch": 2.19,
"learning_rate": 1.273648106042587e-07,
"loss": 11.6056,
"step": 6060
},
{
"epoch": 2.2,
"learning_rate": 1.2530761907698062e-07,
"loss": 11.5232,
"step": 6080
},
{
"epoch": 2.21,
"learning_rate": 1.2326160605289272e-07,
"loss": 11.5292,
"step": 6100
},
{
"epoch": 2.21,
"learning_rate": 1.2122695495429347e-07,
"loss": 11.4655,
"step": 6120
},
{
"epoch": 2.22,
"learning_rate": 1.1920384818490018e-07,
"loss": 11.5054,
"step": 6140
},
{
"epoch": 2.23,
"learning_rate": 1.1719246711349664e-07,
"loss": 11.484,
"step": 6160
},
{
"epoch": 2.24,
"learning_rate": 1.151929920576737e-07,
"loss": 11.4898,
"step": 6180
},
{
"epoch": 2.24,
"learning_rate": 1.1320560226766396e-07,
"loss": 11.5315,
"step": 6200
},
{
"epoch": 2.25,
"learning_rate": 1.112304759102723e-07,
"loss": 11.5258,
"step": 6220
},
{
"epoch": 2.26,
"learning_rate": 1.0926779005290365e-07,
"loss": 11.4383,
"step": 6240
},
{
"epoch": 2.26,
"learning_rate": 1.0731772064768876e-07,
"loss": 11.5047,
"step": 6260
},
{
"epoch": 2.27,
"learning_rate": 1.0538044251571057e-07,
"loss": 11.5129,
"step": 6280
},
{
"epoch": 2.28,
"learning_rate": 1.0345612933133166e-07,
"loss": 11.516,
"step": 6300
},
{
"epoch": 2.29,
"learning_rate": 1.0154495360662463e-07,
"loss": 11.589,
"step": 6320
},
{
"epoch": 2.29,
"learning_rate": 9.964708667590655e-08,
"loss": 11.5469,
"step": 6340
},
{
"epoch": 2.3,
"learning_rate": 9.776269868037928e-08,
"loss": 11.5621,
"step": 6360
},
{
"epoch": 2.31,
"learning_rate": 9.589195855287623e-08,
"loss": 11.5689,
"step": 6380
},
{
"epoch": 2.31,
"learning_rate": 9.403503400271798e-08,
"loss": 11.4723,
"step": 6400
},
{
"epoch": 2.32,
"learning_rate": 9.219209150067725e-08,
"loss": 11.5355,
"step": 6420
},
{
"epoch": 2.33,
"learning_rate": 9.036329626405506e-08,
"loss": 11.402,
"step": 6440
},
{
"epoch": 2.34,
"learning_rate": 8.854881224186933e-08,
"loss": 11.5974,
"step": 6460
},
{
"epoch": 2.34,
"learning_rate": 8.67488021001569e-08,
"loss": 11.6007,
"step": 6480
},
{
"epoch": 2.35,
"learning_rate": 8.496342720739078e-08,
"loss": 11.4786,
"step": 6500
},
{
"epoch": 2.36,
"learning_rate": 8.319284762001371e-08,
"loss": 11.557,
"step": 6520
},
{
"epoch": 2.37,
"learning_rate": 8.143722206808959e-08,
"loss": 11.4715,
"step": 6540
},
{
"epoch": 2.37,
"learning_rate": 7.969670794107294e-08,
"loss": 11.5074,
"step": 6560
},
{
"epoch": 2.38,
"learning_rate": 7.797146127369966e-08,
"loss": 11.533,
"step": 6580
},
{
"epoch": 2.39,
"learning_rate": 7.626163673199848e-08,
"loss": 11.4245,
"step": 6600
},
{
"epoch": 2.39,
"learning_rate": 7.456738759942549e-08,
"loss": 11.6053,
"step": 6620
},
{
"epoch": 2.4,
"learning_rate": 7.288886576312242e-08,
"loss": 11.6554,
"step": 6640
},
{
"epoch": 2.41,
"learning_rate": 7.122622170030016e-08,
"loss": 11.604,
"step": 6660
},
{
"epoch": 2.42,
"learning_rate": 6.957960446474873e-08,
"loss": 11.5528,
"step": 6680
},
{
"epoch": 2.42,
"learning_rate": 6.794916167347473e-08,
"loss": 11.5293,
"step": 6700
},
{
"epoch": 2.43,
"learning_rate": 6.633503949346775e-08,
"loss": 11.4711,
"step": 6720
},
{
"epoch": 2.44,
"learning_rate": 6.473738262859668e-08,
"loss": 11.5405,
"step": 6740
},
{
"epoch": 2.44,
"learning_rate": 6.315633430663708e-08,
"loss": 11.4586,
"step": 6760
},
{
"epoch": 2.45,
"learning_rate": 6.159203626643137e-08,
"loss": 11.547,
"step": 6780
},
{
"epoch": 2.46,
"learning_rate": 6.004462874518157e-08,
"loss": 11.5252,
"step": 6800
},
{
"epoch": 2.47,
"learning_rate": 5.8514250465877826e-08,
"loss": 11.5623,
"step": 6820
},
{
"epoch": 2.47,
"learning_rate": 5.700103862486158e-08,
"loss": 11.5316,
"step": 6840
},
{
"epoch": 2.48,
"learning_rate": 5.550512887952641e-08,
"loss": 11.5136,
"step": 6860
},
{
"epoch": 2.49,
"learning_rate": 5.4026655336156384e-08,
"loss": 11.6511,
"step": 6880
},
{
"epoch": 2.5,
"learning_rate": 5.2565750537903716e-08,
"loss": 11.5241,
"step": 6900
},
{
"epoch": 2.5,
"learning_rate": 5.112254545290634e-08,
"loss": 11.5878,
"step": 6920
},
{
"epoch": 2.51,
"learning_rate": 4.96971694625469e-08,
"loss": 11.4855,
"step": 6940
},
{
"epoch": 2.52,
"learning_rate": 4.8289750349853834e-08,
"loss": 11.5632,
"step": 6960
},
{
"epoch": 2.52,
"learning_rate": 4.6900414288045726e-08,
"loss": 11.6167,
"step": 6980
},
{
"epoch": 2.53,
"learning_rate": 4.552928582922022e-08,
"loss": 11.5751,
"step": 7000
},
{
"epoch": 2.54,
"learning_rate": 4.4176487893187956e-08,
"loss": 11.5228,
"step": 7020
},
{
"epoch": 2.55,
"learning_rate": 4.284214175645301e-08,
"loss": 11.624,
"step": 7040
},
{
"epoch": 2.55,
"learning_rate": 4.152636704134055e-08,
"loss": 11.4761,
"step": 7060
},
{
"epoch": 2.56,
"learning_rate": 4.022928170527315e-08,
"loss": 11.579,
"step": 7080
},
{
"epoch": 2.57,
"learning_rate": 3.8951002030195636e-08,
"loss": 11.5007,
"step": 7100
},
{
"epoch": 2.58,
"learning_rate": 3.7691642612150826e-08,
"loss": 11.6567,
"step": 7120
},
{
"epoch": 2.58,
"learning_rate": 3.6451316351006153e-08,
"loss": 11.517,
"step": 7140
},
{
"epoch": 2.59,
"learning_rate": 3.5230134440332236e-08,
"loss": 11.5614,
"step": 7160
},
{
"epoch": 2.6,
"learning_rate": 3.402820635743464e-08,
"loss": 11.5687,
"step": 7180
},
{
"epoch": 2.6,
"learning_rate": 3.284563985353925e-08,
"loss": 11.4573,
"step": 7200
},
{
"epoch": 2.61,
"learning_rate": 3.168254094413264e-08,
"loss": 11.5399,
"step": 7220
},
{
"epoch": 2.62,
"learning_rate": 3.053901389945798e-08,
"loss": 11.5103,
"step": 7240
},
{
"epoch": 2.63,
"learning_rate": 2.94151612351671e-08,
"loss": 11.5896,
"step": 7260
},
{
"epoch": 2.63,
"learning_rate": 2.8311083703130346e-08,
"loss": 11.4998,
"step": 7280
},
{
"epoch": 2.64,
"learning_rate": 2.722688028240419e-08,
"loss": 11.5447,
"step": 7300
},
{
"epoch": 2.65,
"learning_rate": 2.616264817035793e-08,
"loss": 11.6846,
"step": 7320
},
{
"epoch": 2.65,
"learning_rate": 2.5118482773960002e-08,
"loss": 11.5462,
"step": 7340
},
{
"epoch": 2.66,
"learning_rate": 2.4094477701225163e-08,
"loss": 11.5512,
"step": 7360
},
{
"epoch": 2.67,
"learning_rate": 2.30907247528222e-08,
"loss": 11.4978,
"step": 7380
},
{
"epoch": 2.68,
"learning_rate": 2.2107313913844432e-08,
"loss": 11.4415,
"step": 7400
},
{
"epoch": 2.68,
"learning_rate": 2.114433334574267e-08,
"loss": 11.5456,
"step": 7420
},
{
"epoch": 2.69,
"learning_rate": 2.0201869378421497e-08,
"loss": 11.5887,
"step": 7440
},
{
"epoch": 2.7,
"learning_rate": 1.9280006502500056e-08,
"loss": 11.6359,
"step": 7460
},
{
"epoch": 2.71,
"learning_rate": 1.8378827361737543e-08,
"loss": 11.6285,
"step": 7480
},
{
"epoch": 2.71,
"learning_rate": 1.749841274562422e-08,
"loss": 11.74,
"step": 7500
},
{
"epoch": 2.72,
"learning_rate": 1.6638841582138908e-08,
"loss": 11.5993,
"step": 7520
},
{
"epoch": 2.73,
"learning_rate": 1.580019093067303e-08,
"loss": 11.6595,
"step": 7540
},
{
"epoch": 2.73,
"learning_rate": 1.4982535975122474e-08,
"loss": 11.6267,
"step": 7560
},
{
"epoch": 2.74,
"learning_rate": 1.4185950017147369e-08,
"loss": 11.5287,
"step": 7580
},
{
"epoch": 2.75,
"learning_rate": 1.3410504469600708e-08,
"loss": 11.6259,
"step": 7600
},
{
"epoch": 2.76,
"learning_rate": 1.2656268850126411e-08,
"loss": 11.6673,
"step": 7620
},
{
"epoch": 2.76,
"learning_rate": 1.1923310774926948e-08,
"loss": 11.5539,
"step": 7640
},
{
"epoch": 2.77,
"learning_rate": 1.1211695952701717e-08,
"loss": 11.5689,
"step": 7660
},
{
"epoch": 2.78,
"learning_rate": 1.0521488178756532e-08,
"loss": 11.471,
"step": 7680
},
{
"epoch": 2.78,
"learning_rate": 9.852749329284093e-09,
"loss": 11.5166,
"step": 7700
},
{
"epoch": 2.79,
"learning_rate": 9.20553935581725e-09,
"loss": 11.5288,
"step": 7720
},
{
"epoch": 2.8,
"learning_rate": 8.57991627985416e-09,
"loss": 11.6492,
"step": 7740
},
{
"epoch": 2.81,
"learning_rate": 7.975936187656852e-09,
"loss": 11.4599,
"step": 7760
},
{
"epoch": 2.81,
"learning_rate": 7.393653225223145e-09,
"loss": 11.5627,
"step": 7780
},
{
"epoch": 2.82,
"learning_rate": 6.833119593432607e-09,
"loss": 11.6969,
"step": 7800
},
{
"epoch": 2.83,
"learning_rate": 6.294385543366642e-09,
"loss": 11.651,
"step": 7820
},
{
"epoch": 2.84,
"learning_rate": 5.777499371803745e-09,
"loss": 11.5684,
"step": 7840
},
{
"epoch": 2.84,
"learning_rate": 5.282507416889625e-09,
"loss": 11.6632,
"step": 7860
},
{
"epoch": 2.85,
"learning_rate": 4.809454053983087e-09,
"loss": 11.4329,
"step": 7880
},
{
"epoch": 2.86,
"learning_rate": 4.358381691677931e-09,
"loss": 11.6679,
"step": 7900
},
{
"epoch": 2.86,
"learning_rate": 3.929330768000949e-09,
"loss": 11.479,
"step": 7920
},
{
"epoch": 2.87,
"learning_rate": 3.5223397467867565e-09,
"loss": 11.6362,
"step": 7940
},
{
"epoch": 2.88,
"learning_rate": 3.1374451142295867e-09,
"loss": 11.7412,
"step": 7960
},
{
"epoch": 2.89,
"learning_rate": 2.774681375612292e-09,
"loss": 11.5321,
"step": 7980
},
{
"epoch": 2.89,
"learning_rate": 2.4340810522131274e-09,
"loss": 11.5223,
"step": 8000
},
{
"epoch": 2.9,
"learning_rate": 2.115674678390078e-09,
"loss": 11.6288,
"step": 8020
},
{
"epoch": 2.91,
"learning_rate": 1.8194907988436093e-09,
"loss": 11.5262,
"step": 8040
},
{
"epoch": 2.92,
"learning_rate": 1.5455559660577377e-09,
"loss": 11.5915,
"step": 8060
},
{
"epoch": 2.92,
"learning_rate": 1.293894737919382e-09,
"loss": 11.5543,
"step": 8080
},
{
"epoch": 2.93,
"learning_rate": 1.0645296755171229e-09,
"loss": 11.5801,
"step": 8100
},
{
"epoch": 2.94,
"learning_rate": 8.574813411183213e-10,
"loss": 11.6047,
"step": 8120
},
{
"epoch": 2.94,
"learning_rate": 6.727682963259818e-10,
"loss": 11.5458,
"step": 8140
},
{
"epoch": 2.95,
"learning_rate": 5.10407100414556e-10,
"loss": 11.5238,
"step": 8160
},
{
"epoch": 2.96,
"learning_rate": 3.704123088455191e-10,
"loss": 11.5343,
"step": 8180
},
{
"epoch": 2.97,
"learning_rate": 2.5279647196246867e-10,
"loss": 11.5845,
"step": 8200
},
{
"epoch": 2.97,
"learning_rate": 1.5757013386599738e-10,
"loss": 11.6215,
"step": 8220
},
{
"epoch": 2.98,
"learning_rate": 8.474183146844894e-11,
"loss": 11.5309,
"step": 8240
},
{
"epoch": 2.99,
"learning_rate": 3.431809372853034e-11,
"loss": 11.528,
"step": 8260
},
{
"epoch": 2.99,
"learning_rate": 6.3034410661133574e-12,
"loss": 11.5548,
"step": 8280
},
{
"epoch": 3.0,
"step": 8295,
"total_flos": 2.451174829221888e+17,
"train_loss": 12.036643933186408,
"train_runtime": 6501.8144,
"train_samples_per_second": 2.552,
"train_steps_per_second": 1.276
}
],
"logging_steps": 20,
"max_steps": 8295,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 2.451174829221888e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}