distilbert-imdb-demo / trainer_state.json
eugenecamus's picture
Training in progress, epoch 1
ee6afe1
{
"best_metric": 0.6920435428619385,
"best_model_checkpoint": "./imdb_outputs/checkpoint-5314",
"epoch": 5.0,
"global_step": 13285,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.5048908954100828e-05,
"loss": 0.6896,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 3.0097817908201657e-05,
"loss": 0.684,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.514672686230249e-05,
"loss": 0.6176,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 6.0195635816403313e-05,
"loss": 0.4737,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 7.524454477050414e-05,
"loss": 0.3974,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 9.029345372460498e-05,
"loss": 0.4758,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 0.0001053423626787058,
"loss": 0.3866,
"step": 70
},
{
"epoch": 0.03,
"learning_rate": 0.00012039127163280663,
"loss": 0.5445,
"step": 80
},
{
"epoch": 0.03,
"learning_rate": 0.00013544018058690745,
"loss": 0.4375,
"step": 90
},
{
"epoch": 0.04,
"learning_rate": 0.00015048908954100828,
"loss": 0.4384,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 0.00016553799849510913,
"loss": 0.3477,
"step": 110
},
{
"epoch": 0.05,
"learning_rate": 0.00018058690744920995,
"loss": 0.4281,
"step": 120
},
{
"epoch": 0.05,
"learning_rate": 0.00019563581640331078,
"loss": 0.5325,
"step": 130
},
{
"epoch": 0.05,
"learning_rate": 0.0002106847253574116,
"loss": 0.6339,
"step": 140
},
{
"epoch": 0.06,
"learning_rate": 0.00022573363431151243,
"loss": 0.6693,
"step": 150
},
{
"epoch": 0.06,
"learning_rate": 0.00024078254326561325,
"loss": 0.5224,
"step": 160
},
{
"epoch": 0.06,
"learning_rate": 0.0002558314522197141,
"loss": 0.6391,
"step": 170
},
{
"epoch": 0.07,
"learning_rate": 0.0002708803611738149,
"loss": 0.5047,
"step": 180
},
{
"epoch": 0.07,
"learning_rate": 0.00028592927012791573,
"loss": 0.5958,
"step": 190
},
{
"epoch": 0.08,
"learning_rate": 0.00030097817908201655,
"loss": 0.59,
"step": 200
},
{
"epoch": 0.08,
"learning_rate": 0.0003160270880361174,
"loss": 0.7813,
"step": 210
},
{
"epoch": 0.08,
"learning_rate": 0.00033107599699021826,
"loss": 0.7294,
"step": 220
},
{
"epoch": 0.09,
"learning_rate": 0.0003461249059443191,
"loss": 0.7803,
"step": 230
},
{
"epoch": 0.09,
"learning_rate": 0.0003611738148984199,
"loss": 0.5583,
"step": 240
},
{
"epoch": 0.09,
"learning_rate": 0.00037622272385252073,
"loss": 0.4703,
"step": 250
},
{
"epoch": 0.1,
"learning_rate": 0.00039127163280662156,
"loss": 0.6904,
"step": 260
},
{
"epoch": 0.1,
"learning_rate": 0.0004063205417607224,
"loss": 0.5762,
"step": 270
},
{
"epoch": 0.11,
"learning_rate": 0.0004213694507148232,
"loss": 0.5184,
"step": 280
},
{
"epoch": 0.11,
"learning_rate": 0.00043641835966892403,
"loss": 0.5477,
"step": 290
},
{
"epoch": 0.11,
"learning_rate": 0.00045146726862302486,
"loss": 0.7142,
"step": 300
},
{
"epoch": 0.12,
"learning_rate": 0.0004665161775771257,
"loss": 0.6237,
"step": 310
},
{
"epoch": 0.12,
"learning_rate": 0.0004815650865312265,
"loss": 0.7138,
"step": 320
},
{
"epoch": 0.12,
"learning_rate": 0.0004966139954853273,
"loss": 0.7844,
"step": 330
},
{
"epoch": 0.13,
"learning_rate": 0.0005116629044394282,
"loss": 0.5712,
"step": 340
},
{
"epoch": 0.13,
"learning_rate": 0.000526711813393529,
"loss": 0.7358,
"step": 350
},
{
"epoch": 0.14,
"learning_rate": 0.0005417607223476298,
"loss": 0.665,
"step": 360
},
{
"epoch": 0.14,
"learning_rate": 0.0005568096313017306,
"loss": 0.6351,
"step": 370
},
{
"epoch": 0.14,
"learning_rate": 0.0005718585402558315,
"loss": 0.5413,
"step": 380
},
{
"epoch": 0.15,
"learning_rate": 0.0005869074492099323,
"loss": 0.5889,
"step": 390
},
{
"epoch": 0.15,
"learning_rate": 0.0006019563581640331,
"loss": 0.5156,
"step": 400
},
{
"epoch": 0.15,
"learning_rate": 0.0006170052671181339,
"loss": 0.7493,
"step": 410
},
{
"epoch": 0.16,
"learning_rate": 0.0006320541760722348,
"loss": 0.5993,
"step": 420
},
{
"epoch": 0.16,
"learning_rate": 0.0006471030850263356,
"loss": 0.8799,
"step": 430
},
{
"epoch": 0.17,
"learning_rate": 0.0006621519939804365,
"loss": 0.7169,
"step": 440
},
{
"epoch": 0.17,
"learning_rate": 0.0006772009029345373,
"loss": 0.7097,
"step": 450
},
{
"epoch": 0.17,
"learning_rate": 0.0006922498118886382,
"loss": 0.6949,
"step": 460
},
{
"epoch": 0.18,
"learning_rate": 0.000707298720842739,
"loss": 0.74,
"step": 470
},
{
"epoch": 0.18,
"learning_rate": 0.0007223476297968398,
"loss": 0.6673,
"step": 480
},
{
"epoch": 0.18,
"learning_rate": 0.0007373965387509406,
"loss": 0.7596,
"step": 490
},
{
"epoch": 0.19,
"learning_rate": 0.0007524454477050415,
"loss": 0.6971,
"step": 500
},
{
"epoch": 0.19,
"learning_rate": 0.0007674943566591423,
"loss": 0.712,
"step": 510
},
{
"epoch": 0.2,
"learning_rate": 0.0007825432656132431,
"loss": 0.7116,
"step": 520
},
{
"epoch": 0.2,
"learning_rate": 0.0007975921745673439,
"loss": 0.7192,
"step": 530
},
{
"epoch": 0.2,
"learning_rate": 0.0008126410835214448,
"loss": 0.7081,
"step": 540
},
{
"epoch": 0.21,
"learning_rate": 0.0008276899924755456,
"loss": 0.6847,
"step": 550
},
{
"epoch": 0.21,
"learning_rate": 0.0008427389014296464,
"loss": 0.7171,
"step": 560
},
{
"epoch": 0.21,
"learning_rate": 0.0008577878103837472,
"loss": 0.7274,
"step": 570
},
{
"epoch": 0.22,
"learning_rate": 0.0008728367193378481,
"loss": 0.7037,
"step": 580
},
{
"epoch": 0.22,
"learning_rate": 0.0008878856282919489,
"loss": 0.7008,
"step": 590
},
{
"epoch": 0.23,
"learning_rate": 0.0009029345372460497,
"loss": 0.6321,
"step": 600
},
{
"epoch": 0.23,
"learning_rate": 0.0009179834462001505,
"loss": 0.6766,
"step": 610
},
{
"epoch": 0.23,
"learning_rate": 0.0009330323551542514,
"loss": 0.6484,
"step": 620
},
{
"epoch": 0.24,
"learning_rate": 0.0009480812641083522,
"loss": 0.8377,
"step": 630
},
{
"epoch": 0.24,
"learning_rate": 0.000963130173062453,
"loss": 0.5908,
"step": 640
},
{
"epoch": 0.24,
"learning_rate": 0.0009781790820165537,
"loss": 0.7359,
"step": 650
},
{
"epoch": 0.25,
"learning_rate": 0.0009932279909706547,
"loss": 0.6451,
"step": 660
},
{
"epoch": 0.25,
"learning_rate": 0.0010082768999247556,
"loss": 0.6498,
"step": 670
},
{
"epoch": 0.26,
"learning_rate": 0.0010233258088788563,
"loss": 0.609,
"step": 680
},
{
"epoch": 0.26,
"learning_rate": 0.0010383747178329572,
"loss": 0.5956,
"step": 690
},
{
"epoch": 0.26,
"learning_rate": 0.001053423626787058,
"loss": 0.717,
"step": 700
},
{
"epoch": 0.27,
"learning_rate": 0.001068472535741159,
"loss": 0.6324,
"step": 710
},
{
"epoch": 0.27,
"learning_rate": 0.0010835214446952596,
"loss": 0.5219,
"step": 720
},
{
"epoch": 0.27,
"learning_rate": 0.0010985703536493605,
"loss": 0.6154,
"step": 730
},
{
"epoch": 0.28,
"learning_rate": 0.0011136192626034613,
"loss": 0.6573,
"step": 740
},
{
"epoch": 0.28,
"learning_rate": 0.0011286681715575622,
"loss": 0.6134,
"step": 750
},
{
"epoch": 0.29,
"learning_rate": 0.001143717080511663,
"loss": 0.5372,
"step": 760
},
{
"epoch": 0.29,
"learning_rate": 0.0011587659894657638,
"loss": 0.7781,
"step": 770
},
{
"epoch": 0.29,
"learning_rate": 0.0011738148984198646,
"loss": 0.7011,
"step": 780
},
{
"epoch": 0.3,
"learning_rate": 0.0011888638073739655,
"loss": 0.7259,
"step": 790
},
{
"epoch": 0.3,
"learning_rate": 0.0012039127163280662,
"loss": 0.7233,
"step": 800
},
{
"epoch": 0.3,
"learning_rate": 0.0012189616252821671,
"loss": 0.6953,
"step": 810
},
{
"epoch": 0.31,
"learning_rate": 0.0012340105342362679,
"loss": 0.6927,
"step": 820
},
{
"epoch": 0.31,
"learning_rate": 0.0012490594431903688,
"loss": 0.7095,
"step": 830
},
{
"epoch": 0.32,
"learning_rate": 0.0012641083521444695,
"loss": 0.6834,
"step": 840
},
{
"epoch": 0.32,
"learning_rate": 0.0012791572610985704,
"loss": 0.7397,
"step": 850
},
{
"epoch": 0.32,
"learning_rate": 0.0012942061700526712,
"loss": 0.7163,
"step": 860
},
{
"epoch": 0.33,
"learning_rate": 0.001309255079006772,
"loss": 0.6996,
"step": 870
},
{
"epoch": 0.33,
"learning_rate": 0.001324303987960873,
"loss": 0.695,
"step": 880
},
{
"epoch": 0.33,
"learning_rate": 0.0013393528969149737,
"loss": 0.6984,
"step": 890
},
{
"epoch": 0.34,
"learning_rate": 0.0013544018058690747,
"loss": 0.6935,
"step": 900
},
{
"epoch": 0.34,
"learning_rate": 0.0013694507148231754,
"loss": 0.6973,
"step": 910
},
{
"epoch": 0.35,
"learning_rate": 0.0013844996237772763,
"loss": 0.6817,
"step": 920
},
{
"epoch": 0.35,
"learning_rate": 0.001399548532731377,
"loss": 0.7045,
"step": 930
},
{
"epoch": 0.35,
"learning_rate": 0.001414597441685478,
"loss": 0.694,
"step": 940
},
{
"epoch": 0.36,
"learning_rate": 0.0014296463506395787,
"loss": 0.6823,
"step": 950
},
{
"epoch": 0.36,
"learning_rate": 0.0014446952595936796,
"loss": 0.6979,
"step": 960
},
{
"epoch": 0.37,
"learning_rate": 0.0014597441685477803,
"loss": 0.705,
"step": 970
},
{
"epoch": 0.37,
"learning_rate": 0.0014747930775018813,
"loss": 0.6959,
"step": 980
},
{
"epoch": 0.37,
"learning_rate": 0.001489841986455982,
"loss": 0.6947,
"step": 990
},
{
"epoch": 0.38,
"learning_rate": 0.001504890895410083,
"loss": 0.6952,
"step": 1000
},
{
"epoch": 0.38,
"learning_rate": 0.0015199398043641836,
"loss": 0.6948,
"step": 1010
},
{
"epoch": 0.38,
"learning_rate": 0.0015349887133182846,
"loss": 0.6909,
"step": 1020
},
{
"epoch": 0.39,
"learning_rate": 0.0015500376222723853,
"loss": 0.6985,
"step": 1030
},
{
"epoch": 0.39,
"learning_rate": 0.0015650865312264862,
"loss": 0.7143,
"step": 1040
},
{
"epoch": 0.4,
"learning_rate": 0.001580135440180587,
"loss": 0.697,
"step": 1050
},
{
"epoch": 0.4,
"learning_rate": 0.0015951843491346879,
"loss": 0.6959,
"step": 1060
},
{
"epoch": 0.4,
"learning_rate": 0.0016102332580887886,
"loss": 0.6934,
"step": 1070
},
{
"epoch": 0.41,
"learning_rate": 0.0016252821670428895,
"loss": 0.6992,
"step": 1080
},
{
"epoch": 0.41,
"learning_rate": 0.0016403310759969902,
"loss": 0.691,
"step": 1090
},
{
"epoch": 0.41,
"learning_rate": 0.0016553799849510912,
"loss": 0.6852,
"step": 1100
},
{
"epoch": 0.42,
"learning_rate": 0.001670428893905192,
"loss": 0.7017,
"step": 1110
},
{
"epoch": 0.42,
"learning_rate": 0.0016854778028592928,
"loss": 0.693,
"step": 1120
},
{
"epoch": 0.43,
"learning_rate": 0.0017005267118133935,
"loss": 0.6965,
"step": 1130
},
{
"epoch": 0.43,
"learning_rate": 0.0017155756207674945,
"loss": 0.6937,
"step": 1140
},
{
"epoch": 0.43,
"learning_rate": 0.0017306245297215952,
"loss": 0.7005,
"step": 1150
},
{
"epoch": 0.44,
"learning_rate": 0.0017456734386756961,
"loss": 0.6956,
"step": 1160
},
{
"epoch": 0.44,
"learning_rate": 0.0017607223476297968,
"loss": 0.6928,
"step": 1170
},
{
"epoch": 0.44,
"learning_rate": 0.0017757712565838978,
"loss": 0.6934,
"step": 1180
},
{
"epoch": 0.45,
"learning_rate": 0.0017908201655379985,
"loss": 0.7013,
"step": 1190
},
{
"epoch": 0.45,
"learning_rate": 0.0018058690744920994,
"loss": 0.698,
"step": 1200
},
{
"epoch": 0.46,
"learning_rate": 0.0018209179834462001,
"loss": 0.6863,
"step": 1210
},
{
"epoch": 0.46,
"learning_rate": 0.001835966892400301,
"loss": 0.6931,
"step": 1220
},
{
"epoch": 0.46,
"learning_rate": 0.0018510158013544018,
"loss": 0.6951,
"step": 1230
},
{
"epoch": 0.47,
"learning_rate": 0.0018660647103085027,
"loss": 0.694,
"step": 1240
},
{
"epoch": 0.47,
"learning_rate": 0.0018811136192626034,
"loss": 0.6931,
"step": 1250
},
{
"epoch": 0.47,
"learning_rate": 0.0018961625282167044,
"loss": 0.6925,
"step": 1260
},
{
"epoch": 0.48,
"learning_rate": 0.001911211437170805,
"loss": 0.695,
"step": 1270
},
{
"epoch": 0.48,
"learning_rate": 0.001926260346124906,
"loss": 0.6915,
"step": 1280
},
{
"epoch": 0.49,
"learning_rate": 0.0019413092550790067,
"loss": 0.694,
"step": 1290
},
{
"epoch": 0.49,
"learning_rate": 0.0019563581640331075,
"loss": 0.6902,
"step": 1300
},
{
"epoch": 0.49,
"learning_rate": 0.0019714070729872084,
"loss": 0.6961,
"step": 1310
},
{
"epoch": 0.5,
"learning_rate": 0.0019864559819413093,
"loss": 0.6944,
"step": 1320
},
{
"epoch": 0.5,
"learning_rate": 0.001999999965477842,
"loss": 0.6952,
"step": 1330
},
{
"epoch": 0.5,
"learning_rate": 0.0019999958228217795,
"loss": 0.685,
"step": 1340
},
{
"epoch": 0.51,
"learning_rate": 0.001999984775766913,
"loss": 0.6935,
"step": 1350
},
{
"epoch": 0.51,
"learning_rate": 0.0019999668243895164,
"loss": 0.6931,
"step": 1360
},
{
"epoch": 0.52,
"learning_rate": 0.0019999419688135336,
"loss": 0.6905,
"step": 1370
},
{
"epoch": 0.52,
"learning_rate": 0.001999910209210578,
"loss": 0.6937,
"step": 1380
},
{
"epoch": 0.52,
"learning_rate": 0.001999871545799931,
"loss": 0.6893,
"step": 1390
},
{
"epoch": 0.53,
"learning_rate": 0.0019998259788485424,
"loss": 0.6946,
"step": 1400
},
{
"epoch": 0.53,
"learning_rate": 0.0019997735086710253,
"loss": 0.6913,
"step": 1410
},
{
"epoch": 0.53,
"learning_rate": 0.001999714135629656,
"loss": 0.6934,
"step": 1420
},
{
"epoch": 0.54,
"learning_rate": 0.0019996478601343724,
"loss": 0.6917,
"step": 1430
},
{
"epoch": 0.54,
"learning_rate": 0.0019995746826427675,
"loss": 0.6967,
"step": 1440
},
{
"epoch": 0.55,
"learning_rate": 0.0019994946036600908,
"loss": 0.6893,
"step": 1450
},
{
"epoch": 0.55,
"learning_rate": 0.0019994076237392416,
"loss": 0.694,
"step": 1460
},
{
"epoch": 0.55,
"learning_rate": 0.0019993137434807665,
"loss": 0.6936,
"step": 1470
},
{
"epoch": 0.56,
"learning_rate": 0.0019992129635328553,
"loss": 0.6949,
"step": 1480
},
{
"epoch": 0.56,
"learning_rate": 0.001999105284591335,
"loss": 0.6946,
"step": 1490
},
{
"epoch": 0.56,
"learning_rate": 0.0019989907073996683,
"loss": 0.691,
"step": 1500
},
{
"epoch": 0.57,
"learning_rate": 0.001998869232748944,
"loss": 0.6935,
"step": 1510
},
{
"epoch": 0.57,
"learning_rate": 0.0019987408614778753,
"loss": 0.6934,
"step": 1520
},
{
"epoch": 0.58,
"learning_rate": 0.001998605594472793,
"loss": 0.6913,
"step": 1530
},
{
"epoch": 0.58,
"learning_rate": 0.001998463432667638,
"loss": 0.6945,
"step": 1540
},
{
"epoch": 0.58,
"learning_rate": 0.0019983143770439557,
"loss": 0.6966,
"step": 1550
},
{
"epoch": 0.59,
"learning_rate": 0.00199815842863089,
"loss": 0.6945,
"step": 1560
},
{
"epoch": 0.59,
"learning_rate": 0.0019979955885051763,
"loss": 0.691,
"step": 1570
},
{
"epoch": 0.59,
"learning_rate": 0.0019978258577911315,
"loss": 0.6933,
"step": 1580
},
{
"epoch": 0.6,
"learning_rate": 0.0019976492376606496,
"loss": 0.6921,
"step": 1590
},
{
"epoch": 0.6,
"learning_rate": 0.0019974657293331914,
"loss": 0.6867,
"step": 1600
},
{
"epoch": 0.61,
"learning_rate": 0.0019972753340757763,
"loss": 0.6898,
"step": 1610
},
{
"epoch": 0.61,
"learning_rate": 0.0019970780532029752,
"loss": 0.6981,
"step": 1620
},
{
"epoch": 0.61,
"learning_rate": 0.0019968738880768995,
"loss": 0.6923,
"step": 1630
},
{
"epoch": 0.62,
"learning_rate": 0.0019966628401071928,
"loss": 0.6938,
"step": 1640
},
{
"epoch": 0.62,
"learning_rate": 0.0019964449107510197,
"loss": 0.6926,
"step": 1650
},
{
"epoch": 0.62,
"learning_rate": 0.0019962201015130587,
"loss": 0.6921,
"step": 1660
},
{
"epoch": 0.63,
"learning_rate": 0.0019959884139454883,
"loss": 0.6929,
"step": 1670
},
{
"epoch": 0.63,
"learning_rate": 0.0019957498496479785,
"loss": 0.695,
"step": 1680
},
{
"epoch": 0.64,
"learning_rate": 0.0019955044102676796,
"loss": 0.6921,
"step": 1690
},
{
"epoch": 0.64,
"learning_rate": 0.0019952520974992095,
"loss": 0.6956,
"step": 1700
},
{
"epoch": 0.64,
"learning_rate": 0.001994992913084644,
"loss": 0.693,
"step": 1710
},
{
"epoch": 0.65,
"learning_rate": 0.001994726858813503,
"loss": 0.6938,
"step": 1720
},
{
"epoch": 0.65,
"learning_rate": 0.001994453936522739,
"loss": 0.6935,
"step": 1730
},
{
"epoch": 0.65,
"learning_rate": 0.0019941741480967237,
"loss": 0.6934,
"step": 1740
},
{
"epoch": 0.66,
"learning_rate": 0.001993887495467237,
"loss": 0.6932,
"step": 1750
},
{
"epoch": 0.66,
"learning_rate": 0.0019935939806134504,
"loss": 0.6933,
"step": 1760
},
{
"epoch": 0.67,
"learning_rate": 0.0019932936055619163,
"loss": 0.6933,
"step": 1770
},
{
"epoch": 0.67,
"learning_rate": 0.001992986372386552,
"loss": 0.693,
"step": 1780
},
{
"epoch": 0.67,
"learning_rate": 0.001992672283208628,
"loss": 0.6934,
"step": 1790
},
{
"epoch": 0.68,
"learning_rate": 0.001992351340196749,
"loss": 0.6929,
"step": 1800
},
{
"epoch": 0.68,
"learning_rate": 0.0019920235455668433,
"loss": 0.693,
"step": 1810
},
{
"epoch": 0.68,
"learning_rate": 0.0019916889015821455,
"loss": 0.6924,
"step": 1820
},
{
"epoch": 0.69,
"learning_rate": 0.0019913474105531805,
"loss": 0.6953,
"step": 1830
},
{
"epoch": 0.69,
"learning_rate": 0.0019909990748377482,
"loss": 0.6941,
"step": 1840
},
{
"epoch": 0.7,
"learning_rate": 0.0019906438968409074,
"loss": 0.6936,
"step": 1850
},
{
"epoch": 0.7,
"learning_rate": 0.0019902818790149593,
"loss": 0.6922,
"step": 1860
},
{
"epoch": 0.7,
"learning_rate": 0.001989913023859429,
"loss": 0.6921,
"step": 1870
},
{
"epoch": 0.71,
"learning_rate": 0.0019895373339210514,
"loss": 0.6854,
"step": 1880
},
{
"epoch": 0.71,
"learning_rate": 0.0019891548117937498,
"loss": 0.6922,
"step": 1890
},
{
"epoch": 0.72,
"learning_rate": 0.00198876546011862,
"loss": 0.7152,
"step": 1900
},
{
"epoch": 0.72,
"learning_rate": 0.0019883692815839136,
"loss": 0.6951,
"step": 1910
},
{
"epoch": 0.72,
"learning_rate": 0.0019879662789250158,
"loss": 0.7333,
"step": 1920
},
{
"epoch": 0.73,
"learning_rate": 0.0019875564549244294,
"loss": 0.8859,
"step": 1930
},
{
"epoch": 0.73,
"learning_rate": 0.0019871398124117552,
"loss": 0.7755,
"step": 1940
},
{
"epoch": 0.73,
"learning_rate": 0.0019867163542636704,
"loss": 0.6947,
"step": 1950
},
{
"epoch": 0.74,
"learning_rate": 0.001986286083403912,
"loss": 0.6954,
"step": 1960
},
{
"epoch": 0.74,
"learning_rate": 0.0019858490028032533,
"loss": 0.6845,
"step": 1970
},
{
"epoch": 0.75,
"learning_rate": 0.001985405115479486,
"loss": 0.7309,
"step": 1980
},
{
"epoch": 0.75,
"learning_rate": 0.001984954424497398,
"loss": 0.6955,
"step": 1990
},
{
"epoch": 0.75,
"learning_rate": 0.0019844969329687527,
"loss": 0.6931,
"step": 2000
},
{
"epoch": 0.76,
"learning_rate": 0.0019840326440522665,
"loss": 0.6959,
"step": 2010
},
{
"epoch": 0.76,
"learning_rate": 0.0019835615609535896,
"loss": 0.7009,
"step": 2020
},
{
"epoch": 0.76,
"learning_rate": 0.001983083686925281,
"loss": 0.6944,
"step": 2030
},
{
"epoch": 0.77,
"learning_rate": 0.001982599025266787,
"loss": 0.6921,
"step": 2040
},
{
"epoch": 0.77,
"learning_rate": 0.001982107579324419,
"loss": 0.6901,
"step": 2050
},
{
"epoch": 0.78,
"learning_rate": 0.0019816093524913305,
"loss": 0.6989,
"step": 2060
},
{
"epoch": 0.78,
"learning_rate": 0.001981104348207492,
"loss": 0.6946,
"step": 2070
},
{
"epoch": 0.78,
"learning_rate": 0.001980592569959669,
"loss": 0.6934,
"step": 2080
},
{
"epoch": 0.79,
"learning_rate": 0.0019800740212813977,
"loss": 0.6943,
"step": 2090
},
{
"epoch": 0.79,
"learning_rate": 0.00197954870575296,
"loss": 0.693,
"step": 2100
},
{
"epoch": 0.79,
"learning_rate": 0.0019790166270013585,
"loss": 0.6933,
"step": 2110
},
{
"epoch": 0.8,
"learning_rate": 0.0019784777887002928,
"loss": 0.6935,
"step": 2120
},
{
"epoch": 0.8,
"learning_rate": 0.001977932194570133,
"loss": 0.6927,
"step": 2130
},
{
"epoch": 0.81,
"learning_rate": 0.001977379848377894,
"loss": 0.6936,
"step": 2140
},
{
"epoch": 0.81,
"learning_rate": 0.0019768207539372106,
"loss": 0.6937,
"step": 2150
},
{
"epoch": 0.81,
"learning_rate": 0.001976254915108309,
"loss": 0.6924,
"step": 2160
},
{
"epoch": 0.82,
"learning_rate": 0.001975682335797984,
"loss": 0.694,
"step": 2170
},
{
"epoch": 0.82,
"learning_rate": 0.0019751030199595663,
"loss": 0.6942,
"step": 2180
},
{
"epoch": 0.82,
"learning_rate": 0.0019745169715929016,
"loss": 0.6933,
"step": 2190
},
{
"epoch": 0.83,
"learning_rate": 0.001973924194744317,
"loss": 0.6935,
"step": 2200
},
{
"epoch": 0.83,
"learning_rate": 0.001973324693506599,
"loss": 0.6926,
"step": 2210
},
{
"epoch": 0.84,
"learning_rate": 0.0019727184720189595,
"loss": 0.6913,
"step": 2220
},
{
"epoch": 0.84,
"learning_rate": 0.001972105534467011,
"loss": 0.6933,
"step": 2230
},
{
"epoch": 0.84,
"learning_rate": 0.0019714858850827365,
"loss": 0.683,
"step": 2240
},
{
"epoch": 0.85,
"learning_rate": 0.0019708595281444606,
"loss": 0.6899,
"step": 2250
},
{
"epoch": 0.85,
"learning_rate": 0.001970226467976819,
"loss": 0.6823,
"step": 2260
},
{
"epoch": 0.85,
"learning_rate": 0.0019695867089507303,
"loss": 0.6966,
"step": 2270
},
{
"epoch": 0.86,
"learning_rate": 0.0019689402554833643,
"loss": 0.6996,
"step": 2280
},
{
"epoch": 0.86,
"learning_rate": 0.001968287112038112,
"loss": 0.6908,
"step": 2290
},
{
"epoch": 0.87,
"learning_rate": 0.001967627283124555,
"loss": 0.6949,
"step": 2300
},
{
"epoch": 0.87,
"learning_rate": 0.001966960773298435,
"loss": 0.6946,
"step": 2310
},
{
"epoch": 0.87,
"learning_rate": 0.00196628758716162,
"loss": 0.6923,
"step": 2320
},
{
"epoch": 0.88,
"learning_rate": 0.001965607729362075,
"loss": 0.6897,
"step": 2330
},
{
"epoch": 0.88,
"learning_rate": 0.0019649212045938294,
"loss": 0.6915,
"step": 2340
},
{
"epoch": 0.88,
"learning_rate": 0.001964228017596944,
"loss": 0.6869,
"step": 2350
},
{
"epoch": 0.89,
"learning_rate": 0.0019635281731574775,
"loss": 0.6827,
"step": 2360
},
{
"epoch": 0.89,
"learning_rate": 0.0019628216761074556,
"loss": 0.6839,
"step": 2370
},
{
"epoch": 0.9,
"learning_rate": 0.0019621085313248367,
"loss": 0.6982,
"step": 2380
},
{
"epoch": 0.9,
"learning_rate": 0.0019613887437334764,
"loss": 0.6873,
"step": 2390
},
{
"epoch": 0.9,
"learning_rate": 0.001960662318303096,
"loss": 0.7014,
"step": 2400
},
{
"epoch": 0.91,
"learning_rate": 0.001959929260049248,
"loss": 0.6921,
"step": 2410
},
{
"epoch": 0.91,
"learning_rate": 0.00195918957403328,
"loss": 0.6934,
"step": 2420
},
{
"epoch": 0.91,
"learning_rate": 0.0019584432653623005,
"loss": 0.6919,
"step": 2430
},
{
"epoch": 0.92,
"learning_rate": 0.001957690339189143,
"loss": 0.7,
"step": 2440
},
{
"epoch": 0.92,
"learning_rate": 0.0019569308007123328,
"loss": 0.6966,
"step": 2450
},
{
"epoch": 0.93,
"learning_rate": 0.0019561646551760478,
"loss": 0.694,
"step": 2460
},
{
"epoch": 0.93,
"learning_rate": 0.0019553919078700846,
"loss": 0.691,
"step": 2470
},
{
"epoch": 0.93,
"learning_rate": 0.0019546125641298207,
"loss": 0.7014,
"step": 2480
},
{
"epoch": 0.94,
"learning_rate": 0.001953826629336179,
"loss": 0.6845,
"step": 2490
},
{
"epoch": 0.94,
"learning_rate": 0.0019530341089155892,
"loss": 0.6948,
"step": 2500
},
{
"epoch": 0.94,
"learning_rate": 0.0019522350083399516,
"loss": 0.6923,
"step": 2510
},
{
"epoch": 0.95,
"learning_rate": 0.0019514293331265977,
"loss": 0.6866,
"step": 2520
},
{
"epoch": 0.95,
"learning_rate": 0.0019506170888382544,
"loss": 0.69,
"step": 2530
},
{
"epoch": 0.96,
"learning_rate": 0.0019497982810830028,
"loss": 0.6961,
"step": 2540
},
{
"epoch": 0.96,
"learning_rate": 0.0019489729155142423,
"loss": 0.6899,
"step": 2550
},
{
"epoch": 0.96,
"learning_rate": 0.0019481409978306496,
"loss": 0.6959,
"step": 2560
},
{
"epoch": 0.97,
"learning_rate": 0.0019473025337761408,
"loss": 0.6955,
"step": 2570
},
{
"epoch": 0.97,
"learning_rate": 0.001946457529139829,
"loss": 0.6928,
"step": 2580
},
{
"epoch": 0.97,
"learning_rate": 0.0019456059897559887,
"loss": 0.694,
"step": 2590
},
{
"epoch": 0.98,
"learning_rate": 0.0019447479215040113,
"loss": 0.6924,
"step": 2600
},
{
"epoch": 0.98,
"learning_rate": 0.0019438833303083677,
"loss": 0.6933,
"step": 2610
},
{
"epoch": 0.99,
"learning_rate": 0.0019430122221385645,
"loss": 0.6842,
"step": 2620
},
{
"epoch": 0.99,
"learning_rate": 0.0019421346030091052,
"loss": 0.6916,
"step": 2630
},
{
"epoch": 0.99,
"learning_rate": 0.0019412504789794477,
"loss": 0.6916,
"step": 2640
},
{
"epoch": 1.0,
"learning_rate": 0.0019403598561539624,
"loss": 0.6924,
"step": 2650
},
{
"epoch": 1.0,
"eval_accuracy": 0.504,
"eval_loss": 0.6922000050544739,
"eval_runtime": 38.3652,
"eval_samples_per_second": 97.745,
"eval_steps_per_second": 12.225,
"step": 2657
},
{
"epoch": 1.0,
"learning_rate": 0.0019394627406818903,
"loss": 0.6941,
"step": 2660
},
{
"epoch": 1.0,
"learning_rate": 0.0019385591387572998,
"loss": 0.6897,
"step": 2670
},
{
"epoch": 1.01,
"learning_rate": 0.0019376490566190454,
"loss": 0.6907,
"step": 2680
},
{
"epoch": 1.01,
"learning_rate": 0.0019367325005507233,
"loss": 0.6996,
"step": 2690
},
{
"epoch": 1.02,
"learning_rate": 0.0019358094768806287,
"loss": 0.6957,
"step": 2700
},
{
"epoch": 1.02,
"learning_rate": 0.0019348799919817114,
"loss": 0.6968,
"step": 2710
},
{
"epoch": 1.02,
"learning_rate": 0.0019339440522715327,
"loss": 0.6899,
"step": 2720
},
{
"epoch": 1.03,
"learning_rate": 0.0019330016642122212,
"loss": 0.6922,
"step": 2730
},
{
"epoch": 1.03,
"learning_rate": 0.0019320528343104262,
"loss": 0.692,
"step": 2740
},
{
"epoch": 1.04,
"learning_rate": 0.0019310975691172753,
"loss": 0.6929,
"step": 2750
},
{
"epoch": 1.04,
"learning_rate": 0.0019301358752283285,
"loss": 0.6943,
"step": 2760
},
{
"epoch": 1.04,
"learning_rate": 0.0019291677592835313,
"loss": 0.6964,
"step": 2770
},
{
"epoch": 1.05,
"learning_rate": 0.0019281932279671698,
"loss": 0.6955,
"step": 2780
},
{
"epoch": 1.05,
"learning_rate": 0.0019272122880078255,
"loss": 0.6942,
"step": 2790
},
{
"epoch": 1.05,
"learning_rate": 0.0019262249461783273,
"loss": 0.6941,
"step": 2800
},
{
"epoch": 1.06,
"learning_rate": 0.0019252312092957055,
"loss": 0.6931,
"step": 2810
},
{
"epoch": 1.06,
"learning_rate": 0.0019242310842211442,
"loss": 0.6927,
"step": 2820
},
{
"epoch": 1.07,
"learning_rate": 0.0019232245778599345,
"loss": 0.694,
"step": 2830
},
{
"epoch": 1.07,
"learning_rate": 0.001922211697161427,
"loss": 0.6935,
"step": 2840
},
{
"epoch": 1.07,
"learning_rate": 0.0019211924491189835,
"loss": 0.6931,
"step": 2850
},
{
"epoch": 1.08,
"learning_rate": 0.0019201668407699275,
"loss": 0.6933,
"step": 2860
},
{
"epoch": 1.08,
"learning_rate": 0.0019191348791954988,
"loss": 0.6932,
"step": 2870
},
{
"epoch": 1.08,
"learning_rate": 0.0019180965715208003,
"loss": 0.693,
"step": 2880
},
{
"epoch": 1.09,
"learning_rate": 0.0019170519249147532,
"loss": 0.6929,
"step": 2890
},
{
"epoch": 1.09,
"learning_rate": 0.0019160009465900437,
"loss": 0.6921,
"step": 2900
},
{
"epoch": 1.1,
"learning_rate": 0.0019149436438030756,
"loss": 0.6941,
"step": 2910
},
{
"epoch": 1.1,
"learning_rate": 0.00191388002385392,
"loss": 0.6937,
"step": 2920
},
{
"epoch": 1.1,
"learning_rate": 0.0019128100940862636,
"loss": 0.6909,
"step": 2930
},
{
"epoch": 1.11,
"learning_rate": 0.0019117338618873587,
"loss": 0.6943,
"step": 2940
},
{
"epoch": 1.11,
"learning_rate": 0.0019106513346879735,
"loss": 0.695,
"step": 2950
},
{
"epoch": 1.11,
"learning_rate": 0.0019095625199623378,
"loss": 0.6931,
"step": 2960
},
{
"epoch": 1.12,
"learning_rate": 0.001908467425228095,
"loss": 0.6915,
"step": 2970
},
{
"epoch": 1.12,
"learning_rate": 0.0019073660580462466,
"loss": 0.6907,
"step": 2980
},
{
"epoch": 1.13,
"learning_rate": 0.001906258426021103,
"loss": 0.6924,
"step": 2990
},
{
"epoch": 1.13,
"learning_rate": 0.0019051445368002296,
"loss": 0.6877,
"step": 3000
},
{
"epoch": 1.13,
"learning_rate": 0.0019040243980743936,
"loss": 0.7014,
"step": 3010
},
{
"epoch": 1.14,
"learning_rate": 0.0019028980175775118,
"loss": 0.6954,
"step": 3020
},
{
"epoch": 1.14,
"learning_rate": 0.001901765403086597,
"loss": 0.6839,
"step": 3030
},
{
"epoch": 1.14,
"learning_rate": 0.001900626562421704,
"loss": 0.6973,
"step": 3040
},
{
"epoch": 1.15,
"learning_rate": 0.0018994815034458754,
"loss": 0.6955,
"step": 3050
},
{
"epoch": 1.15,
"learning_rate": 0.0018983302340650889,
"loss": 0.6927,
"step": 3060
},
{
"epoch": 1.16,
"learning_rate": 0.0018971727622281995,
"loss": 0.6924,
"step": 3070
},
{
"epoch": 1.16,
"learning_rate": 0.0018960090959268885,
"loss": 0.693,
"step": 3080
},
{
"epoch": 1.16,
"learning_rate": 0.0018948392431956057,
"loss": 0.6958,
"step": 3090
},
{
"epoch": 1.17,
"learning_rate": 0.0018936632121115142,
"loss": 0.6932,
"step": 3100
},
{
"epoch": 1.17,
"learning_rate": 0.0018924810107944362,
"loss": 0.693,
"step": 3110
},
{
"epoch": 1.17,
"learning_rate": 0.0018912926474067943,
"loss": 0.6938,
"step": 3120
},
{
"epoch": 1.18,
"learning_rate": 0.001890098130153558,
"loss": 0.6848,
"step": 3130
},
{
"epoch": 1.18,
"learning_rate": 0.0018888974672821854,
"loss": 0.6917,
"step": 3140
},
{
"epoch": 1.19,
"learning_rate": 0.0018876906670825663,
"loss": 0.6941,
"step": 3150
},
{
"epoch": 1.19,
"learning_rate": 0.0018864777378869652,
"loss": 0.6902,
"step": 3160
},
{
"epoch": 1.19,
"learning_rate": 0.0018852586880699638,
"loss": 0.6952,
"step": 3170
},
{
"epoch": 1.2,
"learning_rate": 0.0018840335260484039,
"loss": 0.6947,
"step": 3180
},
{
"epoch": 1.2,
"learning_rate": 0.0018828022602813276,
"loss": 0.6905,
"step": 3190
},
{
"epoch": 1.2,
"learning_rate": 0.0018815648992699203,
"loss": 0.6915,
"step": 3200
},
{
"epoch": 1.21,
"learning_rate": 0.0018803214515574521,
"loss": 0.6943,
"step": 3210
},
{
"epoch": 1.21,
"learning_rate": 0.0018790719257292172,
"loss": 0.6906,
"step": 3220
},
{
"epoch": 1.22,
"learning_rate": 0.0018778163304124765,
"loss": 0.6973,
"step": 3230
},
{
"epoch": 1.22,
"learning_rate": 0.001876554674276397,
"loss": 0.6928,
"step": 3240
},
{
"epoch": 1.22,
"learning_rate": 0.001875286966031992,
"loss": 0.695,
"step": 3250
},
{
"epoch": 1.23,
"learning_rate": 0.0018740132144320616,
"loss": 0.6846,
"step": 3260
},
{
"epoch": 1.23,
"learning_rate": 0.0018727334282711318,
"loss": 0.6941,
"step": 3270
},
{
"epoch": 1.23,
"learning_rate": 0.0018714476163853929,
"loss": 0.6929,
"step": 3280
},
{
"epoch": 1.24,
"learning_rate": 0.0018701557876526402,
"loss": 0.6941,
"step": 3290
},
{
"epoch": 1.24,
"learning_rate": 0.001868857950992212,
"loss": 0.6923,
"step": 3300
},
{
"epoch": 1.25,
"learning_rate": 0.0018675541153649277,
"loss": 0.6941,
"step": 3310
},
{
"epoch": 1.25,
"learning_rate": 0.0018662442897730256,
"loss": 0.6907,
"step": 3320
},
{
"epoch": 1.25,
"learning_rate": 0.0018649284832601021,
"loss": 0.6945,
"step": 3330
},
{
"epoch": 1.26,
"learning_rate": 0.001863606704911048,
"loss": 0.6894,
"step": 3340
},
{
"epoch": 1.26,
"learning_rate": 0.001862278963851986,
"loss": 0.6922,
"step": 3350
},
{
"epoch": 1.26,
"learning_rate": 0.0018609452692502084,
"loss": 0.6876,
"step": 3360
},
{
"epoch": 1.27,
"learning_rate": 0.0018596056303141133,
"loss": 0.693,
"step": 3370
},
{
"epoch": 1.27,
"learning_rate": 0.0018582600562931405,
"loss": 0.692,
"step": 3380
},
{
"epoch": 1.28,
"learning_rate": 0.001856908556477708,
"loss": 0.6969,
"step": 3390
},
{
"epoch": 1.28,
"learning_rate": 0.0018555511401991498,
"loss": 0.6972,
"step": 3400
},
{
"epoch": 1.28,
"learning_rate": 0.001854187816829647,
"loss": 0.6985,
"step": 3410
},
{
"epoch": 1.29,
"learning_rate": 0.0018528185957821682,
"loss": 0.6846,
"step": 3420
},
{
"epoch": 1.29,
"learning_rate": 0.0018514434865104007,
"loss": 0.6961,
"step": 3430
},
{
"epoch": 1.29,
"learning_rate": 0.0018500624985086863,
"loss": 0.6936,
"step": 3440
},
{
"epoch": 1.3,
"learning_rate": 0.0018486756413119577,
"loss": 0.6932,
"step": 3450
},
{
"epoch": 1.3,
"learning_rate": 0.0018472829244956695,
"loss": 0.693,
"step": 3460
},
{
"epoch": 1.31,
"learning_rate": 0.0018458843576757344,
"loss": 0.693,
"step": 3470
},
{
"epoch": 1.31,
"learning_rate": 0.001844479950508456,
"loss": 0.6942,
"step": 3480
},
{
"epoch": 1.31,
"learning_rate": 0.0018430697126904617,
"loss": 0.6936,
"step": 3490
},
{
"epoch": 1.32,
"learning_rate": 0.0018416536539586363,
"loss": 0.6939,
"step": 3500
},
{
"epoch": 1.32,
"learning_rate": 0.0018402317840900554,
"loss": 0.6933,
"step": 3510
},
{
"epoch": 1.32,
"learning_rate": 0.001838804112901916,
"loss": 0.6933,
"step": 3520
},
{
"epoch": 1.33,
"learning_rate": 0.0018373706502514708,
"loss": 0.693,
"step": 3530
},
{
"epoch": 1.33,
"learning_rate": 0.0018359314060359595,
"loss": 0.6936,
"step": 3540
},
{
"epoch": 1.34,
"learning_rate": 0.0018344863901925388,
"loss": 0.6929,
"step": 3550
},
{
"epoch": 1.34,
"learning_rate": 0.0018330356126982164,
"loss": 0.6918,
"step": 3560
},
{
"epoch": 1.34,
"learning_rate": 0.0018315790835697805,
"loss": 0.8642,
"step": 3570
},
{
"epoch": 1.35,
"learning_rate": 0.001830116812863731,
"loss": 0.695,
"step": 3580
},
{
"epoch": 1.35,
"learning_rate": 0.0018286488106762104,
"loss": 0.6891,
"step": 3590
},
{
"epoch": 1.35,
"learning_rate": 0.0018271750871429336,
"loss": 0.6978,
"step": 3600
},
{
"epoch": 1.36,
"learning_rate": 0.0018256956524391177,
"loss": 0.6919,
"step": 3610
},
{
"epoch": 1.36,
"learning_rate": 0.001824210516779413,
"loss": 0.6963,
"step": 3620
},
{
"epoch": 1.37,
"learning_rate": 0.001822719690417831,
"loss": 0.6924,
"step": 3630
},
{
"epoch": 1.37,
"learning_rate": 0.0018212231836476744,
"loss": 0.6923,
"step": 3640
},
{
"epoch": 1.37,
"learning_rate": 0.001819721006801466,
"loss": 0.6926,
"step": 3650
},
{
"epoch": 1.38,
"learning_rate": 0.0018182131702508776,
"loss": 0.693,
"step": 3660
},
{
"epoch": 1.38,
"learning_rate": 0.0018166996844066565,
"loss": 0.6935,
"step": 3670
},
{
"epoch": 1.39,
"learning_rate": 0.001815180559718557,
"loss": 0.6959,
"step": 3680
},
{
"epoch": 1.39,
"learning_rate": 0.0018136558066752656,
"loss": 0.6903,
"step": 3690
},
{
"epoch": 1.39,
"learning_rate": 0.0018121254358043292,
"loss": 0.6911,
"step": 3700
},
{
"epoch": 1.4,
"learning_rate": 0.0018105894576720823,
"loss": 0.6977,
"step": 3710
},
{
"epoch": 1.4,
"learning_rate": 0.0018090478828835758,
"loss": 0.6919,
"step": 3720
},
{
"epoch": 1.4,
"learning_rate": 0.0018075007220825002,
"loss": 0.696,
"step": 3730
},
{
"epoch": 1.41,
"learning_rate": 0.001805947985951116,
"loss": 0.692,
"step": 3740
},
{
"epoch": 1.41,
"learning_rate": 0.001804389685210177,
"loss": 0.6922,
"step": 3750
},
{
"epoch": 1.42,
"learning_rate": 0.0018028258306188583,
"loss": 0.6903,
"step": 3760
},
{
"epoch": 1.42,
"learning_rate": 0.0018012564329746808,
"loss": 0.6894,
"step": 3770
},
{
"epoch": 1.42,
"learning_rate": 0.0017996815031134368,
"loss": 0.6913,
"step": 3780
},
{
"epoch": 1.43,
"learning_rate": 0.0017981010519091155,
"loss": 0.6868,
"step": 3790
},
{
"epoch": 1.43,
"learning_rate": 0.001796515090273828,
"loss": 0.6929,
"step": 3800
},
{
"epoch": 1.43,
"learning_rate": 0.0017949236291577315,
"loss": 0.6899,
"step": 3810
},
{
"epoch": 1.44,
"learning_rate": 0.0017933266795489545,
"loss": 0.7009,
"step": 3820
},
{
"epoch": 1.44,
"learning_rate": 0.0017917242524735197,
"loss": 0.7007,
"step": 3830
},
{
"epoch": 1.45,
"learning_rate": 0.001790116358995269,
"loss": 0.6912,
"step": 3840
},
{
"epoch": 1.45,
"learning_rate": 0.0017885030102157869,
"loss": 0.6891,
"step": 3850
},
{
"epoch": 1.45,
"learning_rate": 0.0017868842172743228,
"loss": 0.6999,
"step": 3860
},
{
"epoch": 1.46,
"learning_rate": 0.0017852599913477155,
"loss": 0.6949,
"step": 3870
},
{
"epoch": 1.46,
"learning_rate": 0.0017836303436503155,
"loss": 0.6984,
"step": 3880
},
{
"epoch": 1.46,
"learning_rate": 0.0017819952854339074,
"loss": 0.6908,
"step": 3890
},
{
"epoch": 1.47,
"learning_rate": 0.0017803548279876325,
"loss": 0.7009,
"step": 3900
},
{
"epoch": 1.47,
"learning_rate": 0.0017787089826379104,
"loss": 0.7282,
"step": 3910
},
{
"epoch": 1.48,
"learning_rate": 0.0017770577607483613,
"loss": 0.7153,
"step": 3920
},
{
"epoch": 1.48,
"learning_rate": 0.001775401173719727,
"loss": 0.7886,
"step": 3930
},
{
"epoch": 1.48,
"learning_rate": 0.0017737392329897931,
"loss": 0.707,
"step": 3940
},
{
"epoch": 1.49,
"learning_rate": 0.001772071950033309,
"loss": 0.7407,
"step": 3950
},
{
"epoch": 1.49,
"learning_rate": 0.0017703993363619092,
"loss": 0.7577,
"step": 3960
},
{
"epoch": 1.49,
"learning_rate": 0.0017687214035240343,
"loss": 0.7342,
"step": 3970
},
{
"epoch": 1.5,
"learning_rate": 0.0017670381631048491,
"loss": 0.6956,
"step": 3980
},
{
"epoch": 1.5,
"learning_rate": 0.0017653496267261662,
"loss": 0.7053,
"step": 3990
},
{
"epoch": 1.51,
"learning_rate": 0.0017636558060463625,
"loss": 0.6903,
"step": 4000
},
{
"epoch": 1.51,
"learning_rate": 0.0017619567127603003,
"loss": 0.6924,
"step": 4010
},
{
"epoch": 1.51,
"learning_rate": 0.0017602523585992463,
"loss": 0.6992,
"step": 4020
},
{
"epoch": 1.52,
"learning_rate": 0.0017585427553307904,
"loss": 0.6895,
"step": 4030
},
{
"epoch": 1.52,
"learning_rate": 0.001756827914758765,
"loss": 0.7226,
"step": 4040
},
{
"epoch": 1.52,
"learning_rate": 0.0017551078487231622,
"loss": 0.7086,
"step": 4050
},
{
"epoch": 1.53,
"learning_rate": 0.001753382569100054,
"loss": 0.7017,
"step": 4060
},
{
"epoch": 1.53,
"learning_rate": 0.0017516520878015085,
"loss": 0.6944,
"step": 4070
},
{
"epoch": 1.54,
"learning_rate": 0.0017499164167755087,
"loss": 0.6866,
"step": 4080
},
{
"epoch": 1.54,
"learning_rate": 0.0017481755680058698,
"loss": 0.6916,
"step": 4090
},
{
"epoch": 1.54,
"learning_rate": 0.0017464295535121555,
"loss": 0.6988,
"step": 4100
},
{
"epoch": 1.55,
"learning_rate": 0.001744678385349597,
"loss": 0.7135,
"step": 4110
},
{
"epoch": 1.55,
"learning_rate": 0.0017429220756090085,
"loss": 0.7316,
"step": 4120
},
{
"epoch": 1.55,
"learning_rate": 0.0017411606364167028,
"loss": 0.7037,
"step": 4130
},
{
"epoch": 1.56,
"learning_rate": 0.0017393940799344098,
"loss": 0.7085,
"step": 4140
},
{
"epoch": 1.56,
"learning_rate": 0.001737622418359191,
"loss": 0.706,
"step": 4150
},
{
"epoch": 1.57,
"learning_rate": 0.0017358456639233553,
"loss": 0.6924,
"step": 4160
},
{
"epoch": 1.57,
"learning_rate": 0.0017340638288943753,
"loss": 0.7171,
"step": 4170
},
{
"epoch": 1.57,
"learning_rate": 0.001732276925574802,
"loss": 0.71,
"step": 4180
},
{
"epoch": 1.58,
"learning_rate": 0.0017304849663021798,
"loss": 0.6981,
"step": 4190
},
{
"epoch": 1.58,
"learning_rate": 0.0017286879634489623,
"loss": 0.6916,
"step": 4200
},
{
"epoch": 1.58,
"learning_rate": 0.0017268859294224256,
"loss": 0.6921,
"step": 4210
},
{
"epoch": 1.59,
"learning_rate": 0.0017250788766645828,
"loss": 0.692,
"step": 4220
},
{
"epoch": 1.59,
"learning_rate": 0.0017232668176520997,
"loss": 0.6963,
"step": 4230
},
{
"epoch": 1.6,
"learning_rate": 0.0017214497648962057,
"loss": 0.6856,
"step": 4240
},
{
"epoch": 1.6,
"learning_rate": 0.0017196277309426105,
"loss": 0.6941,
"step": 4250
},
{
"epoch": 1.6,
"learning_rate": 0.0017178007283714161,
"loss": 0.6942,
"step": 4260
},
{
"epoch": 1.61,
"learning_rate": 0.001715968769797029,
"loss": 0.6926,
"step": 4270
},
{
"epoch": 1.61,
"learning_rate": 0.0017141318678680747,
"loss": 0.6929,
"step": 4280
},
{
"epoch": 1.61,
"learning_rate": 0.0017122900352673103,
"loss": 0.694,
"step": 4290
},
{
"epoch": 1.62,
"learning_rate": 0.001710443284711535,
"loss": 0.6938,
"step": 4300
},
{
"epoch": 1.62,
"learning_rate": 0.0017085916289515043,
"loss": 0.6934,
"step": 4310
},
{
"epoch": 1.63,
"learning_rate": 0.0017067350807718421,
"loss": 0.6932,
"step": 4320
},
{
"epoch": 1.63,
"learning_rate": 0.0017048736529909506,
"loss": 0.6845,
"step": 4330
},
{
"epoch": 1.63,
"learning_rate": 0.0017030073584609231,
"loss": 0.6933,
"step": 4340
},
{
"epoch": 1.64,
"learning_rate": 0.0017011362100674554,
"loss": 0.6932,
"step": 4350
},
{
"epoch": 1.64,
"learning_rate": 0.001699260220729756,
"loss": 0.6928,
"step": 4360
},
{
"epoch": 1.64,
"learning_rate": 0.0016973794034004577,
"loss": 0.6925,
"step": 4370
},
{
"epoch": 1.65,
"learning_rate": 0.0016954937710655274,
"loss": 0.6947,
"step": 4380
},
{
"epoch": 1.65,
"learning_rate": 0.0016936033367441774,
"loss": 0.6939,
"step": 4390
},
{
"epoch": 1.66,
"learning_rate": 0.0016917081134887747,
"loss": 0.6937,
"step": 4400
},
{
"epoch": 1.66,
"learning_rate": 0.0016898081143847507,
"loss": 0.6927,
"step": 4410
},
{
"epoch": 1.66,
"learning_rate": 0.0016879033525505126,
"loss": 0.692,
"step": 4420
},
{
"epoch": 1.67,
"learning_rate": 0.0016859938411373499,
"loss": 0.6924,
"step": 4430
},
{
"epoch": 1.67,
"learning_rate": 0.0016840795933293462,
"loss": 0.6933,
"step": 4440
},
{
"epoch": 1.67,
"learning_rate": 0.0016821606223432867,
"loss": 0.6923,
"step": 4450
},
{
"epoch": 1.68,
"learning_rate": 0.0016802369414285687,
"loss": 0.695,
"step": 4460
},
{
"epoch": 1.68,
"learning_rate": 0.0016783085638671068,
"loss": 0.6929,
"step": 4470
},
{
"epoch": 1.69,
"learning_rate": 0.001676375502973245,
"loss": 0.6954,
"step": 4480
},
{
"epoch": 1.69,
"learning_rate": 0.001674437772093662,
"loss": 0.6939,
"step": 4490
},
{
"epoch": 1.69,
"learning_rate": 0.0016724953846072807,
"loss": 0.6935,
"step": 4500
},
{
"epoch": 1.7,
"learning_rate": 0.0016705483539251748,
"loss": 0.6933,
"step": 4510
},
{
"epoch": 1.7,
"learning_rate": 0.0016685966934904767,
"loss": 0.6929,
"step": 4520
},
{
"epoch": 1.7,
"learning_rate": 0.001666640416778285,
"loss": 0.693,
"step": 4530
},
{
"epoch": 1.71,
"learning_rate": 0.0016646795372955703,
"loss": 0.6843,
"step": 4540
},
{
"epoch": 1.71,
"learning_rate": 0.0016627140685810833,
"loss": 0.6918,
"step": 4550
},
{
"epoch": 1.72,
"learning_rate": 0.0016607440242052606,
"loss": 0.6888,
"step": 4560
},
{
"epoch": 1.72,
"learning_rate": 0.001658769417770131,
"loss": 0.6933,
"step": 4570
},
{
"epoch": 1.72,
"learning_rate": 0.0016567902629092217,
"loss": 0.6934,
"step": 4580
},
{
"epoch": 1.73,
"learning_rate": 0.0016548065732874644,
"loss": 0.6847,
"step": 4590
},
{
"epoch": 1.73,
"learning_rate": 0.0016528183626011003,
"loss": 0.6931,
"step": 4600
},
{
"epoch": 1.74,
"learning_rate": 0.0016508256445775867,
"loss": 0.6937,
"step": 4610
},
{
"epoch": 1.74,
"learning_rate": 0.0016488284329755003,
"loss": 0.6928,
"step": 4620
},
{
"epoch": 1.74,
"learning_rate": 0.001646826741584445,
"loss": 0.6928,
"step": 4630
},
{
"epoch": 1.75,
"learning_rate": 0.001644820584224953,
"loss": 0.6938,
"step": 4640
},
{
"epoch": 1.75,
"learning_rate": 0.0016428099747483936,
"loss": 0.6921,
"step": 4650
},
{
"epoch": 1.75,
"learning_rate": 0.001640794927036874,
"loss": 0.6913,
"step": 4660
},
{
"epoch": 1.76,
"learning_rate": 0.0016387754550031457,
"loss": 0.6922,
"step": 4670
},
{
"epoch": 1.76,
"learning_rate": 0.0016367515725905066,
"loss": 0.6955,
"step": 4680
},
{
"epoch": 1.77,
"learning_rate": 0.0016347232937727072,
"loss": 0.6869,
"step": 4690
},
{
"epoch": 1.77,
"learning_rate": 0.0016326906325538513,
"loss": 0.6944,
"step": 4700
},
{
"epoch": 1.77,
"learning_rate": 0.0016306536029683013,
"loss": 0.6939,
"step": 4710
},
{
"epoch": 1.78,
"learning_rate": 0.0016286122190805805,
"loss": 0.6928,
"step": 4720
},
{
"epoch": 1.78,
"learning_rate": 0.0016265664949852766,
"loss": 0.6845,
"step": 4730
},
{
"epoch": 1.78,
"learning_rate": 0.0016245164448069431,
"loss": 0.6936,
"step": 4740
},
{
"epoch": 1.79,
"learning_rate": 0.0016224620827000036,
"loss": 0.6932,
"step": 4750
},
{
"epoch": 1.79,
"learning_rate": 0.0016204034228486524,
"loss": 0.6925,
"step": 4760
},
{
"epoch": 1.8,
"learning_rate": 0.0016183404794667573,
"loss": 0.6948,
"step": 4770
},
{
"epoch": 1.8,
"learning_rate": 0.0016162732667977622,
"loss": 0.6934,
"step": 4780
},
{
"epoch": 1.8,
"learning_rate": 0.0016142017991145866,
"loss": 0.6931,
"step": 4790
},
{
"epoch": 1.81,
"learning_rate": 0.0016121260907195302,
"loss": 0.6934,
"step": 4800
},
{
"epoch": 1.81,
"learning_rate": 0.00161004615594417,
"loss": 0.6933,
"step": 4810
},
{
"epoch": 1.81,
"learning_rate": 0.0016079620091492667,
"loss": 0.6928,
"step": 4820
},
{
"epoch": 1.82,
"learning_rate": 0.0016058736647246604,
"loss": 0.6926,
"step": 4830
},
{
"epoch": 1.82,
"learning_rate": 0.0016037811370891739,
"loss": 0.6848,
"step": 4840
},
{
"epoch": 1.83,
"learning_rate": 0.001601684440690513,
"loss": 0.6931,
"step": 4850
},
{
"epoch": 1.83,
"learning_rate": 0.0015995835900051666,
"loss": 0.694,
"step": 4860
},
{
"epoch": 1.83,
"learning_rate": 0.0015974785995383053,
"loss": 0.6942,
"step": 4870
},
{
"epoch": 1.84,
"learning_rate": 0.0015953694838236846,
"loss": 0.6925,
"step": 4880
},
{
"epoch": 1.84,
"learning_rate": 0.001593256257423541,
"loss": 0.6927,
"step": 4890
},
{
"epoch": 1.84,
"learning_rate": 0.0015911389349284929,
"loss": 0.6927,
"step": 4900
},
{
"epoch": 1.85,
"learning_rate": 0.0015890175309574404,
"loss": 0.6945,
"step": 4910
},
{
"epoch": 1.85,
"learning_rate": 0.0015868920601574642,
"loss": 0.6932,
"step": 4920
},
{
"epoch": 1.86,
"learning_rate": 0.0015847625372037229,
"loss": 0.6941,
"step": 4930
},
{
"epoch": 1.86,
"learning_rate": 0.001582628976799354,
"loss": 0.685,
"step": 4940
},
{
"epoch": 1.86,
"learning_rate": 0.0015804913936753712,
"loss": 0.6841,
"step": 4950
},
{
"epoch": 1.87,
"learning_rate": 0.0015783498025905623,
"loss": 0.6923,
"step": 4960
},
{
"epoch": 1.87,
"learning_rate": 0.001576204218331388,
"loss": 0.6933,
"step": 4970
},
{
"epoch": 1.87,
"learning_rate": 0.0015740546557118796,
"loss": 0.6929,
"step": 4980
},
{
"epoch": 1.88,
"learning_rate": 0.0015719011295735365,
"loss": 0.6938,
"step": 4990
},
{
"epoch": 1.88,
"learning_rate": 0.0015697436547852243,
"loss": 0.6919,
"step": 5000
},
{
"epoch": 1.89,
"learning_rate": 0.0015675822462430714,
"loss": 0.6864,
"step": 5010
},
{
"epoch": 1.89,
"learning_rate": 0.0015654169188703668,
"loss": 0.692,
"step": 5020
},
{
"epoch": 1.89,
"learning_rate": 0.001563247687617457,
"loss": 0.6938,
"step": 5030
},
{
"epoch": 1.9,
"learning_rate": 0.0015610745674616412,
"loss": 0.693,
"step": 5040
},
{
"epoch": 1.9,
"learning_rate": 0.0015588975734070717,
"loss": 0.6924,
"step": 5050
},
{
"epoch": 1.9,
"learning_rate": 0.0015567167204846453,
"loss": 0.6892,
"step": 5060
},
{
"epoch": 1.91,
"learning_rate": 0.0015545320237519036,
"loss": 0.6896,
"step": 5070
},
{
"epoch": 1.91,
"learning_rate": 0.0015523434982929274,
"loss": 0.7004,
"step": 5080
},
{
"epoch": 1.92,
"learning_rate": 0.001550151159218232,
"loss": 0.694,
"step": 5090
},
{
"epoch": 1.92,
"learning_rate": 0.001547955021664664,
"loss": 0.6921,
"step": 5100
},
{
"epoch": 1.92,
"learning_rate": 0.0015457551007952962,
"loss": 0.6929,
"step": 5110
},
{
"epoch": 1.93,
"learning_rate": 0.0015435514117993232,
"loss": 0.6835,
"step": 5120
},
{
"epoch": 1.93,
"learning_rate": 0.001541343969891956,
"loss": 0.6959,
"step": 5130
},
{
"epoch": 1.93,
"learning_rate": 0.001539132790314318,
"loss": 0.6902,
"step": 5140
},
{
"epoch": 1.94,
"learning_rate": 0.0015369178883333382,
"loss": 0.694,
"step": 5150
},
{
"epoch": 1.94,
"learning_rate": 0.001534699279241647,
"loss": 0.6969,
"step": 5160
},
{
"epoch": 1.95,
"learning_rate": 0.0015324769783574706,
"loss": 0.692,
"step": 5170
},
{
"epoch": 1.95,
"learning_rate": 0.0015302510010245243,
"loss": 0.6908,
"step": 5180
},
{
"epoch": 1.95,
"learning_rate": 0.0015280213626119082,
"loss": 0.6924,
"step": 5190
},
{
"epoch": 1.96,
"learning_rate": 0.0015257880785139988,
"loss": 0.6812,
"step": 5200
},
{
"epoch": 1.96,
"learning_rate": 0.0015235511641503447,
"loss": 0.6953,
"step": 5210
},
{
"epoch": 1.96,
"learning_rate": 0.0015213106349655598,
"loss": 0.6959,
"step": 5220
},
{
"epoch": 1.97,
"learning_rate": 0.0015190665064292149,
"loss": 0.6918,
"step": 5230
},
{
"epoch": 1.97,
"learning_rate": 0.0015168187940357335,
"loss": 0.685,
"step": 5240
},
{
"epoch": 1.98,
"learning_rate": 0.001514567513304283,
"loss": 0.6921,
"step": 5250
},
{
"epoch": 1.98,
"learning_rate": 0.0015123126797786685,
"loss": 0.6953,
"step": 5260
},
{
"epoch": 1.98,
"learning_rate": 0.0015100543090272255,
"loss": 0.6974,
"step": 5270
},
{
"epoch": 1.99,
"learning_rate": 0.00150779241664271,
"loss": 0.6934,
"step": 5280
},
{
"epoch": 1.99,
"learning_rate": 0.0015055270182421957,
"loss": 0.6921,
"step": 5290
},
{
"epoch": 1.99,
"learning_rate": 0.0015032581294669612,
"loss": 0.6842,
"step": 5300
},
{
"epoch": 2.0,
"learning_rate": 0.0015009857659823852,
"loss": 0.6919,
"step": 5310
},
{
"epoch": 2.0,
"eval_accuracy": 0.504,
"eval_loss": 0.6920435428619385,
"eval_runtime": 38.4869,
"eval_samples_per_second": 97.436,
"eval_steps_per_second": 12.186,
"step": 5314
},
{
"epoch": 2.0,
"learning_rate": 0.0014987099434778369,
"loss": 0.6916,
"step": 5320
},
{
"epoch": 2.01,
"learning_rate": 0.001496430677666568,
"loss": 0.6905,
"step": 5330
},
{
"epoch": 2.01,
"learning_rate": 0.0014941479842856046,
"loss": 0.6943,
"step": 5340
},
{
"epoch": 2.01,
"learning_rate": 0.0014918618790956376,
"loss": 0.6871,
"step": 5350
},
{
"epoch": 2.02,
"learning_rate": 0.0014895723778809151,
"loss": 0.693,
"step": 5360
},
{
"epoch": 2.02,
"learning_rate": 0.0014872794964491327,
"loss": 0.6978,
"step": 5370
},
{
"epoch": 2.02,
"learning_rate": 0.0014849832506313243,
"loss": 0.7012,
"step": 5380
},
{
"epoch": 2.03,
"learning_rate": 0.0014826836562817527,
"loss": 0.6927,
"step": 5390
},
{
"epoch": 2.03,
"learning_rate": 0.001480380729277801,
"loss": 0.6942,
"step": 5400
},
{
"epoch": 2.04,
"learning_rate": 0.001478074485519862,
"loss": 0.6929,
"step": 5410
},
{
"epoch": 2.04,
"learning_rate": 0.001475764940931229,
"loss": 0.6949,
"step": 5420
},
{
"epoch": 2.04,
"learning_rate": 0.0014734521114579853,
"loss": 0.6947,
"step": 5430
},
{
"epoch": 2.05,
"learning_rate": 0.001471136013068895,
"loss": 0.6929,
"step": 5440
},
{
"epoch": 2.05,
"learning_rate": 0.0014688166617552911,
"loss": 0.6931,
"step": 5450
},
{
"epoch": 2.05,
"learning_rate": 0.0014664940735309676,
"loss": 0.693,
"step": 5460
},
{
"epoch": 2.06,
"learning_rate": 0.0014641682644320666,
"loss": 0.6931,
"step": 5470
},
{
"epoch": 2.06,
"learning_rate": 0.0014618392505169689,
"loss": 0.6939,
"step": 5480
},
{
"epoch": 2.07,
"learning_rate": 0.0014595070478661822,
"loss": 0.6925,
"step": 5490
},
{
"epoch": 2.07,
"learning_rate": 0.0014571716725822314,
"loss": 0.6939,
"step": 5500
},
{
"epoch": 2.07,
"learning_rate": 0.001454833140789546,
"loss": 0.6843,
"step": 5510
},
{
"epoch": 2.08,
"learning_rate": 0.0014524914686343494,
"loss": 0.6937,
"step": 5520
},
{
"epoch": 2.08,
"learning_rate": 0.0014501466722845477,
"loss": 0.6934,
"step": 5530
},
{
"epoch": 2.09,
"learning_rate": 0.0014477987679296174,
"loss": 0.6933,
"step": 5540
},
{
"epoch": 2.09,
"learning_rate": 0.0014454477717804949,
"loss": 0.6932,
"step": 5550
},
{
"epoch": 2.09,
"learning_rate": 0.0014430937000694624,
"loss": 0.6928,
"step": 5560
},
{
"epoch": 2.1,
"learning_rate": 0.0014407365690500377,
"loss": 0.6937,
"step": 5570
},
{
"epoch": 2.1,
"learning_rate": 0.0014383763949968617,
"loss": 0.6925,
"step": 5580
},
{
"epoch": 2.1,
"learning_rate": 0.0014360131942055853,
"loss": 0.6923,
"step": 5590
},
{
"epoch": 2.11,
"learning_rate": 0.0014336469829927573,
"loss": 0.6941,
"step": 5600
},
{
"epoch": 2.11,
"learning_rate": 0.0014312777776957115,
"loss": 0.6927,
"step": 5610
},
{
"epoch": 2.12,
"learning_rate": 0.001428905594672455,
"loss": 0.6839,
"step": 5620
},
{
"epoch": 2.12,
"learning_rate": 0.0014265304503015535,
"loss": 0.6853,
"step": 5630
},
{
"epoch": 2.12,
"learning_rate": 0.0014241523609820198,
"loss": 0.6844,
"step": 5640
},
{
"epoch": 2.13,
"learning_rate": 0.001421771343133199,
"loss": 0.6921,
"step": 5650
},
{
"epoch": 2.13,
"learning_rate": 0.001419387413194657,
"loss": 0.6916,
"step": 5660
},
{
"epoch": 2.13,
"learning_rate": 0.001417000587626065,
"loss": 0.695,
"step": 5670
},
{
"epoch": 2.14,
"learning_rate": 0.0014146108829070883,
"loss": 0.6921,
"step": 5680
},
{
"epoch": 2.14,
"learning_rate": 0.00141221831553727,
"loss": 0.6921,
"step": 5690
},
{
"epoch": 2.15,
"learning_rate": 0.0014098229020359178,
"loss": 0.6944,
"step": 5700
},
{
"epoch": 2.15,
"learning_rate": 0.0014074246589419913,
"loss": 0.696,
"step": 5710
},
{
"epoch": 2.15,
"learning_rate": 0.0014050236028139865,
"loss": 0.6941,
"step": 5720
},
{
"epoch": 2.16,
"learning_rate": 0.0014026197502298215,
"loss": 0.684,
"step": 5730
},
{
"epoch": 2.16,
"learning_rate": 0.0014002131177867226,
"loss": 0.6942,
"step": 5740
},
{
"epoch": 2.16,
"learning_rate": 0.0013978037221011096,
"loss": 0.6945,
"step": 5750
},
{
"epoch": 2.17,
"learning_rate": 0.0013953915798084805,
"loss": 0.6932,
"step": 5760
},
{
"epoch": 2.17,
"learning_rate": 0.0013929767075632968,
"loss": 0.6939,
"step": 5770
},
{
"epoch": 2.18,
"learning_rate": 0.00139055912203887,
"loss": 0.6932,
"step": 5780
},
{
"epoch": 2.18,
"learning_rate": 0.0013881388399272442,
"loss": 0.6933,
"step": 5790
},
{
"epoch": 2.18,
"learning_rate": 0.0013857158779390813,
"loss": 0.6933,
"step": 5800
},
{
"epoch": 2.19,
"learning_rate": 0.001383290252803548,
"loss": 0.6933,
"step": 5810
},
{
"epoch": 2.19,
"learning_rate": 0.0013808619812681977,
"loss": 0.6932,
"step": 5820
},
{
"epoch": 2.19,
"learning_rate": 0.0013784310800988547,
"loss": 0.6933,
"step": 5830
},
{
"epoch": 2.2,
"learning_rate": 0.0013759975660795004,
"loss": 0.6937,
"step": 5840
},
{
"epoch": 2.2,
"learning_rate": 0.0013735614560121566,
"loss": 0.6931,
"step": 5850
},
{
"epoch": 2.21,
"learning_rate": 0.0013711227667167689,
"loss": 0.6936,
"step": 5860
},
{
"epoch": 2.21,
"learning_rate": 0.001368681515031091,
"loss": 0.6939,
"step": 5870
},
{
"epoch": 2.21,
"learning_rate": 0.0013662377178105685,
"loss": 0.6931,
"step": 5880
},
{
"epoch": 2.22,
"learning_rate": 0.0013637913919282226,
"loss": 0.6931,
"step": 5890
},
{
"epoch": 2.22,
"learning_rate": 0.001361342554274533,
"loss": 0.6926,
"step": 5900
},
{
"epoch": 2.22,
"learning_rate": 0.0013588912217573229,
"loss": 0.6936,
"step": 5910
},
{
"epoch": 2.23,
"learning_rate": 0.0013564374113016395,
"loss": 0.6924,
"step": 5920
},
{
"epoch": 2.23,
"learning_rate": 0.00135398113984964,
"loss": 0.694,
"step": 5930
},
{
"epoch": 2.24,
"learning_rate": 0.0013515224243604727,
"loss": 0.6943,
"step": 5940
},
{
"epoch": 2.24,
"learning_rate": 0.0013490612818101606,
"loss": 0.6929,
"step": 5950
},
{
"epoch": 2.24,
"learning_rate": 0.001346597729191485,
"loss": 0.693,
"step": 5960
},
{
"epoch": 2.25,
"learning_rate": 0.001344131783513866,
"loss": 0.6936,
"step": 5970
},
{
"epoch": 2.25,
"learning_rate": 0.0013416634618032477,
"loss": 0.6932,
"step": 5980
},
{
"epoch": 2.25,
"learning_rate": 0.0013391927811019781,
"loss": 0.6849,
"step": 5990
},
{
"epoch": 2.26,
"learning_rate": 0.001336719758468694,
"loss": 0.6932,
"step": 6000
},
{
"epoch": 2.26,
"learning_rate": 0.001334244410978201,
"loss": 0.6933,
"step": 6010
},
{
"epoch": 2.27,
"learning_rate": 0.0013317667557213567,
"loss": 0.6932,
"step": 6020
},
{
"epoch": 2.27,
"learning_rate": 0.001329286809804952,
"loss": 0.6931,
"step": 6030
},
{
"epoch": 2.27,
"learning_rate": 0.0013268045903515953,
"loss": 0.6931,
"step": 6040
},
{
"epoch": 2.28,
"learning_rate": 0.00132432011449959,
"loss": 0.6929,
"step": 6050
},
{
"epoch": 2.28,
"learning_rate": 0.0013218333994028206,
"loss": 0.6935,
"step": 6060
},
{
"epoch": 2.28,
"learning_rate": 0.0013193444622306314,
"loss": 0.6938,
"step": 6070
},
{
"epoch": 2.29,
"learning_rate": 0.001316853320167709,
"loss": 0.6929,
"step": 6080
},
{
"epoch": 2.29,
"learning_rate": 0.001314359990413963,
"loss": 0.694,
"step": 6090
},
{
"epoch": 2.3,
"learning_rate": 0.0013118644901844092,
"loss": 0.693,
"step": 6100
},
{
"epoch": 2.3,
"learning_rate": 0.0013093668367090477,
"loss": 0.6932,
"step": 6110
},
{
"epoch": 2.3,
"learning_rate": 0.0013068670472327465,
"loss": 0.6933,
"step": 6120
},
{
"epoch": 2.31,
"learning_rate": 0.001304365139015121,
"loss": 0.6932,
"step": 6130
},
{
"epoch": 2.31,
"learning_rate": 0.0013018611293304156,
"loss": 0.6932,
"step": 6140
},
{
"epoch": 2.31,
"learning_rate": 0.0012993550354673839,
"loss": 0.6933,
"step": 6150
},
{
"epoch": 2.32,
"learning_rate": 0.0012968468747291695,
"loss": 0.6932,
"step": 6160
},
{
"epoch": 2.32,
"learning_rate": 0.0012943366644331874,
"loss": 0.6846,
"step": 6170
},
{
"epoch": 2.33,
"learning_rate": 0.001291824421911002,
"loss": 0.6929,
"step": 6180
},
{
"epoch": 2.33,
"learning_rate": 0.0012893101645082108,
"loss": 0.6847,
"step": 6190
},
{
"epoch": 2.33,
"learning_rate": 0.0012867939095843218,
"loss": 0.6849,
"step": 6200
},
{
"epoch": 2.34,
"learning_rate": 0.0012842756745126349,
"loss": 0.6935,
"step": 6210
},
{
"epoch": 2.34,
"learning_rate": 0.0012817554766801223,
"loss": 0.6933,
"step": 6220
},
{
"epoch": 2.34,
"learning_rate": 0.0012792333334873075,
"loss": 0.6934,
"step": 6230
},
{
"epoch": 2.35,
"learning_rate": 0.0012767092623481454,
"loss": 0.6925,
"step": 6240
},
{
"epoch": 2.35,
"learning_rate": 0.0012741832806899027,
"loss": 0.6934,
"step": 6250
},
{
"epoch": 2.36,
"learning_rate": 0.0012716554059530373,
"loss": 0.693,
"step": 6260
},
{
"epoch": 2.36,
"learning_rate": 0.001269125655591077,
"loss": 0.694,
"step": 6270
},
{
"epoch": 2.36,
"learning_rate": 0.0012665940470705003,
"loss": 0.6924,
"step": 6280
},
{
"epoch": 2.37,
"learning_rate": 0.0012640605978706148,
"loss": 0.6912,
"step": 6290
},
{
"epoch": 2.37,
"learning_rate": 0.0012615253254834378,
"loss": 0.6936,
"step": 6300
},
{
"epoch": 2.37,
"learning_rate": 0.0012589882474135735,
"loss": 0.694,
"step": 6310
},
{
"epoch": 2.38,
"learning_rate": 0.001256449381178094,
"loss": 0.6929,
"step": 6320
},
{
"epoch": 2.38,
"learning_rate": 0.001253908744306418,
"loss": 0.6943,
"step": 6330
},
{
"epoch": 2.39,
"learning_rate": 0.0012513663543401881,
"loss": 0.6932,
"step": 6340
},
{
"epoch": 2.39,
"learning_rate": 0.0012488222288331524,
"loss": 0.6921,
"step": 6350
},
{
"epoch": 2.39,
"learning_rate": 0.0012462763853510412,
"loss": 0.692,
"step": 6360
},
{
"epoch": 2.4,
"learning_rate": 0.001243728841471447,
"loss": 0.6929,
"step": 6370
},
{
"epoch": 2.4,
"learning_rate": 0.0012411796147837016,
"loss": 0.6857,
"step": 6380
},
{
"epoch": 2.4,
"learning_rate": 0.0012386287228887567,
"loss": 0.6943,
"step": 6390
},
{
"epoch": 2.41,
"learning_rate": 0.0012360761833990605,
"loss": 0.6947,
"step": 6400
},
{
"epoch": 2.41,
"learning_rate": 0.0012335220139384373,
"loss": 0.6929,
"step": 6410
},
{
"epoch": 2.42,
"learning_rate": 0.0012309662321419656,
"loss": 0.6946,
"step": 6420
},
{
"epoch": 2.42,
"learning_rate": 0.0012284088556558555,
"loss": 0.6928,
"step": 6430
},
{
"epoch": 2.42,
"learning_rate": 0.0012258499021373282,
"loss": 0.6933,
"step": 6440
},
{
"epoch": 2.43,
"learning_rate": 0.0012232893892544928,
"loss": 0.6932,
"step": 6450
},
{
"epoch": 2.43,
"learning_rate": 0.0012207273346862256,
"loss": 0.6929,
"step": 6460
},
{
"epoch": 2.44,
"learning_rate": 0.0012181637561220467,
"loss": 0.6935,
"step": 6470
},
{
"epoch": 2.44,
"learning_rate": 0.001215598671261999,
"loss": 0.6933,
"step": 6480
},
{
"epoch": 2.44,
"learning_rate": 0.001213032097816525,
"loss": 0.6935,
"step": 6490
},
{
"epoch": 2.45,
"learning_rate": 0.001210464053506346,
"loss": 0.693,
"step": 6500
},
{
"epoch": 2.45,
"learning_rate": 0.0012078945560623374,
"loss": 0.692,
"step": 6510
},
{
"epoch": 2.45,
"learning_rate": 0.0012053236232254085,
"loss": 0.6953,
"step": 6520
},
{
"epoch": 2.46,
"learning_rate": 0.0012027512727463795,
"loss": 0.6932,
"step": 6530
},
{
"epoch": 2.46,
"learning_rate": 0.001200177522385858,
"loss": 0.6841,
"step": 6540
},
{
"epoch": 2.47,
"learning_rate": 0.001197602389914117,
"loss": 0.6939,
"step": 6550
},
{
"epoch": 2.47,
"learning_rate": 0.0011950258931109723,
"loss": 0.6941,
"step": 6560
},
{
"epoch": 2.47,
"learning_rate": 0.0011924480497656597,
"loss": 0.6922,
"step": 6570
},
{
"epoch": 2.48,
"learning_rate": 0.0011898688776767123,
"loss": 0.6818,
"step": 6580
},
{
"epoch": 2.48,
"learning_rate": 0.001187288394651837,
"loss": 0.7391,
"step": 6590
},
{
"epoch": 2.48,
"learning_rate": 0.0011847066185077919,
"loss": 0.6951,
"step": 6600
},
{
"epoch": 2.49,
"learning_rate": 0.001182123567070264,
"loss": 0.6944,
"step": 6610
},
{
"epoch": 2.49,
"learning_rate": 0.0011795392581737448,
"loss": 0.8711,
"step": 6620
},
{
"epoch": 2.5,
"learning_rate": 0.0011769537096614079,
"loss": 0.6888,
"step": 6630
},
{
"epoch": 2.5,
"learning_rate": 0.0011743669393849863,
"loss": 0.7258,
"step": 6640
},
{
"epoch": 2.5,
"learning_rate": 0.001171778965204648,
"loss": 0.6932,
"step": 6650
},
{
"epoch": 2.51,
"learning_rate": 0.0011691898049888733,
"loss": 0.6774,
"step": 6660
},
{
"epoch": 2.51,
"learning_rate": 0.001166599476614332,
"loss": 0.7073,
"step": 6670
},
{
"epoch": 2.51,
"learning_rate": 0.0011640079979657587,
"loss": 0.7117,
"step": 6680
},
{
"epoch": 2.52,
"learning_rate": 0.0011614153869358307,
"loss": 0.7077,
"step": 6690
},
{
"epoch": 2.52,
"learning_rate": 0.0011588216614250423,
"loss": 0.6959,
"step": 6700
},
{
"epoch": 2.53,
"learning_rate": 0.0011562268393415845,
"loss": 0.6969,
"step": 6710
},
{
"epoch": 2.53,
"learning_rate": 0.0011536309386012181,
"loss": 0.6933,
"step": 6720
},
{
"epoch": 2.53,
"learning_rate": 0.001151033977127152,
"loss": 0.6991,
"step": 6730
},
{
"epoch": 2.54,
"learning_rate": 0.001148435972849919,
"loss": 0.6912,
"step": 6740
},
{
"epoch": 2.54,
"learning_rate": 0.0011458369437072516,
"loss": 0.6984,
"step": 6750
},
{
"epoch": 2.54,
"learning_rate": 0.0011432369076439586,
"loss": 0.6976,
"step": 6760
},
{
"epoch": 2.55,
"learning_rate": 0.0011406358826118004,
"loss": 0.698,
"step": 6770
},
{
"epoch": 2.55,
"learning_rate": 0.0011380338865693664,
"loss": 0.6933,
"step": 6780
},
{
"epoch": 2.56,
"learning_rate": 0.0011354309374819503,
"loss": 0.6949,
"step": 6790
},
{
"epoch": 2.56,
"learning_rate": 0.0011328270533214253,
"loss": 0.6944,
"step": 6800
},
{
"epoch": 2.56,
"learning_rate": 0.0011302222520661214,
"loss": 0.6944,
"step": 6810
},
{
"epoch": 2.57,
"learning_rate": 0.0011276165517007004,
"loss": 0.6929,
"step": 6820
},
{
"epoch": 2.57,
"learning_rate": 0.0011250099702160318,
"loss": 0.7,
"step": 6830
},
{
"epoch": 2.57,
"learning_rate": 0.001122402525609069,
"loss": 0.6856,
"step": 6840
},
{
"epoch": 2.58,
"learning_rate": 0.0011197942358827247,
"loss": 0.6936,
"step": 6850
},
{
"epoch": 2.58,
"learning_rate": 0.0011171851190457463,
"loss": 0.6923,
"step": 6860
},
{
"epoch": 2.59,
"learning_rate": 0.0011145751931125926,
"loss": 0.6961,
"step": 6870
},
{
"epoch": 2.59,
"learning_rate": 0.0011119644761033078,
"loss": 0.6947,
"step": 6880
},
{
"epoch": 2.59,
"learning_rate": 0.0011093529860433994,
"loss": 0.6971,
"step": 6890
},
{
"epoch": 2.6,
"learning_rate": 0.0011067407409637107,
"loss": 0.6933,
"step": 6900
},
{
"epoch": 2.6,
"learning_rate": 0.0011041277589002997,
"loss": 0.692,
"step": 6910
},
{
"epoch": 2.6,
"learning_rate": 0.001101514057894311,
"loss": 0.69,
"step": 6920
},
{
"epoch": 2.61,
"learning_rate": 0.001098899655991855,
"loss": 0.7112,
"step": 6930
},
{
"epoch": 2.61,
"learning_rate": 0.0010962845712438798,
"loss": 0.6939,
"step": 6940
},
{
"epoch": 2.62,
"learning_rate": 0.0010936688217060495,
"loss": 0.6893,
"step": 6950
},
{
"epoch": 2.62,
"learning_rate": 0.001091052425438617,
"loss": 0.7141,
"step": 6960
},
{
"epoch": 2.62,
"learning_rate": 0.001088435400506301,
"loss": 0.6952,
"step": 6970
},
{
"epoch": 2.63,
"learning_rate": 0.0010858177649781613,
"loss": 0.6988,
"step": 6980
},
{
"epoch": 2.63,
"learning_rate": 0.0010831995369274723,
"loss": 0.6745,
"step": 6990
},
{
"epoch": 2.63,
"learning_rate": 0.0010805807344316009,
"loss": 0.7222,
"step": 7000
},
{
"epoch": 2.64,
"learning_rate": 0.0010779613755718788,
"loss": 0.6888,
"step": 7010
},
{
"epoch": 2.64,
"learning_rate": 0.0010753414784334797,
"loss": 0.69,
"step": 7020
},
{
"epoch": 2.65,
"learning_rate": 0.0010727210611052947,
"loss": 0.6989,
"step": 7030
},
{
"epoch": 2.65,
"learning_rate": 0.0010701001416798047,
"loss": 0.682,
"step": 7040
},
{
"epoch": 2.65,
"learning_rate": 0.0010674787382529585,
"loss": 0.6947,
"step": 7050
},
{
"epoch": 2.66,
"learning_rate": 0.0010648568689240467,
"loss": 0.7027,
"step": 7060
},
{
"epoch": 2.66,
"learning_rate": 0.001062234551795576,
"loss": 0.6927,
"step": 7070
},
{
"epoch": 2.66,
"learning_rate": 0.0010596118049731454,
"loss": 0.7177,
"step": 7080
},
{
"epoch": 2.67,
"learning_rate": 0.0010569886465653206,
"loss": 0.7045,
"step": 7090
},
{
"epoch": 2.67,
"learning_rate": 0.0010543650946835091,
"loss": 0.7008,
"step": 7100
},
{
"epoch": 2.68,
"learning_rate": 0.0010517411674418348,
"loss": 0.6965,
"step": 7110
},
{
"epoch": 2.68,
"learning_rate": 0.0010491168829570136,
"loss": 0.7021,
"step": 7120
},
{
"epoch": 2.68,
"learning_rate": 0.001046492259348228,
"loss": 0.6951,
"step": 7130
},
{
"epoch": 2.69,
"learning_rate": 0.0010438673147370016,
"loss": 0.6969,
"step": 7140
},
{
"epoch": 2.69,
"learning_rate": 0.0010412420672470746,
"loss": 0.6841,
"step": 7150
},
{
"epoch": 2.69,
"learning_rate": 0.0010386165350042785,
"loss": 0.6862,
"step": 7160
},
{
"epoch": 2.7,
"learning_rate": 0.0010359907361364104,
"loss": 0.7211,
"step": 7170
},
{
"epoch": 2.7,
"learning_rate": 0.001033364688773109,
"loss": 0.6906,
"step": 7180
},
{
"epoch": 2.71,
"learning_rate": 0.001030738411045728,
"loss": 0.6924,
"step": 7190
},
{
"epoch": 2.71,
"learning_rate": 0.001028111921087212,
"loss": 0.6955,
"step": 7200
},
{
"epoch": 2.71,
"learning_rate": 0.001025485237031971,
"loss": 0.6951,
"step": 7210
},
{
"epoch": 2.72,
"learning_rate": 0.0010228583770157547,
"loss": 0.6955,
"step": 7220
},
{
"epoch": 2.72,
"learning_rate": 0.0010202313591755282,
"loss": 0.6929,
"step": 7230
},
{
"epoch": 2.72,
"learning_rate": 0.0010176042016493464,
"loss": 0.6947,
"step": 7240
},
{
"epoch": 2.73,
"learning_rate": 0.0010149769225762278,
"loss": 0.6934,
"step": 7250
},
{
"epoch": 2.73,
"learning_rate": 0.001012349540096031,
"loss": 0.6949,
"step": 7260
},
{
"epoch": 2.74,
"learning_rate": 0.0010097220723493281,
"loss": 0.692,
"step": 7270
},
{
"epoch": 2.74,
"learning_rate": 0.0010070945374772802,
"loss": 0.6946,
"step": 7280
},
{
"epoch": 2.74,
"learning_rate": 0.0010044669536215113,
"loss": 0.6937,
"step": 7290
},
{
"epoch": 2.75,
"learning_rate": 0.0010018393389239846,
"loss": 0.6931,
"step": 7300
},
{
"epoch": 2.75,
"learning_rate": 0.0009992117115268757,
"loss": 0.6942,
"step": 7310
},
{
"epoch": 2.75,
"learning_rate": 0.0009965840895724471,
"loss": 0.6915,
"step": 7320
},
{
"epoch": 2.76,
"learning_rate": 0.000993956491202925,
"loss": 0.6913,
"step": 7330
},
{
"epoch": 2.76,
"learning_rate": 0.000991328934560372,
"loss": 0.694,
"step": 7340
},
{
"epoch": 2.77,
"learning_rate": 0.0009887014377865631,
"loss": 0.7042,
"step": 7350
},
{
"epoch": 2.77,
"learning_rate": 0.0009860740190228594,
"loss": 0.6927,
"step": 7360
},
{
"epoch": 2.77,
"learning_rate": 0.000983446696410084,
"loss": 0.6935,
"step": 7370
},
{
"epoch": 2.78,
"learning_rate": 0.0009808194880883952,
"loss": 0.6934,
"step": 7380
},
{
"epoch": 2.78,
"learning_rate": 0.000978192412197163,
"loss": 0.6931,
"step": 7390
},
{
"epoch": 2.79,
"learning_rate": 0.000975565486874843,
"loss": 0.6921,
"step": 7400
},
{
"epoch": 2.79,
"learning_rate": 0.000972938730258851,
"loss": 0.693,
"step": 7410
},
{
"epoch": 2.79,
"learning_rate": 0.000970312160485438,
"loss": 0.6944,
"step": 7420
},
{
"epoch": 2.8,
"learning_rate": 0.0009676857956895647,
"loss": 0.6941,
"step": 7430
},
{
"epoch": 2.8,
"learning_rate": 0.000965059654004777,
"loss": 0.6952,
"step": 7440
},
{
"epoch": 2.8,
"learning_rate": 0.0009624337535630802,
"loss": 0.694,
"step": 7450
},
{
"epoch": 2.81,
"learning_rate": 0.0009598081124948137,
"loss": 0.6941,
"step": 7460
},
{
"epoch": 2.81,
"learning_rate": 0.0009571827489285264,
"loss": 0.6955,
"step": 7470
},
{
"epoch": 2.82,
"learning_rate": 0.0009545576809908511,
"loss": 0.6932,
"step": 7480
},
{
"epoch": 2.82,
"learning_rate": 0.0009519329268063794,
"loss": 0.6937,
"step": 7490
},
{
"epoch": 2.82,
"learning_rate": 0.0009493085044975368,
"loss": 0.6936,
"step": 7500
},
{
"epoch": 2.83,
"learning_rate": 0.000946684432184457,
"loss": 0.6932,
"step": 7510
},
{
"epoch": 2.83,
"learning_rate": 0.0009440607279848573,
"loss": 0.6933,
"step": 7520
},
{
"epoch": 2.83,
"learning_rate": 0.0009414374100139139,
"loss": 0.6937,
"step": 7530
},
{
"epoch": 2.84,
"learning_rate": 0.0009388144963841359,
"loss": 0.6931,
"step": 7540
},
{
"epoch": 2.84,
"learning_rate": 0.0009361920052052404,
"loss": 0.6933,
"step": 7550
},
{
"epoch": 2.85,
"learning_rate": 0.0009335699545840282,
"loss": 0.6927,
"step": 7560
},
{
"epoch": 2.85,
"learning_rate": 0.0009309483626242581,
"loss": 0.6943,
"step": 7570
},
{
"epoch": 2.85,
"learning_rate": 0.0009283272474265218,
"loss": 0.6934,
"step": 7580
},
{
"epoch": 2.86,
"learning_rate": 0.0009257066270881199,
"loss": 0.6929,
"step": 7590
},
{
"epoch": 2.86,
"learning_rate": 0.0009230865197029358,
"loss": 0.6933,
"step": 7600
},
{
"epoch": 2.86,
"learning_rate": 0.0009204669433613112,
"loss": 0.6936,
"step": 7610
},
{
"epoch": 2.87,
"learning_rate": 0.0009178479161499217,
"loss": 0.6925,
"step": 7620
},
{
"epoch": 2.87,
"learning_rate": 0.0009152294561516509,
"loss": 0.6928,
"step": 7630
},
{
"epoch": 2.88,
"learning_rate": 0.0009126115814454666,
"loss": 0.693,
"step": 7640
},
{
"epoch": 2.88,
"learning_rate": 0.0009099943101062948,
"loss": 0.6912,
"step": 7650
},
{
"epoch": 2.88,
"learning_rate": 0.0009073776602048968,
"loss": 0.6908,
"step": 7660
},
{
"epoch": 2.89,
"learning_rate": 0.0009047616498077425,
"loss": 0.6951,
"step": 7670
},
{
"epoch": 2.89,
"learning_rate": 0.0009021462969768857,
"loss": 0.6923,
"step": 7680
},
{
"epoch": 2.89,
"learning_rate": 0.0008995316197698415,
"loss": 0.6941,
"step": 7690
},
{
"epoch": 2.9,
"learning_rate": 0.0008969176362394591,
"loss": 0.6934,
"step": 7700
},
{
"epoch": 2.9,
"learning_rate": 0.0008943043644337987,
"loss": 0.6943,
"step": 7710
},
{
"epoch": 2.91,
"learning_rate": 0.0008916918223960066,
"loss": 0.6912,
"step": 7720
},
{
"epoch": 2.91,
"learning_rate": 0.0008890800281641899,
"loss": 0.6918,
"step": 7730
},
{
"epoch": 2.91,
"learning_rate": 0.0008864689997712933,
"loss": 0.6879,
"step": 7740
},
{
"epoch": 2.92,
"learning_rate": 0.0008838587552449731,
"loss": 0.6923,
"step": 7750
},
{
"epoch": 2.92,
"learning_rate": 0.0008812493126074739,
"loss": 0.6935,
"step": 7760
},
{
"epoch": 2.92,
"learning_rate": 0.0008786406898755037,
"loss": 0.6875,
"step": 7770
},
{
"epoch": 2.93,
"learning_rate": 0.000876032905060109,
"loss": 0.7034,
"step": 7780
},
{
"epoch": 2.93,
"learning_rate": 0.0008734259761665518,
"loss": 0.6968,
"step": 7790
},
{
"epoch": 2.94,
"learning_rate": 0.0008708199211941841,
"loss": 0.6948,
"step": 7800
},
{
"epoch": 2.94,
"learning_rate": 0.0008682147581363234,
"loss": 0.6923,
"step": 7810
},
{
"epoch": 2.94,
"learning_rate": 0.0008656105049801299,
"loss": 0.694,
"step": 7820
},
{
"epoch": 2.95,
"learning_rate": 0.0008630071797064811,
"loss": 0.6958,
"step": 7830
},
{
"epoch": 2.95,
"learning_rate": 0.0008604048002898479,
"loss": 0.6937,
"step": 7840
},
{
"epoch": 2.95,
"learning_rate": 0.0008578033846981705,
"loss": 0.6937,
"step": 7850
},
{
"epoch": 2.96,
"learning_rate": 0.0008552029508927349,
"loss": 0.693,
"step": 7860
},
{
"epoch": 2.96,
"learning_rate": 0.0008526035168280482,
"loss": 0.6936,
"step": 7870
},
{
"epoch": 2.97,
"learning_rate": 0.0008500051004517145,
"loss": 0.6931,
"step": 7880
},
{
"epoch": 2.97,
"learning_rate": 0.0008474077197043118,
"loss": 0.694,
"step": 7890
},
{
"epoch": 2.97,
"learning_rate": 0.0008448113925192676,
"loss": 0.6962,
"step": 7900
},
{
"epoch": 2.98,
"learning_rate": 0.0008422161368227349,
"loss": 0.6916,
"step": 7910
},
{
"epoch": 2.98,
"learning_rate": 0.0008396219705334693,
"loss": 0.6928,
"step": 7920
},
{
"epoch": 2.98,
"learning_rate": 0.0008370289115627038,
"loss": 0.6899,
"step": 7930
},
{
"epoch": 2.99,
"learning_rate": 0.0008344369778140267,
"loss": 0.6992,
"step": 7940
},
{
"epoch": 2.99,
"learning_rate": 0.000831846187183257,
"loss": 0.6821,
"step": 7950
},
{
"epoch": 3.0,
"learning_rate": 0.0008292565575583211,
"loss": 0.6652,
"step": 7960
},
{
"epoch": 3.0,
"learning_rate": 0.0008266681068191294,
"loss": 0.7404,
"step": 7970
},
{
"epoch": 3.0,
"eval_accuracy": 0.496,
"eval_loss": 0.7076430916786194,
"eval_runtime": 38.5319,
"eval_samples_per_second": 97.322,
"eval_steps_per_second": 12.172,
"step": 7971
},
{
"epoch": 3.0,
"learning_rate": 0.0008240808528374526,
"loss": 0.6898,
"step": 7980
},
{
"epoch": 3.01,
"learning_rate": 0.0008214948134767989,
"loss": 0.7064,
"step": 7990
},
{
"epoch": 3.01,
"learning_rate": 0.0008189100065922896,
"loss": 0.6986,
"step": 8000
},
{
"epoch": 3.01,
"learning_rate": 0.0008163264500305372,
"loss": 0.6959,
"step": 8010
},
{
"epoch": 3.02,
"learning_rate": 0.0008137441616295207,
"loss": 0.6893,
"step": 8020
},
{
"epoch": 3.02,
"learning_rate": 0.0008111631592184638,
"loss": 0.7089,
"step": 8030
},
{
"epoch": 3.03,
"learning_rate": 0.0008085834606177104,
"loss": 0.6972,
"step": 8040
},
{
"epoch": 3.03,
"learning_rate": 0.0008060050836386035,
"loss": 0.6989,
"step": 8050
},
{
"epoch": 3.03,
"learning_rate": 0.00080342804608336,
"loss": 0.6932,
"step": 8060
},
{
"epoch": 3.04,
"learning_rate": 0.0008008523657449493,
"loss": 0.6927,
"step": 8070
},
{
"epoch": 3.04,
"learning_rate": 0.0007982780604069701,
"loss": 0.6936,
"step": 8080
},
{
"epoch": 3.04,
"learning_rate": 0.0007957051478435272,
"loss": 0.6934,
"step": 8090
},
{
"epoch": 3.05,
"learning_rate": 0.0007931336458191091,
"loss": 0.6929,
"step": 8100
},
{
"epoch": 3.05,
"learning_rate": 0.0007905635720884659,
"loss": 0.6802,
"step": 8110
},
{
"epoch": 3.06,
"learning_rate": 0.0007879949443964851,
"loss": 0.6782,
"step": 8120
},
{
"epoch": 3.06,
"learning_rate": 0.0007854277804780712,
"loss": 0.7108,
"step": 8130
},
{
"epoch": 3.06,
"learning_rate": 0.0007828620980580217,
"loss": 0.6865,
"step": 8140
},
{
"epoch": 3.07,
"learning_rate": 0.0007802979148509049,
"loss": 0.6867,
"step": 8150
},
{
"epoch": 3.07,
"learning_rate": 0.0007777352485609385,
"loss": 0.6991,
"step": 8160
},
{
"epoch": 3.07,
"learning_rate": 0.0007751741168818663,
"loss": 0.6899,
"step": 8170
},
{
"epoch": 3.08,
"learning_rate": 0.000772614537496837,
"loss": 0.6976,
"step": 8180
},
{
"epoch": 3.08,
"learning_rate": 0.000770056528078281,
"loss": 0.6906,
"step": 8190
},
{
"epoch": 3.09,
"learning_rate": 0.0007675001062877892,
"loss": 0.6952,
"step": 8200
},
{
"epoch": 3.09,
"learning_rate": 0.0007649452897759911,
"loss": 0.6864,
"step": 8210
},
{
"epoch": 3.09,
"learning_rate": 0.0007623920961824323,
"loss": 0.7033,
"step": 8220
},
{
"epoch": 3.1,
"learning_rate": 0.0007598405431354533,
"loss": 0.6934,
"step": 8230
},
{
"epoch": 3.1,
"learning_rate": 0.0007572906482520673,
"loss": 0.694,
"step": 8240
},
{
"epoch": 3.11,
"learning_rate": 0.0007547424291378394,
"loss": 0.696,
"step": 8250
},
{
"epoch": 3.11,
"learning_rate": 0.0007521959033867638,
"loss": 0.6949,
"step": 8260
},
{
"epoch": 3.11,
"learning_rate": 0.0007496510885811433,
"loss": 0.6942,
"step": 8270
},
{
"epoch": 3.12,
"learning_rate": 0.0007471080022914678,
"loss": 0.6935,
"step": 8280
},
{
"epoch": 3.12,
"learning_rate": 0.0007445666620762923,
"loss": 0.6903,
"step": 8290
},
{
"epoch": 3.12,
"learning_rate": 0.0007420270854821167,
"loss": 0.6907,
"step": 8300
},
{
"epoch": 3.13,
"learning_rate": 0.0007394892900432635,
"loss": 0.6905,
"step": 8310
},
{
"epoch": 3.13,
"learning_rate": 0.000736953293281758,
"loss": 0.6896,
"step": 8320
},
{
"epoch": 3.14,
"learning_rate": 0.0007344191127072065,
"loss": 0.7011,
"step": 8330
},
{
"epoch": 3.14,
"learning_rate": 0.000731886765816675,
"loss": 0.6993,
"step": 8340
},
{
"epoch": 3.14,
"learning_rate": 0.0007293562700945696,
"loss": 0.6936,
"step": 8350
},
{
"epoch": 3.15,
"learning_rate": 0.0007268276430125149,
"loss": 0.6912,
"step": 8360
},
{
"epoch": 3.15,
"learning_rate": 0.0007243009020292335,
"loss": 0.708,
"step": 8370
},
{
"epoch": 3.15,
"learning_rate": 0.0007217760645904259,
"loss": 0.6942,
"step": 8380
},
{
"epoch": 3.16,
"learning_rate": 0.0007192531481286493,
"loss": 0.6937,
"step": 8390
},
{
"epoch": 3.16,
"learning_rate": 0.0007167321700631981,
"loss": 0.693,
"step": 8400
},
{
"epoch": 3.17,
"learning_rate": 0.0007142131477999826,
"loss": 0.6929,
"step": 8410
},
{
"epoch": 3.17,
"learning_rate": 0.0007116960987314103,
"loss": 0.6935,
"step": 8420
},
{
"epoch": 3.17,
"learning_rate": 0.000709181040236264,
"loss": 0.6935,
"step": 8430
},
{
"epoch": 3.18,
"learning_rate": 0.0007066679896795827,
"loss": 0.6939,
"step": 8440
},
{
"epoch": 3.18,
"learning_rate": 0.0007041569644125429,
"loss": 0.6932,
"step": 8450
},
{
"epoch": 3.18,
"learning_rate": 0.0007016479817723367,
"loss": 0.6929,
"step": 8460
},
{
"epoch": 3.19,
"learning_rate": 0.0006991410590820529,
"loss": 0.693,
"step": 8470
},
{
"epoch": 3.19,
"learning_rate": 0.000696636213650558,
"loss": 0.6938,
"step": 8480
},
{
"epoch": 3.2,
"learning_rate": 0.0006941334627723761,
"loss": 0.6921,
"step": 8490
},
{
"epoch": 3.2,
"learning_rate": 0.0006916328237275694,
"loss": 0.6955,
"step": 8500
},
{
"epoch": 3.2,
"learning_rate": 0.0006891343137816195,
"loss": 0.6915,
"step": 8510
},
{
"epoch": 3.21,
"learning_rate": 0.0006866379501853073,
"loss": 0.6935,
"step": 8520
},
{
"epoch": 3.21,
"learning_rate": 0.0006841437501745947,
"loss": 0.6936,
"step": 8530
},
{
"epoch": 3.21,
"learning_rate": 0.0006816517309705051,
"loss": 0.698,
"step": 8540
},
{
"epoch": 3.22,
"learning_rate": 0.0006791619097790049,
"loss": 0.6925,
"step": 8550
},
{
"epoch": 3.22,
"learning_rate": 0.0006766743037908844,
"loss": 0.6916,
"step": 8560
},
{
"epoch": 3.23,
"learning_rate": 0.0006741889301816391,
"loss": 0.6941,
"step": 8570
},
{
"epoch": 3.23,
"learning_rate": 0.0006717058061113513,
"loss": 0.6939,
"step": 8580
},
{
"epoch": 3.23,
"learning_rate": 0.0006692249487245714,
"loss": 0.6959,
"step": 8590
},
{
"epoch": 3.24,
"learning_rate": 0.0006667463751501996,
"loss": 0.6951,
"step": 8600
},
{
"epoch": 3.24,
"learning_rate": 0.0006642701025013682,
"loss": 0.6924,
"step": 8610
},
{
"epoch": 3.24,
"learning_rate": 0.0006617961478753223,
"loss": 0.6897,
"step": 8620
},
{
"epoch": 3.25,
"learning_rate": 0.0006593245283533025,
"loss": 0.6893,
"step": 8630
},
{
"epoch": 3.25,
"learning_rate": 0.000656855261000427,
"loss": 0.6887,
"step": 8640
},
{
"epoch": 3.26,
"learning_rate": 0.0006543883628655738,
"loss": 0.691,
"step": 8650
},
{
"epoch": 3.26,
"learning_rate": 0.0006519238509812623,
"loss": 0.6933,
"step": 8660
},
{
"epoch": 3.26,
"learning_rate": 0.0006494617423635367,
"loss": 0.703,
"step": 8670
},
{
"epoch": 3.27,
"learning_rate": 0.0006470020540118478,
"loss": 0.6978,
"step": 8680
},
{
"epoch": 3.27,
"learning_rate": 0.0006445448029089356,
"loss": 0.6925,
"step": 8690
},
{
"epoch": 3.27,
"learning_rate": 0.0006420900060207129,
"loss": 0.6941,
"step": 8700
},
{
"epoch": 3.28,
"learning_rate": 0.0006396376802961468,
"loss": 0.6933,
"step": 8710
},
{
"epoch": 3.28,
"learning_rate": 0.0006371878426671431,
"loss": 0.6912,
"step": 8720
},
{
"epoch": 3.29,
"learning_rate": 0.0006347405100484285,
"loss": 0.6882,
"step": 8730
},
{
"epoch": 3.29,
"learning_rate": 0.0006322956993374338,
"loss": 0.6938,
"step": 8740
},
{
"epoch": 3.29,
"learning_rate": 0.0006298534274141778,
"loss": 0.7052,
"step": 8750
},
{
"epoch": 3.3,
"learning_rate": 0.0006274137111411501,
"loss": 0.6937,
"step": 8760
},
{
"epoch": 3.3,
"learning_rate": 0.0006249765673631953,
"loss": 0.6942,
"step": 8770
},
{
"epoch": 3.3,
"learning_rate": 0.0006225420129073962,
"loss": 0.6926,
"step": 8780
},
{
"epoch": 3.31,
"learning_rate": 0.0006201100645829582,
"loss": 0.6935,
"step": 8790
},
{
"epoch": 3.31,
"learning_rate": 0.0006176807391810922,
"loss": 0.6958,
"step": 8800
},
{
"epoch": 3.32,
"learning_rate": 0.0006152540534748998,
"loss": 0.6935,
"step": 8810
},
{
"epoch": 3.32,
"learning_rate": 0.0006128300242192568,
"loss": 0.6928,
"step": 8820
},
{
"epoch": 3.32,
"learning_rate": 0.000610408668150698,
"loss": 0.6935,
"step": 8830
},
{
"epoch": 3.33,
"learning_rate": 0.0006079900019873008,
"loss": 0.6925,
"step": 8840
},
{
"epoch": 3.33,
"learning_rate": 0.0006055740424285712,
"loss": 0.6948,
"step": 8850
},
{
"epoch": 3.33,
"learning_rate": 0.0006031608061553267,
"loss": 0.6939,
"step": 8860
},
{
"epoch": 3.34,
"learning_rate": 0.0006007503098295828,
"loss": 0.6937,
"step": 8870
},
{
"epoch": 3.34,
"learning_rate": 0.000598342570094437,
"loss": 0.6938,
"step": 8880
},
{
"epoch": 3.35,
"learning_rate": 0.000595937603573954,
"loss": 0.6933,
"step": 8890
},
{
"epoch": 3.35,
"learning_rate": 0.0005935354268730509,
"loss": 0.6933,
"step": 8900
},
{
"epoch": 3.35,
"learning_rate": 0.0005911360565773833,
"loss": 0.6932,
"step": 8910
},
{
"epoch": 3.36,
"learning_rate": 0.0005887395092532299,
"loss": 0.694,
"step": 8920
},
{
"epoch": 3.36,
"learning_rate": 0.0005863458014473779,
"loss": 0.6893,
"step": 8930
},
{
"epoch": 3.36,
"learning_rate": 0.0005839549496870098,
"loss": 0.6985,
"step": 8940
},
{
"epoch": 3.37,
"learning_rate": 0.0005815669704795887,
"loss": 0.6949,
"step": 8950
},
{
"epoch": 3.37,
"learning_rate": 0.0005791818803127443,
"loss": 0.6927,
"step": 8960
},
{
"epoch": 3.38,
"learning_rate": 0.0005767996956541583,
"loss": 0.693,
"step": 8970
},
{
"epoch": 3.38,
"learning_rate": 0.0005744204329514533,
"loss": 0.6939,
"step": 8980
},
{
"epoch": 3.38,
"learning_rate": 0.0005720441086320763,
"loss": 0.6931,
"step": 8990
},
{
"epoch": 3.39,
"learning_rate": 0.0005696707391031861,
"loss": 0.6924,
"step": 9000
},
{
"epoch": 3.39,
"learning_rate": 0.0005673003407515417,
"loss": 0.6942,
"step": 9010
},
{
"epoch": 3.39,
"learning_rate": 0.0005649329299433861,
"loss": 0.6939,
"step": 9020
},
{
"epoch": 3.4,
"learning_rate": 0.0005625685230243369,
"loss": 0.6935,
"step": 9030
},
{
"epoch": 3.4,
"learning_rate": 0.0005602071363192695,
"loss": 0.6909,
"step": 9040
},
{
"epoch": 3.41,
"learning_rate": 0.0005578487861322086,
"loss": 0.6917,
"step": 9050
},
{
"epoch": 3.41,
"learning_rate": 0.0005554934887462116,
"loss": 0.6911,
"step": 9060
},
{
"epoch": 3.41,
"learning_rate": 0.0005531412604232595,
"loss": 0.6953,
"step": 9070
},
{
"epoch": 3.42,
"learning_rate": 0.0005507921174041419,
"loss": 0.6914,
"step": 9080
},
{
"epoch": 3.42,
"learning_rate": 0.0005484460759083478,
"loss": 0.6914,
"step": 9090
},
{
"epoch": 3.42,
"learning_rate": 0.0005461031521339498,
"loss": 0.6963,
"step": 9100
},
{
"epoch": 3.43,
"learning_rate": 0.0005437633622574961,
"loss": 0.69,
"step": 9110
},
{
"epoch": 3.43,
"learning_rate": 0.0005414267224338971,
"loss": 0.6921,
"step": 9120
},
{
"epoch": 3.44,
"learning_rate": 0.0005390932487963125,
"loss": 0.6899,
"step": 9130
},
{
"epoch": 3.44,
"learning_rate": 0.0005367629574560431,
"loss": 0.6983,
"step": 9140
},
{
"epoch": 3.44,
"learning_rate": 0.0005344358645024159,
"loss": 0.6912,
"step": 9150
},
{
"epoch": 3.45,
"learning_rate": 0.0005321119860026766,
"loss": 0.6927,
"step": 9160
},
{
"epoch": 3.45,
"learning_rate": 0.0005297913380018758,
"loss": 0.6944,
"step": 9170
},
{
"epoch": 3.46,
"learning_rate": 0.0005274739365227599,
"loss": 0.6939,
"step": 9180
},
{
"epoch": 3.46,
"learning_rate": 0.0005251597975656591,
"loss": 0.6927,
"step": 9190
},
{
"epoch": 3.46,
"learning_rate": 0.0005228489371083794,
"loss": 0.6934,
"step": 9200
},
{
"epoch": 3.47,
"learning_rate": 0.0005205413711060888,
"loss": 0.6897,
"step": 9210
},
{
"epoch": 3.47,
"learning_rate": 0.0005182371154912104,
"loss": 0.6963,
"step": 9220
},
{
"epoch": 3.47,
"learning_rate": 0.0005159361861733099,
"loss": 0.6926,
"step": 9230
},
{
"epoch": 3.48,
"learning_rate": 0.0005136385990389871,
"loss": 0.6953,
"step": 9240
},
{
"epoch": 3.48,
"learning_rate": 0.0005113443699517673,
"loss": 0.6982,
"step": 9250
},
{
"epoch": 3.49,
"learning_rate": 0.0005090535147519877,
"loss": 0.6939,
"step": 9260
},
{
"epoch": 3.49,
"learning_rate": 0.0005067660492566935,
"loss": 0.6924,
"step": 9270
},
{
"epoch": 3.49,
"learning_rate": 0.0005044819892595239,
"loss": 0.6935,
"step": 9280
},
{
"epoch": 3.5,
"learning_rate": 0.0005022013505306067,
"loss": 0.6965,
"step": 9290
},
{
"epoch": 3.5,
"learning_rate": 0.0004999241488164458,
"loss": 0.6936,
"step": 9300
},
{
"epoch": 3.5,
"learning_rate": 0.0004976503998398168,
"loss": 0.6938,
"step": 9310
},
{
"epoch": 3.51,
"learning_rate": 0.0004953801192996543,
"loss": 0.6938,
"step": 9320
},
{
"epoch": 3.51,
"learning_rate": 0.0004931133228709467,
"loss": 0.6909,
"step": 9330
},
{
"epoch": 3.52,
"learning_rate": 0.000490850026204625,
"loss": 0.6926,
"step": 9340
},
{
"epoch": 3.52,
"learning_rate": 0.0004885902449274581,
"loss": 0.693,
"step": 9350
},
{
"epoch": 3.52,
"learning_rate": 0.0004863339946419416,
"loss": 0.693,
"step": 9360
},
{
"epoch": 3.53,
"learning_rate": 0.0004840812909261928,
"loss": 0.6952,
"step": 9370
},
{
"epoch": 3.53,
"learning_rate": 0.0004818321493338418,
"loss": 0.6877,
"step": 9380
},
{
"epoch": 3.53,
"learning_rate": 0.00047958658539392317,
"loss": 0.7037,
"step": 9390
},
{
"epoch": 3.54,
"learning_rate": 0.0004773446146107716,
"loss": 0.6961,
"step": 9400
},
{
"epoch": 3.54,
"learning_rate": 0.00047510625246391126,
"loss": 0.6933,
"step": 9410
},
{
"epoch": 3.55,
"learning_rate": 0.0004728715144079524,
"loss": 0.6932,
"step": 9420
},
{
"epoch": 3.55,
"learning_rate": 0.0004706404158724815,
"loss": 0.6942,
"step": 9430
},
{
"epoch": 3.55,
"learning_rate": 0.00046841297226195777,
"loss": 0.6925,
"step": 9440
},
{
"epoch": 3.56,
"learning_rate": 0.0004661891989556035,
"loss": 0.6919,
"step": 9450
},
{
"epoch": 3.56,
"learning_rate": 0.0004639691113073017,
"loss": 0.6938,
"step": 9460
},
{
"epoch": 3.56,
"learning_rate": 0.00046175272464548566,
"loss": 0.6914,
"step": 9470
},
{
"epoch": 3.57,
"learning_rate": 0.0004595400542730379,
"loss": 0.6967,
"step": 9480
},
{
"epoch": 3.57,
"learning_rate": 0.00045733111546718,
"loss": 0.6926,
"step": 9490
},
{
"epoch": 3.58,
"learning_rate": 0.0004551259234793704,
"loss": 0.6952,
"step": 9500
},
{
"epoch": 3.58,
"learning_rate": 0.00045292449353519817,
"loss": 0.693,
"step": 9510
},
{
"epoch": 3.58,
"learning_rate": 0.00045072684083427616,
"loss": 0.694,
"step": 9520
},
{
"epoch": 3.59,
"learning_rate": 0.00044853298055013937,
"loss": 0.6932,
"step": 9530
},
{
"epoch": 3.59,
"learning_rate": 0.0004463429278301364,
"loss": 0.6926,
"step": 9540
},
{
"epoch": 3.59,
"learning_rate": 0.00044415669779532876,
"loss": 0.6922,
"step": 9550
},
{
"epoch": 3.6,
"learning_rate": 0.00044197430554038263,
"loss": 0.695,
"step": 9560
},
{
"epoch": 3.6,
"learning_rate": 0.0004397957661334684,
"loss": 0.694,
"step": 9570
},
{
"epoch": 3.61,
"learning_rate": 0.0004376210946161529,
"loss": 0.6929,
"step": 9580
},
{
"epoch": 3.61,
"learning_rate": 0.0004354503060032988,
"loss": 0.694,
"step": 9590
},
{
"epoch": 3.61,
"learning_rate": 0.00043328341528295854,
"loss": 0.6932,
"step": 9600
},
{
"epoch": 3.62,
"learning_rate": 0.000431120437416273,
"loss": 0.693,
"step": 9610
},
{
"epoch": 3.62,
"learning_rate": 0.0004289613873373657,
"loss": 0.6935,
"step": 9620
},
{
"epoch": 3.62,
"learning_rate": 0.00042680627995324196,
"loss": 0.6929,
"step": 9630
},
{
"epoch": 3.63,
"learning_rate": 0.0004246551301436851,
"loss": 0.6937,
"step": 9640
},
{
"epoch": 3.63,
"learning_rate": 0.0004225079527611526,
"loss": 0.6933,
"step": 9650
},
{
"epoch": 3.64,
"learning_rate": 0.0004203647626306761,
"loss": 0.6905,
"step": 9660
},
{
"epoch": 3.64,
"learning_rate": 0.0004182255745497562,
"loss": 0.6992,
"step": 9670
},
{
"epoch": 3.64,
"learning_rate": 0.00041609040328826274,
"loss": 0.6947,
"step": 9680
},
{
"epoch": 3.65,
"learning_rate": 0.00041395926358833067,
"loss": 0.6949,
"step": 9690
},
{
"epoch": 3.65,
"learning_rate": 0.00041183217016426034,
"loss": 0.6935,
"step": 9700
},
{
"epoch": 3.65,
"learning_rate": 0.00040970913770241427,
"loss": 0.6932,
"step": 9710
},
{
"epoch": 3.66,
"learning_rate": 0.000407590180861116,
"loss": 0.6939,
"step": 9720
},
{
"epoch": 3.66,
"learning_rate": 0.0004054753142705504,
"loss": 0.6935,
"step": 9730
},
{
"epoch": 3.67,
"learning_rate": 0.0004033645525326601,
"loss": 0.6925,
"step": 9740
},
{
"epoch": 3.67,
"learning_rate": 0.00040125791022104776,
"loss": 0.6932,
"step": 9750
},
{
"epoch": 3.67,
"learning_rate": 0.00039915540188087173,
"loss": 0.692,
"step": 9760
},
{
"epoch": 3.68,
"learning_rate": 0.0003970570420287496,
"loss": 0.6951,
"step": 9770
},
{
"epoch": 3.68,
"learning_rate": 0.00039496284515265446,
"loss": 0.6932,
"step": 9780
},
{
"epoch": 3.68,
"learning_rate": 0.0003928728257118178,
"loss": 0.6932,
"step": 9790
},
{
"epoch": 3.69,
"learning_rate": 0.0003907869981366269,
"loss": 0.6936,
"step": 9800
},
{
"epoch": 3.69,
"learning_rate": 0.000388705376828528,
"loss": 0.6931,
"step": 9810
},
{
"epoch": 3.7,
"learning_rate": 0.00038662797615992417,
"loss": 0.6932,
"step": 9820
},
{
"epoch": 3.7,
"learning_rate": 0.00038455481047407826,
"loss": 0.6947,
"step": 9830
},
{
"epoch": 3.7,
"learning_rate": 0.00038248589408501334,
"loss": 0.692,
"step": 9840
},
{
"epoch": 3.71,
"learning_rate": 0.00038042124127741193,
"loss": 0.6947,
"step": 9850
},
{
"epoch": 3.71,
"learning_rate": 0.00037836086630652077,
"loss": 0.6932,
"step": 9860
},
{
"epoch": 3.71,
"learning_rate": 0.00037630478339804906,
"loss": 0.694,
"step": 9870
},
{
"epoch": 3.72,
"learning_rate": 0.0003742530067480733,
"loss": 0.6935,
"step": 9880
},
{
"epoch": 3.72,
"learning_rate": 0.000372205550522936,
"loss": 0.693,
"step": 9890
},
{
"epoch": 3.73,
"learning_rate": 0.0003701624288591514,
"loss": 0.6929,
"step": 9900
},
{
"epoch": 3.73,
"learning_rate": 0.0003681236558633044,
"loss": 0.6931,
"step": 9910
},
{
"epoch": 3.73,
"learning_rate": 0.00036608924561195634,
"loss": 0.6936,
"step": 9920
},
{
"epoch": 3.74,
"learning_rate": 0.00036405921215154494,
"loss": 0.6939,
"step": 9930
},
{
"epoch": 3.74,
"learning_rate": 0.00036203356949829005,
"loss": 0.6926,
"step": 9940
},
{
"epoch": 3.74,
"learning_rate": 0.000360012331638094,
"loss": 0.6931,
"step": 9950
},
{
"epoch": 3.75,
"learning_rate": 0.00035799551252644777,
"loss": 0.6933,
"step": 9960
},
{
"epoch": 3.75,
"learning_rate": 0.0003559831260883334,
"loss": 0.6934,
"step": 9970
},
{
"epoch": 3.76,
"learning_rate": 0.0003539751862181267,
"loss": 0.6939,
"step": 9980
},
{
"epoch": 3.76,
"learning_rate": 0.00035197170677950387,
"loss": 0.6931,
"step": 9990
},
{
"epoch": 3.76,
"learning_rate": 0.0003499727016053431,
"loss": 0.6933,
"step": 10000
},
{
"epoch": 3.77,
"learning_rate": 0.0003479781844976316,
"loss": 0.6932,
"step": 10010
},
{
"epoch": 3.77,
"learning_rate": 0.0003459881692273678,
"loss": 0.6917,
"step": 10020
},
{
"epoch": 3.77,
"learning_rate": 0.0003440026695344688,
"loss": 0.6925,
"step": 10030
},
{
"epoch": 3.78,
"learning_rate": 0.0003420216991276729,
"loss": 0.6926,
"step": 10040
},
{
"epoch": 3.78,
"learning_rate": 0.00034004527168444754,
"loss": 0.6949,
"step": 10050
},
{
"epoch": 3.79,
"learning_rate": 0.00033807340085089256,
"loss": 0.6928,
"step": 10060
},
{
"epoch": 3.79,
"learning_rate": 0.0003361061002416479,
"loss": 0.6933,
"step": 10070
},
{
"epoch": 3.79,
"learning_rate": 0.00033414338343979756,
"loss": 0.6946,
"step": 10080
},
{
"epoch": 3.8,
"learning_rate": 0.0003321852639967783,
"loss": 0.6939,
"step": 10090
},
{
"epoch": 3.8,
"learning_rate": 0.0003302317554322841,
"loss": 0.6933,
"step": 10100
},
{
"epoch": 3.81,
"learning_rate": 0.00032828287123417324,
"loss": 0.6928,
"step": 10110
},
{
"epoch": 3.81,
"learning_rate": 0.00032633862485837594,
"loss": 0.6926,
"step": 10120
},
{
"epoch": 3.81,
"learning_rate": 0.0003243990297288002,
"loss": 0.6938,
"step": 10130
},
{
"epoch": 3.82,
"learning_rate": 0.00032246409923724083,
"loss": 0.6925,
"step": 10140
},
{
"epoch": 3.82,
"learning_rate": 0.00032053384674328477,
"loss": 0.6931,
"step": 10150
},
{
"epoch": 3.82,
"learning_rate": 0.0003186082855742213,
"loss": 0.6925,
"step": 10160
},
{
"epoch": 3.83,
"learning_rate": 0.00031668742902494763,
"loss": 0.6926,
"step": 10170
},
{
"epoch": 3.83,
"learning_rate": 0.00031477129035787944,
"loss": 0.6923,
"step": 10180
},
{
"epoch": 3.84,
"learning_rate": 0.0003128598828028568,
"loss": 0.6927,
"step": 10190
},
{
"epoch": 3.84,
"learning_rate": 0.0003109532195570556,
"loss": 0.6913,
"step": 10200
},
{
"epoch": 3.84,
"learning_rate": 0.0003090513137848936,
"loss": 0.6948,
"step": 10210
},
{
"epoch": 3.85,
"learning_rate": 0.0003071541786179418,
"loss": 0.6943,
"step": 10220
},
{
"epoch": 3.85,
"learning_rate": 0.0003052618271548332,
"loss": 0.6923,
"step": 10230
},
{
"epoch": 3.85,
"learning_rate": 0.0003033742724611709,
"loss": 0.6922,
"step": 10240
},
{
"epoch": 3.86,
"learning_rate": 0.0003014915275694401,
"loss": 0.6886,
"step": 10250
},
{
"epoch": 3.86,
"learning_rate": 0.00029961360547891636,
"loss": 0.6932,
"step": 10260
},
{
"epoch": 3.87,
"learning_rate": 0.0002977405191555773,
"loss": 0.6934,
"step": 10270
},
{
"epoch": 3.87,
"learning_rate": 0.0002958722815320114,
"loss": 0.6911,
"step": 10280
},
{
"epoch": 3.87,
"learning_rate": 0.00029400890550733064,
"loss": 0.6908,
"step": 10290
},
{
"epoch": 3.88,
"learning_rate": 0.0002921504039470795,
"loss": 0.6829,
"step": 10300
},
{
"epoch": 3.88,
"learning_rate": 0.0002902967896831481,
"loss": 0.697,
"step": 10310
},
{
"epoch": 3.88,
"learning_rate": 0.00028844807551368137,
"loss": 0.7049,
"step": 10320
},
{
"epoch": 3.89,
"learning_rate": 0.0002866042742029931,
"loss": 0.6985,
"step": 10330
},
{
"epoch": 3.89,
"learning_rate": 0.00028476539848147544,
"loss": 0.6987,
"step": 10340
},
{
"epoch": 3.9,
"learning_rate": 0.00028293146104551307,
"loss": 0.6929,
"step": 10350
},
{
"epoch": 3.9,
"learning_rate": 0.00028110247455739466,
"loss": 0.6934,
"step": 10360
},
{
"epoch": 3.9,
"learning_rate": 0.00027927845164522447,
"loss": 0.6924,
"step": 10370
},
{
"epoch": 3.91,
"learning_rate": 0.00027745940490283725,
"loss": 0.6944,
"step": 10380
},
{
"epoch": 3.91,
"learning_rate": 0.0002756453468897092,
"loss": 0.6945,
"step": 10390
},
{
"epoch": 3.91,
"learning_rate": 0.00027383629013087295,
"loss": 0.693,
"step": 10400
},
{
"epoch": 3.92,
"learning_rate": 0.0002720322471168296,
"loss": 0.6932,
"step": 10410
},
{
"epoch": 3.92,
"learning_rate": 0.00027023323030346416,
"loss": 0.6931,
"step": 10420
},
{
"epoch": 3.93,
"learning_rate": 0.00026843925211195743,
"loss": 0.6932,
"step": 10430
},
{
"epoch": 3.93,
"learning_rate": 0.00026665032492870255,
"loss": 0.6927,
"step": 10440
},
{
"epoch": 3.93,
"learning_rate": 0.00026486646110521737,
"loss": 0.6932,
"step": 10450
},
{
"epoch": 3.94,
"learning_rate": 0.00026308767295806114,
"loss": 0.6933,
"step": 10460
},
{
"epoch": 3.94,
"learning_rate": 0.0002613139727687472,
"loss": 0.6925,
"step": 10470
},
{
"epoch": 3.94,
"learning_rate": 0.0002595453727836606,
"loss": 0.6925,
"step": 10480
},
{
"epoch": 3.95,
"learning_rate": 0.0002577818852139723,
"loss": 0.6928,
"step": 10490
},
{
"epoch": 3.95,
"learning_rate": 0.0002560235222355539,
"loss": 0.6917,
"step": 10500
},
{
"epoch": 3.96,
"learning_rate": 0.00025427029598889606,
"loss": 0.6929,
"step": 10510
},
{
"epoch": 3.96,
"learning_rate": 0.0002525222185790219,
"loss": 0.6959,
"step": 10520
},
{
"epoch": 3.96,
"learning_rate": 0.000250779302075406,
"loss": 0.6946,
"step": 10530
},
{
"epoch": 3.97,
"learning_rate": 0.0002490415585118887,
"loss": 0.6934,
"step": 10540
},
{
"epoch": 3.97,
"learning_rate": 0.0002473089998865953,
"loss": 0.6931,
"step": 10550
},
{
"epoch": 3.97,
"learning_rate": 0.00024558163816185076,
"loss": 0.6948,
"step": 10560
},
{
"epoch": 3.98,
"learning_rate": 0.00024385948526409984,
"loss": 0.6935,
"step": 10570
},
{
"epoch": 3.98,
"learning_rate": 0.00024214255308382195,
"loss": 0.6922,
"step": 10580
},
{
"epoch": 3.99,
"learning_rate": 0.0002404308534754517,
"loss": 0.6937,
"step": 10590
},
{
"epoch": 3.99,
"learning_rate": 0.00023872439825729463,
"loss": 0.6922,
"step": 10600
},
{
"epoch": 3.99,
"learning_rate": 0.00023702319921144798,
"loss": 0.694,
"step": 10610
},
{
"epoch": 4.0,
"learning_rate": 0.00023532726808371673,
"loss": 0.6939,
"step": 10620
},
{
"epoch": 4.0,
"eval_accuracy": 0.496,
"eval_loss": 0.6932018399238586,
"eval_runtime": 38.6192,
"eval_samples_per_second": 97.102,
"eval_steps_per_second": 12.144,
"step": 10628
},
{
"epoch": 4.0,
"learning_rate": 0.00023363661658353508,
"loss": 0.6935,
"step": 10630
},
{
"epoch": 4.0,
"learning_rate": 0.00023195125638388425,
"loss": 0.6942,
"step": 10640
},
{
"epoch": 4.01,
"learning_rate": 0.00023027119912121107,
"loss": 0.6928,
"step": 10650
},
{
"epoch": 4.01,
"learning_rate": 0.0002285964563953501,
"loss": 0.6944,
"step": 10660
},
{
"epoch": 4.02,
"learning_rate": 0.00022692703976944062,
"loss": 0.694,
"step": 10670
},
{
"epoch": 4.02,
"learning_rate": 0.00022526296076984953,
"loss": 0.6931,
"step": 10680
},
{
"epoch": 4.02,
"learning_rate": 0.00022360423088608938,
"loss": 0.6924,
"step": 10690
},
{
"epoch": 4.03,
"learning_rate": 0.00022195086157074118,
"loss": 0.6938,
"step": 10700
},
{
"epoch": 4.03,
"learning_rate": 0.00022030286423937307,
"loss": 0.6934,
"step": 10710
},
{
"epoch": 4.03,
"learning_rate": 0.0002186602502704641,
"loss": 0.6929,
"step": 10720
},
{
"epoch": 4.04,
"learning_rate": 0.00021702303100532285,
"loss": 0.6926,
"step": 10730
},
{
"epoch": 4.04,
"learning_rate": 0.00021539121774801195,
"loss": 0.6941,
"step": 10740
},
{
"epoch": 4.05,
"learning_rate": 0.00021376482176526735,
"loss": 0.6938,
"step": 10750
},
{
"epoch": 4.05,
"learning_rate": 0.0002121438542864228,
"loss": 0.6926,
"step": 10760
},
{
"epoch": 4.05,
"learning_rate": 0.0002105283265033311,
"loss": 0.6927,
"step": 10770
},
{
"epoch": 4.06,
"learning_rate": 0.0002089182495702866,
"loss": 0.6948,
"step": 10780
},
{
"epoch": 4.06,
"learning_rate": 0.00020731363460394926,
"loss": 0.6931,
"step": 10790
},
{
"epoch": 4.06,
"learning_rate": 0.0002057144926832667,
"loss": 0.6926,
"step": 10800
},
{
"epoch": 4.07,
"learning_rate": 0.00020412083484939902,
"loss": 0.6941,
"step": 10810
},
{
"epoch": 4.07,
"learning_rate": 0.00020253267210564098,
"loss": 0.6938,
"step": 10820
},
{
"epoch": 4.08,
"learning_rate": 0.0002009500154173477,
"loss": 0.694,
"step": 10830
},
{
"epoch": 4.08,
"learning_rate": 0.00019937287571185748,
"loss": 0.6936,
"step": 10840
},
{
"epoch": 4.08,
"learning_rate": 0.00019780126387841768,
"loss": 0.6936,
"step": 10850
},
{
"epoch": 4.09,
"learning_rate": 0.00019623519076810792,
"loss": 0.6926,
"step": 10860
},
{
"epoch": 4.09,
"learning_rate": 0.00019467466719376726,
"loss": 0.6932,
"step": 10870
},
{
"epoch": 4.09,
"learning_rate": 0.0001931197039299174,
"loss": 0.6938,
"step": 10880
},
{
"epoch": 4.1,
"learning_rate": 0.0001915703117126897,
"loss": 0.6933,
"step": 10890
},
{
"epoch": 4.1,
"learning_rate": 0.0001900265012397514,
"loss": 0.6933,
"step": 10900
},
{
"epoch": 4.11,
"learning_rate": 0.0001884882831702289,
"loss": 0.6935,
"step": 10910
},
{
"epoch": 4.11,
"learning_rate": 0.0001869556681246386,
"loss": 0.6934,
"step": 10920
},
{
"epoch": 4.11,
"learning_rate": 0.0001854286666848096,
"loss": 0.6932,
"step": 10930
},
{
"epoch": 4.12,
"learning_rate": 0.0001839072893938133,
"loss": 0.6929,
"step": 10940
},
{
"epoch": 4.12,
"learning_rate": 0.00018239154675588854,
"loss": 0.6929,
"step": 10950
},
{
"epoch": 4.12,
"learning_rate": 0.00018088144923637117,
"loss": 0.6937,
"step": 10960
},
{
"epoch": 4.13,
"learning_rate": 0.00017937700726162033,
"loss": 0.6933,
"step": 10970
},
{
"epoch": 4.13,
"learning_rate": 0.0001778782312189463,
"loss": 0.6929,
"step": 10980
},
{
"epoch": 4.14,
"learning_rate": 0.00017638513145654033,
"loss": 0.6933,
"step": 10990
},
{
"epoch": 4.14,
"learning_rate": 0.0001748977182834013,
"loss": 0.6929,
"step": 11000
},
{
"epoch": 4.14,
"learning_rate": 0.00017341600196926632,
"loss": 0.6926,
"step": 11010
},
{
"epoch": 4.15,
"learning_rate": 0.00017193999274453775,
"loss": 0.6929,
"step": 11020
},
{
"epoch": 4.15,
"learning_rate": 0.0001704697008002153,
"loss": 0.693,
"step": 11030
},
{
"epoch": 4.16,
"learning_rate": 0.00016900513628782254,
"loss": 0.6932,
"step": 11040
},
{
"epoch": 4.16,
"learning_rate": 0.0001675463093193399,
"loss": 0.6937,
"step": 11050
},
{
"epoch": 4.16,
"learning_rate": 0.00016609322996713195,
"loss": 0.6928,
"step": 11060
},
{
"epoch": 4.17,
"learning_rate": 0.0001646459082638806,
"loss": 0.6921,
"step": 11070
},
{
"epoch": 4.17,
"learning_rate": 0.00016320435420251323,
"loss": 0.692,
"step": 11080
},
{
"epoch": 4.17,
"learning_rate": 0.00016176857773613575,
"loss": 0.6936,
"step": 11090
},
{
"epoch": 4.18,
"learning_rate": 0.00016033858877796326,
"loss": 0.6964,
"step": 11100
},
{
"epoch": 4.18,
"learning_rate": 0.00015891439720125056,
"loss": 0.6933,
"step": 11110
},
{
"epoch": 4.19,
"learning_rate": 0.00015749601283922567,
"loss": 0.6933,
"step": 11120
},
{
"epoch": 4.19,
"learning_rate": 0.0001560834454850205,
"loss": 0.693,
"step": 11130
},
{
"epoch": 4.19,
"learning_rate": 0.00015467670489160447,
"loss": 0.6941,
"step": 11140
},
{
"epoch": 4.2,
"learning_rate": 0.0001532758007717159,
"loss": 0.6933,
"step": 11150
},
{
"epoch": 4.2,
"learning_rate": 0.0001518807427977963,
"loss": 0.6932,
"step": 11160
},
{
"epoch": 4.2,
"learning_rate": 0.00015049154060192194,
"loss": 0.6934,
"step": 11170
},
{
"epoch": 4.21,
"learning_rate": 0.00014910820377573954,
"loss": 0.6937,
"step": 11180
},
{
"epoch": 4.21,
"learning_rate": 0.00014773074187039758,
"loss": 0.6927,
"step": 11190
},
{
"epoch": 4.22,
"learning_rate": 0.00014635916439648255,
"loss": 0.6937,
"step": 11200
},
{
"epoch": 4.22,
"learning_rate": 0.00014499348082395137,
"loss": 0.6934,
"step": 11210
},
{
"epoch": 4.22,
"learning_rate": 0.0001436337005820678,
"loss": 0.6935,
"step": 11220
},
{
"epoch": 4.23,
"learning_rate": 0.00014227983305933634,
"loss": 0.6931,
"step": 11230
},
{
"epoch": 4.23,
"learning_rate": 0.00014093188760343678,
"loss": 0.6927,
"step": 11240
},
{
"epoch": 4.23,
"learning_rate": 0.0001395898735211616,
"loss": 0.6932,
"step": 11250
},
{
"epoch": 4.24,
"learning_rate": 0.00013825380007834932,
"loss": 0.6928,
"step": 11260
},
{
"epoch": 4.24,
"learning_rate": 0.00013692367649982296,
"loss": 0.6939,
"step": 11270
},
{
"epoch": 4.25,
"learning_rate": 0.00013559951196932384,
"loss": 0.6936,
"step": 11280
},
{
"epoch": 4.25,
"learning_rate": 0.0001342813156294509,
"loss": 0.6933,
"step": 11290
},
{
"epoch": 4.25,
"learning_rate": 0.00013296909658159473,
"loss": 0.6932,
"step": 11300
},
{
"epoch": 4.26,
"learning_rate": 0.00013166286388587733,
"loss": 0.6935,
"step": 11310
},
{
"epoch": 4.26,
"learning_rate": 0.00013036262656108743,
"loss": 0.6933,
"step": 11320
},
{
"epoch": 4.26,
"learning_rate": 0.00012906839358461987,
"loss": 0.6929,
"step": 11330
},
{
"epoch": 4.27,
"learning_rate": 0.00012778017389241224,
"loss": 0.6936,
"step": 11340
},
{
"epoch": 4.27,
"learning_rate": 0.0001264979763788845,
"loss": 0.6932,
"step": 11350
},
{
"epoch": 4.28,
"learning_rate": 0.0001252218098968766,
"loss": 0.6925,
"step": 11360
},
{
"epoch": 4.28,
"learning_rate": 0.00012395168325758744,
"loss": 0.6938,
"step": 11370
},
{
"epoch": 4.28,
"learning_rate": 0.0001226876052305148,
"loss": 0.6931,
"step": 11380
},
{
"epoch": 4.29,
"learning_rate": 0.0001214295845433937,
"loss": 0.6933,
"step": 11390
},
{
"epoch": 4.29,
"learning_rate": 0.00012017762988213721,
"loss": 0.693,
"step": 11400
},
{
"epoch": 4.29,
"learning_rate": 0.00011893174989077537,
"loss": 0.6937,
"step": 11410
},
{
"epoch": 4.3,
"learning_rate": 0.0001176919531713968,
"loss": 0.6931,
"step": 11420
},
{
"epoch": 4.3,
"learning_rate": 0.0001164582482840878,
"loss": 0.6933,
"step": 11430
},
{
"epoch": 4.31,
"learning_rate": 0.00011523064374687497,
"loss": 0.6936,
"step": 11440
},
{
"epoch": 4.31,
"learning_rate": 0.00011400914803566464,
"loss": 0.6926,
"step": 11450
},
{
"epoch": 4.31,
"learning_rate": 0.00011279376958418597,
"loss": 0.693,
"step": 11460
},
{
"epoch": 4.32,
"learning_rate": 0.00011158451678393111,
"loss": 0.6928,
"step": 11470
},
{
"epoch": 4.32,
"learning_rate": 0.00011038139798409862,
"loss": 0.6927,
"step": 11480
},
{
"epoch": 4.32,
"learning_rate": 0.00010918442149153551,
"loss": 0.6937,
"step": 11490
},
{
"epoch": 4.33,
"learning_rate": 0.000107993595570679,
"loss": 0.6938,
"step": 11500
},
{
"epoch": 4.33,
"learning_rate": 0.00010680892844350076,
"loss": 0.6934,
"step": 11510
},
{
"epoch": 4.34,
"learning_rate": 0.00010563042828944902,
"loss": 0.6927,
"step": 11520
},
{
"epoch": 4.34,
"learning_rate": 0.00010445810324539306,
"loss": 0.6934,
"step": 11530
},
{
"epoch": 4.34,
"learning_rate": 0.00010329196140556608,
"loss": 0.6936,
"step": 11540
},
{
"epoch": 4.35,
"learning_rate": 0.00010213201082151036,
"loss": 0.6936,
"step": 11550
},
{
"epoch": 4.35,
"learning_rate": 0.00010097825950202033,
"loss": 0.6937,
"step": 11560
},
{
"epoch": 4.35,
"learning_rate": 9.983071541308897e-05,
"loss": 0.6929,
"step": 11570
},
{
"epoch": 4.36,
"learning_rate": 9.868938647785097e-05,
"loss": 0.6934,
"step": 11580
},
{
"epoch": 4.36,
"learning_rate": 9.755428057652982e-05,
"loss": 0.6932,
"step": 11590
},
{
"epoch": 4.37,
"learning_rate": 9.642540554638169e-05,
"loss": 0.6932,
"step": 11600
},
{
"epoch": 4.37,
"learning_rate": 9.530276918164271e-05,
"loss": 0.6933,
"step": 11610
},
{
"epoch": 4.37,
"learning_rate": 9.418637923347462e-05,
"loss": 0.6934,
"step": 11620
},
{
"epoch": 4.38,
"learning_rate": 9.307624340991072e-05,
"loss": 0.6935,
"step": 11630
},
{
"epoch": 4.38,
"learning_rate": 9.197236937580378e-05,
"loss": 0.6932,
"step": 11640
},
{
"epoch": 4.38,
"learning_rate": 9.08747647527718e-05,
"loss": 0.6933,
"step": 11650
},
{
"epoch": 4.39,
"learning_rate": 8.978343711914694e-05,
"loss": 0.6933,
"step": 11660
},
{
"epoch": 4.39,
"learning_rate": 8.869839400992153e-05,
"loss": 0.6931,
"step": 11670
},
{
"epoch": 4.4,
"learning_rate": 8.761964291669756e-05,
"loss": 0.6926,
"step": 11680
},
{
"epoch": 4.4,
"learning_rate": 8.654719128763333e-05,
"loss": 0.6933,
"step": 11690
},
{
"epoch": 4.4,
"learning_rate": 8.548104652739419e-05,
"loss": 0.6935,
"step": 11700
},
{
"epoch": 4.41,
"learning_rate": 8.442121599709896e-05,
"loss": 0.6932,
"step": 11710
},
{
"epoch": 4.41,
"learning_rate": 8.336770701427132e-05,
"loss": 0.6933,
"step": 11720
},
{
"epoch": 4.41,
"learning_rate": 8.232052685278735e-05,
"loss": 0.6932,
"step": 11730
},
{
"epoch": 4.42,
"learning_rate": 8.127968274282683e-05,
"loss": 0.693,
"step": 11740
},
{
"epoch": 4.42,
"learning_rate": 8.024518187082297e-05,
"loss": 0.693,
"step": 11750
},
{
"epoch": 4.43,
"learning_rate": 7.921703137941172e-05,
"loss": 0.6929,
"step": 11760
},
{
"epoch": 4.43,
"learning_rate": 7.819523836738406e-05,
"loss": 0.6927,
"step": 11770
},
{
"epoch": 4.43,
"learning_rate": 7.717980988963569e-05,
"loss": 0.6929,
"step": 11780
},
{
"epoch": 4.44,
"learning_rate": 7.61707529571194e-05,
"loss": 0.6929,
"step": 11790
},
{
"epoch": 4.44,
"learning_rate": 7.516807453679531e-05,
"loss": 0.6932,
"step": 11800
},
{
"epoch": 4.44,
"learning_rate": 7.417178155158444e-05,
"loss": 0.6931,
"step": 11810
},
{
"epoch": 4.45,
"learning_rate": 7.318188088031929e-05,
"loss": 0.6932,
"step": 11820
},
{
"epoch": 4.45,
"learning_rate": 7.219837935769768e-05,
"loss": 0.6933,
"step": 11830
},
{
"epoch": 4.46,
"learning_rate": 7.122128377423454e-05,
"loss": 0.6936,
"step": 11840
},
{
"epoch": 4.46,
"learning_rate": 7.025060087621571e-05,
"loss": 0.6928,
"step": 11850
},
{
"epoch": 4.46,
"learning_rate": 6.928633736565093e-05,
"loss": 0.6932,
"step": 11860
},
{
"epoch": 4.47,
"learning_rate": 6.832849990022793e-05,
"loss": 0.6933,
"step": 11870
},
{
"epoch": 4.47,
"learning_rate": 6.737709509326628e-05,
"loss": 0.6931,
"step": 11880
},
{
"epoch": 4.47,
"learning_rate": 6.64321295136714e-05,
"loss": 0.6933,
"step": 11890
},
{
"epoch": 4.48,
"learning_rate": 6.549360968589002e-05,
"loss": 0.6931,
"step": 11900
},
{
"epoch": 4.48,
"learning_rate": 6.45615420898641e-05,
"loss": 0.6934,
"step": 11910
},
{
"epoch": 4.49,
"learning_rate": 6.363593316098725e-05,
"loss": 0.6932,
"step": 11920
},
{
"epoch": 4.49,
"learning_rate": 6.271678929005897e-05,
"loss": 0.6932,
"step": 11930
},
{
"epoch": 4.49,
"learning_rate": 6.180411682324194e-05,
"loss": 0.6931,
"step": 11940
},
{
"epoch": 4.5,
"learning_rate": 6.089792206201694e-05,
"loss": 0.6932,
"step": 11950
},
{
"epoch": 4.5,
"learning_rate": 5.9998211263140314e-05,
"loss": 0.6927,
"step": 11960
},
{
"epoch": 4.51,
"learning_rate": 5.9104990638599976e-05,
"loss": 0.6933,
"step": 11970
},
{
"epoch": 4.51,
"learning_rate": 5.8218266355573255e-05,
"loss": 0.6935,
"step": 11980
},
{
"epoch": 4.51,
"learning_rate": 5.7338044536383694e-05,
"loss": 0.6932,
"step": 11990
},
{
"epoch": 4.52,
"learning_rate": 5.646433125845929e-05,
"loss": 0.6932,
"step": 12000
},
{
"epoch": 4.52,
"learning_rate": 5.559713255429011e-05,
"loss": 0.6924,
"step": 12010
},
{
"epoch": 4.52,
"learning_rate": 5.4736454411386746e-05,
"loss": 0.693,
"step": 12020
},
{
"epoch": 4.53,
"learning_rate": 5.388230277223938e-05,
"loss": 0.6936,
"step": 12030
},
{
"epoch": 4.53,
"learning_rate": 5.303468353427587e-05,
"loss": 0.6935,
"step": 12040
},
{
"epoch": 4.54,
"learning_rate": 5.2193602549822307e-05,
"loss": 0.6932,
"step": 12050
},
{
"epoch": 4.54,
"learning_rate": 5.1359065626061186e-05,
"loss": 0.6932,
"step": 12060
},
{
"epoch": 4.54,
"learning_rate": 5.05310785249925e-05,
"loss": 0.6933,
"step": 12070
},
{
"epoch": 4.55,
"learning_rate": 4.9709646963393084e-05,
"loss": 0.6927,
"step": 12080
},
{
"epoch": 4.55,
"learning_rate": 4.889477661277797e-05,
"loss": 0.693,
"step": 12090
},
{
"epoch": 4.55,
"learning_rate": 4.808647309936043e-05,
"loss": 0.6934,
"step": 12100
},
{
"epoch": 4.56,
"learning_rate": 4.7284742004013445e-05,
"loss": 0.6932,
"step": 12110
},
{
"epoch": 4.56,
"learning_rate": 4.6489588862231534e-05,
"loss": 0.6931,
"step": 12120
},
{
"epoch": 4.57,
"learning_rate": 4.5701019164091865e-05,
"loss": 0.6929,
"step": 12130
},
{
"epoch": 4.57,
"learning_rate": 4.491903835421707e-05,
"loss": 0.6937,
"step": 12140
},
{
"epoch": 4.57,
"learning_rate": 4.4143651831736964e-05,
"loss": 0.6927,
"step": 12150
},
{
"epoch": 4.58,
"learning_rate": 4.337486495025167e-05,
"loss": 0.6936,
"step": 12160
},
{
"epoch": 4.58,
"learning_rate": 4.261268301779453e-05,
"loss": 0.6927,
"step": 12170
},
{
"epoch": 4.58,
"learning_rate": 4.185711129679581e-05,
"loss": 0.6934,
"step": 12180
},
{
"epoch": 4.59,
"learning_rate": 4.110815500404541e-05,
"loss": 0.6929,
"step": 12190
},
{
"epoch": 4.59,
"learning_rate": 4.0365819310658194e-05,
"loss": 0.6929,
"step": 12200
},
{
"epoch": 4.6,
"learning_rate": 3.963010934203692e-05,
"loss": 0.6933,
"step": 12210
},
{
"epoch": 4.6,
"learning_rate": 3.890103017783808e-05,
"loss": 0.6936,
"step": 12220
},
{
"epoch": 4.6,
"learning_rate": 3.817858685193609e-05,
"loss": 0.6937,
"step": 12230
},
{
"epoch": 4.61,
"learning_rate": 3.7462784352388365e-05,
"loss": 0.6929,
"step": 12240
},
{
"epoch": 4.61,
"learning_rate": 3.6753627621401666e-05,
"loss": 0.6921,
"step": 12250
},
{
"epoch": 4.61,
"learning_rate": 3.605112155529722e-05,
"loss": 0.6934,
"step": 12260
},
{
"epoch": 4.62,
"learning_rate": 3.535527100447733e-05,
"loss": 0.6936,
"step": 12270
},
{
"epoch": 4.62,
"learning_rate": 3.466608077339184e-05,
"loss": 0.6932,
"step": 12280
},
{
"epoch": 4.63,
"learning_rate": 3.3983555620504815e-05,
"loss": 0.6927,
"step": 12290
},
{
"epoch": 4.63,
"learning_rate": 3.330770025826169e-05,
"loss": 0.6936,
"step": 12300
},
{
"epoch": 4.63,
"learning_rate": 3.263851935305717e-05,
"loss": 0.6928,
"step": 12310
},
{
"epoch": 4.64,
"learning_rate": 3.1976017525202074e-05,
"loss": 0.6927,
"step": 12320
},
{
"epoch": 4.64,
"learning_rate": 3.132019934889252e-05,
"loss": 0.6934,
"step": 12330
},
{
"epoch": 4.64,
"learning_rate": 3.067106935217767e-05,
"loss": 0.6936,
"step": 12340
},
{
"epoch": 4.65,
"learning_rate": 3.0028632016928404e-05,
"loss": 0.6934,
"step": 12350
},
{
"epoch": 4.65,
"learning_rate": 2.9392891778807218e-05,
"loss": 0.6932,
"step": 12360
},
{
"epoch": 4.66,
"learning_rate": 2.8763853027236276e-05,
"loss": 0.6933,
"step": 12370
},
{
"epoch": 4.66,
"learning_rate": 2.814152010536819e-05,
"loss": 0.6931,
"step": 12380
},
{
"epoch": 4.66,
"learning_rate": 2.7525897310055504e-05,
"loss": 0.693,
"step": 12390
},
{
"epoch": 4.67,
"learning_rate": 2.6916988891821594e-05,
"loss": 0.6932,
"step": 12400
},
{
"epoch": 4.67,
"learning_rate": 2.631479905483014e-05,
"loss": 0.6932,
"step": 12410
},
{
"epoch": 4.67,
"learning_rate": 2.5719331956857607e-05,
"loss": 0.693,
"step": 12420
},
{
"epoch": 4.68,
"learning_rate": 2.5130591709263352e-05,
"loss": 0.6932,
"step": 12430
},
{
"epoch": 4.68,
"learning_rate": 2.454858237696189e-05,
"loss": 0.6931,
"step": 12440
},
{
"epoch": 4.69,
"learning_rate": 2.3973307978394475e-05,
"loss": 0.6928,
"step": 12450
},
{
"epoch": 4.69,
"learning_rate": 2.3404772485501657e-05,
"loss": 0.6931,
"step": 12460
},
{
"epoch": 4.69,
"learning_rate": 2.284297982369543e-05,
"loss": 0.6928,
"step": 12470
},
{
"epoch": 4.7,
"learning_rate": 2.22879338718327e-05,
"loss": 0.6934,
"step": 12480
},
{
"epoch": 4.7,
"learning_rate": 2.1739638462188293e-05,
"loss": 0.6931,
"step": 12490
},
{
"epoch": 4.7,
"learning_rate": 2.1198097380427773e-05,
"loss": 0.693,
"step": 12500
},
{
"epoch": 4.71,
"learning_rate": 2.0663314365582663e-05,
"loss": 0.6931,
"step": 12510
},
{
"epoch": 4.71,
"learning_rate": 2.013529311002349e-05,
"loss": 0.6929,
"step": 12520
},
{
"epoch": 4.72,
"learning_rate": 1.961403725943478e-05,
"loss": 0.6932,
"step": 12530
},
{
"epoch": 4.72,
"learning_rate": 1.9099550412789878e-05,
"loss": 0.6934,
"step": 12540
},
{
"epoch": 4.72,
"learning_rate": 1.8591836122326068e-05,
"loss": 0.6929,
"step": 12550
},
{
"epoch": 4.73,
"learning_rate": 1.80908978935197e-05,
"loss": 0.6928,
"step": 12560
},
{
"epoch": 4.73,
"learning_rate": 1.759673918506277e-05,
"loss": 0.6934,
"step": 12570
},
{
"epoch": 4.73,
"learning_rate": 1.7109363408838173e-05,
"loss": 0.693,
"step": 12580
},
{
"epoch": 4.74,
"learning_rate": 1.6628773929896922e-05,
"loss": 0.6934,
"step": 12590
},
{
"epoch": 4.74,
"learning_rate": 1.615497406643396e-05,
"loss": 0.6934,
"step": 12600
},
{
"epoch": 4.75,
"learning_rate": 1.5687967089766386e-05,
"loss": 0.6933,
"step": 12610
},
{
"epoch": 4.75,
"learning_rate": 1.5227756224310052e-05,
"loss": 0.693,
"step": 12620
},
{
"epoch": 4.75,
"learning_rate": 1.4774344647557558e-05,
"loss": 0.6929,
"step": 12630
},
{
"epoch": 4.76,
"learning_rate": 1.4327735490056281e-05,
"loss": 0.6935,
"step": 12640
},
{
"epoch": 4.76,
"learning_rate": 1.3887931835386947e-05,
"loss": 0.6935,
"step": 12650
},
{
"epoch": 4.76,
"learning_rate": 1.3454936720141975e-05,
"loss": 0.6935,
"step": 12660
},
{
"epoch": 4.77,
"learning_rate": 1.3028753133904836e-05,
"loss": 0.6933,
"step": 12670
},
{
"epoch": 4.77,
"learning_rate": 1.2609384019229286e-05,
"loss": 0.6933,
"step": 12680
},
{
"epoch": 4.78,
"learning_rate": 1.219683227161894e-05,
"loss": 0.6927,
"step": 12690
},
{
"epoch": 4.78,
"learning_rate": 1.1791100739507622e-05,
"loss": 0.6929,
"step": 12700
},
{
"epoch": 4.78,
"learning_rate": 1.1392192224239261e-05,
"loss": 0.6929,
"step": 12710
},
{
"epoch": 4.79,
"learning_rate": 1.1000109480048926e-05,
"loss": 0.6931,
"step": 12720
},
{
"epoch": 4.79,
"learning_rate": 1.0614855214043373e-05,
"loss": 0.6935,
"step": 12730
},
{
"epoch": 4.79,
"learning_rate": 1.0236432086182968e-05,
"loss": 0.6931,
"step": 12740
},
{
"epoch": 4.8,
"learning_rate": 9.8648427092628e-06,
"loss": 0.6933,
"step": 12750
},
{
"epoch": 4.8,
"learning_rate": 9.500089648894706e-06,
"loss": 0.6927,
"step": 12760
},
{
"epoch": 4.81,
"learning_rate": 9.142175423490052e-06,
"loss": 0.693,
"step": 12770
},
{
"epoch": 4.81,
"learning_rate": 8.791102504241422e-06,
"loss": 0.6936,
"step": 12780
},
{
"epoch": 4.81,
"learning_rate": 8.446873315106629e-06,
"loss": 0.6936,
"step": 12790
},
{
"epoch": 4.82,
"learning_rate": 8.109490232790951e-06,
"loss": 0.6932,
"step": 12800
},
{
"epoch": 4.82,
"learning_rate": 7.778955586731696e-06,
"loss": 0.6929,
"step": 12810
},
{
"epoch": 4.82,
"learning_rate": 7.455271659081331e-06,
"loss": 0.6928,
"step": 12820
},
{
"epoch": 4.83,
"learning_rate": 7.138440684692049e-06,
"loss": 0.6934,
"step": 12830
},
{
"epoch": 4.83,
"learning_rate": 6.828464851100558e-06,
"loss": 0.6931,
"step": 12840
},
{
"epoch": 4.84,
"learning_rate": 6.5253462985123135e-06,
"loss": 0.6931,
"step": 12850
},
{
"epoch": 4.84,
"learning_rate": 6.229087119787646e-06,
"loss": 0.6931,
"step": 12860
},
{
"epoch": 4.84,
"learning_rate": 5.939689360426548e-06,
"loss": 0.6929,
"step": 12870
},
{
"epoch": 4.85,
"learning_rate": 5.657155018555016e-06,
"loss": 0.693,
"step": 12880
},
{
"epoch": 4.85,
"learning_rate": 5.381486044910733e-06,
"loss": 0.693,
"step": 12890
},
{
"epoch": 4.86,
"learning_rate": 5.1126843428302985e-06,
"loss": 0.6932,
"step": 12900
},
{
"epoch": 4.86,
"learning_rate": 4.850751768235684e-06,
"loss": 0.693,
"step": 12910
},
{
"epoch": 4.86,
"learning_rate": 4.5956901296214655e-06,
"loss": 0.693,
"step": 12920
},
{
"epoch": 4.87,
"learning_rate": 4.347501188041947e-06,
"loss": 0.6929,
"step": 12930
},
{
"epoch": 4.87,
"learning_rate": 4.106186657100053e-06,
"loss": 0.6935,
"step": 12940
},
{
"epoch": 4.87,
"learning_rate": 3.871748202934344e-06,
"loss": 0.6937,
"step": 12950
},
{
"epoch": 4.88,
"learning_rate": 3.644187444208247e-06,
"loss": 0.6928,
"step": 12960
},
{
"epoch": 4.88,
"learning_rate": 3.4235059520983934e-06,
"loss": 0.693,
"step": 12970
},
{
"epoch": 4.89,
"learning_rate": 3.2097052502843004e-06,
"loss": 0.6931,
"step": 12980
},
{
"epoch": 4.89,
"learning_rate": 3.002786814937486e-06,
"loss": 0.6932,
"step": 12990
},
{
"epoch": 4.89,
"learning_rate": 2.8027520747112566e-06,
"loss": 0.6932,
"step": 13000
},
{
"epoch": 4.9,
"learning_rate": 2.609602410730938e-06,
"loss": 0.6929,
"step": 13010
},
{
"epoch": 4.9,
"learning_rate": 2.4233391565845475e-06,
"loss": 0.6929,
"step": 13020
},
{
"epoch": 4.9,
"learning_rate": 2.243963598313137e-06,
"loss": 0.6935,
"step": 13030
},
{
"epoch": 4.91,
"learning_rate": 2.071476974402353e-06,
"loss": 0.6933,
"step": 13040
},
{
"epoch": 4.91,
"learning_rate": 1.9058804757735582e-06,
"loss": 0.6935,
"step": 13050
},
{
"epoch": 4.92,
"learning_rate": 1.7471752457757228e-06,
"loss": 0.6929,
"step": 13060
},
{
"epoch": 4.92,
"learning_rate": 1.5953623801778783e-06,
"loss": 0.6932,
"step": 13070
},
{
"epoch": 4.92,
"learning_rate": 1.4504429271606778e-06,
"loss": 0.6936,
"step": 13080
},
{
"epoch": 4.93,
"learning_rate": 1.312417887310069e-06,
"loss": 0.6927,
"step": 13090
},
{
"epoch": 4.93,
"learning_rate": 1.1812882136100767e-06,
"loss": 0.6932,
"step": 13100
},
{
"epoch": 4.93,
"learning_rate": 1.05705481143592e-06,
"loss": 0.6934,
"step": 13110
},
{
"epoch": 4.94,
"learning_rate": 9.397185385482398e-07,
"loss": 0.693,
"step": 13120
},
{
"epoch": 4.94,
"learning_rate": 8.292802050867687e-07,
"loss": 0.693,
"step": 13130
},
{
"epoch": 4.95,
"learning_rate": 7.257405735650036e-07,
"loss": 0.6931,
"step": 13140
},
{
"epoch": 4.95,
"learning_rate": 6.291003588648759e-07,
"loss": 0.6937,
"step": 13150
},
{
"epoch": 4.95,
"learning_rate": 5.393602282317555e-07,
"loss": 0.6929,
"step": 13160
},
{
"epoch": 4.96,
"learning_rate": 4.565208012697886e-07,
"loss": 0.6936,
"step": 13170
},
{
"epoch": 4.96,
"learning_rate": 3.8058264993778846e-07,
"loss": 0.6931,
"step": 13180
},
{
"epoch": 4.96,
"learning_rate": 3.1154629854546203e-07,
"loss": 0.6933,
"step": 13190
},
{
"epoch": 4.97,
"learning_rate": 2.494122237490792e-07,
"loss": 0.693,
"step": 13200
},
{
"epoch": 4.97,
"learning_rate": 1.9418085454880886e-07,
"loss": 0.6931,
"step": 13210
},
{
"epoch": 4.98,
"learning_rate": 1.4585257228594272e-07,
"loss": 0.693,
"step": 13220
},
{
"epoch": 4.98,
"learning_rate": 1.044277106393432e-07,
"loss": 0.6932,
"step": 13230
},
{
"epoch": 4.98,
"learning_rate": 6.990655562411074e-08,
"loss": 0.6929,
"step": 13240
},
{
"epoch": 4.99,
"learning_rate": 4.2289345588919505e-08,
"loss": 0.6929,
"step": 13250
},
{
"epoch": 4.99,
"learning_rate": 2.1576271214907017e-08,
"loss": 0.6934,
"step": 13260
},
{
"epoch": 4.99,
"learning_rate": 7.767475514008915e-09,
"loss": 0.6932,
"step": 13270
},
{
"epoch": 5.0,
"learning_rate": 8.630538280707256e-10,
"loss": 0.6935,
"step": 13280
},
{
"epoch": 5.0,
"eval_accuracy": 0.504,
"eval_loss": 0.6931441426277161,
"eval_runtime": 38.7711,
"eval_samples_per_second": 96.722,
"eval_steps_per_second": 12.097,
"step": 13285
},
{
"epoch": 5.0,
"step": 13285,
"total_flos": 1.3244422721025072e+16,
"train_loss": 0.6908724272659411,
"train_runtime": 3467.8448,
"train_samples_per_second": 30.639,
"train_steps_per_second": 3.831
}
],
"max_steps": 13285,
"num_train_epochs": 5,
"total_flos": 1.3244422721025072e+16,
"trial_name": null,
"trial_params": null
}