bloom-1b1-random-sw / trainer_state.json
atsuki-yamaguchi's picture
Upload folder using huggingface_hub
f2072e6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9353078721745908,
"eval_steps": 500,
"global_step": 9000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-06,
"loss": 11.5375,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 1e-05,
"loss": 11.2877,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.5e-05,
"loss": 10.631,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 2e-05,
"loss": 9.8892,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 2.5e-05,
"loss": 9.0342,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 3e-05,
"loss": 8.2813,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 3.5e-05,
"loss": 7.8183,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 4e-05,
"loss": 7.4274,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 4.5e-05,
"loss": 7.3671,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 5e-05,
"loss": 7.2528,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 5.500000000000001e-05,
"loss": 7.1583,
"step": 55
},
{
"epoch": 0.01,
"learning_rate": 6e-05,
"loss": 7.0445,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 6.500000000000001e-05,
"loss": 6.9933,
"step": 65
},
{
"epoch": 0.01,
"learning_rate": 7e-05,
"loss": 6.826,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 7.500000000000001e-05,
"loss": 6.7937,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 8e-05,
"loss": 6.7451,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 8.5e-05,
"loss": 6.4212,
"step": 85
},
{
"epoch": 0.01,
"learning_rate": 9e-05,
"loss": 6.4033,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 9.5e-05,
"loss": 6.3906,
"step": 95
},
{
"epoch": 0.01,
"learning_rate": 0.0001,
"loss": 6.1864,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 9.99999319664223e-05,
"loss": 6.0284,
"step": 105
},
{
"epoch": 0.01,
"learning_rate": 9.999972786587433e-05,
"loss": 5.7979,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 9.99993876989115e-05,
"loss": 5.9609,
"step": 115
},
{
"epoch": 0.01,
"learning_rate": 9.999891146645955e-05,
"loss": 5.7689,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 9.999829916981445e-05,
"loss": 5.6893,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 9.999755081064248e-05,
"loss": 5.6071,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 9.999666639098018e-05,
"loss": 5.5475,
"step": 135
},
{
"epoch": 0.01,
"learning_rate": 9.999564591323436e-05,
"loss": 5.5319,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 9.99944893801821e-05,
"loss": 5.3992,
"step": 145
},
{
"epoch": 0.02,
"learning_rate": 9.99931967949707e-05,
"loss": 5.3752,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 9.999176816111775e-05,
"loss": 5.3229,
"step": 155
},
{
"epoch": 0.02,
"learning_rate": 9.999020348251104e-05,
"loss": 5.1756,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 9.99885027634086e-05,
"loss": 5.1552,
"step": 165
},
{
"epoch": 0.02,
"learning_rate": 9.998666600843867e-05,
"loss": 5.1344,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 9.99846932225997e-05,
"loss": 5.1621,
"step": 175
},
{
"epoch": 0.02,
"learning_rate": 9.99825844112603e-05,
"loss": 5.0381,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 9.998033958015928e-05,
"loss": 5.1088,
"step": 185
},
{
"epoch": 0.02,
"learning_rate": 9.997795873540561e-05,
"loss": 5.0639,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 9.997544188347834e-05,
"loss": 4.9108,
"step": 195
},
{
"epoch": 0.02,
"learning_rate": 9.997278903122673e-05,
"loss": 4.9241,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 9.997000018587011e-05,
"loss": 4.9309,
"step": 205
},
{
"epoch": 0.02,
"learning_rate": 9.996707535499784e-05,
"loss": 4.8356,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 9.99640145465694e-05,
"loss": 4.9571,
"step": 215
},
{
"epoch": 0.02,
"learning_rate": 9.996081776891434e-05,
"loss": 4.8575,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 9.995748503073215e-05,
"loss": 4.8101,
"step": 225
},
{
"epoch": 0.02,
"learning_rate": 9.995401634109236e-05,
"loss": 4.7393,
"step": 230
},
{
"epoch": 0.02,
"learning_rate": 9.995041170943447e-05,
"loss": 4.6531,
"step": 235
},
{
"epoch": 0.02,
"learning_rate": 9.994667114556792e-05,
"loss": 4.7505,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 9.994279465967206e-05,
"loss": 4.7601,
"step": 245
},
{
"epoch": 0.03,
"learning_rate": 9.993878226229616e-05,
"loss": 4.7105,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 9.993463396435929e-05,
"loss": 4.5959,
"step": 255
},
{
"epoch": 0.03,
"learning_rate": 9.993034977715044e-05,
"loss": 4.7773,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 9.992592971232833e-05,
"loss": 4.6829,
"step": 265
},
{
"epoch": 0.03,
"learning_rate": 9.992137378192145e-05,
"loss": 4.7622,
"step": 270
},
{
"epoch": 0.03,
"learning_rate": 9.991668199832809e-05,
"loss": 4.5865,
"step": 275
},
{
"epoch": 0.03,
"learning_rate": 9.991185437431618e-05,
"loss": 4.5472,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 9.990689092302336e-05,
"loss": 4.5461,
"step": 285
},
{
"epoch": 0.03,
"learning_rate": 9.990179165795686e-05,
"loss": 4.5179,
"step": 290
},
{
"epoch": 0.03,
"learning_rate": 9.989655659299353e-05,
"loss": 4.625,
"step": 295
},
{
"epoch": 0.03,
"learning_rate": 9.98911857423798e-05,
"loss": 4.5834,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 9.988567912073158e-05,
"loss": 4.6043,
"step": 305
},
{
"epoch": 0.03,
"learning_rate": 9.98800367430343e-05,
"loss": 4.5443,
"step": 310
},
{
"epoch": 0.03,
"learning_rate": 9.987425862464277e-05,
"loss": 4.5195,
"step": 315
},
{
"epoch": 0.03,
"learning_rate": 9.986834478128127e-05,
"loss": 4.497,
"step": 320
},
{
"epoch": 0.03,
"learning_rate": 9.986229522904336e-05,
"loss": 4.4615,
"step": 325
},
{
"epoch": 0.03,
"learning_rate": 9.985610998439197e-05,
"loss": 4.4588,
"step": 330
},
{
"epoch": 0.03,
"learning_rate": 9.984978906415927e-05,
"loss": 4.4382,
"step": 335
},
{
"epoch": 0.04,
"learning_rate": 9.984333248554665e-05,
"loss": 4.4523,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 9.983674026612467e-05,
"loss": 4.4456,
"step": 345
},
{
"epoch": 0.04,
"learning_rate": 9.983001242383302e-05,
"loss": 4.386,
"step": 350
},
{
"epoch": 0.04,
"learning_rate": 9.98231489769805e-05,
"loss": 4.4249,
"step": 355
},
{
"epoch": 0.04,
"learning_rate": 9.981614994424487e-05,
"loss": 4.3047,
"step": 360
},
{
"epoch": 0.04,
"learning_rate": 9.980901534467291e-05,
"loss": 4.3131,
"step": 365
},
{
"epoch": 0.04,
"learning_rate": 9.980174519768031e-05,
"loss": 4.3479,
"step": 370
},
{
"epoch": 0.04,
"learning_rate": 9.979433952305163e-05,
"loss": 4.3042,
"step": 375
},
{
"epoch": 0.04,
"learning_rate": 9.978679834094025e-05,
"loss": 4.2703,
"step": 380
},
{
"epoch": 0.04,
"learning_rate": 9.977912167186834e-05,
"loss": 4.1616,
"step": 385
},
{
"epoch": 0.04,
"learning_rate": 9.977130953672673e-05,
"loss": 4.413,
"step": 390
},
{
"epoch": 0.04,
"learning_rate": 9.976336195677491e-05,
"loss": 4.2935,
"step": 395
},
{
"epoch": 0.04,
"learning_rate": 9.975527895364098e-05,
"loss": 4.3209,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 9.974706054932158e-05,
"loss": 4.2606,
"step": 405
},
{
"epoch": 0.04,
"learning_rate": 9.973870676618179e-05,
"loss": 4.4179,
"step": 410
},
{
"epoch": 0.04,
"learning_rate": 9.973021762695514e-05,
"loss": 4.1093,
"step": 415
},
{
"epoch": 0.04,
"learning_rate": 9.972159315474348e-05,
"loss": 4.2495,
"step": 420
},
{
"epoch": 0.04,
"learning_rate": 9.971283337301694e-05,
"loss": 4.2522,
"step": 425
},
{
"epoch": 0.04,
"learning_rate": 9.97039383056139e-05,
"loss": 4.2719,
"step": 430
},
{
"epoch": 0.05,
"learning_rate": 9.969490797674091e-05,
"loss": 4.2665,
"step": 435
},
{
"epoch": 0.05,
"learning_rate": 9.968574241097259e-05,
"loss": 4.2985,
"step": 440
},
{
"epoch": 0.05,
"learning_rate": 9.967644163325156e-05,
"loss": 4.2518,
"step": 445
},
{
"epoch": 0.05,
"learning_rate": 9.966700566888847e-05,
"loss": 4.211,
"step": 450
},
{
"epoch": 0.05,
"learning_rate": 9.965743454356178e-05,
"loss": 4.3097,
"step": 455
},
{
"epoch": 0.05,
"learning_rate": 9.964772828331781e-05,
"loss": 4.2268,
"step": 460
},
{
"epoch": 0.05,
"learning_rate": 9.963788691457066e-05,
"loss": 4.1852,
"step": 465
},
{
"epoch": 0.05,
"learning_rate": 9.962791046410202e-05,
"loss": 4.2971,
"step": 470
},
{
"epoch": 0.05,
"learning_rate": 9.961779895906127e-05,
"loss": 4.2119,
"step": 475
},
{
"epoch": 0.05,
"learning_rate": 9.960755242696528e-05,
"loss": 4.2781,
"step": 480
},
{
"epoch": 0.05,
"learning_rate": 9.959717089569837e-05,
"loss": 4.2193,
"step": 485
},
{
"epoch": 0.05,
"learning_rate": 9.958665439351224e-05,
"loss": 4.1444,
"step": 490
},
{
"epoch": 0.05,
"learning_rate": 9.957600294902593e-05,
"loss": 4.2276,
"step": 495
},
{
"epoch": 0.05,
"learning_rate": 9.956521659122567e-05,
"loss": 4.1808,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 9.95542953494648e-05,
"loss": 4.2218,
"step": 505
},
{
"epoch": 0.05,
"learning_rate": 9.954323925346381e-05,
"loss": 4.1615,
"step": 510
},
{
"epoch": 0.05,
"learning_rate": 9.953204833331011e-05,
"loss": 4.1061,
"step": 515
},
{
"epoch": 0.05,
"learning_rate": 9.952072261945804e-05,
"loss": 4.0275,
"step": 520
},
{
"epoch": 0.05,
"learning_rate": 9.950926214272876e-05,
"loss": 3.9907,
"step": 525
},
{
"epoch": 0.06,
"learning_rate": 9.949766693431015e-05,
"loss": 4.0716,
"step": 530
},
{
"epoch": 0.06,
"learning_rate": 9.948593702575675e-05,
"loss": 4.1418,
"step": 535
},
{
"epoch": 0.06,
"learning_rate": 9.947407244898967e-05,
"loss": 4.0215,
"step": 540
},
{
"epoch": 0.06,
"learning_rate": 9.946207323629648e-05,
"loss": 4.0912,
"step": 545
},
{
"epoch": 0.06,
"learning_rate": 9.944993942033119e-05,
"loss": 4.1922,
"step": 550
},
{
"epoch": 0.06,
"learning_rate": 9.943767103411403e-05,
"loss": 4.1028,
"step": 555
},
{
"epoch": 0.06,
"learning_rate": 9.942526811103152e-05,
"loss": 4.129,
"step": 560
},
{
"epoch": 0.06,
"learning_rate": 9.941273068483625e-05,
"loss": 4.0818,
"step": 565
},
{
"epoch": 0.06,
"learning_rate": 9.940005878964689e-05,
"loss": 4.1768,
"step": 570
},
{
"epoch": 0.06,
"learning_rate": 9.938725245994798e-05,
"loss": 4.0915,
"step": 575
},
{
"epoch": 0.06,
"learning_rate": 9.937431173058996e-05,
"loss": 4.0007,
"step": 580
},
{
"epoch": 0.06,
"learning_rate": 9.9361236636789e-05,
"loss": 3.9288,
"step": 585
},
{
"epoch": 0.06,
"learning_rate": 9.934802721412686e-05,
"loss": 3.9529,
"step": 590
},
{
"epoch": 0.06,
"learning_rate": 9.933468349855099e-05,
"loss": 4.0354,
"step": 595
},
{
"epoch": 0.06,
"learning_rate": 9.932120552637418e-05,
"loss": 4.0737,
"step": 600
},
{
"epoch": 0.06,
"learning_rate": 9.930759333427459e-05,
"loss": 4.0666,
"step": 605
},
{
"epoch": 0.06,
"learning_rate": 9.929384695929572e-05,
"loss": 4.0967,
"step": 610
},
{
"epoch": 0.06,
"learning_rate": 9.927996643884614e-05,
"loss": 4.0246,
"step": 615
},
{
"epoch": 0.06,
"learning_rate": 9.92659518106995e-05,
"loss": 3.8967,
"step": 620
},
{
"epoch": 0.06,
"learning_rate": 9.925180311299446e-05,
"loss": 4.1447,
"step": 625
},
{
"epoch": 0.07,
"learning_rate": 9.923752038423443e-05,
"loss": 4.089,
"step": 630
},
{
"epoch": 0.07,
"learning_rate": 9.922310366328763e-05,
"loss": 3.9846,
"step": 635
},
{
"epoch": 0.07,
"learning_rate": 9.920855298938691e-05,
"loss": 4.0517,
"step": 640
},
{
"epoch": 0.07,
"learning_rate": 9.919386840212964e-05,
"loss": 4.138,
"step": 645
},
{
"epoch": 0.07,
"learning_rate": 9.917904994147764e-05,
"loss": 4.069,
"step": 650
},
{
"epoch": 0.07,
"learning_rate": 9.916409764775699e-05,
"loss": 4.006,
"step": 655
},
{
"epoch": 0.07,
"learning_rate": 9.914901156165804e-05,
"loss": 3.932,
"step": 660
},
{
"epoch": 0.07,
"learning_rate": 9.91337917242352e-05,
"loss": 4.0524,
"step": 665
},
{
"epoch": 0.07,
"learning_rate": 9.911843817690686e-05,
"loss": 3.9053,
"step": 670
},
{
"epoch": 0.07,
"learning_rate": 9.91029509614553e-05,
"loss": 3.9072,
"step": 675
},
{
"epoch": 0.07,
"learning_rate": 9.908733012002655e-05,
"loss": 3.9842,
"step": 680
},
{
"epoch": 0.07,
"learning_rate": 9.907157569513027e-05,
"loss": 3.95,
"step": 685
},
{
"epoch": 0.07,
"learning_rate": 9.905568772963967e-05,
"loss": 3.9991,
"step": 690
},
{
"epoch": 0.07,
"learning_rate": 9.903966626679133e-05,
"loss": 3.9474,
"step": 695
},
{
"epoch": 0.07,
"learning_rate": 9.902351135018517e-05,
"loss": 3.9754,
"step": 700
},
{
"epoch": 0.07,
"learning_rate": 9.900722302378425e-05,
"loss": 3.989,
"step": 705
},
{
"epoch": 0.07,
"learning_rate": 9.89908013319147e-05,
"loss": 3.9484,
"step": 710
},
{
"epoch": 0.07,
"learning_rate": 9.897424631926557e-05,
"loss": 4.0325,
"step": 715
},
{
"epoch": 0.07,
"learning_rate": 9.895755803088874e-05,
"loss": 4.0261,
"step": 720
},
{
"epoch": 0.08,
"learning_rate": 9.894073651219877e-05,
"loss": 3.8771,
"step": 725
},
{
"epoch": 0.08,
"learning_rate": 9.892378180897277e-05,
"loss": 3.8138,
"step": 730
},
{
"epoch": 0.08,
"learning_rate": 9.890669396735032e-05,
"loss": 3.9609,
"step": 735
},
{
"epoch": 0.08,
"learning_rate": 9.888947303383329e-05,
"loss": 3.9424,
"step": 740
},
{
"epoch": 0.08,
"learning_rate": 9.887211905528576e-05,
"loss": 3.9631,
"step": 745
},
{
"epoch": 0.08,
"learning_rate": 9.885463207893383e-05,
"loss": 3.8561,
"step": 750
},
{
"epoch": 0.08,
"learning_rate": 9.88370121523656e-05,
"loss": 3.8925,
"step": 755
},
{
"epoch": 0.08,
"learning_rate": 9.881925932353091e-05,
"loss": 3.8925,
"step": 760
},
{
"epoch": 0.08,
"learning_rate": 9.880137364074132e-05,
"loss": 3.8758,
"step": 765
},
{
"epoch": 0.08,
"learning_rate": 9.878335515266989e-05,
"loss": 3.806,
"step": 770
},
{
"epoch": 0.08,
"learning_rate": 9.876520390835112e-05,
"loss": 3.9016,
"step": 775
},
{
"epoch": 0.08,
"learning_rate": 9.874691995718077e-05,
"loss": 3.7877,
"step": 780
},
{
"epoch": 0.08,
"learning_rate": 9.872850334891575e-05,
"loss": 3.866,
"step": 785
},
{
"epoch": 0.08,
"learning_rate": 9.870995413367397e-05,
"loss": 3.8179,
"step": 790
},
{
"epoch": 0.08,
"learning_rate": 9.869127236193419e-05,
"loss": 3.9106,
"step": 795
},
{
"epoch": 0.08,
"learning_rate": 9.867245808453594e-05,
"loss": 3.9662,
"step": 800
},
{
"epoch": 0.08,
"learning_rate": 9.865351135267931e-05,
"loss": 3.8928,
"step": 805
},
{
"epoch": 0.08,
"learning_rate": 9.863443221792487e-05,
"loss": 3.8844,
"step": 810
},
{
"epoch": 0.08,
"learning_rate": 9.861522073219349e-05,
"loss": 3.7855,
"step": 815
},
{
"epoch": 0.09,
"learning_rate": 9.859587694776621e-05,
"loss": 3.8677,
"step": 820
},
{
"epoch": 0.09,
"learning_rate": 9.857640091728411e-05,
"loss": 3.8445,
"step": 825
},
{
"epoch": 0.09,
"learning_rate": 9.855679269374816e-05,
"loss": 3.8411,
"step": 830
},
{
"epoch": 0.09,
"learning_rate": 9.853705233051904e-05,
"loss": 3.8302,
"step": 835
},
{
"epoch": 0.09,
"learning_rate": 9.851717988131705e-05,
"loss": 3.8467,
"step": 840
},
{
"epoch": 0.09,
"learning_rate": 9.849717540022199e-05,
"loss": 3.8176,
"step": 845
},
{
"epoch": 0.09,
"learning_rate": 9.847703894167286e-05,
"loss": 3.7066,
"step": 850
},
{
"epoch": 0.09,
"learning_rate": 9.84567705604679e-05,
"loss": 3.7558,
"step": 855
},
{
"epoch": 0.09,
"learning_rate": 9.843637031176433e-05,
"loss": 3.7346,
"step": 860
},
{
"epoch": 0.09,
"learning_rate": 9.841583825107821e-05,
"loss": 3.7371,
"step": 865
},
{
"epoch": 0.09,
"learning_rate": 9.839517443428434e-05,
"loss": 3.8364,
"step": 870
},
{
"epoch": 0.09,
"learning_rate": 9.837437891761606e-05,
"loss": 3.7334,
"step": 875
},
{
"epoch": 0.09,
"learning_rate": 9.835345175766509e-05,
"loss": 3.865,
"step": 880
},
{
"epoch": 0.09,
"learning_rate": 9.833239301138142e-05,
"loss": 3.7821,
"step": 885
},
{
"epoch": 0.09,
"learning_rate": 9.83112027360731e-05,
"loss": 3.6758,
"step": 890
},
{
"epoch": 0.09,
"learning_rate": 9.828988098940619e-05,
"loss": 3.7046,
"step": 895
},
{
"epoch": 0.09,
"learning_rate": 9.826842782940445e-05,
"loss": 3.9129,
"step": 900
},
{
"epoch": 0.09,
"learning_rate": 9.824684331444927e-05,
"loss": 3.8495,
"step": 905
},
{
"epoch": 0.09,
"learning_rate": 9.822512750327953e-05,
"loss": 3.8108,
"step": 910
},
{
"epoch": 0.1,
"learning_rate": 9.820328045499143e-05,
"loss": 3.753,
"step": 915
},
{
"epoch": 0.1,
"learning_rate": 9.818130222903828e-05,
"loss": 3.8771,
"step": 920
},
{
"epoch": 0.1,
"learning_rate": 9.815919288523035e-05,
"loss": 3.7107,
"step": 925
},
{
"epoch": 0.1,
"learning_rate": 9.813695248373474e-05,
"loss": 3.7011,
"step": 930
},
{
"epoch": 0.1,
"learning_rate": 9.811458108507527e-05,
"loss": 3.7602,
"step": 935
},
{
"epoch": 0.1,
"learning_rate": 9.809207875013213e-05,
"loss": 3.8076,
"step": 940
},
{
"epoch": 0.1,
"learning_rate": 9.806944554014193e-05,
"loss": 3.7961,
"step": 945
},
{
"epoch": 0.1,
"learning_rate": 9.80466815166974e-05,
"loss": 3.7413,
"step": 950
},
{
"epoch": 0.1,
"learning_rate": 9.802378674174724e-05,
"loss": 3.7162,
"step": 955
},
{
"epoch": 0.1,
"learning_rate": 9.800076127759599e-05,
"loss": 3.7689,
"step": 960
},
{
"epoch": 0.1,
"learning_rate": 9.797760518690385e-05,
"loss": 3.629,
"step": 965
},
{
"epoch": 0.1,
"learning_rate": 9.795431853268648e-05,
"loss": 3.7742,
"step": 970
},
{
"epoch": 0.1,
"learning_rate": 9.793090137831486e-05,
"loss": 3.6976,
"step": 975
},
{
"epoch": 0.1,
"learning_rate": 9.79073537875151e-05,
"loss": 3.7106,
"step": 980
},
{
"epoch": 0.1,
"learning_rate": 9.788367582436828e-05,
"loss": 3.7185,
"step": 985
},
{
"epoch": 0.1,
"learning_rate": 9.785986755331025e-05,
"loss": 3.7897,
"step": 990
},
{
"epoch": 0.1,
"learning_rate": 9.78359290391315e-05,
"loss": 3.6348,
"step": 995
},
{
"epoch": 0.1,
"learning_rate": 9.781186034697692e-05,
"loss": 3.5979,
"step": 1000
},
{
"epoch": 0.1,
"learning_rate": 9.778766154234569e-05,
"loss": 3.7523,
"step": 1005
},
{
"epoch": 0.1,
"learning_rate": 9.776333269109107e-05,
"loss": 3.7874,
"step": 1010
},
{
"epoch": 0.11,
"learning_rate": 9.77388738594202e-05,
"loss": 3.7577,
"step": 1015
},
{
"epoch": 0.11,
"learning_rate": 9.771428511389395e-05,
"loss": 3.7654,
"step": 1020
},
{
"epoch": 0.11,
"learning_rate": 9.768956652142674e-05,
"loss": 3.8066,
"step": 1025
},
{
"epoch": 0.11,
"learning_rate": 9.766471814928635e-05,
"loss": 3.7506,
"step": 1030
},
{
"epoch": 0.11,
"learning_rate": 9.76397400650937e-05,
"loss": 3.8497,
"step": 1035
},
{
"epoch": 0.11,
"learning_rate": 9.761463233682276e-05,
"loss": 3.6532,
"step": 1040
},
{
"epoch": 0.11,
"learning_rate": 9.758939503280024e-05,
"loss": 3.7796,
"step": 1045
},
{
"epoch": 0.11,
"learning_rate": 9.756402822170553e-05,
"loss": 3.6971,
"step": 1050
},
{
"epoch": 0.11,
"learning_rate": 9.753853197257042e-05,
"loss": 3.8117,
"step": 1055
},
{
"epoch": 0.11,
"learning_rate": 9.751290635477895e-05,
"loss": 3.7334,
"step": 1060
},
{
"epoch": 0.11,
"learning_rate": 9.748715143806719e-05,
"loss": 3.8371,
"step": 1065
},
{
"epoch": 0.11,
"learning_rate": 9.746126729252316e-05,
"loss": 3.6015,
"step": 1070
},
{
"epoch": 0.11,
"learning_rate": 9.743525398858646e-05,
"loss": 3.6297,
"step": 1075
},
{
"epoch": 0.11,
"learning_rate": 9.740911159704824e-05,
"loss": 3.6828,
"step": 1080
},
{
"epoch": 0.11,
"learning_rate": 9.738284018905091e-05,
"loss": 3.7113,
"step": 1085
},
{
"epoch": 0.11,
"learning_rate": 9.735643983608797e-05,
"loss": 3.7889,
"step": 1090
},
{
"epoch": 0.11,
"learning_rate": 9.732991061000385e-05,
"loss": 3.5754,
"step": 1095
},
{
"epoch": 0.11,
"learning_rate": 9.730325258299366e-05,
"loss": 3.6713,
"step": 1100
},
{
"epoch": 0.11,
"learning_rate": 9.727646582760308e-05,
"loss": 3.5681,
"step": 1105
},
{
"epoch": 0.12,
"learning_rate": 9.724955041672803e-05,
"loss": 3.7093,
"step": 1110
},
{
"epoch": 0.12,
"learning_rate": 9.722250642361456e-05,
"loss": 3.514,
"step": 1115
},
{
"epoch": 0.12,
"learning_rate": 9.719533392185872e-05,
"loss": 3.6651,
"step": 1120
},
{
"epoch": 0.12,
"learning_rate": 9.716803298540613e-05,
"loss": 3.707,
"step": 1125
},
{
"epoch": 0.12,
"learning_rate": 9.714060368855206e-05,
"loss": 3.6944,
"step": 1130
},
{
"epoch": 0.12,
"learning_rate": 9.711304610594104e-05,
"loss": 3.7269,
"step": 1135
},
{
"epoch": 0.12,
"learning_rate": 9.708536031256666e-05,
"loss": 3.6314,
"step": 1140
},
{
"epoch": 0.12,
"learning_rate": 9.705754638377151e-05,
"loss": 3.5697,
"step": 1145
},
{
"epoch": 0.12,
"learning_rate": 9.70296043952468e-05,
"loss": 3.7098,
"step": 1150
},
{
"epoch": 0.12,
"learning_rate": 9.70015344230323e-05,
"loss": 3.696,
"step": 1155
},
{
"epoch": 0.12,
"learning_rate": 9.697333654351602e-05,
"loss": 3.7505,
"step": 1160
},
{
"epoch": 0.12,
"learning_rate": 9.694501083343405e-05,
"loss": 3.5885,
"step": 1165
},
{
"epoch": 0.12,
"learning_rate": 9.691655736987038e-05,
"loss": 3.7345,
"step": 1170
},
{
"epoch": 0.12,
"learning_rate": 9.688797623025666e-05,
"loss": 3.5453,
"step": 1175
},
{
"epoch": 0.12,
"learning_rate": 9.685926749237195e-05,
"loss": 3.6337,
"step": 1180
},
{
"epoch": 0.12,
"learning_rate": 9.683043123434261e-05,
"loss": 3.5826,
"step": 1185
},
{
"epoch": 0.12,
"learning_rate": 9.680146753464197e-05,
"loss": 3.7254,
"step": 1190
},
{
"epoch": 0.12,
"learning_rate": 9.677237647209017e-05,
"loss": 3.6509,
"step": 1195
},
{
"epoch": 0.12,
"learning_rate": 9.674315812585402e-05,
"loss": 3.7221,
"step": 1200
},
{
"epoch": 0.13,
"learning_rate": 9.671381257544665e-05,
"loss": 3.7415,
"step": 1205
},
{
"epoch": 0.13,
"learning_rate": 9.668433990072737e-05,
"loss": 3.6499,
"step": 1210
},
{
"epoch": 0.13,
"learning_rate": 9.665474018190142e-05,
"loss": 3.5825,
"step": 1215
},
{
"epoch": 0.13,
"learning_rate": 9.662501349951981e-05,
"loss": 3.5002,
"step": 1220
},
{
"epoch": 0.13,
"learning_rate": 9.659515993447904e-05,
"loss": 3.5725,
"step": 1225
},
{
"epoch": 0.13,
"learning_rate": 9.656517956802092e-05,
"loss": 3.6267,
"step": 1230
},
{
"epoch": 0.13,
"learning_rate": 9.653507248173227e-05,
"loss": 3.6351,
"step": 1235
},
{
"epoch": 0.13,
"learning_rate": 9.650483875754483e-05,
"loss": 3.4773,
"step": 1240
},
{
"epoch": 0.13,
"learning_rate": 9.647447847773497e-05,
"loss": 3.7241,
"step": 1245
},
{
"epoch": 0.13,
"learning_rate": 9.644399172492336e-05,
"loss": 3.5076,
"step": 1250
},
{
"epoch": 0.13,
"learning_rate": 9.641337858207495e-05,
"loss": 3.7403,
"step": 1255
},
{
"epoch": 0.13,
"learning_rate": 9.638263913249861e-05,
"loss": 3.5662,
"step": 1260
},
{
"epoch": 0.13,
"learning_rate": 9.635177345984692e-05,
"loss": 3.6316,
"step": 1265
},
{
"epoch": 0.13,
"learning_rate": 9.632078164811598e-05,
"loss": 3.5475,
"step": 1270
},
{
"epoch": 0.13,
"learning_rate": 9.628966378164513e-05,
"loss": 3.5504,
"step": 1275
},
{
"epoch": 0.13,
"learning_rate": 9.625841994511675e-05,
"loss": 3.6074,
"step": 1280
},
{
"epoch": 0.13,
"learning_rate": 9.622705022355607e-05,
"loss": 3.5816,
"step": 1285
},
{
"epoch": 0.13,
"learning_rate": 9.619555470233083e-05,
"loss": 3.5766,
"step": 1290
},
{
"epoch": 0.13,
"learning_rate": 9.616393346715119e-05,
"loss": 3.6748,
"step": 1295
},
{
"epoch": 0.14,
"learning_rate": 9.613218660406933e-05,
"loss": 3.5986,
"step": 1300
},
{
"epoch": 0.14,
"learning_rate": 9.61003141994794e-05,
"loss": 3.731,
"step": 1305
},
{
"epoch": 0.14,
"learning_rate": 9.606831634011712e-05,
"loss": 3.6098,
"step": 1310
},
{
"epoch": 0.14,
"learning_rate": 9.603619311305968e-05,
"loss": 3.5826,
"step": 1315
},
{
"epoch": 0.14,
"learning_rate": 9.600394460572537e-05,
"loss": 3.635,
"step": 1320
},
{
"epoch": 0.14,
"learning_rate": 9.597157090587346e-05,
"loss": 3.5129,
"step": 1325
},
{
"epoch": 0.14,
"learning_rate": 9.593907210160387e-05,
"loss": 3.5163,
"step": 1330
},
{
"epoch": 0.14,
"learning_rate": 9.590644828135702e-05,
"loss": 3.4268,
"step": 1335
},
{
"epoch": 0.14,
"learning_rate": 9.587369953391352e-05,
"loss": 3.5015,
"step": 1340
},
{
"epoch": 0.14,
"learning_rate": 9.584082594839394e-05,
"loss": 3.6242,
"step": 1345
},
{
"epoch": 0.14,
"learning_rate": 9.580782761425859e-05,
"loss": 3.5733,
"step": 1350
},
{
"epoch": 0.14,
"learning_rate": 9.577470462130726e-05,
"loss": 3.5834,
"step": 1355
},
{
"epoch": 0.14,
"learning_rate": 9.574145705967898e-05,
"loss": 3.4741,
"step": 1360
},
{
"epoch": 0.14,
"learning_rate": 9.570808501985175e-05,
"loss": 3.5284,
"step": 1365
},
{
"epoch": 0.14,
"learning_rate": 9.567458859264238e-05,
"loss": 3.6601,
"step": 1370
},
{
"epoch": 0.14,
"learning_rate": 9.56409678692061e-05,
"loss": 3.7271,
"step": 1375
},
{
"epoch": 0.14,
"learning_rate": 9.560722294103646e-05,
"loss": 3.56,
"step": 1380
},
{
"epoch": 0.14,
"learning_rate": 9.557335389996499e-05,
"loss": 3.7228,
"step": 1385
},
{
"epoch": 0.14,
"learning_rate": 9.553936083816095e-05,
"loss": 3.6206,
"step": 1390
},
{
"epoch": 0.14,
"learning_rate": 9.550524384813114e-05,
"loss": 3.5725,
"step": 1395
},
{
"epoch": 0.15,
"learning_rate": 9.54710030227196e-05,
"loss": 3.63,
"step": 1400
},
{
"epoch": 0.15,
"learning_rate": 9.543663845510736e-05,
"loss": 3.5948,
"step": 1405
},
{
"epoch": 0.15,
"learning_rate": 9.540215023881219e-05,
"loss": 3.5457,
"step": 1410
},
{
"epoch": 0.15,
"learning_rate": 9.536753846768835e-05,
"loss": 3.7054,
"step": 1415
},
{
"epoch": 0.15,
"learning_rate": 9.53328032359264e-05,
"loss": 3.6052,
"step": 1420
},
{
"epoch": 0.15,
"learning_rate": 9.529794463805275e-05,
"loss": 3.4509,
"step": 1425
},
{
"epoch": 0.15,
"learning_rate": 9.526296276892965e-05,
"loss": 3.4801,
"step": 1430
},
{
"epoch": 0.15,
"learning_rate": 9.522785772375475e-05,
"loss": 3.5778,
"step": 1435
},
{
"epoch": 0.15,
"learning_rate": 9.519262959806095e-05,
"loss": 3.5404,
"step": 1440
},
{
"epoch": 0.15,
"learning_rate": 9.515727848771605e-05,
"loss": 3.6708,
"step": 1445
},
{
"epoch": 0.15,
"learning_rate": 9.512180448892254e-05,
"loss": 3.4611,
"step": 1450
},
{
"epoch": 0.15,
"learning_rate": 9.508620769821734e-05,
"loss": 3.6322,
"step": 1455
},
{
"epoch": 0.15,
"learning_rate": 9.505048821247156e-05,
"loss": 3.5811,
"step": 1460
},
{
"epoch": 0.15,
"learning_rate": 9.501464612889015e-05,
"loss": 3.6099,
"step": 1465
},
{
"epoch": 0.15,
"learning_rate": 9.497868154501172e-05,
"loss": 3.6795,
"step": 1470
},
{
"epoch": 0.15,
"learning_rate": 9.494259455870824e-05,
"loss": 3.4475,
"step": 1475
},
{
"epoch": 0.15,
"learning_rate": 9.49063852681848e-05,
"loss": 3.5502,
"step": 1480
},
{
"epoch": 0.15,
"learning_rate": 9.48700537719793e-05,
"loss": 3.4725,
"step": 1485
},
{
"epoch": 0.15,
"learning_rate": 9.48336001689622e-05,
"loss": 3.536,
"step": 1490
},
{
"epoch": 0.16,
"learning_rate": 9.479702455833626e-05,
"loss": 3.4377,
"step": 1495
},
{
"epoch": 0.16,
"learning_rate": 9.476032703963625e-05,
"loss": 3.4536,
"step": 1500
},
{
"epoch": 0.16,
"learning_rate": 9.472350771272875e-05,
"loss": 3.5803,
"step": 1505
},
{
"epoch": 0.16,
"learning_rate": 9.468656667781174e-05,
"loss": 3.5074,
"step": 1510
},
{
"epoch": 0.16,
"learning_rate": 9.464950403541447e-05,
"loss": 3.5433,
"step": 1515
},
{
"epoch": 0.16,
"learning_rate": 9.461231988639711e-05,
"loss": 3.4499,
"step": 1520
},
{
"epoch": 0.16,
"learning_rate": 9.457501433195048e-05,
"loss": 3.5926,
"step": 1525
},
{
"epoch": 0.16,
"learning_rate": 9.45375874735958e-05,
"loss": 3.5056,
"step": 1530
},
{
"epoch": 0.16,
"learning_rate": 9.450003941318438e-05,
"loss": 3.5455,
"step": 1535
},
{
"epoch": 0.16,
"learning_rate": 9.44623702528974e-05,
"loss": 3.5595,
"step": 1540
},
{
"epoch": 0.16,
"learning_rate": 9.442458009524554e-05,
"loss": 3.3979,
"step": 1545
},
{
"epoch": 0.16,
"learning_rate": 9.43866690430688e-05,
"loss": 3.5457,
"step": 1550
},
{
"epoch": 0.16,
"learning_rate": 9.434863719953618e-05,
"loss": 3.4334,
"step": 1555
},
{
"epoch": 0.16,
"learning_rate": 9.431048466814534e-05,
"loss": 3.5655,
"step": 1560
},
{
"epoch": 0.16,
"learning_rate": 9.427221155272241e-05,
"loss": 3.4996,
"step": 1565
},
{
"epoch": 0.16,
"learning_rate": 9.42338179574217e-05,
"loss": 3.5764,
"step": 1570
},
{
"epoch": 0.16,
"learning_rate": 9.419530398672533e-05,
"loss": 3.5163,
"step": 1575
},
{
"epoch": 0.16,
"learning_rate": 9.415666974544305e-05,
"loss": 3.4246,
"step": 1580
},
{
"epoch": 0.16,
"learning_rate": 9.411791533871186e-05,
"loss": 3.4725,
"step": 1585
},
{
"epoch": 0.17,
"learning_rate": 9.407904087199582e-05,
"loss": 3.3925,
"step": 1590
},
{
"epoch": 0.17,
"learning_rate": 9.404004645108568e-05,
"loss": 3.4705,
"step": 1595
},
{
"epoch": 0.17,
"learning_rate": 9.400093218209865e-05,
"loss": 3.4844,
"step": 1600
},
{
"epoch": 0.17,
"learning_rate": 9.396169817147805e-05,
"loss": 3.4981,
"step": 1605
},
{
"epoch": 0.17,
"learning_rate": 9.392234452599312e-05,
"loss": 3.4456,
"step": 1610
},
{
"epoch": 0.17,
"learning_rate": 9.388287135273861e-05,
"loss": 3.5698,
"step": 1615
},
{
"epoch": 0.17,
"learning_rate": 9.384327875913457e-05,
"loss": 3.4827,
"step": 1620
},
{
"epoch": 0.17,
"learning_rate": 9.380356685292603e-05,
"loss": 3.5414,
"step": 1625
},
{
"epoch": 0.17,
"learning_rate": 9.376373574218272e-05,
"loss": 3.5588,
"step": 1630
},
{
"epoch": 0.17,
"learning_rate": 9.372378553529875e-05,
"loss": 3.4879,
"step": 1635
},
{
"epoch": 0.17,
"learning_rate": 9.368371634099236e-05,
"loss": 3.5712,
"step": 1640
},
{
"epoch": 0.17,
"learning_rate": 9.364352826830555e-05,
"loss": 3.5887,
"step": 1645
},
{
"epoch": 0.17,
"learning_rate": 9.360322142660388e-05,
"loss": 3.5668,
"step": 1650
},
{
"epoch": 0.17,
"learning_rate": 9.356279592557607e-05,
"loss": 3.4418,
"step": 1655
},
{
"epoch": 0.17,
"learning_rate": 9.352225187523378e-05,
"loss": 3.3832,
"step": 1660
},
{
"epoch": 0.17,
"learning_rate": 9.34815893859113e-05,
"loss": 3.5223,
"step": 1665
},
{
"epoch": 0.17,
"learning_rate": 9.344080856826521e-05,
"loss": 3.5331,
"step": 1670
},
{
"epoch": 0.17,
"learning_rate": 9.339990953327411e-05,
"loss": 3.484,
"step": 1675
},
{
"epoch": 0.17,
"learning_rate": 9.33588923922383e-05,
"loss": 3.4791,
"step": 1680
},
{
"epoch": 0.18,
"learning_rate": 9.331775725677949e-05,
"loss": 3.4218,
"step": 1685
},
{
"epoch": 0.18,
"learning_rate": 9.327650423884051e-05,
"loss": 3.42,
"step": 1690
},
{
"epoch": 0.18,
"learning_rate": 9.323513345068496e-05,
"loss": 3.5308,
"step": 1695
},
{
"epoch": 0.18,
"learning_rate": 9.319364500489696e-05,
"loss": 3.3971,
"step": 1700
},
{
"epoch": 0.18,
"learning_rate": 9.315203901438082e-05,
"loss": 3.4279,
"step": 1705
},
{
"epoch": 0.18,
"learning_rate": 9.311031559236067e-05,
"loss": 3.4812,
"step": 1710
},
{
"epoch": 0.18,
"learning_rate": 9.30684748523803e-05,
"loss": 3.3762,
"step": 1715
},
{
"epoch": 0.18,
"learning_rate": 9.302651690830271e-05,
"loss": 3.4395,
"step": 1720
},
{
"epoch": 0.18,
"learning_rate": 9.298444187430986e-05,
"loss": 3.497,
"step": 1725
},
{
"epoch": 0.18,
"learning_rate": 9.294224986490236e-05,
"loss": 3.4741,
"step": 1730
},
{
"epoch": 0.18,
"learning_rate": 9.289994099489913e-05,
"loss": 3.415,
"step": 1735
},
{
"epoch": 0.18,
"learning_rate": 9.285751537943712e-05,
"loss": 3.5428,
"step": 1740
},
{
"epoch": 0.18,
"learning_rate": 9.281497313397101e-05,
"loss": 3.3536,
"step": 1745
},
{
"epoch": 0.18,
"learning_rate": 9.27723143742728e-05,
"loss": 3.4309,
"step": 1750
},
{
"epoch": 0.18,
"learning_rate": 9.272953921643168e-05,
"loss": 3.4613,
"step": 1755
},
{
"epoch": 0.18,
"learning_rate": 9.268664777685345e-05,
"loss": 3.5508,
"step": 1760
},
{
"epoch": 0.18,
"learning_rate": 9.264364017226051e-05,
"loss": 3.536,
"step": 1765
},
{
"epoch": 0.18,
"learning_rate": 9.260051651969128e-05,
"loss": 3.4405,
"step": 1770
},
{
"epoch": 0.18,
"learning_rate": 9.255727693649998e-05,
"loss": 3.4646,
"step": 1775
},
{
"epoch": 0.18,
"learning_rate": 9.251392154035641e-05,
"loss": 3.4193,
"step": 1780
},
{
"epoch": 0.19,
"learning_rate": 9.247045044924542e-05,
"loss": 3.4704,
"step": 1785
},
{
"epoch": 0.19,
"learning_rate": 9.242686378146682e-05,
"loss": 3.3396,
"step": 1790
},
{
"epoch": 0.19,
"learning_rate": 9.238316165563485e-05,
"loss": 3.5639,
"step": 1795
},
{
"epoch": 0.19,
"learning_rate": 9.233934419067802e-05,
"loss": 3.41,
"step": 1800
},
{
"epoch": 0.19,
"learning_rate": 9.229541150583865e-05,
"loss": 3.47,
"step": 1805
},
{
"epoch": 0.19,
"learning_rate": 9.225136372067266e-05,
"loss": 3.4233,
"step": 1810
},
{
"epoch": 0.19,
"learning_rate": 9.22072009550492e-05,
"loss": 3.3309,
"step": 1815
},
{
"epoch": 0.19,
"learning_rate": 9.216292332915031e-05,
"loss": 3.5253,
"step": 1820
},
{
"epoch": 0.19,
"learning_rate": 9.211853096347058e-05,
"loss": 3.4263,
"step": 1825
},
{
"epoch": 0.19,
"learning_rate": 9.20740239788169e-05,
"loss": 3.5337,
"step": 1830
},
{
"epoch": 0.19,
"learning_rate": 9.202940249630801e-05,
"loss": 3.3848,
"step": 1835
},
{
"epoch": 0.19,
"learning_rate": 9.198466663737431e-05,
"loss": 3.3998,
"step": 1840
},
{
"epoch": 0.19,
"learning_rate": 9.193981652375738e-05,
"loss": 3.4307,
"step": 1845
},
{
"epoch": 0.19,
"learning_rate": 9.189485227750981e-05,
"loss": 3.4196,
"step": 1850
},
{
"epoch": 0.19,
"learning_rate": 9.18497740209947e-05,
"loss": 3.4977,
"step": 1855
},
{
"epoch": 0.19,
"learning_rate": 9.180458187688549e-05,
"loss": 3.3734,
"step": 1860
},
{
"epoch": 0.19,
"learning_rate": 9.175927596816549e-05,
"loss": 3.3519,
"step": 1865
},
{
"epoch": 0.19,
"learning_rate": 9.171385641812763e-05,
"loss": 3.3348,
"step": 1870
},
{
"epoch": 0.19,
"learning_rate": 9.166832335037408e-05,
"loss": 3.3296,
"step": 1875
},
{
"epoch": 0.2,
"learning_rate": 9.162267688881596e-05,
"loss": 3.4177,
"step": 1880
},
{
"epoch": 0.2,
"learning_rate": 9.157691715767293e-05,
"loss": 3.5309,
"step": 1885
},
{
"epoch": 0.2,
"learning_rate": 9.153104428147294e-05,
"loss": 3.3949,
"step": 1890
},
{
"epoch": 0.2,
"learning_rate": 9.148505838505181e-05,
"loss": 3.4182,
"step": 1895
},
{
"epoch": 0.2,
"learning_rate": 9.143895959355293e-05,
"loss": 3.4144,
"step": 1900
},
{
"epoch": 0.2,
"learning_rate": 9.139274803242697e-05,
"loss": 3.4485,
"step": 1905
},
{
"epoch": 0.2,
"learning_rate": 9.13464238274314e-05,
"loss": 3.3505,
"step": 1910
},
{
"epoch": 0.2,
"learning_rate": 9.12999871046303e-05,
"loss": 3.4214,
"step": 1915
},
{
"epoch": 0.2,
"learning_rate": 9.125343799039394e-05,
"loss": 3.6238,
"step": 1920
},
{
"epoch": 0.2,
"learning_rate": 9.120677661139839e-05,
"loss": 3.5022,
"step": 1925
},
{
"epoch": 0.2,
"learning_rate": 9.11600030946253e-05,
"loss": 3.2656,
"step": 1930
},
{
"epoch": 0.2,
"learning_rate": 9.111311756736145e-05,
"loss": 3.4656,
"step": 1935
},
{
"epoch": 0.2,
"learning_rate": 9.106612015719845e-05,
"loss": 3.4179,
"step": 1940
},
{
"epoch": 0.2,
"learning_rate": 9.101901099203239e-05,
"loss": 3.4982,
"step": 1945
},
{
"epoch": 0.2,
"learning_rate": 9.097179020006343e-05,
"loss": 3.4987,
"step": 1950
},
{
"epoch": 0.2,
"learning_rate": 9.09244579097956e-05,
"loss": 3.4155,
"step": 1955
},
{
"epoch": 0.2,
"learning_rate": 9.087701425003627e-05,
"loss": 3.3359,
"step": 1960
},
{
"epoch": 0.2,
"learning_rate": 9.082945934989591e-05,
"loss": 3.3327,
"step": 1965
},
{
"epoch": 0.2,
"learning_rate": 9.078179333878774e-05,
"loss": 3.4443,
"step": 1970
},
{
"epoch": 0.21,
"learning_rate": 9.073401634642733e-05,
"loss": 3.4336,
"step": 1975
},
{
"epoch": 0.21,
"learning_rate": 9.068612850283227e-05,
"loss": 3.3127,
"step": 1980
},
{
"epoch": 0.21,
"learning_rate": 9.063812993832179e-05,
"loss": 3.402,
"step": 1985
},
{
"epoch": 0.21,
"learning_rate": 9.059002078351648e-05,
"loss": 3.3454,
"step": 1990
},
{
"epoch": 0.21,
"learning_rate": 9.054180116933783e-05,
"loss": 3.5579,
"step": 1995
},
{
"epoch": 0.21,
"learning_rate": 9.049347122700799e-05,
"loss": 3.313,
"step": 2000
},
{
"epoch": 0.21,
"learning_rate": 9.04450310880493e-05,
"loss": 3.3515,
"step": 2005
},
{
"epoch": 0.21,
"learning_rate": 9.039648088428397e-05,
"loss": 3.4245,
"step": 2010
},
{
"epoch": 0.21,
"learning_rate": 9.034782074783379e-05,
"loss": 3.3838,
"step": 2015
},
{
"epoch": 0.21,
"learning_rate": 9.02990508111197e-05,
"loss": 3.423,
"step": 2020
},
{
"epoch": 0.21,
"learning_rate": 9.025017120686142e-05,
"loss": 3.4128,
"step": 2025
},
{
"epoch": 0.21,
"learning_rate": 9.02011820680771e-05,
"loss": 3.4144,
"step": 2030
},
{
"epoch": 0.21,
"learning_rate": 9.015208352808301e-05,
"loss": 3.4192,
"step": 2035
},
{
"epoch": 0.21,
"learning_rate": 9.010287572049314e-05,
"loss": 3.4221,
"step": 2040
},
{
"epoch": 0.21,
"learning_rate": 9.00535587792188e-05,
"loss": 3.4177,
"step": 2045
},
{
"epoch": 0.21,
"learning_rate": 9.000413283846831e-05,
"loss": 3.3476,
"step": 2050
},
{
"epoch": 0.21,
"learning_rate": 8.995459803274664e-05,
"loss": 3.3584,
"step": 2055
},
{
"epoch": 0.21,
"learning_rate": 8.990495449685494e-05,
"loss": 3.338,
"step": 2060
},
{
"epoch": 0.21,
"learning_rate": 8.985520236589036e-05,
"loss": 3.4804,
"step": 2065
},
{
"epoch": 0.22,
"learning_rate": 8.980534177524547e-05,
"loss": 3.3623,
"step": 2070
},
{
"epoch": 0.22,
"learning_rate": 8.97553728606081e-05,
"loss": 3.3682,
"step": 2075
},
{
"epoch": 0.22,
"learning_rate": 8.970529575796073e-05,
"loss": 3.3774,
"step": 2080
},
{
"epoch": 0.22,
"learning_rate": 8.965511060358043e-05,
"loss": 3.3451,
"step": 2085
},
{
"epoch": 0.22,
"learning_rate": 8.960481753403815e-05,
"loss": 3.3615,
"step": 2090
},
{
"epoch": 0.22,
"learning_rate": 8.955441668619864e-05,
"loss": 3.3598,
"step": 2095
},
{
"epoch": 0.22,
"learning_rate": 8.950390819721986e-05,
"loss": 3.4915,
"step": 2100
},
{
"epoch": 0.22,
"learning_rate": 8.945329220455279e-05,
"loss": 3.2871,
"step": 2105
},
{
"epoch": 0.22,
"learning_rate": 8.940256884594085e-05,
"loss": 3.3905,
"step": 2110
},
{
"epoch": 0.22,
"learning_rate": 8.935173825941973e-05,
"loss": 3.3348,
"step": 2115
},
{
"epoch": 0.22,
"learning_rate": 8.930080058331691e-05,
"loss": 3.4252,
"step": 2120
},
{
"epoch": 0.22,
"learning_rate": 8.924975595625128e-05,
"loss": 3.316,
"step": 2125
},
{
"epoch": 0.22,
"learning_rate": 8.919860451713277e-05,
"loss": 3.3096,
"step": 2130
},
{
"epoch": 0.22,
"learning_rate": 8.914734640516201e-05,
"loss": 3.3754,
"step": 2135
},
{
"epoch": 0.22,
"learning_rate": 8.909598175982989e-05,
"loss": 3.3597,
"step": 2140
},
{
"epoch": 0.22,
"learning_rate": 8.904451072091724e-05,
"loss": 3.3051,
"step": 2145
},
{
"epoch": 0.22,
"learning_rate": 8.899293342849444e-05,
"loss": 3.1761,
"step": 2150
},
{
"epoch": 0.22,
"learning_rate": 8.894125002292097e-05,
"loss": 3.4439,
"step": 2155
},
{
"epoch": 0.22,
"learning_rate": 8.888946064484514e-05,
"loss": 3.4385,
"step": 2160
},
{
"epoch": 0.22,
"learning_rate": 8.883756543520359e-05,
"loss": 3.3402,
"step": 2165
},
{
"epoch": 0.23,
"learning_rate": 8.8785564535221e-05,
"loss": 3.3721,
"step": 2170
},
{
"epoch": 0.23,
"learning_rate": 8.873345808640964e-05,
"loss": 3.3845,
"step": 2175
},
{
"epoch": 0.23,
"learning_rate": 8.86812462305691e-05,
"loss": 3.4275,
"step": 2180
},
{
"epoch": 0.23,
"learning_rate": 8.862892910978568e-05,
"loss": 3.4157,
"step": 2185
},
{
"epoch": 0.23,
"learning_rate": 8.857650686643226e-05,
"loss": 3.3359,
"step": 2190
},
{
"epoch": 0.23,
"learning_rate": 8.852397964316773e-05,
"loss": 3.3565,
"step": 2195
},
{
"epoch": 0.23,
"learning_rate": 8.847134758293669e-05,
"loss": 3.3251,
"step": 2200
},
{
"epoch": 0.23,
"learning_rate": 8.841861082896905e-05,
"loss": 3.4929,
"step": 2205
},
{
"epoch": 0.23,
"learning_rate": 8.836576952477959e-05,
"loss": 3.4097,
"step": 2210
},
{
"epoch": 0.23,
"learning_rate": 8.831282381416764e-05,
"loss": 3.2644,
"step": 2215
},
{
"epoch": 0.23,
"learning_rate": 8.825977384121666e-05,
"loss": 3.3957,
"step": 2220
},
{
"epoch": 0.23,
"learning_rate": 8.820661975029381e-05,
"loss": 3.3589,
"step": 2225
},
{
"epoch": 0.23,
"learning_rate": 8.81533616860496e-05,
"loss": 3.2689,
"step": 2230
},
{
"epoch": 0.23,
"learning_rate": 8.80999997934175e-05,
"loss": 3.4637,
"step": 2235
},
{
"epoch": 0.23,
"learning_rate": 8.804653421761355e-05,
"loss": 3.3386,
"step": 2240
},
{
"epoch": 0.23,
"learning_rate": 8.799296510413592e-05,
"loss": 3.4938,
"step": 2245
},
{
"epoch": 0.23,
"learning_rate": 8.793929259876453e-05,
"loss": 3.3582,
"step": 2250
},
{
"epoch": 0.23,
"learning_rate": 8.78855168475607e-05,
"loss": 3.3692,
"step": 2255
},
{
"epoch": 0.23,
"learning_rate": 8.783163799686669e-05,
"loss": 3.2405,
"step": 2260
},
{
"epoch": 0.24,
"learning_rate": 8.777765619330534e-05,
"loss": 3.226,
"step": 2265
},
{
"epoch": 0.24,
"learning_rate": 8.772357158377966e-05,
"loss": 3.2608,
"step": 2270
},
{
"epoch": 0.24,
"learning_rate": 8.766938431547242e-05,
"loss": 3.354,
"step": 2275
},
{
"epoch": 0.24,
"learning_rate": 8.761509453584579e-05,
"loss": 3.3514,
"step": 2280
},
{
"epoch": 0.24,
"learning_rate": 8.75607023926409e-05,
"loss": 3.4127,
"step": 2285
},
{
"epoch": 0.24,
"learning_rate": 8.75062080338774e-05,
"loss": 3.4036,
"step": 2290
},
{
"epoch": 0.24,
"learning_rate": 8.745161160785312e-05,
"loss": 3.3129,
"step": 2295
},
{
"epoch": 0.24,
"learning_rate": 8.739691326314373e-05,
"loss": 3.2615,
"step": 2300
},
{
"epoch": 0.24,
"learning_rate": 8.734211314860215e-05,
"loss": 3.3197,
"step": 2305
},
{
"epoch": 0.24,
"learning_rate": 8.728721141335829e-05,
"loss": 3.3757,
"step": 2310
},
{
"epoch": 0.24,
"learning_rate": 8.723220820681863e-05,
"loss": 3.3568,
"step": 2315
},
{
"epoch": 0.24,
"learning_rate": 8.717710367866579e-05,
"loss": 3.2747,
"step": 2320
},
{
"epoch": 0.24,
"learning_rate": 8.712189797885804e-05,
"loss": 3.3347,
"step": 2325
},
{
"epoch": 0.24,
"learning_rate": 8.706659125762905e-05,
"loss": 3.2547,
"step": 2330
},
{
"epoch": 0.24,
"learning_rate": 8.701118366548741e-05,
"loss": 3.2634,
"step": 2335
},
{
"epoch": 0.24,
"learning_rate": 8.695567535321616e-05,
"loss": 3.3791,
"step": 2340
},
{
"epoch": 0.24,
"learning_rate": 8.690006647187249e-05,
"loss": 3.3769,
"step": 2345
},
{
"epoch": 0.24,
"learning_rate": 8.684435717278723e-05,
"loss": 3.3526,
"step": 2350
},
{
"epoch": 0.24,
"learning_rate": 8.67885476075645e-05,
"loss": 3.4036,
"step": 2355
},
{
"epoch": 0.25,
"learning_rate": 8.673263792808126e-05,
"loss": 3.3095,
"step": 2360
},
{
"epoch": 0.25,
"learning_rate": 8.667662828648695e-05,
"loss": 3.1342,
"step": 2365
},
{
"epoch": 0.25,
"learning_rate": 8.662051883520304e-05,
"loss": 3.4006,
"step": 2370
},
{
"epoch": 0.25,
"learning_rate": 8.656430972692254e-05,
"loss": 3.2912,
"step": 2375
},
{
"epoch": 0.25,
"learning_rate": 8.650800111460978e-05,
"loss": 3.315,
"step": 2380
},
{
"epoch": 0.25,
"learning_rate": 8.645159315149978e-05,
"loss": 3.2575,
"step": 2385
},
{
"epoch": 0.25,
"learning_rate": 8.639508599109798e-05,
"loss": 3.4536,
"step": 2390
},
{
"epoch": 0.25,
"learning_rate": 8.633847978717972e-05,
"loss": 3.2418,
"step": 2395
},
{
"epoch": 0.25,
"learning_rate": 8.628177469378995e-05,
"loss": 3.3163,
"step": 2400
},
{
"epoch": 0.25,
"learning_rate": 8.622497086524266e-05,
"loss": 3.3075,
"step": 2405
},
{
"epoch": 0.25,
"learning_rate": 8.616806845612055e-05,
"loss": 3.3349,
"step": 2410
},
{
"epoch": 0.25,
"learning_rate": 8.61110676212746e-05,
"loss": 3.4049,
"step": 2415
},
{
"epoch": 0.25,
"learning_rate": 8.605396851582366e-05,
"loss": 3.3102,
"step": 2420
},
{
"epoch": 0.25,
"learning_rate": 8.599677129515397e-05,
"loss": 3.3275,
"step": 2425
},
{
"epoch": 0.25,
"learning_rate": 8.593947611491879e-05,
"loss": 3.3388,
"step": 2430
},
{
"epoch": 0.25,
"learning_rate": 8.588208313103798e-05,
"loss": 3.271,
"step": 2435
},
{
"epoch": 0.25,
"learning_rate": 8.582459249969752e-05,
"loss": 3.3382,
"step": 2440
},
{
"epoch": 0.25,
"learning_rate": 8.576700437734916e-05,
"loss": 3.3221,
"step": 2445
},
{
"epoch": 0.25,
"learning_rate": 8.570931892070992e-05,
"loss": 3.2905,
"step": 2450
},
{
"epoch": 0.26,
"learning_rate": 8.565153628676175e-05,
"loss": 3.3314,
"step": 2455
},
{
"epoch": 0.26,
"learning_rate": 8.559365663275101e-05,
"loss": 3.2394,
"step": 2460
},
{
"epoch": 0.26,
"learning_rate": 8.553568011618809e-05,
"loss": 3.3375,
"step": 2465
},
{
"epoch": 0.26,
"learning_rate": 8.547760689484698e-05,
"loss": 3.2946,
"step": 2470
},
{
"epoch": 0.26,
"learning_rate": 8.541943712676484e-05,
"loss": 3.3408,
"step": 2475
},
{
"epoch": 0.26,
"learning_rate": 8.536117097024158e-05,
"loss": 3.3747,
"step": 2480
},
{
"epoch": 0.26,
"learning_rate": 8.530280858383942e-05,
"loss": 3.3417,
"step": 2485
},
{
"epoch": 0.26,
"learning_rate": 8.524435012638241e-05,
"loss": 3.3327,
"step": 2490
},
{
"epoch": 0.26,
"learning_rate": 8.518579575695606e-05,
"loss": 3.2889,
"step": 2495
},
{
"epoch": 0.26,
"learning_rate": 8.512714563490694e-05,
"loss": 3.3986,
"step": 2500
},
{
"epoch": 0.26,
"learning_rate": 8.506839991984212e-05,
"loss": 3.2444,
"step": 2505
},
{
"epoch": 0.26,
"learning_rate": 8.500955877162886e-05,
"loss": 3.267,
"step": 2510
},
{
"epoch": 0.26,
"learning_rate": 8.495062235039411e-05,
"loss": 3.2563,
"step": 2515
},
{
"epoch": 0.26,
"learning_rate": 8.48915908165241e-05,
"loss": 3.2339,
"step": 2520
},
{
"epoch": 0.26,
"learning_rate": 8.483246433066389e-05,
"loss": 3.284,
"step": 2525
},
{
"epoch": 0.26,
"learning_rate": 8.477324305371692e-05,
"loss": 3.3038,
"step": 2530
},
{
"epoch": 0.26,
"learning_rate": 8.471392714684462e-05,
"loss": 3.3061,
"step": 2535
},
{
"epoch": 0.26,
"learning_rate": 8.465451677146592e-05,
"loss": 3.4221,
"step": 2540
},
{
"epoch": 0.26,
"learning_rate": 8.459501208925681e-05,
"loss": 3.3609,
"step": 2545
},
{
"epoch": 0.27,
"learning_rate": 8.453541326215e-05,
"loss": 3.214,
"step": 2550
},
{
"epoch": 0.27,
"learning_rate": 8.44757204523343e-05,
"loss": 3.3066,
"step": 2555
},
{
"epoch": 0.27,
"learning_rate": 8.441593382225434e-05,
"loss": 3.3019,
"step": 2560
},
{
"epoch": 0.27,
"learning_rate": 8.435605353461007e-05,
"loss": 3.1965,
"step": 2565
},
{
"epoch": 0.27,
"learning_rate": 8.42960797523563e-05,
"loss": 3.3415,
"step": 2570
},
{
"epoch": 0.27,
"learning_rate": 8.423601263870222e-05,
"loss": 3.258,
"step": 2575
},
{
"epoch": 0.27,
"learning_rate": 8.417585235711112e-05,
"loss": 3.3111,
"step": 2580
},
{
"epoch": 0.27,
"learning_rate": 8.41155990712997e-05,
"loss": 3.3403,
"step": 2585
},
{
"epoch": 0.27,
"learning_rate": 8.405525294523791e-05,
"loss": 3.3113,
"step": 2590
},
{
"epoch": 0.27,
"learning_rate": 8.39948141431482e-05,
"loss": 3.2371,
"step": 2595
},
{
"epoch": 0.27,
"learning_rate": 8.393428282950529e-05,
"loss": 3.224,
"step": 2600
},
{
"epoch": 0.27,
"learning_rate": 8.38736591690357e-05,
"loss": 3.2535,
"step": 2605
},
{
"epoch": 0.27,
"learning_rate": 8.381294332671714e-05,
"loss": 3.2127,
"step": 2610
},
{
"epoch": 0.27,
"learning_rate": 8.375213546777829e-05,
"loss": 3.3192,
"step": 2615
},
{
"epoch": 0.27,
"learning_rate": 8.369123575769822e-05,
"loss": 3.263,
"step": 2620
},
{
"epoch": 0.27,
"learning_rate": 8.36302443622059e-05,
"loss": 3.2654,
"step": 2625
},
{
"epoch": 0.27,
"learning_rate": 8.356916144727985e-05,
"loss": 3.1785,
"step": 2630
},
{
"epoch": 0.27,
"learning_rate": 8.350798717914766e-05,
"loss": 3.2524,
"step": 2635
},
{
"epoch": 0.27,
"learning_rate": 8.344672172428547e-05,
"loss": 3.2547,
"step": 2640
},
{
"epoch": 0.27,
"learning_rate": 8.338536524941763e-05,
"loss": 3.2824,
"step": 2645
},
{
"epoch": 0.28,
"learning_rate": 8.332391792151613e-05,
"loss": 3.2834,
"step": 2650
},
{
"epoch": 0.28,
"learning_rate": 8.326237990780028e-05,
"loss": 3.3773,
"step": 2655
},
{
"epoch": 0.28,
"learning_rate": 8.320075137573609e-05,
"loss": 3.2653,
"step": 2660
},
{
"epoch": 0.28,
"learning_rate": 8.313903249303595e-05,
"loss": 3.1791,
"step": 2665
},
{
"epoch": 0.28,
"learning_rate": 8.307722342765813e-05,
"loss": 3.2213,
"step": 2670
},
{
"epoch": 0.28,
"learning_rate": 8.30153243478063e-05,
"loss": 3.2951,
"step": 2675
},
{
"epoch": 0.28,
"learning_rate": 8.295333542192907e-05,
"loss": 3.1415,
"step": 2680
},
{
"epoch": 0.28,
"learning_rate": 8.289125681871961e-05,
"loss": 3.2968,
"step": 2685
},
{
"epoch": 0.28,
"learning_rate": 8.282908870711507e-05,
"loss": 3.299,
"step": 2690
},
{
"epoch": 0.28,
"learning_rate": 8.276683125629625e-05,
"loss": 3.2173,
"step": 2695
},
{
"epoch": 0.28,
"learning_rate": 8.270448463568697e-05,
"loss": 3.3282,
"step": 2700
},
{
"epoch": 0.28,
"learning_rate": 8.264204901495385e-05,
"loss": 3.3958,
"step": 2705
},
{
"epoch": 0.28,
"learning_rate": 8.257952456400559e-05,
"loss": 3.3657,
"step": 2710
},
{
"epoch": 0.28,
"learning_rate": 8.251691145299269e-05,
"loss": 3.2049,
"step": 2715
},
{
"epoch": 0.28,
"learning_rate": 8.245420985230691e-05,
"loss": 3.3265,
"step": 2720
},
{
"epoch": 0.28,
"learning_rate": 8.239141993258082e-05,
"loss": 3.2992,
"step": 2725
},
{
"epoch": 0.28,
"learning_rate": 8.23285418646873e-05,
"loss": 3.3062,
"step": 2730
},
{
"epoch": 0.28,
"learning_rate": 8.226557581973919e-05,
"loss": 3.2234,
"step": 2735
},
{
"epoch": 0.28,
"learning_rate": 8.22025219690887e-05,
"loss": 3.2928,
"step": 2740
},
{
"epoch": 0.29,
"learning_rate": 8.213938048432697e-05,
"loss": 3.2126,
"step": 2745
},
{
"epoch": 0.29,
"learning_rate": 8.207615153728365e-05,
"loss": 3.1843,
"step": 2750
},
{
"epoch": 0.29,
"learning_rate": 8.201283530002641e-05,
"loss": 3.1663,
"step": 2755
},
{
"epoch": 0.29,
"learning_rate": 8.194943194486045e-05,
"loss": 3.1322,
"step": 2760
},
{
"epoch": 0.29,
"learning_rate": 8.188594164432806e-05,
"loss": 3.2378,
"step": 2765
},
{
"epoch": 0.29,
"learning_rate": 8.18223645712081e-05,
"loss": 3.2049,
"step": 2770
},
{
"epoch": 0.29,
"learning_rate": 8.175870089851564e-05,
"loss": 3.3502,
"step": 2775
},
{
"epoch": 0.29,
"learning_rate": 8.169495079950138e-05,
"loss": 3.2646,
"step": 2780
},
{
"epoch": 0.29,
"learning_rate": 8.163111444765119e-05,
"loss": 3.3117,
"step": 2785
},
{
"epoch": 0.29,
"learning_rate": 8.156719201668567e-05,
"loss": 3.264,
"step": 2790
},
{
"epoch": 0.29,
"learning_rate": 8.150318368055973e-05,
"loss": 3.1868,
"step": 2795
},
{
"epoch": 0.29,
"learning_rate": 8.143908961346197e-05,
"loss": 3.218,
"step": 2800
},
{
"epoch": 0.29,
"learning_rate": 8.137490998981437e-05,
"loss": 3.1588,
"step": 2805
},
{
"epoch": 0.29,
"learning_rate": 8.13106449842717e-05,
"loss": 3.2438,
"step": 2810
},
{
"epoch": 0.29,
"learning_rate": 8.124629477172107e-05,
"loss": 3.2773,
"step": 2815
},
{
"epoch": 0.29,
"learning_rate": 8.118185952728152e-05,
"loss": 3.1023,
"step": 2820
},
{
"epoch": 0.29,
"learning_rate": 8.111733942630345e-05,
"loss": 3.2641,
"step": 2825
},
{
"epoch": 0.29,
"learning_rate": 8.105273464436816e-05,
"loss": 3.185,
"step": 2830
},
{
"epoch": 0.29,
"learning_rate": 8.098804535728746e-05,
"loss": 3.2098,
"step": 2835
},
{
"epoch": 0.3,
"learning_rate": 8.092327174110309e-05,
"loss": 3.2594,
"step": 2840
},
{
"epoch": 0.3,
"learning_rate": 8.085841397208628e-05,
"loss": 3.2184,
"step": 2845
},
{
"epoch": 0.3,
"learning_rate": 8.079347222673727e-05,
"loss": 3.2367,
"step": 2850
},
{
"epoch": 0.3,
"learning_rate": 8.072844668178485e-05,
"loss": 3.298,
"step": 2855
},
{
"epoch": 0.3,
"learning_rate": 8.066333751418583e-05,
"loss": 3.2417,
"step": 2860
},
{
"epoch": 0.3,
"learning_rate": 8.059814490112456e-05,
"loss": 3.1765,
"step": 2865
},
{
"epoch": 0.3,
"learning_rate": 8.053286902001256e-05,
"loss": 3.2471,
"step": 2870
},
{
"epoch": 0.3,
"learning_rate": 8.046751004848787e-05,
"loss": 3.1743,
"step": 2875
},
{
"epoch": 0.3,
"learning_rate": 8.040206816441469e-05,
"loss": 3.2923,
"step": 2880
},
{
"epoch": 0.3,
"learning_rate": 8.033654354588282e-05,
"loss": 3.2636,
"step": 2885
},
{
"epoch": 0.3,
"learning_rate": 8.027093637120725e-05,
"loss": 3.3305,
"step": 2890
},
{
"epoch": 0.3,
"learning_rate": 8.020524681892761e-05,
"loss": 3.2531,
"step": 2895
},
{
"epoch": 0.3,
"learning_rate": 8.013947506780768e-05,
"loss": 3.15,
"step": 2900
},
{
"epoch": 0.3,
"learning_rate": 8.007362129683501e-05,
"loss": 3.1532,
"step": 2905
},
{
"epoch": 0.3,
"learning_rate": 8.000768568522028e-05,
"loss": 3.1407,
"step": 2910
},
{
"epoch": 0.3,
"learning_rate": 7.994166841239692e-05,
"loss": 3.2168,
"step": 2915
},
{
"epoch": 0.3,
"learning_rate": 7.987556965802056e-05,
"loss": 3.1482,
"step": 2920
},
{
"epoch": 0.3,
"learning_rate": 7.980938960196861e-05,
"loss": 3.1502,
"step": 2925
},
{
"epoch": 0.3,
"learning_rate": 7.974312842433971e-05,
"loss": 3.1375,
"step": 2930
},
{
"epoch": 0.31,
"learning_rate": 7.967678630545326e-05,
"loss": 3.1295,
"step": 2935
},
{
"epoch": 0.31,
"learning_rate": 7.961036342584891e-05,
"loss": 3.2147,
"step": 2940
},
{
"epoch": 0.31,
"learning_rate": 7.954385996628611e-05,
"loss": 3.2256,
"step": 2945
},
{
"epoch": 0.31,
"learning_rate": 7.947727610774361e-05,
"loss": 3.272,
"step": 2950
},
{
"epoch": 0.31,
"learning_rate": 7.941061203141893e-05,
"loss": 3.2253,
"step": 2955
},
{
"epoch": 0.31,
"learning_rate": 7.934386791872787e-05,
"loss": 3.2288,
"step": 2960
},
{
"epoch": 0.31,
"learning_rate": 7.927704395130408e-05,
"loss": 3.2354,
"step": 2965
},
{
"epoch": 0.31,
"learning_rate": 7.921014031099852e-05,
"loss": 3.2107,
"step": 2970
},
{
"epoch": 0.31,
"learning_rate": 7.914315717987892e-05,
"loss": 3.2168,
"step": 2975
},
{
"epoch": 0.31,
"learning_rate": 7.907609474022939e-05,
"loss": 3.2445,
"step": 2980
},
{
"epoch": 0.31,
"learning_rate": 7.900895317454981e-05,
"loss": 3.1948,
"step": 2985
},
{
"epoch": 0.31,
"learning_rate": 7.894173266555545e-05,
"loss": 3.1794,
"step": 2990
},
{
"epoch": 0.31,
"learning_rate": 7.887443339617633e-05,
"loss": 3.1999,
"step": 2995
},
{
"epoch": 0.31,
"learning_rate": 7.880705554955689e-05,
"loss": 3.2405,
"step": 3000
},
{
"epoch": 0.31,
"learning_rate": 7.873959930905536e-05,
"loss": 3.2159,
"step": 3005
},
{
"epoch": 0.31,
"learning_rate": 7.867206485824332e-05,
"loss": 3.2134,
"step": 3010
},
{
"epoch": 0.31,
"learning_rate": 7.860445238090518e-05,
"loss": 3.2412,
"step": 3015
},
{
"epoch": 0.31,
"learning_rate": 7.853676206103768e-05,
"loss": 3.058,
"step": 3020
},
{
"epoch": 0.31,
"learning_rate": 7.846899408284942e-05,
"loss": 3.1502,
"step": 3025
},
{
"epoch": 0.31,
"learning_rate": 7.840114863076031e-05,
"loss": 3.2644,
"step": 3030
},
{
"epoch": 0.32,
"learning_rate": 7.833322588940111e-05,
"loss": 3.1445,
"step": 3035
},
{
"epoch": 0.32,
"learning_rate": 7.826522604361289e-05,
"loss": 3.2321,
"step": 3040
},
{
"epoch": 0.32,
"learning_rate": 7.819714927844658e-05,
"loss": 3.2214,
"step": 3045
},
{
"epoch": 0.32,
"learning_rate": 7.81289957791624e-05,
"loss": 3.1481,
"step": 3050
},
{
"epoch": 0.32,
"learning_rate": 7.806076573122942e-05,
"loss": 3.1775,
"step": 3055
},
{
"epoch": 0.32,
"learning_rate": 7.7992459320325e-05,
"loss": 3.1945,
"step": 3060
},
{
"epoch": 0.32,
"learning_rate": 7.792407673233433e-05,
"loss": 3.1555,
"step": 3065
},
{
"epoch": 0.32,
"learning_rate": 7.78556181533499e-05,
"loss": 3.1334,
"step": 3070
},
{
"epoch": 0.32,
"learning_rate": 7.778708376967096e-05,
"loss": 3.1462,
"step": 3075
},
{
"epoch": 0.32,
"learning_rate": 7.77184737678031e-05,
"loss": 3.2397,
"step": 3080
},
{
"epoch": 0.32,
"learning_rate": 7.76497883344577e-05,
"loss": 3.0973,
"step": 3085
},
{
"epoch": 0.32,
"learning_rate": 7.758102765655137e-05,
"loss": 3.1067,
"step": 3090
},
{
"epoch": 0.32,
"learning_rate": 7.751219192120549e-05,
"loss": 3.2301,
"step": 3095
},
{
"epoch": 0.32,
"learning_rate": 7.744328131574575e-05,
"loss": 3.1452,
"step": 3100
},
{
"epoch": 0.32,
"learning_rate": 7.737429602770152e-05,
"loss": 3.173,
"step": 3105
},
{
"epoch": 0.32,
"learning_rate": 7.730523624480546e-05,
"loss": 3.2283,
"step": 3110
},
{
"epoch": 0.32,
"learning_rate": 7.723610215499291e-05,
"loss": 3.2139,
"step": 3115
},
{
"epoch": 0.32,
"learning_rate": 7.716689394640148e-05,
"loss": 3.2393,
"step": 3120
},
{
"epoch": 0.32,
"learning_rate": 7.709761180737043e-05,
"loss": 3.1475,
"step": 3125
},
{
"epoch": 0.33,
"learning_rate": 7.702825592644025e-05,
"loss": 3.1161,
"step": 3130
},
{
"epoch": 0.33,
"learning_rate": 7.695882649235206e-05,
"loss": 3.2175,
"step": 3135
},
{
"epoch": 0.33,
"learning_rate": 7.68893236940472e-05,
"loss": 3.0892,
"step": 3140
},
{
"epoch": 0.33,
"learning_rate": 7.68197477206666e-05,
"loss": 3.2444,
"step": 3145
},
{
"epoch": 0.33,
"learning_rate": 7.675009876155038e-05,
"loss": 3.2191,
"step": 3150
},
{
"epoch": 0.33,
"learning_rate": 7.668037700623724e-05,
"loss": 3.2492,
"step": 3155
},
{
"epoch": 0.33,
"learning_rate": 7.661058264446403e-05,
"loss": 3.2339,
"step": 3160
},
{
"epoch": 0.33,
"learning_rate": 7.654071586616513e-05,
"loss": 3.1808,
"step": 3165
},
{
"epoch": 0.33,
"learning_rate": 7.647077686147202e-05,
"loss": 3.1602,
"step": 3170
},
{
"epoch": 0.33,
"learning_rate": 7.640076582071271e-05,
"loss": 3.1421,
"step": 3175
},
{
"epoch": 0.33,
"learning_rate": 7.63306829344113e-05,
"loss": 3.1525,
"step": 3180
},
{
"epoch": 0.33,
"learning_rate": 7.626052839328734e-05,
"loss": 3.1926,
"step": 3185
},
{
"epoch": 0.33,
"learning_rate": 7.619030238825541e-05,
"loss": 3.1067,
"step": 3190
},
{
"epoch": 0.33,
"learning_rate": 7.612000511042457e-05,
"loss": 3.148,
"step": 3195
},
{
"epoch": 0.33,
"learning_rate": 7.604963675109786e-05,
"loss": 3.1369,
"step": 3200
},
{
"epoch": 0.33,
"learning_rate": 7.597919750177168e-05,
"loss": 3.0461,
"step": 3205
},
{
"epoch": 0.33,
"learning_rate": 7.590868755413543e-05,
"loss": 3.127,
"step": 3210
},
{
"epoch": 0.33,
"learning_rate": 7.583810710007087e-05,
"loss": 3.2704,
"step": 3215
},
{
"epoch": 0.33,
"learning_rate": 7.576745633165159e-05,
"loss": 3.2469,
"step": 3220
},
{
"epoch": 0.34,
"learning_rate": 7.569673544114264e-05,
"loss": 3.1398,
"step": 3225
},
{
"epoch": 0.34,
"learning_rate": 7.562594462099977e-05,
"loss": 3.1535,
"step": 3230
},
{
"epoch": 0.34,
"learning_rate": 7.555508406386912e-05,
"loss": 3.0938,
"step": 3235
},
{
"epoch": 0.34,
"learning_rate": 7.548415396258657e-05,
"loss": 3.1336,
"step": 3240
},
{
"epoch": 0.34,
"learning_rate": 7.541315451017725e-05,
"loss": 3.2197,
"step": 3245
},
{
"epoch": 0.34,
"learning_rate": 7.534208589985507e-05,
"loss": 3.2466,
"step": 3250
},
{
"epoch": 0.34,
"learning_rate": 7.527094832502206e-05,
"loss": 3.1807,
"step": 3255
},
{
"epoch": 0.34,
"learning_rate": 7.519974197926801e-05,
"loss": 3.1906,
"step": 3260
},
{
"epoch": 0.34,
"learning_rate": 7.512846705636975e-05,
"loss": 3.113,
"step": 3265
},
{
"epoch": 0.34,
"learning_rate": 7.505712375029085e-05,
"loss": 3.0678,
"step": 3270
},
{
"epoch": 0.34,
"learning_rate": 7.498571225518096e-05,
"loss": 3.1234,
"step": 3275
},
{
"epoch": 0.34,
"learning_rate": 7.491423276537519e-05,
"loss": 3.2494,
"step": 3280
},
{
"epoch": 0.34,
"learning_rate": 7.48426854753938e-05,
"loss": 3.1463,
"step": 3285
},
{
"epoch": 0.34,
"learning_rate": 7.477107057994149e-05,
"loss": 3.1431,
"step": 3290
},
{
"epoch": 0.34,
"learning_rate": 7.469938827390697e-05,
"loss": 3.1355,
"step": 3295
},
{
"epoch": 0.34,
"learning_rate": 7.46276387523624e-05,
"loss": 3.1921,
"step": 3300
},
{
"epoch": 0.34,
"learning_rate": 7.455582221056282e-05,
"loss": 3.1494,
"step": 3305
},
{
"epoch": 0.34,
"learning_rate": 7.448393884394573e-05,
"loss": 3.2458,
"step": 3310
},
{
"epoch": 0.34,
"learning_rate": 7.441198884813039e-05,
"loss": 3.0447,
"step": 3315
},
{
"epoch": 0.35,
"learning_rate": 7.433997241891742e-05,
"loss": 3.1718,
"step": 3320
},
{
"epoch": 0.35,
"learning_rate": 7.426788975228828e-05,
"loss": 3.1271,
"step": 3325
},
{
"epoch": 0.35,
"learning_rate": 7.41957410444046e-05,
"loss": 3.0893,
"step": 3330
},
{
"epoch": 0.35,
"learning_rate": 7.412352649160779e-05,
"loss": 3.1669,
"step": 3335
},
{
"epoch": 0.35,
"learning_rate": 7.405124629041838e-05,
"loss": 3.0632,
"step": 3340
},
{
"epoch": 0.35,
"learning_rate": 7.397890063753565e-05,
"loss": 3.1882,
"step": 3345
},
{
"epoch": 0.35,
"learning_rate": 7.390648972983693e-05,
"loss": 3.1121,
"step": 3350
},
{
"epoch": 0.35,
"learning_rate": 7.383401376437716e-05,
"loss": 3.1172,
"step": 3355
},
{
"epoch": 0.35,
"learning_rate": 7.376147293838827e-05,
"loss": 3.0802,
"step": 3360
},
{
"epoch": 0.35,
"learning_rate": 7.368886744927875e-05,
"loss": 3.013,
"step": 3365
},
{
"epoch": 0.35,
"learning_rate": 7.361619749463306e-05,
"loss": 3.0762,
"step": 3370
},
{
"epoch": 0.35,
"learning_rate": 7.354346327221106e-05,
"loss": 3.2118,
"step": 3375
},
{
"epoch": 0.35,
"learning_rate": 7.347066497994756e-05,
"loss": 3.2867,
"step": 3380
},
{
"epoch": 0.35,
"learning_rate": 7.339780281595166e-05,
"loss": 3.1968,
"step": 3385
},
{
"epoch": 0.35,
"learning_rate": 7.332487697850633e-05,
"loss": 3.0677,
"step": 3390
},
{
"epoch": 0.35,
"learning_rate": 7.325188766606776e-05,
"loss": 3.2039,
"step": 3395
},
{
"epoch": 0.35,
"learning_rate": 7.317883507726496e-05,
"loss": 3.2133,
"step": 3400
},
{
"epoch": 0.35,
"learning_rate": 7.310571941089906e-05,
"loss": 3.054,
"step": 3405
},
{
"epoch": 0.35,
"learning_rate": 7.303254086594287e-05,
"loss": 3.2114,
"step": 3410
},
{
"epoch": 0.35,
"learning_rate": 7.295929964154033e-05,
"loss": 3.074,
"step": 3415
},
{
"epoch": 0.36,
"learning_rate": 7.288599593700592e-05,
"loss": 3.1789,
"step": 3420
},
{
"epoch": 0.36,
"learning_rate": 7.281262995182422e-05,
"loss": 3.07,
"step": 3425
},
{
"epoch": 0.36,
"learning_rate": 7.273920188564921e-05,
"loss": 3.166,
"step": 3430
},
{
"epoch": 0.36,
"learning_rate": 7.266571193830387e-05,
"loss": 3.1676,
"step": 3435
},
{
"epoch": 0.36,
"learning_rate": 7.259216030977954e-05,
"loss": 3.158,
"step": 3440
},
{
"epoch": 0.36,
"learning_rate": 7.251854720023545e-05,
"loss": 3.1018,
"step": 3445
},
{
"epoch": 0.36,
"learning_rate": 7.244487280999813e-05,
"loss": 3.1207,
"step": 3450
},
{
"epoch": 0.36,
"learning_rate": 7.237113733956087e-05,
"loss": 3.1879,
"step": 3455
},
{
"epoch": 0.36,
"learning_rate": 7.22973409895832e-05,
"loss": 3.0686,
"step": 3460
},
{
"epoch": 0.36,
"learning_rate": 7.222348396089029e-05,
"loss": 3.1069,
"step": 3465
},
{
"epoch": 0.36,
"learning_rate": 7.214956645447244e-05,
"loss": 3.1542,
"step": 3470
},
{
"epoch": 0.36,
"learning_rate": 7.207558867148458e-05,
"loss": 3.0487,
"step": 3475
},
{
"epoch": 0.36,
"learning_rate": 7.200155081324565e-05,
"loss": 3.1688,
"step": 3480
},
{
"epoch": 0.36,
"learning_rate": 7.192745308123802e-05,
"loss": 3.1539,
"step": 3485
},
{
"epoch": 0.36,
"learning_rate": 7.185329567710706e-05,
"loss": 3.1241,
"step": 3490
},
{
"epoch": 0.36,
"learning_rate": 7.177907880266051e-05,
"loss": 3.164,
"step": 3495
},
{
"epoch": 0.36,
"learning_rate": 7.170480265986798e-05,
"loss": 3.1179,
"step": 3500
},
{
"epoch": 0.36,
"learning_rate": 7.16304674508603e-05,
"loss": 3.1395,
"step": 3505
},
{
"epoch": 0.36,
"learning_rate": 7.155607337792912e-05,
"loss": 3.1721,
"step": 3510
},
{
"epoch": 0.37,
"learning_rate": 7.148162064352619e-05,
"loss": 3.2053,
"step": 3515
},
{
"epoch": 0.37,
"learning_rate": 7.140710945026296e-05,
"loss": 3.1553,
"step": 3520
},
{
"epoch": 0.37,
"learning_rate": 7.133254000090997e-05,
"loss": 3.129,
"step": 3525
},
{
"epoch": 0.37,
"learning_rate": 7.125791249839626e-05,
"loss": 3.2526,
"step": 3530
},
{
"epoch": 0.37,
"learning_rate": 7.118322714580888e-05,
"loss": 3.1089,
"step": 3535
},
{
"epoch": 0.37,
"learning_rate": 7.110848414639229e-05,
"loss": 3.2093,
"step": 3540
},
{
"epoch": 0.37,
"learning_rate": 7.103368370354783e-05,
"loss": 3.2533,
"step": 3545
},
{
"epoch": 0.37,
"learning_rate": 7.095882602083322e-05,
"loss": 3.1246,
"step": 3550
},
{
"epoch": 0.37,
"learning_rate": 7.088391130196184e-05,
"loss": 3.0657,
"step": 3555
},
{
"epoch": 0.37,
"learning_rate": 7.080893975080237e-05,
"loss": 3.2354,
"step": 3560
},
{
"epoch": 0.37,
"learning_rate": 7.07339115713781e-05,
"loss": 3.1765,
"step": 3565
},
{
"epoch": 0.37,
"learning_rate": 7.065882696786646e-05,
"loss": 3.1773,
"step": 3570
},
{
"epoch": 0.37,
"learning_rate": 7.058368614459845e-05,
"loss": 3.1519,
"step": 3575
},
{
"epoch": 0.37,
"learning_rate": 7.0508489306058e-05,
"loss": 3.0885,
"step": 3580
},
{
"epoch": 0.37,
"learning_rate": 7.043323665688153e-05,
"loss": 3.1815,
"step": 3585
},
{
"epoch": 0.37,
"learning_rate": 7.035792840185728e-05,
"loss": 3.0248,
"step": 3590
},
{
"epoch": 0.37,
"learning_rate": 7.02825647459249e-05,
"loss": 3.1001,
"step": 3595
},
{
"epoch": 0.37,
"learning_rate": 7.020714589417471e-05,
"loss": 3.1015,
"step": 3600
},
{
"epoch": 0.37,
"learning_rate": 7.013167205184732e-05,
"loss": 3.085,
"step": 3605
},
{
"epoch": 0.38,
"learning_rate": 7.005614342433291e-05,
"loss": 3.1053,
"step": 3610
},
{
"epoch": 0.38,
"learning_rate": 6.998056021717083e-05,
"loss": 3.1589,
"step": 3615
},
{
"epoch": 0.38,
"learning_rate": 6.990492263604889e-05,
"loss": 3.1577,
"step": 3620
},
{
"epoch": 0.38,
"learning_rate": 6.982923088680293e-05,
"loss": 3.2004,
"step": 3625
},
{
"epoch": 0.38,
"learning_rate": 6.975348517541614e-05,
"loss": 3.1613,
"step": 3630
},
{
"epoch": 0.38,
"learning_rate": 6.967768570801862e-05,
"loss": 3.1487,
"step": 3635
},
{
"epoch": 0.38,
"learning_rate": 6.960183269088669e-05,
"loss": 3.1031,
"step": 3640
},
{
"epoch": 0.38,
"learning_rate": 6.952592633044247e-05,
"loss": 3.1244,
"step": 3645
},
{
"epoch": 0.38,
"learning_rate": 6.944996683325319e-05,
"loss": 3.2323,
"step": 3650
},
{
"epoch": 0.38,
"learning_rate": 6.937395440603073e-05,
"loss": 3.1502,
"step": 3655
},
{
"epoch": 0.38,
"learning_rate": 6.929788925563095e-05,
"loss": 3.1587,
"step": 3660
},
{
"epoch": 0.38,
"learning_rate": 6.922177158905325e-05,
"loss": 3.0308,
"step": 3665
},
{
"epoch": 0.38,
"learning_rate": 6.914560161343991e-05,
"loss": 3.0768,
"step": 3670
},
{
"epoch": 0.38,
"learning_rate": 6.906937953607555e-05,
"loss": 3.0908,
"step": 3675
},
{
"epoch": 0.38,
"learning_rate": 6.899310556438663e-05,
"loss": 3.1149,
"step": 3680
},
{
"epoch": 0.38,
"learning_rate": 6.891677990594075e-05,
"loss": 3.0646,
"step": 3685
},
{
"epoch": 0.38,
"learning_rate": 6.884040276844627e-05,
"loss": 3.1447,
"step": 3690
},
{
"epoch": 0.38,
"learning_rate": 6.876397435975152e-05,
"loss": 3.1473,
"step": 3695
},
{
"epoch": 0.38,
"learning_rate": 6.86874948878445e-05,
"loss": 3.1373,
"step": 3700
},
{
"epoch": 0.39,
"learning_rate": 6.861096456085206e-05,
"loss": 2.9776,
"step": 3705
},
{
"epoch": 0.39,
"learning_rate": 6.853438358703946e-05,
"loss": 3.1365,
"step": 3710
},
{
"epoch": 0.39,
"learning_rate": 6.845775217480981e-05,
"loss": 3.164,
"step": 3715
},
{
"epoch": 0.39,
"learning_rate": 6.838107053270348e-05,
"loss": 3.0909,
"step": 3720
},
{
"epoch": 0.39,
"learning_rate": 6.830433886939754e-05,
"loss": 3.0687,
"step": 3725
},
{
"epoch": 0.39,
"learning_rate": 6.822755739370518e-05,
"loss": 3.146,
"step": 3730
},
{
"epoch": 0.39,
"learning_rate": 6.815072631457512e-05,
"loss": 3.0139,
"step": 3735
},
{
"epoch": 0.39,
"learning_rate": 6.80738458410911e-05,
"loss": 3.1475,
"step": 3740
},
{
"epoch": 0.39,
"learning_rate": 6.799691618247124e-05,
"loss": 3.1542,
"step": 3745
},
{
"epoch": 0.39,
"learning_rate": 6.791993754806757e-05,
"loss": 3.084,
"step": 3750
},
{
"epoch": 0.39,
"learning_rate": 6.784291014736534e-05,
"loss": 3.0876,
"step": 3755
},
{
"epoch": 0.39,
"learning_rate": 6.776583418998255e-05,
"loss": 3.1758,
"step": 3760
},
{
"epoch": 0.39,
"learning_rate": 6.768870988566934e-05,
"loss": 3.1975,
"step": 3765
},
{
"epoch": 0.39,
"learning_rate": 6.761153744430739e-05,
"loss": 3.0681,
"step": 3770
},
{
"epoch": 0.39,
"learning_rate": 6.753431707590939e-05,
"loss": 3.0481,
"step": 3775
},
{
"epoch": 0.39,
"learning_rate": 6.745704899061843e-05,
"loss": 3.1188,
"step": 3780
},
{
"epoch": 0.39,
"learning_rate": 6.737973339870753e-05,
"loss": 3.105,
"step": 3785
},
{
"epoch": 0.39,
"learning_rate": 6.730237051057892e-05,
"loss": 2.9708,
"step": 3790
},
{
"epoch": 0.39,
"learning_rate": 6.722496053676356e-05,
"loss": 3.0351,
"step": 3795
},
{
"epoch": 0.39,
"learning_rate": 6.714750368792055e-05,
"loss": 3.1342,
"step": 3800
},
{
"epoch": 0.4,
"learning_rate": 6.707000017483655e-05,
"loss": 3.2341,
"step": 3805
},
{
"epoch": 0.4,
"learning_rate": 6.699245020842524e-05,
"loss": 3.0404,
"step": 3810
},
{
"epoch": 0.4,
"learning_rate": 6.691485399972665e-05,
"loss": 3.1263,
"step": 3815
},
{
"epoch": 0.4,
"learning_rate": 6.68372117599067e-05,
"loss": 3.1401,
"step": 3820
},
{
"epoch": 0.4,
"learning_rate": 6.675952370025656e-05,
"loss": 3.0714,
"step": 3825
},
{
"epoch": 0.4,
"learning_rate": 6.66817900321921e-05,
"loss": 3.095,
"step": 3830
},
{
"epoch": 0.4,
"learning_rate": 6.660401096725331e-05,
"loss": 2.995,
"step": 3835
},
{
"epoch": 0.4,
"learning_rate": 6.652618671710369e-05,
"loss": 3.0723,
"step": 3840
},
{
"epoch": 0.4,
"learning_rate": 6.644831749352974e-05,
"loss": 3.1983,
"step": 3845
},
{
"epoch": 0.4,
"learning_rate": 6.637040350844035e-05,
"loss": 3.2112,
"step": 3850
},
{
"epoch": 0.4,
"learning_rate": 6.629244497386619e-05,
"loss": 3.0518,
"step": 3855
},
{
"epoch": 0.4,
"learning_rate": 6.62144421019592e-05,
"loss": 2.9698,
"step": 3860
},
{
"epoch": 0.4,
"learning_rate": 6.613639510499193e-05,
"loss": 3.0943,
"step": 3865
},
{
"epoch": 0.4,
"learning_rate": 6.605830419535702e-05,
"loss": 3.0889,
"step": 3870
},
{
"epoch": 0.4,
"learning_rate": 6.598016958556667e-05,
"loss": 3.1251,
"step": 3875
},
{
"epoch": 0.4,
"learning_rate": 6.590199148825197e-05,
"loss": 3.1085,
"step": 3880
},
{
"epoch": 0.4,
"learning_rate": 6.582377011616231e-05,
"loss": 3.0247,
"step": 3885
},
{
"epoch": 0.4,
"learning_rate": 6.574550568216491e-05,
"loss": 3.1328,
"step": 3890
},
{
"epoch": 0.4,
"learning_rate": 6.566719839924412e-05,
"loss": 3.1151,
"step": 3895
},
{
"epoch": 0.41,
"learning_rate": 6.558884848050094e-05,
"loss": 3.04,
"step": 3900
},
{
"epoch": 0.41,
"learning_rate": 6.55104561391524e-05,
"loss": 3.1058,
"step": 3905
},
{
"epoch": 0.41,
"learning_rate": 6.543202158853091e-05,
"loss": 3.0598,
"step": 3910
},
{
"epoch": 0.41,
"learning_rate": 6.535354504208385e-05,
"loss": 3.0513,
"step": 3915
},
{
"epoch": 0.41,
"learning_rate": 6.527502671337278e-05,
"loss": 3.0771,
"step": 3920
},
{
"epoch": 0.41,
"learning_rate": 6.519646681607305e-05,
"loss": 3.1076,
"step": 3925
},
{
"epoch": 0.41,
"learning_rate": 6.511786556397308e-05,
"loss": 3.1108,
"step": 3930
},
{
"epoch": 0.41,
"learning_rate": 6.503922317097385e-05,
"loss": 3.1839,
"step": 3935
},
{
"epoch": 0.41,
"learning_rate": 6.496053985108829e-05,
"loss": 3.1131,
"step": 3940
},
{
"epoch": 0.41,
"learning_rate": 6.48818158184407e-05,
"loss": 3.0072,
"step": 3945
},
{
"epoch": 0.41,
"learning_rate": 6.480305128726618e-05,
"loss": 3.0962,
"step": 3950
},
{
"epoch": 0.41,
"learning_rate": 6.472424647191007e-05,
"loss": 3.1292,
"step": 3955
},
{
"epoch": 0.41,
"learning_rate": 6.464540158682731e-05,
"loss": 3.0864,
"step": 3960
},
{
"epoch": 0.41,
"learning_rate": 6.456651684658188e-05,
"loss": 3.0322,
"step": 3965
},
{
"epoch": 0.41,
"learning_rate": 6.44875924658462e-05,
"loss": 3.1003,
"step": 3970
},
{
"epoch": 0.41,
"learning_rate": 6.44086286594006e-05,
"loss": 3.1056,
"step": 3975
},
{
"epoch": 0.41,
"learning_rate": 6.432962564213272e-05,
"loss": 3.1058,
"step": 3980
},
{
"epoch": 0.41,
"learning_rate": 6.425058362903684e-05,
"loss": 2.9969,
"step": 3985
},
{
"epoch": 0.41,
"learning_rate": 6.417150283521341e-05,
"loss": 3.07,
"step": 3990
},
{
"epoch": 0.42,
"learning_rate": 6.40923834758684e-05,
"loss": 3.038,
"step": 3995
},
{
"epoch": 0.42,
"learning_rate": 6.401322576631276e-05,
"loss": 3.01,
"step": 4000
},
{
"epoch": 0.42,
"learning_rate": 6.393402992196176e-05,
"loss": 3.0808,
"step": 4005
},
{
"epoch": 0.42,
"learning_rate": 6.385479615833445e-05,
"loss": 3.0561,
"step": 4010
},
{
"epoch": 0.42,
"learning_rate": 6.37755246910531e-05,
"loss": 2.95,
"step": 4015
},
{
"epoch": 0.42,
"learning_rate": 6.369621573584256e-05,
"loss": 3.0461,
"step": 4020
},
{
"epoch": 0.42,
"learning_rate": 6.361686950852971e-05,
"loss": 3.0327,
"step": 4025
},
{
"epoch": 0.42,
"learning_rate": 6.353748622504288e-05,
"loss": 2.9993,
"step": 4030
},
{
"epoch": 0.42,
"learning_rate": 6.345806610141121e-05,
"loss": 3.0402,
"step": 4035
},
{
"epoch": 0.42,
"learning_rate": 6.33786093537641e-05,
"loss": 3.0468,
"step": 4040
},
{
"epoch": 0.42,
"learning_rate": 6.329911619833063e-05,
"loss": 3.1014,
"step": 4045
},
{
"epoch": 0.42,
"learning_rate": 6.321958685143894e-05,
"loss": 3.0086,
"step": 4050
},
{
"epoch": 0.42,
"learning_rate": 6.314002152951569e-05,
"loss": 3.1249,
"step": 4055
},
{
"epoch": 0.42,
"learning_rate": 6.30604204490854e-05,
"loss": 3.0183,
"step": 4060
},
{
"epoch": 0.42,
"learning_rate": 6.298078382676993e-05,
"loss": 3.0314,
"step": 4065
},
{
"epoch": 0.42,
"learning_rate": 6.290111187928784e-05,
"loss": 3.0055,
"step": 4070
},
{
"epoch": 0.42,
"learning_rate": 6.282140482345387e-05,
"loss": 2.9902,
"step": 4075
},
{
"epoch": 0.42,
"learning_rate": 6.274166287617824e-05,
"loss": 3.0762,
"step": 4080
},
{
"epoch": 0.42,
"learning_rate": 6.266188625446615e-05,
"loss": 3.1289,
"step": 4085
},
{
"epoch": 0.43,
"learning_rate": 6.258207517541717e-05,
"loss": 3.0808,
"step": 4090
},
{
"epoch": 0.43,
"learning_rate": 6.250222985622461e-05,
"loss": 3.0976,
"step": 4095
},
{
"epoch": 0.43,
"learning_rate": 6.2422350514175e-05,
"loss": 3.1466,
"step": 4100
},
{
"epoch": 0.43,
"learning_rate": 6.234243736664742e-05,
"loss": 3.0128,
"step": 4105
},
{
"epoch": 0.43,
"learning_rate": 6.226249063111299e-05,
"loss": 3.0281,
"step": 4110
},
{
"epoch": 0.43,
"learning_rate": 6.218251052513418e-05,
"loss": 2.986,
"step": 4115
},
{
"epoch": 0.43,
"learning_rate": 6.210249726636431e-05,
"loss": 3.1206,
"step": 4120
},
{
"epoch": 0.43,
"learning_rate": 6.202245107254693e-05,
"loss": 3.0731,
"step": 4125
},
{
"epoch": 0.43,
"learning_rate": 6.194237216151516e-05,
"loss": 3.036,
"step": 4130
},
{
"epoch": 0.43,
"learning_rate": 6.186226075119123e-05,
"loss": 3.1311,
"step": 4135
},
{
"epoch": 0.43,
"learning_rate": 6.178211705958572e-05,
"loss": 3.1126,
"step": 4140
},
{
"epoch": 0.43,
"learning_rate": 6.170194130479717e-05,
"loss": 2.972,
"step": 4145
},
{
"epoch": 0.43,
"learning_rate": 6.16217337050113e-05,
"loss": 3.0654,
"step": 4150
},
{
"epoch": 0.43,
"learning_rate": 6.15414944785005e-05,
"loss": 3.0943,
"step": 4155
},
{
"epoch": 0.43,
"learning_rate": 6.146122384362326e-05,
"loss": 3.0756,
"step": 4160
},
{
"epoch": 0.43,
"learning_rate": 6.138092201882349e-05,
"loss": 3.0197,
"step": 4165
},
{
"epoch": 0.43,
"learning_rate": 6.130058922263e-05,
"loss": 3.1162,
"step": 4170
},
{
"epoch": 0.43,
"learning_rate": 6.122022567365592e-05,
"loss": 3.087,
"step": 4175
},
{
"epoch": 0.43,
"learning_rate": 6.113983159059803e-05,
"loss": 3.0941,
"step": 4180
},
{
"epoch": 0.43,
"learning_rate": 6.105940719223621e-05,
"loss": 2.9955,
"step": 4185
},
{
"epoch": 0.44,
"learning_rate": 6.097895269743287e-05,
"loss": 3.0133,
"step": 4190
},
{
"epoch": 0.44,
"learning_rate": 6.089846832513224e-05,
"loss": 3.0481,
"step": 4195
},
{
"epoch": 0.44,
"learning_rate": 6.0817954294359955e-05,
"loss": 2.8821,
"step": 4200
},
{
"epoch": 0.44,
"learning_rate": 6.0737410824222316e-05,
"loss": 3.035,
"step": 4205
},
{
"epoch": 0.44,
"learning_rate": 6.0656838133905727e-05,
"loss": 2.9994,
"step": 4210
},
{
"epoch": 0.44,
"learning_rate": 6.0576236442676104e-05,
"loss": 3.1524,
"step": 4215
},
{
"epoch": 0.44,
"learning_rate": 6.049560596987833e-05,
"loss": 3.0584,
"step": 4220
},
{
"epoch": 0.44,
"learning_rate": 6.04149469349356e-05,
"loss": 2.9864,
"step": 4225
},
{
"epoch": 0.44,
"learning_rate": 6.03342595573488e-05,
"loss": 3.0731,
"step": 4230
},
{
"epoch": 0.44,
"learning_rate": 6.0253544056695976e-05,
"loss": 3.0851,
"step": 4235
},
{
"epoch": 0.44,
"learning_rate": 6.01728006526317e-05,
"loss": 3.0273,
"step": 4240
},
{
"epoch": 0.44,
"learning_rate": 6.009202956488647e-05,
"loss": 3.09,
"step": 4245
},
{
"epoch": 0.44,
"learning_rate": 6.001123101326613e-05,
"loss": 3.0294,
"step": 4250
},
{
"epoch": 0.44,
"learning_rate": 5.9930405217651274e-05,
"loss": 3.0173,
"step": 4255
},
{
"epoch": 0.44,
"learning_rate": 5.984955239799661e-05,
"loss": 3.1223,
"step": 4260
},
{
"epoch": 0.44,
"learning_rate": 5.976867277433043e-05,
"loss": 3.0614,
"step": 4265
},
{
"epoch": 0.44,
"learning_rate": 5.968776656675389e-05,
"loss": 2.9893,
"step": 4270
},
{
"epoch": 0.44,
"learning_rate": 5.9606833995440584e-05,
"loss": 3.0115,
"step": 4275
},
{
"epoch": 0.44,
"learning_rate": 5.952587528063579e-05,
"loss": 3.1049,
"step": 4280
},
{
"epoch": 0.45,
"learning_rate": 5.944489064265595e-05,
"loss": 3.1301,
"step": 4285
},
{
"epoch": 0.45,
"learning_rate": 5.936388030188804e-05,
"loss": 3.06,
"step": 4290
},
{
"epoch": 0.45,
"learning_rate": 5.9282844478789014e-05,
"loss": 2.9623,
"step": 4295
},
{
"epoch": 0.45,
"learning_rate": 5.920178339388513e-05,
"loss": 3.0338,
"step": 4300
},
{
"epoch": 0.45,
"learning_rate": 5.912069726777144e-05,
"loss": 3.0864,
"step": 4305
},
{
"epoch": 0.45,
"learning_rate": 5.9039586321111096e-05,
"loss": 3.0815,
"step": 4310
},
{
"epoch": 0.45,
"learning_rate": 5.895845077463482e-05,
"loss": 3.1435,
"step": 4315
},
{
"epoch": 0.45,
"learning_rate": 5.887729084914024e-05,
"loss": 2.9858,
"step": 4320
},
{
"epoch": 0.45,
"learning_rate": 5.8796106765491424e-05,
"loss": 3.0735,
"step": 4325
},
{
"epoch": 0.45,
"learning_rate": 5.8714898744618055e-05,
"loss": 3.0907,
"step": 4330
},
{
"epoch": 0.45,
"learning_rate": 5.8633667007515056e-05,
"loss": 3.0273,
"step": 4335
},
{
"epoch": 0.45,
"learning_rate": 5.8552411775241845e-05,
"loss": 3.0662,
"step": 4340
},
{
"epoch": 0.45,
"learning_rate": 5.8471133268921794e-05,
"loss": 3.0199,
"step": 4345
},
{
"epoch": 0.45,
"learning_rate": 5.8389831709741605e-05,
"loss": 3.0126,
"step": 4350
},
{
"epoch": 0.45,
"learning_rate": 5.830850731895071e-05,
"loss": 3.1145,
"step": 4355
},
{
"epoch": 0.45,
"learning_rate": 5.822716031786069e-05,
"loss": 2.9883,
"step": 4360
},
{
"epoch": 0.45,
"learning_rate": 5.8145790927844614e-05,
"loss": 3.0954,
"step": 4365
},
{
"epoch": 0.45,
"learning_rate": 5.806439937033654e-05,
"loss": 3.0301,
"step": 4370
},
{
"epoch": 0.45,
"learning_rate": 5.798298586683084e-05,
"loss": 3.0994,
"step": 4375
},
{
"epoch": 0.46,
"learning_rate": 5.790155063888155e-05,
"loss": 3.0692,
"step": 4380
},
{
"epoch": 0.46,
"learning_rate": 5.78200939081019e-05,
"loss": 3.0307,
"step": 4385
},
{
"epoch": 0.46,
"learning_rate": 5.7738615896163586e-05,
"loss": 3.057,
"step": 4390
},
{
"epoch": 0.46,
"learning_rate": 5.765711682479621e-05,
"loss": 3.0394,
"step": 4395
},
{
"epoch": 0.46,
"learning_rate": 5.7575596915786754e-05,
"loss": 3.0503,
"step": 4400
},
{
"epoch": 0.46,
"learning_rate": 5.749405639097882e-05,
"loss": 3.004,
"step": 4405
},
{
"epoch": 0.46,
"learning_rate": 5.74124954722722e-05,
"loss": 3.0699,
"step": 4410
},
{
"epoch": 0.46,
"learning_rate": 5.7330914381622104e-05,
"loss": 3.1242,
"step": 4415
},
{
"epoch": 0.46,
"learning_rate": 5.724931334103867e-05,
"loss": 3.0656,
"step": 4420
},
{
"epoch": 0.46,
"learning_rate": 5.716769257258634e-05,
"loss": 3.0341,
"step": 4425
},
{
"epoch": 0.46,
"learning_rate": 5.708605229838323e-05,
"loss": 3.0247,
"step": 4430
},
{
"epoch": 0.46,
"learning_rate": 5.700439274060053e-05,
"loss": 3.0948,
"step": 4435
},
{
"epoch": 0.46,
"learning_rate": 5.692271412146192e-05,
"loss": 2.9225,
"step": 4440
},
{
"epoch": 0.46,
"learning_rate": 5.6841016663242916e-05,
"loss": 2.972,
"step": 4445
},
{
"epoch": 0.46,
"learning_rate": 5.675930058827039e-05,
"loss": 3.0104,
"step": 4450
},
{
"epoch": 0.46,
"learning_rate": 5.66775661189218e-05,
"loss": 3.049,
"step": 4455
},
{
"epoch": 0.46,
"learning_rate": 5.659581347762466e-05,
"loss": 2.9567,
"step": 4460
},
{
"epoch": 0.46,
"learning_rate": 5.651404288685598e-05,
"loss": 3.1455,
"step": 4465
},
{
"epoch": 0.46,
"learning_rate": 5.643225456914156e-05,
"loss": 3.0998,
"step": 4470
},
{
"epoch": 0.47,
"learning_rate": 5.635044874705551e-05,
"loss": 3.0398,
"step": 4475
},
{
"epoch": 0.47,
"learning_rate": 5.626862564321952e-05,
"loss": 3.0931,
"step": 4480
},
{
"epoch": 0.47,
"learning_rate": 5.618678548030235e-05,
"loss": 3.0014,
"step": 4485
},
{
"epoch": 0.47,
"learning_rate": 5.610492848101915e-05,
"loss": 2.9121,
"step": 4490
},
{
"epoch": 0.47,
"learning_rate": 5.602305486813089e-05,
"loss": 3.0461,
"step": 4495
},
{
"epoch": 0.47,
"learning_rate": 5.5941164864443785e-05,
"loss": 3.1352,
"step": 4500
},
{
"epoch": 0.47,
"learning_rate": 5.585925869280861e-05,
"loss": 2.9522,
"step": 4505
},
{
"epoch": 0.47,
"learning_rate": 5.577733657612019e-05,
"loss": 3.0116,
"step": 4510
},
{
"epoch": 0.47,
"learning_rate": 5.5695398737316685e-05,
"loss": 3.0293,
"step": 4515
},
{
"epoch": 0.47,
"learning_rate": 5.561344539937907e-05,
"loss": 2.9686,
"step": 4520
},
{
"epoch": 0.47,
"learning_rate": 5.55314767853305e-05,
"loss": 3.0624,
"step": 4525
},
{
"epoch": 0.47,
"learning_rate": 5.5449493118235715e-05,
"loss": 3.0677,
"step": 4530
},
{
"epoch": 0.47,
"learning_rate": 5.536749462120039e-05,
"loss": 3.0206,
"step": 4535
},
{
"epoch": 0.47,
"learning_rate": 5.528548151737056e-05,
"loss": 3.0829,
"step": 4540
},
{
"epoch": 0.47,
"learning_rate": 5.520345402993203e-05,
"loss": 3.1213,
"step": 4545
},
{
"epoch": 0.47,
"learning_rate": 5.512141238210974e-05,
"loss": 3.0561,
"step": 4550
},
{
"epoch": 0.47,
"learning_rate": 5.5039356797167155e-05,
"loss": 3.1096,
"step": 4555
},
{
"epoch": 0.47,
"learning_rate": 5.4957287498405686e-05,
"loss": 2.9436,
"step": 4560
},
{
"epoch": 0.47,
"learning_rate": 5.487520470916404e-05,
"loss": 3.0278,
"step": 4565
},
{
"epoch": 0.47,
"learning_rate": 5.479310865281766e-05,
"loss": 3.0438,
"step": 4570
},
{
"epoch": 0.48,
"learning_rate": 5.471099955277809e-05,
"loss": 3.0986,
"step": 4575
},
{
"epoch": 0.48,
"learning_rate": 5.462887763249235e-05,
"loss": 2.9591,
"step": 4580
},
{
"epoch": 0.48,
"learning_rate": 5.454674311544235e-05,
"loss": 2.9862,
"step": 4585
},
{
"epoch": 0.48,
"learning_rate": 5.446459622514433e-05,
"loss": 3.0033,
"step": 4590
},
{
"epoch": 0.48,
"learning_rate": 5.4382437185148125e-05,
"loss": 2.9845,
"step": 4595
},
{
"epoch": 0.48,
"learning_rate": 5.430026621903669e-05,
"loss": 2.9696,
"step": 4600
},
{
"epoch": 0.48,
"learning_rate": 5.4218083550425416e-05,
"loss": 2.9929,
"step": 4605
},
{
"epoch": 0.48,
"learning_rate": 5.4135889402961547e-05,
"loss": 3.1634,
"step": 4610
},
{
"epoch": 0.48,
"learning_rate": 5.405368400032357e-05,
"loss": 3.0643,
"step": 4615
},
{
"epoch": 0.48,
"learning_rate": 5.3971467566220546e-05,
"loss": 3.0154,
"step": 4620
},
{
"epoch": 0.48,
"learning_rate": 5.3889240324391645e-05,
"loss": 3.0445,
"step": 4625
},
{
"epoch": 0.48,
"learning_rate": 5.380700249860538e-05,
"loss": 3.0543,
"step": 4630
},
{
"epoch": 0.48,
"learning_rate": 5.372475431265912e-05,
"loss": 2.9665,
"step": 4635
},
{
"epoch": 0.48,
"learning_rate": 5.3642495990378386e-05,
"loss": 3.0134,
"step": 4640
},
{
"epoch": 0.48,
"learning_rate": 5.356022775561627e-05,
"loss": 3.0128,
"step": 4645
},
{
"epoch": 0.48,
"learning_rate": 5.347794983225289e-05,
"loss": 3.0196,
"step": 4650
},
{
"epoch": 0.48,
"learning_rate": 5.339566244419471e-05,
"loss": 3.1539,
"step": 4655
},
{
"epoch": 0.48,
"learning_rate": 5.331336581537395e-05,
"loss": 2.9392,
"step": 4660
},
{
"epoch": 0.48,
"learning_rate": 5.323106016974795e-05,
"loss": 3.0064,
"step": 4665
},
{
"epoch": 0.49,
"learning_rate": 5.314874573129862e-05,
"loss": 2.903,
"step": 4670
},
{
"epoch": 0.49,
"learning_rate": 5.3066422724031794e-05,
"loss": 3.0323,
"step": 4675
},
{
"epoch": 0.49,
"learning_rate": 5.298409137197663e-05,
"loss": 3.0275,
"step": 4680
},
{
"epoch": 0.49,
"learning_rate": 5.2901751899184984e-05,
"loss": 2.9456,
"step": 4685
},
{
"epoch": 0.49,
"learning_rate": 5.2819404529730796e-05,
"loss": 3.0856,
"step": 4690
},
{
"epoch": 0.49,
"learning_rate": 5.273704948770951e-05,
"loss": 3.0298,
"step": 4695
},
{
"epoch": 0.49,
"learning_rate": 5.265468699723748e-05,
"loss": 2.8966,
"step": 4700
},
{
"epoch": 0.49,
"learning_rate": 5.2572317282451286e-05,
"loss": 2.9719,
"step": 4705
},
{
"epoch": 0.49,
"learning_rate": 5.2489940567507165e-05,
"loss": 2.9296,
"step": 4710
},
{
"epoch": 0.49,
"learning_rate": 5.240755707658046e-05,
"loss": 2.9683,
"step": 4715
},
{
"epoch": 0.49,
"learning_rate": 5.232516703386489e-05,
"loss": 2.9552,
"step": 4720
},
{
"epoch": 0.49,
"learning_rate": 5.2242770663572027e-05,
"loss": 3.03,
"step": 4725
},
{
"epoch": 0.49,
"learning_rate": 5.2160368189930694e-05,
"loss": 2.9519,
"step": 4730
},
{
"epoch": 0.49,
"learning_rate": 5.2077959837186263e-05,
"loss": 3.116,
"step": 4735
},
{
"epoch": 0.49,
"learning_rate": 5.1995545829600166e-05,
"loss": 3.0781,
"step": 4740
},
{
"epoch": 0.49,
"learning_rate": 5.191312639144915e-05,
"loss": 3.0392,
"step": 4745
},
{
"epoch": 0.49,
"learning_rate": 5.1830701747024825e-05,
"loss": 3.0529,
"step": 4750
},
{
"epoch": 0.49,
"learning_rate": 5.1748272120632925e-05,
"loss": 2.9831,
"step": 4755
},
{
"epoch": 0.49,
"learning_rate": 5.1665837736592736e-05,
"loss": 3.0534,
"step": 4760
},
{
"epoch": 0.5,
"learning_rate": 5.15833988192365e-05,
"loss": 2.9701,
"step": 4765
},
{
"epoch": 0.5,
"learning_rate": 5.15009555929088e-05,
"loss": 2.9424,
"step": 4770
},
{
"epoch": 0.5,
"learning_rate": 5.141850828196595e-05,
"loss": 3.005,
"step": 4775
},
{
"epoch": 0.5,
"learning_rate": 5.133605711077536e-05,
"loss": 3.0597,
"step": 4780
},
{
"epoch": 0.5,
"learning_rate": 5.1253602303714945e-05,
"loss": 3.0784,
"step": 4785
},
{
"epoch": 0.5,
"learning_rate": 5.1171144085172553e-05,
"loss": 3.0135,
"step": 4790
},
{
"epoch": 0.5,
"learning_rate": 5.108868267954526e-05,
"loss": 3.0173,
"step": 4795
},
{
"epoch": 0.5,
"learning_rate": 5.1006218311238884e-05,
"loss": 2.9668,
"step": 4800
},
{
"epoch": 0.5,
"learning_rate": 5.092375120466724e-05,
"loss": 2.9921,
"step": 4805
},
{
"epoch": 0.5,
"learning_rate": 5.084128158425161e-05,
"loss": 2.9828,
"step": 4810
},
{
"epoch": 0.5,
"learning_rate": 5.075880967442014e-05,
"loss": 2.9582,
"step": 4815
},
{
"epoch": 0.5,
"learning_rate": 5.067633569960718e-05,
"loss": 3.0719,
"step": 4820
},
{
"epoch": 0.5,
"learning_rate": 5.059385988425274e-05,
"loss": 2.9479,
"step": 4825
},
{
"epoch": 0.5,
"learning_rate": 5.0511382452801794e-05,
"loss": 3.0166,
"step": 4830
},
{
"epoch": 0.5,
"learning_rate": 5.0428903629703737e-05,
"loss": 3.0477,
"step": 4835
},
{
"epoch": 0.5,
"learning_rate": 5.034642363941174e-05,
"loss": 3.0076,
"step": 4840
},
{
"epoch": 0.5,
"learning_rate": 5.026394270638215e-05,
"loss": 3.0304,
"step": 4845
},
{
"epoch": 0.5,
"learning_rate": 5.0181461055073907e-05,
"loss": 3.0762,
"step": 4850
},
{
"epoch": 0.5,
"learning_rate": 5.009897890994788e-05,
"loss": 2.9893,
"step": 4855
},
{
"epoch": 0.51,
"learning_rate": 5.001649649546626e-05,
"loss": 2.9915,
"step": 4860
},
{
"epoch": 0.51,
"learning_rate": 4.993401403609203e-05,
"loss": 3.0271,
"step": 4865
},
{
"epoch": 0.51,
"learning_rate": 4.9851531756288234e-05,
"loss": 3.0551,
"step": 4870
},
{
"epoch": 0.51,
"learning_rate": 4.976904988051748e-05,
"loss": 2.9895,
"step": 4875
},
{
"epoch": 0.51,
"learning_rate": 4.968656863324124e-05,
"loss": 3.0064,
"step": 4880
},
{
"epoch": 0.51,
"learning_rate": 4.96040882389193e-05,
"loss": 3.074,
"step": 4885
},
{
"epoch": 0.51,
"learning_rate": 4.952160892200909e-05,
"loss": 3.1097,
"step": 4890
},
{
"epoch": 0.51,
"learning_rate": 4.9439130906965144e-05,
"loss": 2.9972,
"step": 4895
},
{
"epoch": 0.51,
"learning_rate": 4.9356654418238444e-05,
"loss": 2.9557,
"step": 4900
},
{
"epoch": 0.51,
"learning_rate": 4.927417968027581e-05,
"loss": 2.9784,
"step": 4905
},
{
"epoch": 0.51,
"learning_rate": 4.91917069175193e-05,
"loss": 2.9535,
"step": 4910
},
{
"epoch": 0.51,
"learning_rate": 4.910923635440558e-05,
"loss": 2.94,
"step": 4915
},
{
"epoch": 0.51,
"learning_rate": 4.9026768215365386e-05,
"loss": 3.0684,
"step": 4920
},
{
"epoch": 0.51,
"learning_rate": 4.8944302724822793e-05,
"loss": 3.0555,
"step": 4925
},
{
"epoch": 0.51,
"learning_rate": 4.886184010719471e-05,
"loss": 2.9557,
"step": 4930
},
{
"epoch": 0.51,
"learning_rate": 4.877938058689022e-05,
"loss": 2.9694,
"step": 4935
},
{
"epoch": 0.51,
"learning_rate": 4.8696924388309946e-05,
"loss": 2.9223,
"step": 4940
},
{
"epoch": 0.51,
"learning_rate": 4.86144717358455e-05,
"loss": 2.9208,
"step": 4945
},
{
"epoch": 0.51,
"learning_rate": 4.853202285387886e-05,
"loss": 3.019,
"step": 4950
},
{
"epoch": 0.51,
"learning_rate": 4.84495779667817e-05,
"loss": 3.043,
"step": 4955
},
{
"epoch": 0.52,
"learning_rate": 4.836713729891487e-05,
"loss": 2.9888,
"step": 4960
},
{
"epoch": 0.52,
"learning_rate": 4.8284701074627694e-05,
"loss": 2.9286,
"step": 4965
},
{
"epoch": 0.52,
"learning_rate": 4.820226951825741e-05,
"loss": 2.9686,
"step": 4970
},
{
"epoch": 0.52,
"learning_rate": 4.81198428541286e-05,
"loss": 3.1102,
"step": 4975
},
{
"epoch": 0.52,
"learning_rate": 4.8037421306552485e-05,
"loss": 3.0098,
"step": 4980
},
{
"epoch": 0.52,
"learning_rate": 4.795500509982636e-05,
"loss": 2.9617,
"step": 4985
},
{
"epoch": 0.52,
"learning_rate": 4.7872594458233e-05,
"loss": 3.0371,
"step": 4990
},
{
"epoch": 0.52,
"learning_rate": 4.779018960604007e-05,
"loss": 3.0059,
"step": 4995
},
{
"epoch": 0.52,
"learning_rate": 4.770779076749941e-05,
"loss": 3.022,
"step": 5000
},
{
"epoch": 0.52,
"learning_rate": 4.7625398166846565e-05,
"loss": 2.9216,
"step": 5005
},
{
"epoch": 0.52,
"learning_rate": 4.754301202830006e-05,
"loss": 3.0249,
"step": 5010
},
{
"epoch": 0.52,
"learning_rate": 4.746063257606082e-05,
"loss": 2.8526,
"step": 5015
},
{
"epoch": 0.52,
"learning_rate": 4.737826003431162e-05,
"loss": 3.0094,
"step": 5020
},
{
"epoch": 0.52,
"learning_rate": 4.729589462721642e-05,
"loss": 3.1334,
"step": 5025
},
{
"epoch": 0.52,
"learning_rate": 4.7213536578919734e-05,
"loss": 3.0408,
"step": 5030
},
{
"epoch": 0.52,
"learning_rate": 4.7131186113546076e-05,
"loss": 2.9893,
"step": 5035
},
{
"epoch": 0.52,
"learning_rate": 4.704884345519932e-05,
"loss": 3.0477,
"step": 5040
},
{
"epoch": 0.52,
"learning_rate": 4.6966508827962075e-05,
"loss": 2.9936,
"step": 5045
},
{
"epoch": 0.52,
"learning_rate": 4.688418245589514e-05,
"loss": 3.034,
"step": 5050
},
{
"epoch": 0.53,
"learning_rate": 4.6801864563036805e-05,
"loss": 2.982,
"step": 5055
},
{
"epoch": 0.53,
"learning_rate": 4.6719555373402294e-05,
"loss": 3.0017,
"step": 5060
},
{
"epoch": 0.53,
"learning_rate": 4.6637255110983154e-05,
"loss": 3.0727,
"step": 5065
},
{
"epoch": 0.53,
"learning_rate": 4.655496399974664e-05,
"loss": 2.9249,
"step": 5070
},
{
"epoch": 0.53,
"learning_rate": 4.6472682263635106e-05,
"loss": 2.974,
"step": 5075
},
{
"epoch": 0.53,
"learning_rate": 4.63904101265654e-05,
"loss": 3.0252,
"step": 5080
},
{
"epoch": 0.53,
"learning_rate": 4.630814781242821e-05,
"loss": 2.8929,
"step": 5085
},
{
"epoch": 0.53,
"learning_rate": 4.6225895545087535e-05,
"loss": 2.9902,
"step": 5090
},
{
"epoch": 0.53,
"learning_rate": 4.614365354838e-05,
"loss": 3.0188,
"step": 5095
},
{
"epoch": 0.53,
"learning_rate": 4.60614220461143e-05,
"loss": 2.9119,
"step": 5100
},
{
"epoch": 0.53,
"learning_rate": 4.597920126207058e-05,
"loss": 2.952,
"step": 5105
},
{
"epoch": 0.53,
"learning_rate": 4.5896991419999796e-05,
"loss": 2.893,
"step": 5110
},
{
"epoch": 0.53,
"learning_rate": 4.581479274362314e-05,
"loss": 2.9398,
"step": 5115
},
{
"epoch": 0.53,
"learning_rate": 4.5732605456631386e-05,
"loss": 2.93,
"step": 5120
},
{
"epoch": 0.53,
"learning_rate": 4.565042978268438e-05,
"loss": 2.9845,
"step": 5125
},
{
"epoch": 0.53,
"learning_rate": 4.55682659454103e-05,
"loss": 2.9244,
"step": 5130
},
{
"epoch": 0.53,
"learning_rate": 4.5486114168405156e-05,
"loss": 3.0629,
"step": 5135
},
{
"epoch": 0.53,
"learning_rate": 4.5403974675232104e-05,
"loss": 2.9369,
"step": 5140
},
{
"epoch": 0.53,
"learning_rate": 4.532184768942089e-05,
"loss": 2.9826,
"step": 5145
},
{
"epoch": 0.54,
"learning_rate": 4.523973343446723e-05,
"loss": 3.097,
"step": 5150
},
{
"epoch": 0.54,
"learning_rate": 4.5157632133832185e-05,
"loss": 3.043,
"step": 5155
},
{
"epoch": 0.54,
"learning_rate": 4.5075544010941564e-05,
"loss": 3.0134,
"step": 5160
},
{
"epoch": 0.54,
"learning_rate": 4.499346928918531e-05,
"loss": 2.988,
"step": 5165
},
{
"epoch": 0.54,
"learning_rate": 4.491140819191691e-05,
"loss": 3.057,
"step": 5170
},
{
"epoch": 0.54,
"learning_rate": 4.482936094245274e-05,
"loss": 2.9222,
"step": 5175
},
{
"epoch": 0.54,
"learning_rate": 4.4747327764071543e-05,
"loss": 3.0549,
"step": 5180
},
{
"epoch": 0.54,
"learning_rate": 4.466530888001374e-05,
"loss": 3.0851,
"step": 5185
},
{
"epoch": 0.54,
"learning_rate": 4.458330451348085e-05,
"loss": 3.0138,
"step": 5190
},
{
"epoch": 0.54,
"learning_rate": 4.450131488763488e-05,
"loss": 3.032,
"step": 5195
},
{
"epoch": 0.54,
"learning_rate": 4.441934022559776e-05,
"loss": 3.0443,
"step": 5200
},
{
"epoch": 0.54,
"learning_rate": 4.433738075045065e-05,
"loss": 3.0077,
"step": 5205
},
{
"epoch": 0.54,
"learning_rate": 4.425543668523341e-05,
"loss": 3.1165,
"step": 5210
},
{
"epoch": 0.54,
"learning_rate": 4.417350825294395e-05,
"loss": 2.9383,
"step": 5215
},
{
"epoch": 0.54,
"learning_rate": 4.409159567653765e-05,
"loss": 2.9451,
"step": 5220
},
{
"epoch": 0.54,
"learning_rate": 4.400969917892674e-05,
"loss": 2.9542,
"step": 5225
},
{
"epoch": 0.54,
"learning_rate": 4.392781898297969e-05,
"loss": 3.0694,
"step": 5230
},
{
"epoch": 0.54,
"learning_rate": 4.384595531152061e-05,
"loss": 2.9181,
"step": 5235
},
{
"epoch": 0.54,
"learning_rate": 4.376410838732863e-05,
"loss": 2.9679,
"step": 5240
},
{
"epoch": 0.55,
"learning_rate": 4.368227843313731e-05,
"loss": 2.9548,
"step": 5245
},
{
"epoch": 0.55,
"learning_rate": 4.360046567163403e-05,
"loss": 3.0431,
"step": 5250
},
{
"epoch": 0.55,
"learning_rate": 4.351867032545939e-05,
"loss": 2.9602,
"step": 5255
},
{
"epoch": 0.55,
"learning_rate": 4.3436892617206605e-05,
"loss": 2.908,
"step": 5260
},
{
"epoch": 0.55,
"learning_rate": 4.335513276942086e-05,
"loss": 2.9248,
"step": 5265
},
{
"epoch": 0.55,
"learning_rate": 4.3273391004598745e-05,
"loss": 2.9548,
"step": 5270
},
{
"epoch": 0.55,
"learning_rate": 4.319166754518768e-05,
"loss": 2.9168,
"step": 5275
},
{
"epoch": 0.55,
"learning_rate": 4.3109962613585214e-05,
"loss": 2.8516,
"step": 5280
},
{
"epoch": 0.55,
"learning_rate": 4.302827643213851e-05,
"loss": 2.9677,
"step": 5285
},
{
"epoch": 0.55,
"learning_rate": 4.294660922314367e-05,
"loss": 3.1188,
"step": 5290
},
{
"epoch": 0.55,
"learning_rate": 4.286496120884522e-05,
"loss": 2.9361,
"step": 5295
},
{
"epoch": 0.55,
"learning_rate": 4.278333261143541e-05,
"loss": 2.9715,
"step": 5300
},
{
"epoch": 0.55,
"learning_rate": 4.270172365305367e-05,
"loss": 2.9817,
"step": 5305
},
{
"epoch": 0.55,
"learning_rate": 4.2620134555785976e-05,
"loss": 2.878,
"step": 5310
},
{
"epoch": 0.55,
"learning_rate": 4.2538565541664245e-05,
"loss": 3.0183,
"step": 5315
},
{
"epoch": 0.55,
"learning_rate": 4.245701683266575e-05,
"loss": 2.9741,
"step": 5320
},
{
"epoch": 0.55,
"learning_rate": 4.237548865071251e-05,
"loss": 3.0651,
"step": 5325
},
{
"epoch": 0.55,
"learning_rate": 4.229398121767069e-05,
"loss": 3.0065,
"step": 5330
},
{
"epoch": 0.55,
"learning_rate": 4.221249475534997e-05,
"loss": 2.9313,
"step": 5335
},
{
"epoch": 0.55,
"learning_rate": 4.2131029485502995e-05,
"loss": 3.0811,
"step": 5340
},
{
"epoch": 0.56,
"learning_rate": 4.204958562982468e-05,
"loss": 2.9344,
"step": 5345
},
{
"epoch": 0.56,
"learning_rate": 4.196816340995172e-05,
"loss": 2.9898,
"step": 5350
},
{
"epoch": 0.56,
"learning_rate": 4.1886763047461925e-05,
"loss": 3.1129,
"step": 5355
},
{
"epoch": 0.56,
"learning_rate": 4.180538476387359e-05,
"loss": 3.0285,
"step": 5360
},
{
"epoch": 0.56,
"learning_rate": 4.172402878064495e-05,
"loss": 3.0211,
"step": 5365
},
{
"epoch": 0.56,
"learning_rate": 4.164269531917354e-05,
"loss": 3.0428,
"step": 5370
},
{
"epoch": 0.56,
"learning_rate": 4.156138460079563e-05,
"loss": 2.9048,
"step": 5375
},
{
"epoch": 0.56,
"learning_rate": 4.14800968467856e-05,
"loss": 2.9882,
"step": 5380
},
{
"epoch": 0.56,
"learning_rate": 4.1398832278355296e-05,
"loss": 3.0682,
"step": 5385
},
{
"epoch": 0.56,
"learning_rate": 4.131759111665349e-05,
"loss": 3.0709,
"step": 5390
},
{
"epoch": 0.56,
"learning_rate": 4.123637358276526e-05,
"loss": 2.898,
"step": 5395
},
{
"epoch": 0.56,
"learning_rate": 4.1155179897711374e-05,
"loss": 2.9611,
"step": 5400
},
{
"epoch": 0.56,
"learning_rate": 4.107401028244773e-05,
"loss": 3.0413,
"step": 5405
},
{
"epoch": 0.56,
"learning_rate": 4.0992864957864694e-05,
"loss": 2.9746,
"step": 5410
},
{
"epoch": 0.56,
"learning_rate": 4.0911744144786523e-05,
"loss": 3.0358,
"step": 5415
},
{
"epoch": 0.56,
"learning_rate": 4.083064806397078e-05,
"loss": 2.9667,
"step": 5420
},
{
"epoch": 0.56,
"learning_rate": 4.074957693610775e-05,
"loss": 3.0153,
"step": 5425
},
{
"epoch": 0.56,
"learning_rate": 4.0668530981819776e-05,
"loss": 2.8481,
"step": 5430
},
{
"epoch": 0.56,
"learning_rate": 4.0587510421660704e-05,
"loss": 2.9695,
"step": 5435
},
{
"epoch": 0.57,
"learning_rate": 4.050651547611526e-05,
"loss": 3.0039,
"step": 5440
},
{
"epoch": 0.57,
"learning_rate": 4.04255463655985e-05,
"loss": 2.9089,
"step": 5445
},
{
"epoch": 0.57,
"learning_rate": 4.0344603310455154e-05,
"loss": 3.101,
"step": 5450
},
{
"epoch": 0.57,
"learning_rate": 4.026368653095906e-05,
"loss": 2.9567,
"step": 5455
},
{
"epoch": 0.57,
"learning_rate": 4.0182796247312526e-05,
"loss": 3.0554,
"step": 5460
},
{
"epoch": 0.57,
"learning_rate": 4.010193267964576e-05,
"loss": 2.981,
"step": 5465
},
{
"epoch": 0.57,
"learning_rate": 4.0021096048016297e-05,
"loss": 3.0242,
"step": 5470
},
{
"epoch": 0.57,
"learning_rate": 3.99402865724083e-05,
"loss": 2.975,
"step": 5475
},
{
"epoch": 0.57,
"learning_rate": 3.985950447273214e-05,
"loss": 2.9443,
"step": 5480
},
{
"epoch": 0.57,
"learning_rate": 3.9778749968823586e-05,
"loss": 2.9989,
"step": 5485
},
{
"epoch": 0.57,
"learning_rate": 3.969802328044337e-05,
"loss": 2.9051,
"step": 5490
},
{
"epoch": 0.57,
"learning_rate": 3.9617324627276487e-05,
"loss": 2.8675,
"step": 5495
},
{
"epoch": 0.57,
"learning_rate": 3.9536654228931685e-05,
"loss": 2.8715,
"step": 5500
},
{
"epoch": 0.57,
"learning_rate": 3.945601230494079e-05,
"loss": 3.0438,
"step": 5505
},
{
"epoch": 0.57,
"learning_rate": 3.937539907475814e-05,
"loss": 3.0196,
"step": 5510
},
{
"epoch": 0.57,
"learning_rate": 3.9294814757759985e-05,
"loss": 2.9482,
"step": 5515
},
{
"epoch": 0.57,
"learning_rate": 3.921425957324392e-05,
"loss": 3.0152,
"step": 5520
},
{
"epoch": 0.57,
"learning_rate": 3.913373374042822e-05,
"loss": 3.037,
"step": 5525
},
{
"epoch": 0.57,
"learning_rate": 3.905323747845133e-05,
"loss": 2.8559,
"step": 5530
},
{
"epoch": 0.58,
"learning_rate": 3.8972771006371193e-05,
"loss": 2.9082,
"step": 5535
},
{
"epoch": 0.58,
"learning_rate": 3.8892334543164675e-05,
"loss": 2.9559,
"step": 5540
},
{
"epoch": 0.58,
"learning_rate": 3.8811928307727005e-05,
"loss": 2.977,
"step": 5545
},
{
"epoch": 0.58,
"learning_rate": 3.8731552518871115e-05,
"loss": 2.9108,
"step": 5550
},
{
"epoch": 0.58,
"learning_rate": 3.8651207395327135e-05,
"loss": 2.9273,
"step": 5555
},
{
"epoch": 0.58,
"learning_rate": 3.8570893155741686e-05,
"loss": 3.0549,
"step": 5560
},
{
"epoch": 0.58,
"learning_rate": 3.849061001867739e-05,
"loss": 3.0003,
"step": 5565
},
{
"epoch": 0.58,
"learning_rate": 3.8410358202612184e-05,
"loss": 2.7616,
"step": 5570
},
{
"epoch": 0.58,
"learning_rate": 3.8330137925938825e-05,
"loss": 3.0067,
"step": 5575
},
{
"epoch": 0.58,
"learning_rate": 3.82499494069642e-05,
"loss": 2.9336,
"step": 5580
},
{
"epoch": 0.58,
"learning_rate": 3.816979286390877e-05,
"loss": 3.0131,
"step": 5585
},
{
"epoch": 0.58,
"learning_rate": 3.808966851490599e-05,
"loss": 2.9417,
"step": 5590
},
{
"epoch": 0.58,
"learning_rate": 3.800957657800173e-05,
"loss": 2.9149,
"step": 5595
},
{
"epoch": 0.58,
"learning_rate": 3.792951727115359e-05,
"loss": 2.8807,
"step": 5600
},
{
"epoch": 0.58,
"learning_rate": 3.784949081223046e-05,
"loss": 2.9032,
"step": 5605
},
{
"epoch": 0.58,
"learning_rate": 3.7769497419011766e-05,
"loss": 2.9831,
"step": 5610
},
{
"epoch": 0.58,
"learning_rate": 3.768953730918699e-05,
"loss": 2.976,
"step": 5615
},
{
"epoch": 0.58,
"learning_rate": 3.760961070035501e-05,
"loss": 2.9412,
"step": 5620
},
{
"epoch": 0.58,
"learning_rate": 3.752971781002355e-05,
"loss": 2.9446,
"step": 5625
},
{
"epoch": 0.59,
"learning_rate": 3.74498588556086e-05,
"loss": 2.9199,
"step": 5630
},
{
"epoch": 0.59,
"learning_rate": 3.737003405443376e-05,
"loss": 2.9459,
"step": 5635
},
{
"epoch": 0.59,
"learning_rate": 3.72902436237297e-05,
"loss": 2.9315,
"step": 5640
},
{
"epoch": 0.59,
"learning_rate": 3.7210487780633564e-05,
"loss": 2.9288,
"step": 5645
},
{
"epoch": 0.59,
"learning_rate": 3.713076674218836e-05,
"loss": 2.9751,
"step": 5650
},
{
"epoch": 0.59,
"learning_rate": 3.70510807253424e-05,
"loss": 2.9068,
"step": 5655
},
{
"epoch": 0.59,
"learning_rate": 3.6971429946948665e-05,
"loss": 2.9021,
"step": 5660
},
{
"epoch": 0.59,
"learning_rate": 3.6891814623764254e-05,
"loss": 3.0042,
"step": 5665
},
{
"epoch": 0.59,
"learning_rate": 3.6812234972449774e-05,
"loss": 3.0421,
"step": 5670
},
{
"epoch": 0.59,
"learning_rate": 3.673269120956875e-05,
"loss": 3.0051,
"step": 5675
},
{
"epoch": 0.59,
"learning_rate": 3.6653183551587086e-05,
"loss": 2.9744,
"step": 5680
},
{
"epoch": 0.59,
"learning_rate": 3.657371221487239e-05,
"loss": 2.9316,
"step": 5685
},
{
"epoch": 0.59,
"learning_rate": 3.6494277415693424e-05,
"loss": 3.0179,
"step": 5690
},
{
"epoch": 0.59,
"learning_rate": 3.6414879370219526e-05,
"loss": 2.9394,
"step": 5695
},
{
"epoch": 0.59,
"learning_rate": 3.633551829452003e-05,
"loss": 2.8471,
"step": 5700
},
{
"epoch": 0.59,
"learning_rate": 3.625619440456366e-05,
"loss": 2.9791,
"step": 5705
},
{
"epoch": 0.59,
"learning_rate": 3.617690791621793e-05,
"loss": 2.9971,
"step": 5710
},
{
"epoch": 0.59,
"learning_rate": 3.609765904524857e-05,
"loss": 2.9016,
"step": 5715
},
{
"epoch": 0.59,
"learning_rate": 3.601844800731895e-05,
"loss": 2.9531,
"step": 5720
},
{
"epoch": 0.59,
"learning_rate": 3.59392750179895e-05,
"loss": 2.9115,
"step": 5725
},
{
"epoch": 0.6,
"learning_rate": 3.586014029271708e-05,
"loss": 2.9262,
"step": 5730
},
{
"epoch": 0.6,
"learning_rate": 3.5781044046854416e-05,
"loss": 2.9121,
"step": 5735
},
{
"epoch": 0.6,
"learning_rate": 3.5701986495649555e-05,
"loss": 2.9112,
"step": 5740
},
{
"epoch": 0.6,
"learning_rate": 3.562296785424518e-05,
"loss": 3.0054,
"step": 5745
},
{
"epoch": 0.6,
"learning_rate": 3.554398833767816e-05,
"loss": 2.9217,
"step": 5750
},
{
"epoch": 0.6,
"learning_rate": 3.546504816087886e-05,
"loss": 3.0008,
"step": 5755
},
{
"epoch": 0.6,
"learning_rate": 3.538614753867058e-05,
"loss": 2.8544,
"step": 5760
},
{
"epoch": 0.6,
"learning_rate": 3.530728668576898e-05,
"loss": 3.0141,
"step": 5765
},
{
"epoch": 0.6,
"learning_rate": 3.52284658167815e-05,
"loss": 2.8511,
"step": 5770
},
{
"epoch": 0.6,
"learning_rate": 3.5149685146206756e-05,
"loss": 3.0225,
"step": 5775
},
{
"epoch": 0.6,
"learning_rate": 3.507094488843401e-05,
"loss": 2.8771,
"step": 5780
},
{
"epoch": 0.6,
"learning_rate": 3.499224525774251e-05,
"loss": 3.0127,
"step": 5785
},
{
"epoch": 0.6,
"learning_rate": 3.4913586468300934e-05,
"loss": 2.8895,
"step": 5790
},
{
"epoch": 0.6,
"learning_rate": 3.483496873416685e-05,
"loss": 2.92,
"step": 5795
},
{
"epoch": 0.6,
"learning_rate": 3.4756392269286095e-05,
"loss": 2.907,
"step": 5800
},
{
"epoch": 0.6,
"learning_rate": 3.467785728749219e-05,
"loss": 2.9166,
"step": 5805
},
{
"epoch": 0.6,
"learning_rate": 3.4599364002505755e-05,
"loss": 3.049,
"step": 5810
},
{
"epoch": 0.6,
"learning_rate": 3.452091262793395e-05,
"loss": 2.9727,
"step": 5815
},
{
"epoch": 0.6,
"learning_rate": 3.444250337726989e-05,
"loss": 2.8995,
"step": 5820
},
{
"epoch": 0.61,
"learning_rate": 3.4364136463892024e-05,
"loss": 2.9885,
"step": 5825
},
{
"epoch": 0.61,
"learning_rate": 3.428581210106366e-05,
"loss": 3.0731,
"step": 5830
},
{
"epoch": 0.61,
"learning_rate": 3.420753050193223e-05,
"loss": 2.8772,
"step": 5835
},
{
"epoch": 0.61,
"learning_rate": 3.412929187952884e-05,
"loss": 2.9897,
"step": 5840
},
{
"epoch": 0.61,
"learning_rate": 3.4051096446767615e-05,
"loss": 3.023,
"step": 5845
},
{
"epoch": 0.61,
"learning_rate": 3.397294441644515e-05,
"loss": 2.9753,
"step": 5850
},
{
"epoch": 0.61,
"learning_rate": 3.3894836001239946e-05,
"loss": 2.97,
"step": 5855
},
{
"epoch": 0.61,
"learning_rate": 3.3816771413711815e-05,
"loss": 2.8528,
"step": 5860
},
{
"epoch": 0.61,
"learning_rate": 3.373875086630125e-05,
"loss": 2.8336,
"step": 5865
},
{
"epoch": 0.61,
"learning_rate": 3.3660774571328944e-05,
"loss": 2.8747,
"step": 5870
},
{
"epoch": 0.61,
"learning_rate": 3.358284274099516e-05,
"loss": 2.9985,
"step": 5875
},
{
"epoch": 0.61,
"learning_rate": 3.3504955587379147e-05,
"loss": 3.0302,
"step": 5880
},
{
"epoch": 0.61,
"learning_rate": 3.342711332243856e-05,
"loss": 2.877,
"step": 5885
},
{
"epoch": 0.61,
"learning_rate": 3.334931615800892e-05,
"loss": 3.0142,
"step": 5890
},
{
"epoch": 0.61,
"learning_rate": 3.3271564305802994e-05,
"loss": 2.949,
"step": 5895
},
{
"epoch": 0.61,
"learning_rate": 3.319385797741025e-05,
"loss": 3.0545,
"step": 5900
},
{
"epoch": 0.61,
"learning_rate": 3.311619738429628e-05,
"loss": 2.9261,
"step": 5905
},
{
"epoch": 0.61,
"learning_rate": 3.303858273780221e-05,
"loss": 2.8461,
"step": 5910
},
{
"epoch": 0.61,
"learning_rate": 3.296101424914411e-05,
"loss": 2.9121,
"step": 5915
},
{
"epoch": 0.62,
"learning_rate": 3.288349212941246e-05,
"loss": 2.9994,
"step": 5920
},
{
"epoch": 0.62,
"learning_rate": 3.280601658957153e-05,
"loss": 2.9453,
"step": 5925
},
{
"epoch": 0.62,
"learning_rate": 3.272858784045887e-05,
"loss": 2.8655,
"step": 5930
},
{
"epoch": 0.62,
"learning_rate": 3.265120609278466e-05,
"loss": 2.9476,
"step": 5935
},
{
"epoch": 0.62,
"learning_rate": 3.257387155713119e-05,
"loss": 2.9384,
"step": 5940
},
{
"epoch": 0.62,
"learning_rate": 3.249658444395224e-05,
"loss": 2.8973,
"step": 5945
},
{
"epoch": 0.62,
"learning_rate": 3.24193449635726e-05,
"loss": 2.8922,
"step": 5950
},
{
"epoch": 0.62,
"learning_rate": 3.234215332618739e-05,
"loss": 2.961,
"step": 5955
},
{
"epoch": 0.62,
"learning_rate": 3.2265009741861516e-05,
"loss": 3.077,
"step": 5960
},
{
"epoch": 0.62,
"learning_rate": 3.2187914420529174e-05,
"loss": 2.8913,
"step": 5965
},
{
"epoch": 0.62,
"learning_rate": 3.211086757199315e-05,
"loss": 2.949,
"step": 5970
},
{
"epoch": 0.62,
"learning_rate": 3.2033869405924386e-05,
"loss": 2.9077,
"step": 5975
},
{
"epoch": 0.62,
"learning_rate": 3.195692013186127e-05,
"loss": 2.8589,
"step": 5980
},
{
"epoch": 0.62,
"learning_rate": 3.1880019959209225e-05,
"loss": 2.9046,
"step": 5985
},
{
"epoch": 0.62,
"learning_rate": 3.180316909724e-05,
"loss": 2.9405,
"step": 5990
},
{
"epoch": 0.62,
"learning_rate": 3.172636775509114e-05,
"loss": 2.7514,
"step": 5995
},
{
"epoch": 0.62,
"learning_rate": 3.164961614176543e-05,
"loss": 2.9554,
"step": 6000
},
{
"epoch": 0.62,
"learning_rate": 3.1572914466130385e-05,
"loss": 2.9045,
"step": 6005
},
{
"epoch": 0.62,
"learning_rate": 3.1496262936917564e-05,
"loss": 2.9913,
"step": 6010
},
{
"epoch": 0.63,
"learning_rate": 3.141966176272208e-05,
"loss": 2.9642,
"step": 6015
},
{
"epoch": 0.63,
"learning_rate": 3.134311115200199e-05,
"loss": 2.8793,
"step": 6020
},
{
"epoch": 0.63,
"learning_rate": 3.126661131307781e-05,
"loss": 2.9875,
"step": 6025
},
{
"epoch": 0.63,
"learning_rate": 3.119016245413182e-05,
"loss": 2.9724,
"step": 6030
},
{
"epoch": 0.63,
"learning_rate": 3.11137647832076e-05,
"loss": 2.9,
"step": 6035
},
{
"epoch": 0.63,
"learning_rate": 3.103741850820944e-05,
"loss": 2.9434,
"step": 6040
},
{
"epoch": 0.63,
"learning_rate": 3.0961123836901715e-05,
"loss": 3.0319,
"step": 6045
},
{
"epoch": 0.63,
"learning_rate": 3.088488097690844e-05,
"loss": 2.8912,
"step": 6050
},
{
"epoch": 0.63,
"learning_rate": 3.080869013571257e-05,
"loss": 2.93,
"step": 6055
},
{
"epoch": 0.63,
"learning_rate": 3.073255152065555e-05,
"loss": 2.9719,
"step": 6060
},
{
"epoch": 0.63,
"learning_rate": 3.065646533893667e-05,
"loss": 2.9573,
"step": 6065
},
{
"epoch": 0.63,
"learning_rate": 3.058043179761252e-05,
"loss": 2.9199,
"step": 6070
},
{
"epoch": 0.63,
"learning_rate": 3.050445110359646e-05,
"loss": 2.9656,
"step": 6075
},
{
"epoch": 0.63,
"learning_rate": 3.0428523463658044e-05,
"loss": 2.9889,
"step": 6080
},
{
"epoch": 0.63,
"learning_rate": 3.0352649084422414e-05,
"loss": 2.8312,
"step": 6085
},
{
"epoch": 0.63,
"learning_rate": 3.0276828172369796e-05,
"loss": 2.9099,
"step": 6090
},
{
"epoch": 0.63,
"learning_rate": 3.020106093383489e-05,
"loss": 2.8909,
"step": 6095
},
{
"epoch": 0.63,
"learning_rate": 3.012534757500638e-05,
"loss": 3.1077,
"step": 6100
},
{
"epoch": 0.63,
"learning_rate": 3.004968830192627e-05,
"loss": 2.9411,
"step": 6105
},
{
"epoch": 0.63,
"learning_rate": 2.9974083320489415e-05,
"loss": 3.0338,
"step": 6110
},
{
"epoch": 0.64,
"learning_rate": 2.98985328364429e-05,
"loss": 2.9307,
"step": 6115
},
{
"epoch": 0.64,
"learning_rate": 2.9823037055385506e-05,
"loss": 2.9608,
"step": 6120
},
{
"epoch": 0.64,
"learning_rate": 2.9747596182767178e-05,
"loss": 2.9714,
"step": 6125
},
{
"epoch": 0.64,
"learning_rate": 2.967221042388838e-05,
"loss": 2.8675,
"step": 6130
},
{
"epoch": 0.64,
"learning_rate": 2.9596879983899672e-05,
"loss": 2.9623,
"step": 6135
},
{
"epoch": 0.64,
"learning_rate": 2.9521605067801016e-05,
"loss": 2.8146,
"step": 6140
},
{
"epoch": 0.64,
"learning_rate": 2.9446385880441274e-05,
"loss": 2.8223,
"step": 6145
},
{
"epoch": 0.64,
"learning_rate": 2.9371222626517648e-05,
"loss": 2.9929,
"step": 6150
},
{
"epoch": 0.64,
"learning_rate": 2.9296115510575177e-05,
"loss": 2.816,
"step": 6155
},
{
"epoch": 0.64,
"learning_rate": 2.9221064737006077e-05,
"loss": 2.9419,
"step": 6160
},
{
"epoch": 0.64,
"learning_rate": 2.914607051004924e-05,
"loss": 2.8513,
"step": 6165
},
{
"epoch": 0.64,
"learning_rate": 2.907113303378972e-05,
"loss": 3.023,
"step": 6170
},
{
"epoch": 0.64,
"learning_rate": 2.8996252512158062e-05,
"loss": 2.8467,
"step": 6175
},
{
"epoch": 0.64,
"learning_rate": 2.8921429148929903e-05,
"loss": 2.8152,
"step": 6180
},
{
"epoch": 0.64,
"learning_rate": 2.8846663147725238e-05,
"loss": 2.889,
"step": 6185
},
{
"epoch": 0.64,
"learning_rate": 2.8771954712008038e-05,
"loss": 3.0059,
"step": 6190
},
{
"epoch": 0.64,
"learning_rate": 2.86973040450856e-05,
"loss": 2.9976,
"step": 6195
},
{
"epoch": 0.64,
"learning_rate": 2.8622711350107966e-05,
"loss": 2.8969,
"step": 6200
},
{
"epoch": 0.64,
"learning_rate": 2.8548176830067474e-05,
"loss": 2.8465,
"step": 6205
},
{
"epoch": 0.65,
"learning_rate": 2.8473700687798134e-05,
"loss": 2.8471,
"step": 6210
},
{
"epoch": 0.65,
"learning_rate": 2.839928312597506e-05,
"loss": 2.9779,
"step": 6215
},
{
"epoch": 0.65,
"learning_rate": 2.8324924347114013e-05,
"loss": 2.7989,
"step": 6220
},
{
"epoch": 0.65,
"learning_rate": 2.8250624553570694e-05,
"loss": 2.8406,
"step": 6225
},
{
"epoch": 0.65,
"learning_rate": 2.8176383947540376e-05,
"loss": 2.9092,
"step": 6230
},
{
"epoch": 0.65,
"learning_rate": 2.810220273105718e-05,
"loss": 2.9164,
"step": 6235
},
{
"epoch": 0.65,
"learning_rate": 2.802808110599367e-05,
"loss": 2.834,
"step": 6240
},
{
"epoch": 0.65,
"learning_rate": 2.795401927406024e-05,
"loss": 2.9236,
"step": 6245
},
{
"epoch": 0.65,
"learning_rate": 2.7880017436804497e-05,
"loss": 2.9497,
"step": 6250
},
{
"epoch": 0.65,
"learning_rate": 2.780607579561088e-05,
"loss": 2.9739,
"step": 6255
},
{
"epoch": 0.65,
"learning_rate": 2.7732194551699923e-05,
"loss": 2.9375,
"step": 6260
},
{
"epoch": 0.65,
"learning_rate": 2.7658373906127867e-05,
"loss": 2.7406,
"step": 6265
},
{
"epoch": 0.65,
"learning_rate": 2.7584614059786016e-05,
"loss": 2.9097,
"step": 6270
},
{
"epoch": 0.65,
"learning_rate": 2.7510915213400202e-05,
"loss": 2.8839,
"step": 6275
},
{
"epoch": 0.65,
"learning_rate": 2.743727756753028e-05,
"loss": 2.9065,
"step": 6280
},
{
"epoch": 0.65,
"learning_rate": 2.7363701322569557e-05,
"loss": 2.8454,
"step": 6285
},
{
"epoch": 0.65,
"learning_rate": 2.7290186678744235e-05,
"loss": 2.8946,
"step": 6290
},
{
"epoch": 0.65,
"learning_rate": 2.72167338361129e-05,
"loss": 2.8756,
"step": 6295
},
{
"epoch": 0.65,
"learning_rate": 2.7143342994565913e-05,
"loss": 2.9266,
"step": 6300
},
{
"epoch": 0.66,
"learning_rate": 2.7070014353824956e-05,
"loss": 3.0255,
"step": 6305
},
{
"epoch": 0.66,
"learning_rate": 2.6996748113442394e-05,
"loss": 3.0523,
"step": 6310
},
{
"epoch": 0.66,
"learning_rate": 2.6923544472800826e-05,
"loss": 3.0364,
"step": 6315
},
{
"epoch": 0.66,
"learning_rate": 2.6850403631112487e-05,
"loss": 2.9404,
"step": 6320
},
{
"epoch": 0.66,
"learning_rate": 2.6777325787418674e-05,
"loss": 2.9068,
"step": 6325
},
{
"epoch": 0.66,
"learning_rate": 2.6704311140589293e-05,
"loss": 2.9351,
"step": 6330
},
{
"epoch": 0.66,
"learning_rate": 2.6631359889322228e-05,
"loss": 2.8579,
"step": 6335
},
{
"epoch": 0.66,
"learning_rate": 2.6558472232142877e-05,
"loss": 2.9638,
"step": 6340
},
{
"epoch": 0.66,
"learning_rate": 2.6485648367403587e-05,
"loss": 2.8894,
"step": 6345
},
{
"epoch": 0.66,
"learning_rate": 2.641288849328304e-05,
"loss": 2.9169,
"step": 6350
},
{
"epoch": 0.66,
"learning_rate": 2.634019280778583e-05,
"loss": 2.9032,
"step": 6355
},
{
"epoch": 0.66,
"learning_rate": 2.6267561508741868e-05,
"loss": 2.942,
"step": 6360
},
{
"epoch": 0.66,
"learning_rate": 2.6194994793805837e-05,
"loss": 2.8697,
"step": 6365
},
{
"epoch": 0.66,
"learning_rate": 2.612249286045669e-05,
"loss": 3.0204,
"step": 6370
},
{
"epoch": 0.66,
"learning_rate": 2.6050055905997018e-05,
"loss": 2.9513,
"step": 6375
},
{
"epoch": 0.66,
"learning_rate": 2.5977684127552665e-05,
"loss": 2.8773,
"step": 6380
},
{
"epoch": 0.66,
"learning_rate": 2.5905377722072045e-05,
"loss": 2.9156,
"step": 6385
},
{
"epoch": 0.66,
"learning_rate": 2.5833136886325705e-05,
"loss": 3.0443,
"step": 6390
},
{
"epoch": 0.66,
"learning_rate": 2.576096181690576e-05,
"loss": 2.9093,
"step": 6395
},
{
"epoch": 0.67,
"learning_rate": 2.5688852710225315e-05,
"loss": 2.9503,
"step": 6400
},
{
"epoch": 0.67,
"learning_rate": 2.561680976251802e-05,
"loss": 3.0527,
"step": 6405
},
{
"epoch": 0.67,
"learning_rate": 2.554483316983741e-05,
"loss": 2.9008,
"step": 6410
},
{
"epoch": 0.67,
"learning_rate": 2.5472923128056526e-05,
"loss": 2.8578,
"step": 6415
},
{
"epoch": 0.67,
"learning_rate": 2.5401079832867265e-05,
"loss": 3.1319,
"step": 6420
},
{
"epoch": 0.67,
"learning_rate": 2.5329303479779854e-05,
"loss": 2.8457,
"step": 6425
},
{
"epoch": 0.67,
"learning_rate": 2.5257594264122397e-05,
"loss": 2.9139,
"step": 6430
},
{
"epoch": 0.67,
"learning_rate": 2.5185952381040266e-05,
"loss": 2.985,
"step": 6435
},
{
"epoch": 0.67,
"learning_rate": 2.5114378025495622e-05,
"loss": 2.8521,
"step": 6440
},
{
"epoch": 0.67,
"learning_rate": 2.504287139226684e-05,
"loss": 2.9344,
"step": 6445
},
{
"epoch": 0.67,
"learning_rate": 2.4971432675947986e-05,
"loss": 3.0079,
"step": 6450
},
{
"epoch": 0.67,
"learning_rate": 2.490006207094835e-05,
"loss": 2.9127,
"step": 6455
},
{
"epoch": 0.67,
"learning_rate": 2.4828759771491794e-05,
"loss": 2.9941,
"step": 6460
},
{
"epoch": 0.67,
"learning_rate": 2.4757525971616362e-05,
"loss": 2.9152,
"step": 6465
},
{
"epoch": 0.67,
"learning_rate": 2.468636086517368e-05,
"loss": 2.9293,
"step": 6470
},
{
"epoch": 0.67,
"learning_rate": 2.4615264645828385e-05,
"loss": 2.9036,
"step": 6475
},
{
"epoch": 0.67,
"learning_rate": 2.4544237507057723e-05,
"loss": 2.9462,
"step": 6480
},
{
"epoch": 0.67,
"learning_rate": 2.4473279642150864e-05,
"loss": 2.9845,
"step": 6485
},
{
"epoch": 0.67,
"learning_rate": 2.4402391244208527e-05,
"loss": 3.0078,
"step": 6490
},
{
"epoch": 0.67,
"learning_rate": 2.4331572506142392e-05,
"loss": 2.9214,
"step": 6495
},
{
"epoch": 0.68,
"learning_rate": 2.4260823620674494e-05,
"loss": 2.9839,
"step": 6500
},
{
"epoch": 0.68,
"learning_rate": 2.419014478033685e-05,
"loss": 2.9875,
"step": 6505
},
{
"epoch": 0.68,
"learning_rate": 2.411953617747083e-05,
"loss": 2.9337,
"step": 6510
},
{
"epoch": 0.68,
"learning_rate": 2.4048998004226675e-05,
"loss": 2.9176,
"step": 6515
},
{
"epoch": 0.68,
"learning_rate": 2.3978530452562974e-05,
"loss": 2.9536,
"step": 6520
},
{
"epoch": 0.68,
"learning_rate": 2.3908133714246078e-05,
"loss": 2.8946,
"step": 6525
},
{
"epoch": 0.68,
"learning_rate": 2.3837807980849696e-05,
"loss": 2.9506,
"step": 6530
},
{
"epoch": 0.68,
"learning_rate": 2.3767553443754243e-05,
"loss": 2.91,
"step": 6535
},
{
"epoch": 0.68,
"learning_rate": 2.3697370294146444e-05,
"loss": 2.9307,
"step": 6540
},
{
"epoch": 0.68,
"learning_rate": 2.3627258723018737e-05,
"loss": 2.8291,
"step": 6545
},
{
"epoch": 0.68,
"learning_rate": 2.3557218921168735e-05,
"loss": 2.8649,
"step": 6550
},
{
"epoch": 0.68,
"learning_rate": 2.34872510791988e-05,
"loss": 2.9299,
"step": 6555
},
{
"epoch": 0.68,
"learning_rate": 2.341735538751541e-05,
"loss": 2.7958,
"step": 6560
},
{
"epoch": 0.68,
"learning_rate": 2.3347532036328735e-05,
"loss": 2.8647,
"step": 6565
},
{
"epoch": 0.68,
"learning_rate": 2.327778121565209e-05,
"loss": 2.8582,
"step": 6570
},
{
"epoch": 0.68,
"learning_rate": 2.3208103115301354e-05,
"loss": 2.79,
"step": 6575
},
{
"epoch": 0.68,
"learning_rate": 2.3138497924894558e-05,
"loss": 2.8581,
"step": 6580
},
{
"epoch": 0.68,
"learning_rate": 2.306896583385132e-05,
"loss": 2.9129,
"step": 6585
},
{
"epoch": 0.68,
"learning_rate": 2.2999507031392303e-05,
"loss": 2.8519,
"step": 6590
},
{
"epoch": 0.69,
"learning_rate": 2.2930121706538764e-05,
"loss": 2.6874,
"step": 6595
},
{
"epoch": 0.69,
"learning_rate": 2.2860810048111946e-05,
"loss": 2.9205,
"step": 6600
},
{
"epoch": 0.69,
"learning_rate": 2.279157224473269e-05,
"loss": 3.0293,
"step": 6605
},
{
"epoch": 0.69,
"learning_rate": 2.2722408484820767e-05,
"loss": 2.8354,
"step": 6610
},
{
"epoch": 0.69,
"learning_rate": 2.2653318956594526e-05,
"loss": 2.9465,
"step": 6615
},
{
"epoch": 0.69,
"learning_rate": 2.25843038480703e-05,
"loss": 2.976,
"step": 6620
},
{
"epoch": 0.69,
"learning_rate": 2.2515363347061837e-05,
"loss": 2.9244,
"step": 6625
},
{
"epoch": 0.69,
"learning_rate": 2.2446497641179927e-05,
"loss": 2.8918,
"step": 6630
},
{
"epoch": 0.69,
"learning_rate": 2.2377706917831753e-05,
"loss": 3.0385,
"step": 6635
},
{
"epoch": 0.69,
"learning_rate": 2.2308991364220493e-05,
"loss": 2.9077,
"step": 6640
},
{
"epoch": 0.69,
"learning_rate": 2.2240351167344763e-05,
"loss": 2.8777,
"step": 6645
},
{
"epoch": 0.69,
"learning_rate": 2.2171786513998056e-05,
"loss": 3.0116,
"step": 6650
},
{
"epoch": 0.69,
"learning_rate": 2.2103297590768334e-05,
"loss": 2.9378,
"step": 6655
},
{
"epoch": 0.69,
"learning_rate": 2.2034884584037458e-05,
"loss": 2.8471,
"step": 6660
},
{
"epoch": 0.69,
"learning_rate": 2.1966547679980692e-05,
"loss": 2.8558,
"step": 6665
},
{
"epoch": 0.69,
"learning_rate": 2.1898287064566216e-05,
"loss": 2.8937,
"step": 6670
},
{
"epoch": 0.69,
"learning_rate": 2.1830102923554546e-05,
"loss": 2.8693,
"step": 6675
},
{
"epoch": 0.69,
"learning_rate": 2.176199544249817e-05,
"loss": 2.912,
"step": 6680
},
{
"epoch": 0.69,
"learning_rate": 2.1693964806740868e-05,
"loss": 2.8784,
"step": 6685
},
{
"epoch": 0.7,
"learning_rate": 2.1626011201417363e-05,
"loss": 2.8303,
"step": 6690
},
{
"epoch": 0.7,
"learning_rate": 2.1558134811452745e-05,
"loss": 3.0053,
"step": 6695
},
{
"epoch": 0.7,
"learning_rate": 2.1490335821561923e-05,
"loss": 2.8993,
"step": 6700
},
{
"epoch": 0.7,
"learning_rate": 2.142261441624926e-05,
"loss": 2.9888,
"step": 6705
},
{
"epoch": 0.7,
"learning_rate": 2.1354970779807882e-05,
"loss": 2.9881,
"step": 6710
},
{
"epoch": 0.7,
"learning_rate": 2.128740509631936e-05,
"loss": 2.9555,
"step": 6715
},
{
"epoch": 0.7,
"learning_rate": 2.1219917549653123e-05,
"loss": 2.9375,
"step": 6720
},
{
"epoch": 0.7,
"learning_rate": 2.1152508323465897e-05,
"loss": 2.9425,
"step": 6725
},
{
"epoch": 0.7,
"learning_rate": 2.108517760120134e-05,
"loss": 2.9474,
"step": 6730
},
{
"epoch": 0.7,
"learning_rate": 2.1017925566089435e-05,
"loss": 2.8324,
"step": 6735
},
{
"epoch": 0.7,
"learning_rate": 2.095075240114606e-05,
"loss": 2.8604,
"step": 6740
},
{
"epoch": 0.7,
"learning_rate": 2.0883658289172464e-05,
"loss": 2.863,
"step": 6745
},
{
"epoch": 0.7,
"learning_rate": 2.0816643412754694e-05,
"loss": 2.9563,
"step": 6750
},
{
"epoch": 0.7,
"learning_rate": 2.074970795426327e-05,
"loss": 2.8494,
"step": 6755
},
{
"epoch": 0.7,
"learning_rate": 2.0682852095852505e-05,
"loss": 2.9259,
"step": 6760
},
{
"epoch": 0.7,
"learning_rate": 2.061607601946014e-05,
"loss": 2.9161,
"step": 6765
},
{
"epoch": 0.7,
"learning_rate": 2.0549379906806815e-05,
"loss": 2.9611,
"step": 6770
},
{
"epoch": 0.7,
"learning_rate": 2.0482763939395495e-05,
"loss": 2.706,
"step": 6775
},
{
"epoch": 0.7,
"learning_rate": 2.041622829851112e-05,
"loss": 2.8842,
"step": 6780
},
{
"epoch": 0.71,
"learning_rate": 2.0349773165219965e-05,
"loss": 2.7763,
"step": 6785
},
{
"epoch": 0.71,
"learning_rate": 2.0283398720369267e-05,
"loss": 2.8833,
"step": 6790
},
{
"epoch": 0.71,
"learning_rate": 2.0217105144586678e-05,
"loss": 2.9402,
"step": 6795
},
{
"epoch": 0.71,
"learning_rate": 2.015089261827973e-05,
"loss": 2.8516,
"step": 6800
},
{
"epoch": 0.71,
"learning_rate": 2.0084761321635454e-05,
"loss": 2.9836,
"step": 6805
},
{
"epoch": 0.71,
"learning_rate": 2.001871143461978e-05,
"loss": 2.8904,
"step": 6810
},
{
"epoch": 0.71,
"learning_rate": 1.995274313697712e-05,
"loss": 2.8632,
"step": 6815
},
{
"epoch": 0.71,
"learning_rate": 1.9886856608229864e-05,
"loss": 2.8973,
"step": 6820
},
{
"epoch": 0.71,
"learning_rate": 1.9821052027677833e-05,
"loss": 2.9326,
"step": 6825
},
{
"epoch": 0.71,
"learning_rate": 1.9755329574397897e-05,
"loss": 2.9903,
"step": 6830
},
{
"epoch": 0.71,
"learning_rate": 1.968968942724337e-05,
"loss": 2.8984,
"step": 6835
},
{
"epoch": 0.71,
"learning_rate": 1.962413176484362e-05,
"loss": 2.9656,
"step": 6840
},
{
"epoch": 0.71,
"learning_rate": 1.9558656765603583e-05,
"loss": 2.9501,
"step": 6845
},
{
"epoch": 0.71,
"learning_rate": 1.9493264607703138e-05,
"loss": 2.9534,
"step": 6850
},
{
"epoch": 0.71,
"learning_rate": 1.9427955469096825e-05,
"loss": 2.8833,
"step": 6855
},
{
"epoch": 0.71,
"learning_rate": 1.936272952751319e-05,
"loss": 2.859,
"step": 6860
},
{
"epoch": 0.71,
"learning_rate": 1.9297586960454404e-05,
"loss": 2.92,
"step": 6865
},
{
"epoch": 0.71,
"learning_rate": 1.9232527945195778e-05,
"loss": 2.9643,
"step": 6870
},
{
"epoch": 0.71,
"learning_rate": 1.9167552658785165e-05,
"loss": 2.927,
"step": 6875
},
{
"epoch": 0.71,
"learning_rate": 1.910266127804264e-05,
"loss": 2.9116,
"step": 6880
},
{
"epoch": 0.72,
"learning_rate": 1.9037853979559923e-05,
"loss": 2.8302,
"step": 6885
},
{
"epoch": 0.72,
"learning_rate": 1.8973130939699858e-05,
"loss": 2.9509,
"step": 6890
},
{
"epoch": 0.72,
"learning_rate": 1.8908492334596122e-05,
"loss": 2.8555,
"step": 6895
},
{
"epoch": 0.72,
"learning_rate": 1.884393834015248e-05,
"loss": 3.0024,
"step": 6900
},
{
"epoch": 0.72,
"learning_rate": 1.8779469132042534e-05,
"loss": 2.9214,
"step": 6905
},
{
"epoch": 0.72,
"learning_rate": 1.8715084885709084e-05,
"loss": 2.8809,
"step": 6910
},
{
"epoch": 0.72,
"learning_rate": 1.8650785776363772e-05,
"loss": 2.8551,
"step": 6915
},
{
"epoch": 0.72,
"learning_rate": 1.8586571978986562e-05,
"loss": 2.8375,
"step": 6920
},
{
"epoch": 0.72,
"learning_rate": 1.852244366832518e-05,
"loss": 2.8239,
"step": 6925
},
{
"epoch": 0.72,
"learning_rate": 1.8458401018894807e-05,
"loss": 2.9718,
"step": 6930
},
{
"epoch": 0.72,
"learning_rate": 1.8394444204977428e-05,
"loss": 2.9168,
"step": 6935
},
{
"epoch": 0.72,
"learning_rate": 1.833057340062149e-05,
"loss": 2.8779,
"step": 6940
},
{
"epoch": 0.72,
"learning_rate": 1.8266788779641396e-05,
"loss": 2.931,
"step": 6945
},
{
"epoch": 0.72,
"learning_rate": 1.8203090515616946e-05,
"loss": 2.928,
"step": 6950
},
{
"epoch": 0.72,
"learning_rate": 1.8139478781892977e-05,
"loss": 2.9042,
"step": 6955
},
{
"epoch": 0.72,
"learning_rate": 1.8075953751578872e-05,
"loss": 2.8956,
"step": 6960
},
{
"epoch": 0.72,
"learning_rate": 1.801251559754798e-05,
"loss": 2.9258,
"step": 6965
},
{
"epoch": 0.72,
"learning_rate": 1.7949164492437348e-05,
"loss": 2.8822,
"step": 6970
},
{
"epoch": 0.72,
"learning_rate": 1.7885900608647028e-05,
"loss": 2.9457,
"step": 6975
},
{
"epoch": 0.73,
"learning_rate": 1.782272411833978e-05,
"loss": 2.8477,
"step": 6980
},
{
"epoch": 0.73,
"learning_rate": 1.7759635193440476e-05,
"loss": 2.9183,
"step": 6985
},
{
"epoch": 0.73,
"learning_rate": 1.7696634005635753e-05,
"loss": 2.9364,
"step": 6990
},
{
"epoch": 0.73,
"learning_rate": 1.7633720726373465e-05,
"loss": 2.8664,
"step": 6995
},
{
"epoch": 0.73,
"learning_rate": 1.7570895526862202e-05,
"loss": 2.8978,
"step": 7000
},
{
"epoch": 0.73,
"learning_rate": 1.7508158578070928e-05,
"loss": 2.8647,
"step": 7005
},
{
"epoch": 0.73,
"learning_rate": 1.7445510050728358e-05,
"loss": 2.8948,
"step": 7010
},
{
"epoch": 0.73,
"learning_rate": 1.738295011532266e-05,
"loss": 2.9046,
"step": 7015
},
{
"epoch": 0.73,
"learning_rate": 1.7320478942100894e-05,
"loss": 2.9088,
"step": 7020
},
{
"epoch": 0.73,
"learning_rate": 1.7258096701068522e-05,
"loss": 2.9672,
"step": 7025
},
{
"epoch": 0.73,
"learning_rate": 1.7195803561989048e-05,
"loss": 2.8572,
"step": 7030
},
{
"epoch": 0.73,
"learning_rate": 1.713359969438349e-05,
"loss": 2.92,
"step": 7035
},
{
"epoch": 0.73,
"learning_rate": 1.7071485267529864e-05,
"loss": 2.8882,
"step": 7040
},
{
"epoch": 0.73,
"learning_rate": 1.70094604504629e-05,
"loss": 2.9217,
"step": 7045
},
{
"epoch": 0.73,
"learning_rate": 1.6947525411973364e-05,
"loss": 2.8347,
"step": 7050
},
{
"epoch": 0.73,
"learning_rate": 1.6885680320607776e-05,
"loss": 2.9347,
"step": 7055
},
{
"epoch": 0.73,
"learning_rate": 1.6823925344667818e-05,
"loss": 2.8641,
"step": 7060
},
{
"epoch": 0.73,
"learning_rate": 1.6762260652209983e-05,
"loss": 2.8052,
"step": 7065
},
{
"epoch": 0.73,
"learning_rate": 1.6700686411045075e-05,
"loss": 2.8709,
"step": 7070
},
{
"epoch": 0.74,
"learning_rate": 1.6639202788737694e-05,
"loss": 2.9251,
"step": 7075
},
{
"epoch": 0.74,
"learning_rate": 1.6577809952605904e-05,
"loss": 2.7836,
"step": 7080
},
{
"epoch": 0.74,
"learning_rate": 1.6516508069720653e-05,
"loss": 2.8996,
"step": 7085
},
{
"epoch": 0.74,
"learning_rate": 1.6455297306905404e-05,
"loss": 2.9278,
"step": 7090
},
{
"epoch": 0.74,
"learning_rate": 1.6394177830735668e-05,
"loss": 2.98,
"step": 7095
},
{
"epoch": 0.74,
"learning_rate": 1.6333149807538473e-05,
"loss": 2.7282,
"step": 7100
},
{
"epoch": 0.74,
"learning_rate": 1.6272213403392023e-05,
"loss": 2.856,
"step": 7105
},
{
"epoch": 0.74,
"learning_rate": 1.6211368784125204e-05,
"loss": 2.9134,
"step": 7110
},
{
"epoch": 0.74,
"learning_rate": 1.6150616115317052e-05,
"loss": 2.873,
"step": 7115
},
{
"epoch": 0.74,
"learning_rate": 1.6089955562296483e-05,
"loss": 3.0006,
"step": 7120
},
{
"epoch": 0.74,
"learning_rate": 1.6029387290141633e-05,
"loss": 2.8852,
"step": 7125
},
{
"epoch": 0.74,
"learning_rate": 1.596891146367958e-05,
"loss": 2.8632,
"step": 7130
},
{
"epoch": 0.74,
"learning_rate": 1.5908528247485767e-05,
"loss": 2.8915,
"step": 7135
},
{
"epoch": 0.74,
"learning_rate": 1.584823780588366e-05,
"loss": 2.9205,
"step": 7140
},
{
"epoch": 0.74,
"learning_rate": 1.5788040302944245e-05,
"loss": 2.9478,
"step": 7145
},
{
"epoch": 0.74,
"learning_rate": 1.5727935902485563e-05,
"loss": 2.9553,
"step": 7150
},
{
"epoch": 0.74,
"learning_rate": 1.5667924768072335e-05,
"loss": 2.8743,
"step": 7155
},
{
"epoch": 0.74,
"learning_rate": 1.560800706301542e-05,
"loss": 2.8947,
"step": 7160
},
{
"epoch": 0.74,
"learning_rate": 1.554818295037146e-05,
"loss": 2.8892,
"step": 7165
},
{
"epoch": 0.75,
"learning_rate": 1.548845259294241e-05,
"loss": 2.927,
"step": 7170
},
{
"epoch": 0.75,
"learning_rate": 1.5428816153275038e-05,
"loss": 2.9317,
"step": 7175
},
{
"epoch": 0.75,
"learning_rate": 1.5369273793660573e-05,
"loss": 2.9011,
"step": 7180
},
{
"epoch": 0.75,
"learning_rate": 1.5309825676134217e-05,
"loss": 2.9101,
"step": 7185
},
{
"epoch": 0.75,
"learning_rate": 1.5250471962474656e-05,
"loss": 2.9748,
"step": 7190
},
{
"epoch": 0.75,
"learning_rate": 1.5191212814203765e-05,
"loss": 2.8822,
"step": 7195
},
{
"epoch": 0.75,
"learning_rate": 1.5132048392585974e-05,
"loss": 2.8518,
"step": 7200
},
{
"epoch": 0.75,
"learning_rate": 1.5072978858628006e-05,
"loss": 3.0075,
"step": 7205
},
{
"epoch": 0.75,
"learning_rate": 1.5014004373078294e-05,
"loss": 2.85,
"step": 7210
},
{
"epoch": 0.75,
"learning_rate": 1.4955125096426675e-05,
"loss": 2.7951,
"step": 7215
},
{
"epoch": 0.75,
"learning_rate": 1.4896341188903867e-05,
"loss": 2.88,
"step": 7220
},
{
"epoch": 0.75,
"learning_rate": 1.4837652810481023e-05,
"loss": 3.0368,
"step": 7225
},
{
"epoch": 0.75,
"learning_rate": 1.4779060120869392e-05,
"loss": 2.7971,
"step": 7230
},
{
"epoch": 0.75,
"learning_rate": 1.4720563279519756e-05,
"loss": 2.7829,
"step": 7235
},
{
"epoch": 0.75,
"learning_rate": 1.4662162445622097e-05,
"loss": 2.844,
"step": 7240
},
{
"epoch": 0.75,
"learning_rate": 1.4603857778105152e-05,
"loss": 2.9245,
"step": 7245
},
{
"epoch": 0.75,
"learning_rate": 1.4545649435635878e-05,
"loss": 2.9121,
"step": 7250
},
{
"epoch": 0.75,
"learning_rate": 1.4487537576619176e-05,
"loss": 2.9732,
"step": 7255
},
{
"epoch": 0.75,
"learning_rate": 1.4429522359197362e-05,
"loss": 2.8346,
"step": 7260
},
{
"epoch": 0.76,
"learning_rate": 1.43716039412497e-05,
"loss": 2.824,
"step": 7265
},
{
"epoch": 0.76,
"learning_rate": 1.4313782480392147e-05,
"loss": 2.9197,
"step": 7270
},
{
"epoch": 0.76,
"learning_rate": 1.4256058133976686e-05,
"loss": 2.8023,
"step": 7275
},
{
"epoch": 0.76,
"learning_rate": 1.41984310590911e-05,
"loss": 2.8808,
"step": 7280
},
{
"epoch": 0.76,
"learning_rate": 1.41409014125584e-05,
"loss": 2.887,
"step": 7285
},
{
"epoch": 0.76,
"learning_rate": 1.4083469350936513e-05,
"loss": 2.9202,
"step": 7290
},
{
"epoch": 0.76,
"learning_rate": 1.4026135030517796e-05,
"loss": 2.8866,
"step": 7295
},
{
"epoch": 0.76,
"learning_rate": 1.3968898607328573e-05,
"loss": 2.8599,
"step": 7300
},
{
"epoch": 0.76,
"learning_rate": 1.3911760237128829e-05,
"loss": 2.9011,
"step": 7305
},
{
"epoch": 0.76,
"learning_rate": 1.3854720075411637e-05,
"loss": 2.9945,
"step": 7310
},
{
"epoch": 0.76,
"learning_rate": 1.3797778277402862e-05,
"loss": 2.9268,
"step": 7315
},
{
"epoch": 0.76,
"learning_rate": 1.3740934998060694e-05,
"loss": 2.7898,
"step": 7320
},
{
"epoch": 0.76,
"learning_rate": 1.3684190392075164e-05,
"loss": 2.9648,
"step": 7325
},
{
"epoch": 0.76,
"learning_rate": 1.3627544613867853e-05,
"loss": 2.812,
"step": 7330
},
{
"epoch": 0.76,
"learning_rate": 1.3570997817591318e-05,
"loss": 2.9451,
"step": 7335
},
{
"epoch": 0.76,
"learning_rate": 1.3514550157128814e-05,
"loss": 2.8403,
"step": 7340
},
{
"epoch": 0.76,
"learning_rate": 1.3458201786093794e-05,
"loss": 2.6865,
"step": 7345
},
{
"epoch": 0.76,
"learning_rate": 1.3401952857829509e-05,
"loss": 2.7784,
"step": 7350
},
{
"epoch": 0.76,
"learning_rate": 1.3345803525408606e-05,
"loss": 2.8942,
"step": 7355
},
{
"epoch": 0.76,
"learning_rate": 1.3289753941632649e-05,
"loss": 2.9349,
"step": 7360
},
{
"epoch": 0.77,
"learning_rate": 1.3233804259031813e-05,
"loss": 2.9317,
"step": 7365
},
{
"epoch": 0.77,
"learning_rate": 1.3177954629864386e-05,
"loss": 2.9265,
"step": 7370
},
{
"epoch": 0.77,
"learning_rate": 1.3122205206116345e-05,
"loss": 2.9964,
"step": 7375
},
{
"epoch": 0.77,
"learning_rate": 1.306655613950103e-05,
"loss": 2.9694,
"step": 7380
},
{
"epoch": 0.77,
"learning_rate": 1.3011007581458611e-05,
"loss": 2.7936,
"step": 7385
},
{
"epoch": 0.77,
"learning_rate": 1.2955559683155799e-05,
"loss": 2.879,
"step": 7390
},
{
"epoch": 0.77,
"learning_rate": 1.2900212595485356e-05,
"loss": 2.8039,
"step": 7395
},
{
"epoch": 0.77,
"learning_rate": 1.2844966469065672e-05,
"loss": 2.8918,
"step": 7400
},
{
"epoch": 0.77,
"learning_rate": 1.2789821454240435e-05,
"loss": 2.9659,
"step": 7405
},
{
"epoch": 0.77,
"learning_rate": 1.2734777701078133e-05,
"loss": 2.9603,
"step": 7410
},
{
"epoch": 0.77,
"learning_rate": 1.2679835359371706e-05,
"loss": 2.9423,
"step": 7415
},
{
"epoch": 0.77,
"learning_rate": 1.2624994578638127e-05,
"loss": 2.8943,
"step": 7420
},
{
"epoch": 0.77,
"learning_rate": 1.2570255508117978e-05,
"loss": 2.8678,
"step": 7425
},
{
"epoch": 0.77,
"learning_rate": 1.2515618296775056e-05,
"loss": 2.8788,
"step": 7430
},
{
"epoch": 0.77,
"learning_rate": 1.246108309329594e-05,
"loss": 2.8355,
"step": 7435
},
{
"epoch": 0.77,
"learning_rate": 1.2406650046089635e-05,
"loss": 2.8009,
"step": 7440
},
{
"epoch": 0.77,
"learning_rate": 1.2352319303287163e-05,
"loss": 2.7742,
"step": 7445
},
{
"epoch": 0.77,
"learning_rate": 1.229809101274108e-05,
"loss": 2.8486,
"step": 7450
},
{
"epoch": 0.77,
"learning_rate": 1.2243965322025202e-05,
"loss": 2.8947,
"step": 7455
},
{
"epoch": 0.78,
"learning_rate": 1.2189942378434083e-05,
"loss": 2.8822,
"step": 7460
},
{
"epoch": 0.78,
"learning_rate": 1.2136022328982693e-05,
"loss": 2.8753,
"step": 7465
},
{
"epoch": 0.78,
"learning_rate": 1.2082205320406004e-05,
"loss": 2.881,
"step": 7470
},
{
"epoch": 0.78,
"learning_rate": 1.2028491499158534e-05,
"loss": 2.868,
"step": 7475
},
{
"epoch": 0.78,
"learning_rate": 1.1974881011414046e-05,
"loss": 2.8985,
"step": 7480
},
{
"epoch": 0.78,
"learning_rate": 1.1921374003065044e-05,
"loss": 2.8125,
"step": 7485
},
{
"epoch": 0.78,
"learning_rate": 1.186797061972248e-05,
"loss": 2.9751,
"step": 7490
},
{
"epoch": 0.78,
"learning_rate": 1.1814671006715267e-05,
"loss": 2.9307,
"step": 7495
},
{
"epoch": 0.78,
"learning_rate": 1.1761475309089948e-05,
"loss": 3.0034,
"step": 7500
},
{
"epoch": 0.78,
"learning_rate": 1.1708383671610284e-05,
"loss": 2.9065,
"step": 7505
},
{
"epoch": 0.78,
"learning_rate": 1.1655396238756805e-05,
"loss": 2.9129,
"step": 7510
},
{
"epoch": 0.78,
"learning_rate": 1.1602513154726512e-05,
"loss": 2.8419,
"step": 7515
},
{
"epoch": 0.78,
"learning_rate": 1.1549734563432436e-05,
"loss": 2.907,
"step": 7520
},
{
"epoch": 0.78,
"learning_rate": 1.1497060608503202e-05,
"loss": 2.8648,
"step": 7525
},
{
"epoch": 0.78,
"learning_rate": 1.1444491433282739e-05,
"loss": 2.9375,
"step": 7530
},
{
"epoch": 0.78,
"learning_rate": 1.1392027180829795e-05,
"loss": 2.8749,
"step": 7535
},
{
"epoch": 0.78,
"learning_rate": 1.1339667993917602e-05,
"loss": 2.8733,
"step": 7540
},
{
"epoch": 0.78,
"learning_rate": 1.1287414015033493e-05,
"loss": 2.8616,
"step": 7545
},
{
"epoch": 0.78,
"learning_rate": 1.1235265386378446e-05,
"loss": 2.8642,
"step": 7550
},
{
"epoch": 0.79,
"learning_rate": 1.1183222249866793e-05,
"loss": 2.8825,
"step": 7555
},
{
"epoch": 0.79,
"learning_rate": 1.1131284747125747e-05,
"loss": 2.9617,
"step": 7560
},
{
"epoch": 0.79,
"learning_rate": 1.107945301949508e-05,
"loss": 2.9455,
"step": 7565
},
{
"epoch": 0.79,
"learning_rate": 1.1027727208026711e-05,
"loss": 2.876,
"step": 7570
},
{
"epoch": 0.79,
"learning_rate": 1.0976107453484314e-05,
"loss": 2.8741,
"step": 7575
},
{
"epoch": 0.79,
"learning_rate": 1.0924593896342977e-05,
"loss": 2.8908,
"step": 7580
},
{
"epoch": 0.79,
"learning_rate": 1.0873186676788726e-05,
"loss": 2.8954,
"step": 7585
},
{
"epoch": 0.79,
"learning_rate": 1.0821885934718262e-05,
"loss": 2.872,
"step": 7590
},
{
"epoch": 0.79,
"learning_rate": 1.0770691809738514e-05,
"loss": 2.896,
"step": 7595
},
{
"epoch": 0.79,
"learning_rate": 1.0719604441166247e-05,
"loss": 2.7755,
"step": 7600
},
{
"epoch": 0.79,
"learning_rate": 1.066862396802773e-05,
"loss": 2.8576,
"step": 7605
},
{
"epoch": 0.79,
"learning_rate": 1.0617750529058312e-05,
"loss": 2.8723,
"step": 7610
},
{
"epoch": 0.79,
"learning_rate": 1.0566984262702073e-05,
"loss": 2.9218,
"step": 7615
},
{
"epoch": 0.79,
"learning_rate": 1.0516325307111468e-05,
"loss": 2.8971,
"step": 7620
},
{
"epoch": 0.79,
"learning_rate": 1.0465773800146862e-05,
"loss": 2.9214,
"step": 7625
},
{
"epoch": 0.79,
"learning_rate": 1.0415329879376278e-05,
"loss": 2.8813,
"step": 7630
},
{
"epoch": 0.79,
"learning_rate": 1.0364993682074915e-05,
"loss": 2.8865,
"step": 7635
},
{
"epoch": 0.79,
"learning_rate": 1.0314765345224831e-05,
"loss": 2.9054,
"step": 7640
},
{
"epoch": 0.79,
"learning_rate": 1.0264645005514584e-05,
"loss": 2.8731,
"step": 7645
},
{
"epoch": 0.8,
"learning_rate": 1.0214632799338802e-05,
"loss": 2.8814,
"step": 7650
},
{
"epoch": 0.8,
"learning_rate": 1.0164728862797878e-05,
"loss": 2.9242,
"step": 7655
},
{
"epoch": 0.8,
"learning_rate": 1.0114933331697513e-05,
"loss": 2.8808,
"step": 7660
},
{
"epoch": 0.8,
"learning_rate": 1.006524634154845e-05,
"loss": 2.871,
"step": 7665
},
{
"epoch": 0.8,
"learning_rate": 1.0015668027566056e-05,
"loss": 2.8632,
"step": 7670
},
{
"epoch": 0.8,
"learning_rate": 9.966198524669896e-06,
"loss": 2.9597,
"step": 7675
},
{
"epoch": 0.8,
"learning_rate": 9.91683796748349e-06,
"loss": 2.8929,
"step": 7680
},
{
"epoch": 0.8,
"learning_rate": 9.86758649033383e-06,
"loss": 2.8624,
"step": 7685
},
{
"epoch": 0.8,
"learning_rate": 9.818444227251089e-06,
"loss": 2.8156,
"step": 7690
},
{
"epoch": 0.8,
"learning_rate": 9.769411311968247e-06,
"loss": 2.8784,
"step": 7695
},
{
"epoch": 0.8,
"learning_rate": 9.720487877920658e-06,
"loss": 2.865,
"step": 7700
},
{
"epoch": 0.8,
"learning_rate": 9.671674058245805e-06,
"loss": 2.891,
"step": 7705
},
{
"epoch": 0.8,
"learning_rate": 9.622969985782803e-06,
"loss": 2.8756,
"step": 7710
},
{
"epoch": 0.8,
"learning_rate": 9.57437579307217e-06,
"loss": 2.8465,
"step": 7715
},
{
"epoch": 0.8,
"learning_rate": 9.525891612355364e-06,
"loss": 2.9065,
"step": 7720
},
{
"epoch": 0.8,
"learning_rate": 9.47751757557449e-06,
"loss": 2.904,
"step": 7725
},
{
"epoch": 0.8,
"learning_rate": 9.429253814371897e-06,
"loss": 2.9035,
"step": 7730
},
{
"epoch": 0.8,
"learning_rate": 9.381100460089826e-06,
"loss": 2.8806,
"step": 7735
},
{
"epoch": 0.8,
"learning_rate": 9.333057643770083e-06,
"loss": 2.8941,
"step": 7740
},
{
"epoch": 0.8,
"learning_rate": 9.285125496153667e-06,
"loss": 2.8788,
"step": 7745
},
{
"epoch": 0.81,
"learning_rate": 9.237304147680375e-06,
"loss": 2.9316,
"step": 7750
},
{
"epoch": 0.81,
"learning_rate": 9.189593728488533e-06,
"loss": 2.8426,
"step": 7755
},
{
"epoch": 0.81,
"learning_rate": 9.141994368414524e-06,
"loss": 2.8847,
"step": 7760
},
{
"epoch": 0.81,
"learning_rate": 9.094506196992564e-06,
"loss": 2.8059,
"step": 7765
},
{
"epoch": 0.81,
"learning_rate": 9.047129343454268e-06,
"loss": 2.9604,
"step": 7770
},
{
"epoch": 0.81,
"learning_rate": 8.999863936728287e-06,
"loss": 2.7716,
"step": 7775
},
{
"epoch": 0.81,
"learning_rate": 8.952710105440027e-06,
"loss": 2.8111,
"step": 7780
},
{
"epoch": 0.81,
"learning_rate": 8.905667977911225e-06,
"loss": 2.8593,
"step": 7785
},
{
"epoch": 0.81,
"learning_rate": 8.858737682159663e-06,
"loss": 2.7311,
"step": 7790
},
{
"epoch": 0.81,
"learning_rate": 8.811919345898772e-06,
"loss": 2.9675,
"step": 7795
},
{
"epoch": 0.81,
"learning_rate": 8.765213096537317e-06,
"loss": 2.9184,
"step": 7800
},
{
"epoch": 0.81,
"learning_rate": 8.718619061179028e-06,
"loss": 2.8842,
"step": 7805
},
{
"epoch": 0.81,
"learning_rate": 8.672137366622252e-06,
"loss": 2.8258,
"step": 7810
},
{
"epoch": 0.81,
"learning_rate": 8.625768139359631e-06,
"loss": 2.8813,
"step": 7815
},
{
"epoch": 0.81,
"learning_rate": 8.579511505577759e-06,
"loss": 2.7782,
"step": 7820
},
{
"epoch": 0.81,
"learning_rate": 8.533367591156778e-06,
"loss": 2.8901,
"step": 7825
},
{
"epoch": 0.81,
"learning_rate": 8.487336521670141e-06,
"loss": 2.8051,
"step": 7830
},
{
"epoch": 0.81,
"learning_rate": 8.441418422384151e-06,
"loss": 2.9374,
"step": 7835
},
{
"epoch": 0.81,
"learning_rate": 8.39561341825773e-06,
"loss": 2.8758,
"step": 7840
},
{
"epoch": 0.82,
"learning_rate": 8.349921633942021e-06,
"loss": 2.8147,
"step": 7845
},
{
"epoch": 0.82,
"learning_rate": 8.304343193780017e-06,
"loss": 2.8288,
"step": 7850
},
{
"epoch": 0.82,
"learning_rate": 8.258878221806326e-06,
"loss": 2.9349,
"step": 7855
},
{
"epoch": 0.82,
"learning_rate": 8.213526841746699e-06,
"loss": 2.7969,
"step": 7860
},
{
"epoch": 0.82,
"learning_rate": 8.168289177017824e-06,
"loss": 2.8592,
"step": 7865
},
{
"epoch": 0.82,
"learning_rate": 8.123165350726919e-06,
"loss": 2.7655,
"step": 7870
},
{
"epoch": 0.82,
"learning_rate": 8.078155485671357e-06,
"loss": 2.9649,
"step": 7875
},
{
"epoch": 0.82,
"learning_rate": 8.033259704338475e-06,
"loss": 2.8298,
"step": 7880
},
{
"epoch": 0.82,
"learning_rate": 7.988478128905063e-06,
"loss": 2.7694,
"step": 7885
},
{
"epoch": 0.82,
"learning_rate": 7.943810881237163e-06,
"loss": 2.8258,
"step": 7890
},
{
"epoch": 0.82,
"learning_rate": 7.899258082889688e-06,
"loss": 2.8975,
"step": 7895
},
{
"epoch": 0.82,
"learning_rate": 7.854819855106078e-06,
"loss": 2.8528,
"step": 7900
},
{
"epoch": 0.82,
"learning_rate": 7.81049631881801e-06,
"loss": 2.9486,
"step": 7905
},
{
"epoch": 0.82,
"learning_rate": 7.766287594645017e-06,
"loss": 2.9073,
"step": 7910
},
{
"epoch": 0.82,
"learning_rate": 7.722193802894217e-06,
"loss": 2.9155,
"step": 7915
},
{
"epoch": 0.82,
"learning_rate": 7.678215063559956e-06,
"loss": 3.0423,
"step": 7920
},
{
"epoch": 0.82,
"learning_rate": 7.63435149632345e-06,
"loss": 2.7638,
"step": 7925
},
{
"epoch": 0.82,
"learning_rate": 7.590603220552539e-06,
"loss": 2.94,
"step": 7930
},
{
"epoch": 0.82,
"learning_rate": 7.546970355301264e-06,
"loss": 2.8597,
"step": 7935
},
{
"epoch": 0.83,
"learning_rate": 7.5034530193096475e-06,
"loss": 3.0005,
"step": 7940
},
{
"epoch": 0.83,
"learning_rate": 7.460051331003287e-06,
"loss": 2.9032,
"step": 7945
},
{
"epoch": 0.83,
"learning_rate": 7.416765408493042e-06,
"loss": 2.8776,
"step": 7950
},
{
"epoch": 0.83,
"learning_rate": 7.373595369574804e-06,
"loss": 2.7591,
"step": 7955
},
{
"epoch": 0.83,
"learning_rate": 7.33054133172903e-06,
"loss": 2.8881,
"step": 7960
},
{
"epoch": 0.83,
"learning_rate": 7.287603412120531e-06,
"loss": 2.9873,
"step": 7965
},
{
"epoch": 0.83,
"learning_rate": 7.244781727598137e-06,
"loss": 2.8301,
"step": 7970
},
{
"epoch": 0.83,
"learning_rate": 7.202076394694324e-06,
"loss": 2.8317,
"step": 7975
},
{
"epoch": 0.83,
"learning_rate": 7.159487529624975e-06,
"loss": 2.8781,
"step": 7980
},
{
"epoch": 0.83,
"learning_rate": 7.117015248288977e-06,
"loss": 2.8637,
"step": 7985
},
{
"epoch": 0.83,
"learning_rate": 7.074659666268002e-06,
"loss": 2.887,
"step": 7990
},
{
"epoch": 0.83,
"learning_rate": 7.03242089882612e-06,
"loss": 2.8215,
"step": 7995
},
{
"epoch": 0.83,
"learning_rate": 6.990299060909494e-06,
"loss": 2.8774,
"step": 8000
},
{
"epoch": 0.83,
"learning_rate": 6.948294267146121e-06,
"loss": 2.9101,
"step": 8005
},
{
"epoch": 0.83,
"learning_rate": 6.906406631845436e-06,
"loss": 2.7999,
"step": 8010
},
{
"epoch": 0.83,
"learning_rate": 6.86463626899807e-06,
"loss": 2.8326,
"step": 8015
},
{
"epoch": 0.83,
"learning_rate": 6.822983292275525e-06,
"loss": 2.7986,
"step": 8020
},
{
"epoch": 0.83,
"learning_rate": 6.781447815029818e-06,
"loss": 2.8634,
"step": 8025
},
{
"epoch": 0.83,
"learning_rate": 6.740029950293269e-06,
"loss": 2.8693,
"step": 8030
},
{
"epoch": 0.84,
"learning_rate": 6.698729810778065e-06,
"loss": 2.8484,
"step": 8035
},
{
"epoch": 0.84,
"learning_rate": 6.657547508876078e-06,
"loss": 2.8413,
"step": 8040
},
{
"epoch": 0.84,
"learning_rate": 6.616483156658476e-06,
"loss": 2.8747,
"step": 8045
},
{
"epoch": 0.84,
"learning_rate": 6.575536865875443e-06,
"loss": 2.8632,
"step": 8050
},
{
"epoch": 0.84,
"learning_rate": 6.534708747955903e-06,
"loss": 2.8095,
"step": 8055
},
{
"epoch": 0.84,
"learning_rate": 6.493998914007149e-06,
"loss": 2.8387,
"step": 8060
},
{
"epoch": 0.84,
"learning_rate": 6.453407474814616e-06,
"loss": 2.8881,
"step": 8065
},
{
"epoch": 0.84,
"learning_rate": 6.412934540841553e-06,
"loss": 2.8793,
"step": 8070
},
{
"epoch": 0.84,
"learning_rate": 6.3725802222286805e-06,
"loss": 2.7694,
"step": 8075
},
{
"epoch": 0.84,
"learning_rate": 6.332344628793957e-06,
"loss": 2.9183,
"step": 8080
},
{
"epoch": 0.84,
"learning_rate": 6.292227870032225e-06,
"loss": 2.8543,
"step": 8085
},
{
"epoch": 0.84,
"learning_rate": 6.2522300551149605e-06,
"loss": 2.8417,
"step": 8090
},
{
"epoch": 0.84,
"learning_rate": 6.212351292889945e-06,
"loss": 2.9838,
"step": 8095
},
{
"epoch": 0.84,
"learning_rate": 6.172591691880947e-06,
"loss": 2.8515,
"step": 8100
},
{
"epoch": 0.84,
"learning_rate": 6.1329513602875195e-06,
"loss": 2.913,
"step": 8105
},
{
"epoch": 0.84,
"learning_rate": 6.093430405984585e-06,
"loss": 2.8196,
"step": 8110
},
{
"epoch": 0.84,
"learning_rate": 6.054028936522216e-06,
"loss": 2.8937,
"step": 8115
},
{
"epoch": 0.84,
"learning_rate": 6.014747059125353e-06,
"loss": 2.8529,
"step": 8120
},
{
"epoch": 0.84,
"learning_rate": 5.975584880693436e-06,
"loss": 2.9127,
"step": 8125
},
{
"epoch": 0.84,
"learning_rate": 5.936542507800208e-06,
"loss": 2.9065,
"step": 8130
},
{
"epoch": 0.85,
"learning_rate": 5.8976200466933475e-06,
"loss": 2.8673,
"step": 8135
},
{
"epoch": 0.85,
"learning_rate": 5.8588176032942255e-06,
"loss": 2.8579,
"step": 8140
},
{
"epoch": 0.85,
"learning_rate": 5.820135283197625e-06,
"loss": 2.9074,
"step": 8145
},
{
"epoch": 0.85,
"learning_rate": 5.781573191671386e-06,
"loss": 2.7958,
"step": 8150
},
{
"epoch": 0.85,
"learning_rate": 5.743131433656207e-06,
"loss": 2.9314,
"step": 8155
},
{
"epoch": 0.85,
"learning_rate": 5.704810113765291e-06,
"loss": 2.9389,
"step": 8160
},
{
"epoch": 0.85,
"learning_rate": 5.666609336284096e-06,
"loss": 2.9184,
"step": 8165
},
{
"epoch": 0.85,
"learning_rate": 5.628529205170063e-06,
"loss": 2.8807,
"step": 8170
},
{
"epoch": 0.85,
"learning_rate": 5.590569824052266e-06,
"loss": 2.8066,
"step": 8175
},
{
"epoch": 0.85,
"learning_rate": 5.552731296231239e-06,
"loss": 2.8473,
"step": 8180
},
{
"epoch": 0.85,
"learning_rate": 5.515013724678569e-06,
"loss": 2.8549,
"step": 8185
},
{
"epoch": 0.85,
"learning_rate": 5.477417212036723e-06,
"loss": 2.8706,
"step": 8190
},
{
"epoch": 0.85,
"learning_rate": 5.4399418606187195e-06,
"loss": 2.7804,
"step": 8195
},
{
"epoch": 0.85,
"learning_rate": 5.402587772407824e-06,
"loss": 2.8332,
"step": 8200
},
{
"epoch": 0.85,
"learning_rate": 5.365355049057358e-06,
"loss": 2.8729,
"step": 8205
},
{
"epoch": 0.85,
"learning_rate": 5.328243791890303e-06,
"loss": 2.9654,
"step": 8210
},
{
"epoch": 0.85,
"learning_rate": 5.291254101899146e-06,
"loss": 2.7877,
"step": 8215
},
{
"epoch": 0.85,
"learning_rate": 5.254386079745527e-06,
"loss": 2.8073,
"step": 8220
},
{
"epoch": 0.85,
"learning_rate": 5.217639825759962e-06,
"loss": 2.8717,
"step": 8225
},
{
"epoch": 0.86,
"learning_rate": 5.181015439941645e-06,
"loss": 2.8088,
"step": 8230
},
{
"epoch": 0.86,
"learning_rate": 5.14451302195807e-06,
"loss": 2.8124,
"step": 8235
},
{
"epoch": 0.86,
"learning_rate": 5.108132671144849e-06,
"loss": 2.913,
"step": 8240
},
{
"epoch": 0.86,
"learning_rate": 5.071874486505412e-06,
"loss": 2.8362,
"step": 8245
},
{
"epoch": 0.86,
"learning_rate": 5.0357385667106906e-06,
"loss": 2.9117,
"step": 8250
},
{
"epoch": 0.86,
"learning_rate": 4.99972501009896e-06,
"loss": 2.8891,
"step": 8255
},
{
"epoch": 0.86,
"learning_rate": 4.963833914675431e-06,
"loss": 2.9069,
"step": 8260
},
{
"epoch": 0.86,
"learning_rate": 4.928065378112106e-06,
"loss": 2.7637,
"step": 8265
},
{
"epoch": 0.86,
"learning_rate": 4.892419497747447e-06,
"loss": 2.8374,
"step": 8270
},
{
"epoch": 0.86,
"learning_rate": 4.8568963705861114e-06,
"loss": 2.8136,
"step": 8275
},
{
"epoch": 0.86,
"learning_rate": 4.821496093298733e-06,
"loss": 2.795,
"step": 8280
},
{
"epoch": 0.86,
"learning_rate": 4.786218762221595e-06,
"loss": 2.9271,
"step": 8285
},
{
"epoch": 0.86,
"learning_rate": 4.751064473356431e-06,
"loss": 2.9181,
"step": 8290
},
{
"epoch": 0.86,
"learning_rate": 4.716033322370123e-06,
"loss": 2.8116,
"step": 8295
},
{
"epoch": 0.86,
"learning_rate": 4.6811254045944445e-06,
"loss": 2.8575,
"step": 8300
},
{
"epoch": 0.86,
"learning_rate": 4.646340815025824e-06,
"loss": 2.8371,
"step": 8305
},
{
"epoch": 0.86,
"learning_rate": 4.611679648325057e-06,
"loss": 2.8703,
"step": 8310
},
{
"epoch": 0.86,
"learning_rate": 4.577141998817075e-06,
"loss": 2.8521,
"step": 8315
},
{
"epoch": 0.86,
"learning_rate": 4.542727960490684e-06,
"loss": 2.8273,
"step": 8320
},
{
"epoch": 0.87,
"learning_rate": 4.508437626998269e-06,
"loss": 2.8532,
"step": 8325
},
{
"epoch": 0.87,
"learning_rate": 4.474271091655602e-06,
"loss": 2.9128,
"step": 8330
},
{
"epoch": 0.87,
"learning_rate": 4.440228447441552e-06,
"loss": 2.8762,
"step": 8335
},
{
"epoch": 0.87,
"learning_rate": 4.406309786997825e-06,
"loss": 2.8786,
"step": 8340
},
{
"epoch": 0.87,
"learning_rate": 4.372515202628758e-06,
"loss": 2.8667,
"step": 8345
},
{
"epoch": 0.87,
"learning_rate": 4.338844786300977e-06,
"loss": 2.7771,
"step": 8350
},
{
"epoch": 0.87,
"learning_rate": 4.3052986296432615e-06,
"loss": 2.8549,
"step": 8355
},
{
"epoch": 0.87,
"learning_rate": 4.271876823946203e-06,
"loss": 2.8453,
"step": 8360
},
{
"epoch": 0.87,
"learning_rate": 4.238579460161996e-06,
"loss": 2.8916,
"step": 8365
},
{
"epoch": 0.87,
"learning_rate": 4.205406628904213e-06,
"loss": 2.7955,
"step": 8370
},
{
"epoch": 0.87,
"learning_rate": 4.172358420447492e-06,
"loss": 2.7991,
"step": 8375
},
{
"epoch": 0.87,
"learning_rate": 4.139434924727359e-06,
"loss": 2.8524,
"step": 8380
},
{
"epoch": 0.87,
"learning_rate": 4.106636231339922e-06,
"loss": 2.9343,
"step": 8385
},
{
"epoch": 0.87,
"learning_rate": 4.0739624295417e-06,
"loss": 2.881,
"step": 8390
},
{
"epoch": 0.87,
"learning_rate": 4.041413608249312e-06,
"loss": 2.8243,
"step": 8395
},
{
"epoch": 0.87,
"learning_rate": 4.008989856039264e-06,
"loss": 2.9094,
"step": 8400
},
{
"epoch": 0.87,
"learning_rate": 3.976691261147714e-06,
"loss": 2.9381,
"step": 8405
},
{
"epoch": 0.87,
"learning_rate": 3.944517911470219e-06,
"loss": 2.8515,
"step": 8410
},
{
"epoch": 0.87,
"learning_rate": 3.9124698945615046e-06,
"loss": 2.8254,
"step": 8415
},
{
"epoch": 0.88,
"learning_rate": 3.880547297635234e-06,
"loss": 2.7336,
"step": 8420
},
{
"epoch": 0.88,
"learning_rate": 3.848750207563717e-06,
"loss": 2.8598,
"step": 8425
},
{
"epoch": 0.88,
"learning_rate": 3.817078710877775e-06,
"loss": 2.8624,
"step": 8430
},
{
"epoch": 0.88,
"learning_rate": 3.7855328937663914e-06,
"loss": 2.8908,
"step": 8435
},
{
"epoch": 0.88,
"learning_rate": 3.7541128420765734e-06,
"loss": 2.8764,
"step": 8440
},
{
"epoch": 0.88,
"learning_rate": 3.7228186413130637e-06,
"loss": 2.9249,
"step": 8445
},
{
"epoch": 0.88,
"learning_rate": 3.6916503766381116e-06,
"loss": 2.9115,
"step": 8450
},
{
"epoch": 0.88,
"learning_rate": 3.6606081328712683e-06,
"loss": 2.8248,
"step": 8455
},
{
"epoch": 0.88,
"learning_rate": 3.629691994489115e-06,
"loss": 2.8825,
"step": 8460
},
{
"epoch": 0.88,
"learning_rate": 3.5989020456250854e-06,
"loss": 2.7977,
"step": 8465
},
{
"epoch": 0.88,
"learning_rate": 3.5682383700691933e-06,
"loss": 2.75,
"step": 8470
},
{
"epoch": 0.88,
"learning_rate": 3.5377010512678167e-06,
"loss": 2.7819,
"step": 8475
},
{
"epoch": 0.88,
"learning_rate": 3.507290172323474e-06,
"loss": 2.8601,
"step": 8480
},
{
"epoch": 0.88,
"learning_rate": 3.4770058159946106e-06,
"loss": 2.9211,
"step": 8485
},
{
"epoch": 0.88,
"learning_rate": 3.4468480646953404e-06,
"loss": 2.9062,
"step": 8490
},
{
"epoch": 0.88,
"learning_rate": 3.4168170004952706e-06,
"loss": 2.8822,
"step": 8495
},
{
"epoch": 0.88,
"learning_rate": 3.3869127051192105e-06,
"loss": 2.7757,
"step": 8500
},
{
"epoch": 0.88,
"learning_rate": 3.3571352599470253e-06,
"loss": 2.9033,
"step": 8505
},
{
"epoch": 0.88,
"learning_rate": 3.3274847460133372e-06,
"loss": 2.8923,
"step": 8510
},
{
"epoch": 0.88,
"learning_rate": 3.2979612440073857e-06,
"loss": 2.8077,
"step": 8515
},
{
"epoch": 0.89,
"learning_rate": 3.2685648342727527e-06,
"loss": 2.9168,
"step": 8520
},
{
"epoch": 0.89,
"learning_rate": 3.2392955968071404e-06,
"loss": 2.8472,
"step": 8525
},
{
"epoch": 0.89,
"learning_rate": 3.2101536112622064e-06,
"loss": 2.8074,
"step": 8530
},
{
"epoch": 0.89,
"learning_rate": 3.1811389569432692e-06,
"loss": 2.8736,
"step": 8535
},
{
"epoch": 0.89,
"learning_rate": 3.1522517128091677e-06,
"loss": 2.8609,
"step": 8540
},
{
"epoch": 0.89,
"learning_rate": 3.1234919574720135e-06,
"loss": 2.8778,
"step": 8545
},
{
"epoch": 0.89,
"learning_rate": 3.094859769196956e-06,
"loss": 2.9457,
"step": 8550
},
{
"epoch": 0.89,
"learning_rate": 3.0663552259020067e-06,
"loss": 2.9051,
"step": 8555
},
{
"epoch": 0.89,
"learning_rate": 3.0379784051578097e-06,
"loss": 2.8775,
"step": 8560
},
{
"epoch": 0.89,
"learning_rate": 3.009729384187432e-06,
"loss": 2.7186,
"step": 8565
},
{
"epoch": 0.89,
"learning_rate": 2.981608239866157e-06,
"loss": 2.8407,
"step": 8570
},
{
"epoch": 0.89,
"learning_rate": 2.953615048721253e-06,
"loss": 2.8583,
"step": 8575
},
{
"epoch": 0.89,
"learning_rate": 2.925749886931817e-06,
"loss": 2.9134,
"step": 8580
},
{
"epoch": 0.89,
"learning_rate": 2.898012830328495e-06,
"loss": 2.8832,
"step": 8585
},
{
"epoch": 0.89,
"learning_rate": 2.8704039543933416e-06,
"loss": 2.8545,
"step": 8590
},
{
"epoch": 0.89,
"learning_rate": 2.8429233342595894e-06,
"loss": 2.8149,
"step": 8595
},
{
"epoch": 0.89,
"learning_rate": 2.8155710447114236e-06,
"loss": 2.8752,
"step": 8600
},
{
"epoch": 0.89,
"learning_rate": 2.788347160183824e-06,
"loss": 2.8725,
"step": 8605
},
{
"epoch": 0.89,
"learning_rate": 2.761251754762295e-06,
"loss": 2.841,
"step": 8610
},
{
"epoch": 0.9,
"learning_rate": 2.7342849021827544e-06,
"loss": 2.8485,
"step": 8615
},
{
"epoch": 0.9,
"learning_rate": 2.70744667583126e-06,
"loss": 2.7597,
"step": 8620
},
{
"epoch": 0.9,
"learning_rate": 2.680737148743817e-06,
"loss": 2.8591,
"step": 8625
},
{
"epoch": 0.9,
"learning_rate": 2.654156393606222e-06,
"loss": 2.8849,
"step": 8630
},
{
"epoch": 0.9,
"learning_rate": 2.6277044827538346e-06,
"loss": 2.7434,
"step": 8635
},
{
"epoch": 0.9,
"learning_rate": 2.6013814881713784e-06,
"loss": 2.7657,
"step": 8640
},
{
"epoch": 0.9,
"learning_rate": 2.5751874814927522e-06,
"loss": 2.9131,
"step": 8645
},
{
"epoch": 0.9,
"learning_rate": 2.5491225340008306e-06,
"loss": 2.7557,
"step": 8650
},
{
"epoch": 0.9,
"learning_rate": 2.5231867166272905e-06,
"loss": 2.9101,
"step": 8655
},
{
"epoch": 0.9,
"learning_rate": 2.4973800999523788e-06,
"loss": 2.8779,
"step": 8660
},
{
"epoch": 0.9,
"learning_rate": 2.471702754204752e-06,
"loss": 2.8746,
"step": 8665
},
{
"epoch": 0.9,
"learning_rate": 2.4461547492612923e-06,
"loss": 2.8075,
"step": 8670
},
{
"epoch": 0.9,
"learning_rate": 2.4207361546468744e-06,
"loss": 2.9346,
"step": 8675
},
{
"epoch": 0.9,
"learning_rate": 2.395447039534221e-06,
"loss": 2.8424,
"step": 8680
},
{
"epoch": 0.9,
"learning_rate": 2.3702874727436876e-06,
"loss": 2.8498,
"step": 8685
},
{
"epoch": 0.9,
"learning_rate": 2.3452575227430885e-06,
"loss": 2.9311,
"step": 8690
},
{
"epoch": 0.9,
"learning_rate": 2.32035725764751e-06,
"loss": 2.9577,
"step": 8695
},
{
"epoch": 0.9,
"learning_rate": 2.295586745219108e-06,
"loss": 2.9036,
"step": 8700
},
{
"epoch": 0.9,
"learning_rate": 2.2709460528669513e-06,
"loss": 2.8055,
"step": 8705
},
{
"epoch": 0.91,
"learning_rate": 2.246435247646822e-06,
"loss": 2.8272,
"step": 8710
},
{
"epoch": 0.91,
"learning_rate": 2.222054396261025e-06,
"loss": 2.9431,
"step": 8715
},
{
"epoch": 0.91,
"learning_rate": 2.197803565058232e-06,
"loss": 2.7475,
"step": 8720
},
{
"epoch": 0.91,
"learning_rate": 2.1736828200332625e-06,
"loss": 2.9193,
"step": 8725
},
{
"epoch": 0.91,
"learning_rate": 2.1496922268269493e-06,
"loss": 2.8035,
"step": 8730
},
{
"epoch": 0.91,
"learning_rate": 2.125831850725918e-06,
"loss": 2.8738,
"step": 8735
},
{
"epoch": 0.91,
"learning_rate": 2.1021017566624447e-06,
"loss": 2.893,
"step": 8740
},
{
"epoch": 0.91,
"learning_rate": 2.0785020092142672e-06,
"loss": 2.9523,
"step": 8745
},
{
"epoch": 0.91,
"learning_rate": 2.0550326726043734e-06,
"loss": 2.8293,
"step": 8750
},
{
"epoch": 0.91,
"learning_rate": 2.031693810700902e-06,
"loss": 2.8159,
"step": 8755
},
{
"epoch": 0.91,
"learning_rate": 2.0084854870168868e-06,
"loss": 2.8093,
"step": 8760
},
{
"epoch": 0.91,
"learning_rate": 1.985407764710151e-06,
"loss": 2.8171,
"step": 8765
},
{
"epoch": 0.91,
"learning_rate": 1.962460706583097e-06,
"loss": 2.9431,
"step": 8770
},
{
"epoch": 0.91,
"learning_rate": 1.939644375082528e-06,
"loss": 2.8466,
"step": 8775
},
{
"epoch": 0.91,
"learning_rate": 1.9169588322995314e-06,
"loss": 2.8888,
"step": 8780
},
{
"epoch": 0.91,
"learning_rate": 1.8944041399692248e-06,
"loss": 2.8591,
"step": 8785
},
{
"epoch": 0.91,
"learning_rate": 1.871980359470693e-06,
"loss": 2.835,
"step": 8790
},
{
"epoch": 0.91,
"learning_rate": 1.8496875518267286e-06,
"loss": 2.9509,
"step": 8795
},
{
"epoch": 0.91,
"learning_rate": 1.8275257777036981e-06,
"loss": 2.9184,
"step": 8800
},
{
"epoch": 0.92,
"learning_rate": 1.8054950974114137e-06,
"loss": 2.8473,
"step": 8805
},
{
"epoch": 0.92,
"learning_rate": 1.7835955709028907e-06,
"loss": 2.8985,
"step": 8810
},
{
"epoch": 0.92,
"learning_rate": 1.7618272577742733e-06,
"loss": 2.9569,
"step": 8815
},
{
"epoch": 0.92,
"learning_rate": 1.7401902172646034e-06,
"loss": 2.8695,
"step": 8820
},
{
"epoch": 0.92,
"learning_rate": 1.7186845082556914e-06,
"loss": 2.8379,
"step": 8825
},
{
"epoch": 0.92,
"learning_rate": 1.6973101892719556e-06,
"loss": 2.9668,
"step": 8830
},
{
"epoch": 0.92,
"learning_rate": 1.6760673184802401e-06,
"loss": 2.8445,
"step": 8835
},
{
"epoch": 0.92,
"learning_rate": 1.6549559536896964e-06,
"loss": 2.835,
"step": 8840
},
{
"epoch": 0.92,
"learning_rate": 1.6339761523515907e-06,
"loss": 2.9824,
"step": 8845
},
{
"epoch": 0.92,
"learning_rate": 1.6131279715591584e-06,
"loss": 2.9564,
"step": 8850
},
{
"epoch": 0.92,
"learning_rate": 1.59241146804745e-06,
"loss": 2.7621,
"step": 8855
},
{
"epoch": 0.92,
"learning_rate": 1.571826698193174e-06,
"loss": 2.8287,
"step": 8860
},
{
"epoch": 0.92,
"learning_rate": 1.5513737180145705e-06,
"loss": 2.8275,
"step": 8865
},
{
"epoch": 0.92,
"learning_rate": 1.5310525831711996e-06,
"loss": 2.9048,
"step": 8870
},
{
"epoch": 0.92,
"learning_rate": 1.510863348963848e-06,
"loss": 2.8928,
"step": 8875
},
{
"epoch": 0.92,
"learning_rate": 1.4908060703343495e-06,
"loss": 2.9125,
"step": 8880
},
{
"epoch": 0.92,
"learning_rate": 1.4708808018654374e-06,
"loss": 2.8363,
"step": 8885
},
{
"epoch": 0.92,
"learning_rate": 1.4510875977806092e-06,
"loss": 2.9201,
"step": 8890
},
{
"epoch": 0.92,
"learning_rate": 1.4314265119439618e-06,
"loss": 2.8847,
"step": 8895
},
{
"epoch": 0.92,
"learning_rate": 1.4118975978600513e-06,
"loss": 2.9236,
"step": 8900
},
{
"epoch": 0.93,
"learning_rate": 1.3925009086737661e-06,
"loss": 2.8667,
"step": 8905
},
{
"epoch": 0.93,
"learning_rate": 1.373236497170133e-06,
"loss": 2.875,
"step": 8910
},
{
"epoch": 0.93,
"learning_rate": 1.3541044157742378e-06,
"loss": 3.0322,
"step": 8915
},
{
"epoch": 0.93,
"learning_rate": 1.3351047165510444e-06,
"loss": 2.835,
"step": 8920
},
{
"epoch": 0.93,
"learning_rate": 1.3162374512052433e-06,
"loss": 2.8812,
"step": 8925
},
{
"epoch": 0.93,
"learning_rate": 1.297502671081141e-06,
"loss": 2.9387,
"step": 8930
},
{
"epoch": 0.93,
"learning_rate": 1.2789004271624884e-06,
"loss": 2.8301,
"step": 8935
},
{
"epoch": 0.93,
"learning_rate": 1.2604307700724017e-06,
"loss": 2.8831,
"step": 8940
},
{
"epoch": 0.93,
"learning_rate": 1.242093750073131e-06,
"loss": 2.935,
"step": 8945
},
{
"epoch": 0.93,
"learning_rate": 1.2238894170660098e-06,
"loss": 2.823,
"step": 8950
},
{
"epoch": 0.93,
"learning_rate": 1.2058178205912762e-06,
"loss": 2.9259,
"step": 8955
},
{
"epoch": 0.93,
"learning_rate": 1.1878790098279358e-06,
"loss": 2.8865,
"step": 8960
},
{
"epoch": 0.93,
"learning_rate": 1.170073033593655e-06,
"loss": 2.8884,
"step": 8965
},
{
"epoch": 0.93,
"learning_rate": 1.1523999403446007e-06,
"loss": 2.8522,
"step": 8970
},
{
"epoch": 0.93,
"learning_rate": 1.134859778175329e-06,
"loss": 2.7615,
"step": 8975
},
{
"epoch": 0.93,
"learning_rate": 1.1174525948186354e-06,
"loss": 2.8425,
"step": 8980
},
{
"epoch": 0.93,
"learning_rate": 1.1001784376454326e-06,
"loss": 2.8441,
"step": 8985
},
{
"epoch": 0.93,
"learning_rate": 1.0830373536646343e-06,
"loss": 2.9573,
"step": 8990
},
{
"epoch": 0.93,
"learning_rate": 1.0660293895230156e-06,
"loss": 2.8495,
"step": 8995
},
{
"epoch": 0.94,
"learning_rate": 1.0491545915050804e-06,
"loss": 2.8729,
"step": 9000
}
],
"logging_steps": 5,
"max_steps": 9622,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 1.348118192848896e+18,
"trial_name": null,
"trial_params": null
}