bloom-7b1-lapt-de / trainer_state.json
atsuki-yamaguchi's picture
Upload folder using huggingface_hub
a435ef3 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.04290021543951941,
"eval_steps": 500,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-06,
"loss": 2.8473,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 1e-05,
"loss": 2.7726,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.5e-05,
"loss": 2.8703,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 2e-05,
"loss": 2.7936,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 2.5e-05,
"loss": 2.8171,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 3e-05,
"loss": 2.8034,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 3.5e-05,
"loss": 2.7991,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 4e-05,
"loss": 2.748,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 4.5e-05,
"loss": 2.7692,
"step": 45
},
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.7435,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 5.500000000000001e-05,
"loss": 2.7909,
"step": 55
},
{
"epoch": 0.0,
"learning_rate": 6e-05,
"loss": 2.6836,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 6.500000000000001e-05,
"loss": 2.6582,
"step": 65
},
{
"epoch": 0.0,
"learning_rate": 7e-05,
"loss": 2.695,
"step": 70
},
{
"epoch": 0.0,
"learning_rate": 7.500000000000001e-05,
"loss": 2.6744,
"step": 75
},
{
"epoch": 0.0,
"learning_rate": 8e-05,
"loss": 2.6914,
"step": 80
},
{
"epoch": 0.0,
"learning_rate": 8.5e-05,
"loss": 2.6122,
"step": 85
},
{
"epoch": 0.0,
"learning_rate": 9e-05,
"loss": 2.6094,
"step": 90
},
{
"epoch": 0.0,
"learning_rate": 9.5e-05,
"loss": 2.6107,
"step": 95
},
{
"epoch": 0.0,
"learning_rate": 0.0001,
"loss": 2.5494,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 9.999999982242337e-05,
"loss": 2.6113,
"step": 105
},
{
"epoch": 0.0,
"learning_rate": 9.999999928969349e-05,
"loss": 2.5314,
"step": 110
},
{
"epoch": 0.0,
"learning_rate": 9.999999840181035e-05,
"loss": 2.5724,
"step": 115
},
{
"epoch": 0.0,
"learning_rate": 9.999999715877396e-05,
"loss": 2.5832,
"step": 120
},
{
"epoch": 0.0,
"learning_rate": 9.999999556058433e-05,
"loss": 2.5706,
"step": 125
},
{
"epoch": 0.0,
"learning_rate": 9.999999360724148e-05,
"loss": 2.5541,
"step": 130
},
{
"epoch": 0.0,
"learning_rate": 9.999999129874542e-05,
"loss": 2.5733,
"step": 135
},
{
"epoch": 0.0,
"learning_rate": 9.999998863509615e-05,
"loss": 2.6335,
"step": 140
},
{
"epoch": 0.0,
"learning_rate": 9.999998561629372e-05,
"loss": 2.5492,
"step": 145
},
{
"epoch": 0.0,
"learning_rate": 9.999998224233811e-05,
"loss": 2.6278,
"step": 150
},
{
"epoch": 0.0,
"learning_rate": 9.99999785132294e-05,
"loss": 2.5097,
"step": 155
},
{
"epoch": 0.0,
"learning_rate": 9.999997442896756e-05,
"loss": 2.5647,
"step": 160
},
{
"epoch": 0.0,
"learning_rate": 9.999996998955264e-05,
"loss": 2.5926,
"step": 165
},
{
"epoch": 0.0,
"learning_rate": 9.999996519498469e-05,
"loss": 2.5379,
"step": 170
},
{
"epoch": 0.0,
"learning_rate": 9.999996004526372e-05,
"loss": 2.527,
"step": 175
},
{
"epoch": 0.0,
"learning_rate": 9.999995454038977e-05,
"loss": 2.5837,
"step": 180
},
{
"epoch": 0.0,
"learning_rate": 9.99999486803629e-05,
"loss": 2.5457,
"step": 185
},
{
"epoch": 0.0,
"learning_rate": 9.999994246518311e-05,
"loss": 2.5282,
"step": 190
},
{
"epoch": 0.0,
"learning_rate": 9.99999358948505e-05,
"loss": 2.4637,
"step": 195
},
{
"epoch": 0.0,
"learning_rate": 9.999992896936507e-05,
"loss": 2.5831,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 9.999992168872689e-05,
"loss": 2.5234,
"step": 205
},
{
"epoch": 0.0,
"learning_rate": 9.999991405293602e-05,
"loss": 2.5167,
"step": 210
},
{
"epoch": 0.0,
"learning_rate": 9.999990606199247e-05,
"loss": 2.5296,
"step": 215
},
{
"epoch": 0.0,
"learning_rate": 9.999989771589636e-05,
"loss": 2.5526,
"step": 220
},
{
"epoch": 0.0,
"learning_rate": 9.999988901464771e-05,
"loss": 2.5035,
"step": 225
},
{
"epoch": 0.0,
"learning_rate": 9.999987995824658e-05,
"loss": 2.4831,
"step": 230
},
{
"epoch": 0.0,
"learning_rate": 9.999987054669306e-05,
"loss": 2.506,
"step": 235
},
{
"epoch": 0.0,
"learning_rate": 9.999986077998718e-05,
"loss": 2.5331,
"step": 240
},
{
"epoch": 0.0,
"learning_rate": 9.999985065812905e-05,
"loss": 2.5156,
"step": 245
},
{
"epoch": 0.0,
"learning_rate": 9.999984018111872e-05,
"loss": 2.5009,
"step": 250
},
{
"epoch": 0.0,
"learning_rate": 9.999982934895625e-05,
"loss": 2.5374,
"step": 255
},
{
"epoch": 0.0,
"learning_rate": 9.999981816164175e-05,
"loss": 2.5013,
"step": 260
},
{
"epoch": 0.0,
"learning_rate": 9.999980661917528e-05,
"loss": 2.4897,
"step": 265
},
{
"epoch": 0.0,
"learning_rate": 9.999979472155692e-05,
"loss": 2.462,
"step": 270
},
{
"epoch": 0.0,
"learning_rate": 9.999978246878676e-05,
"loss": 2.5021,
"step": 275
},
{
"epoch": 0.0,
"learning_rate": 9.99997698608649e-05,
"loss": 2.4469,
"step": 280
},
{
"epoch": 0.0,
"learning_rate": 9.99997568977914e-05,
"loss": 2.4394,
"step": 285
},
{
"epoch": 0.0,
"learning_rate": 9.999974357956636e-05,
"loss": 2.4699,
"step": 290
},
{
"epoch": 0.0,
"learning_rate": 9.999972990618991e-05,
"loss": 2.4906,
"step": 295
},
{
"epoch": 0.0,
"learning_rate": 9.99997158776621e-05,
"loss": 2.4426,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 9.999970149398306e-05,
"loss": 2.3726,
"step": 305
},
{
"epoch": 0.0,
"learning_rate": 9.999968675515288e-05,
"loss": 2.4599,
"step": 310
},
{
"epoch": 0.0,
"learning_rate": 9.999967166117166e-05,
"loss": 2.5107,
"step": 315
},
{
"epoch": 0.0,
"learning_rate": 9.999965621203952e-05,
"loss": 2.4459,
"step": 320
},
{
"epoch": 0.0,
"learning_rate": 9.999964040775656e-05,
"loss": 2.4612,
"step": 325
},
{
"epoch": 0.0,
"learning_rate": 9.99996242483229e-05,
"loss": 2.5075,
"step": 330
},
{
"epoch": 0.0,
"learning_rate": 9.999960773373865e-05,
"loss": 2.4152,
"step": 335
},
{
"epoch": 0.0,
"learning_rate": 9.999959086400393e-05,
"loss": 2.4186,
"step": 340
},
{
"epoch": 0.0,
"learning_rate": 9.999957363911885e-05,
"loss": 2.5206,
"step": 345
},
{
"epoch": 0.0,
"learning_rate": 9.999955605908353e-05,
"loss": 2.4124,
"step": 350
},
{
"epoch": 0.0,
"learning_rate": 9.999953812389813e-05,
"loss": 2.4077,
"step": 355
},
{
"epoch": 0.0,
"learning_rate": 9.999951983356273e-05,
"loss": 2.4685,
"step": 360
},
{
"epoch": 0.0,
"learning_rate": 9.999950118807749e-05,
"loss": 2.4559,
"step": 365
},
{
"epoch": 0.0,
"learning_rate": 9.999948218744254e-05,
"loss": 2.3816,
"step": 370
},
{
"epoch": 0.0,
"learning_rate": 9.9999462831658e-05,
"loss": 2.441,
"step": 375
},
{
"epoch": 0.0,
"learning_rate": 9.999944312072402e-05,
"loss": 2.427,
"step": 380
},
{
"epoch": 0.0,
"learning_rate": 9.999942305464075e-05,
"loss": 2.467,
"step": 385
},
{
"epoch": 0.0,
"learning_rate": 9.99994026334083e-05,
"loss": 2.4406,
"step": 390
},
{
"epoch": 0.0,
"learning_rate": 9.999938185702685e-05,
"loss": 2.3956,
"step": 395
},
{
"epoch": 0.0,
"learning_rate": 9.999936072549653e-05,
"loss": 2.4279,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 9.999933923881747e-05,
"loss": 2.3775,
"step": 405
},
{
"epoch": 0.0,
"learning_rate": 9.999931739698989e-05,
"loss": 2.4536,
"step": 410
},
{
"epoch": 0.0,
"learning_rate": 9.999929520001386e-05,
"loss": 2.4011,
"step": 415
},
{
"epoch": 0.0,
"learning_rate": 9.999927264788959e-05,
"loss": 2.3642,
"step": 420
},
{
"epoch": 0.0,
"learning_rate": 9.999924974061723e-05,
"loss": 2.3429,
"step": 425
},
{
"epoch": 0.0,
"learning_rate": 9.999922647819694e-05,
"loss": 2.3448,
"step": 430
},
{
"epoch": 0.0,
"learning_rate": 9.999920286062889e-05,
"loss": 2.4617,
"step": 435
},
{
"epoch": 0.0,
"learning_rate": 9.999917888791323e-05,
"loss": 2.4442,
"step": 440
},
{
"epoch": 0.0,
"learning_rate": 9.999915456005016e-05,
"loss": 2.454,
"step": 445
},
{
"epoch": 0.0,
"learning_rate": 9.999912987703983e-05,
"loss": 2.4007,
"step": 450
},
{
"epoch": 0.0,
"learning_rate": 9.999910483888242e-05,
"loss": 2.3814,
"step": 455
},
{
"epoch": 0.0,
"learning_rate": 9.999907944557812e-05,
"loss": 2.3584,
"step": 460
},
{
"epoch": 0.0,
"learning_rate": 9.999905369712708e-05,
"loss": 2.434,
"step": 465
},
{
"epoch": 0.0,
"learning_rate": 9.999902759352953e-05,
"loss": 2.398,
"step": 470
},
{
"epoch": 0.0,
"learning_rate": 9.999900113478561e-05,
"loss": 2.4065,
"step": 475
},
{
"epoch": 0.0,
"learning_rate": 9.999897432089553e-05,
"loss": 2.3793,
"step": 480
},
{
"epoch": 0.0,
"learning_rate": 9.999894715185947e-05,
"loss": 2.3103,
"step": 485
},
{
"epoch": 0.0,
"learning_rate": 9.999891962767764e-05,
"loss": 2.4316,
"step": 490
},
{
"epoch": 0.0,
"learning_rate": 9.999889174835023e-05,
"loss": 2.3382,
"step": 495
},
{
"epoch": 0.0,
"learning_rate": 9.999886351387741e-05,
"loss": 2.3045,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 9.999883492425944e-05,
"loss": 2.3562,
"step": 505
},
{
"epoch": 0.0,
"learning_rate": 9.999880597949645e-05,
"loss": 2.3554,
"step": 510
},
{
"epoch": 0.0,
"learning_rate": 9.99987766795887e-05,
"loss": 2.383,
"step": 515
},
{
"epoch": 0.0,
"learning_rate": 9.999874702453639e-05,
"loss": 2.3416,
"step": 520
},
{
"epoch": 0.0,
"learning_rate": 9.999871701433972e-05,
"loss": 2.3196,
"step": 525
},
{
"epoch": 0.0,
"learning_rate": 9.999868664899889e-05,
"loss": 2.2929,
"step": 530
},
{
"epoch": 0.0,
"learning_rate": 9.999865592851413e-05,
"loss": 2.3593,
"step": 535
},
{
"epoch": 0.0,
"learning_rate": 9.999862485288567e-05,
"loss": 2.3656,
"step": 540
},
{
"epoch": 0.0,
"learning_rate": 9.999859342211371e-05,
"loss": 2.3468,
"step": 545
},
{
"epoch": 0.0,
"learning_rate": 9.99985616361985e-05,
"loss": 2.3337,
"step": 550
},
{
"epoch": 0.0,
"learning_rate": 9.999852949514023e-05,
"loss": 2.3549,
"step": 555
},
{
"epoch": 0.0,
"learning_rate": 9.999849699893916e-05,
"loss": 2.3345,
"step": 560
},
{
"epoch": 0.0,
"learning_rate": 9.999846414759548e-05,
"loss": 2.3254,
"step": 565
},
{
"epoch": 0.0,
"learning_rate": 9.999843094110947e-05,
"loss": 2.3593,
"step": 570
},
{
"epoch": 0.0,
"learning_rate": 9.999839737948136e-05,
"loss": 2.3664,
"step": 575
},
{
"epoch": 0.0,
"learning_rate": 9.999836346271136e-05,
"loss": 2.4034,
"step": 580
},
{
"epoch": 0.0,
"learning_rate": 9.999832919079973e-05,
"loss": 2.3747,
"step": 585
},
{
"epoch": 0.0,
"learning_rate": 9.99982945637467e-05,
"loss": 2.4076,
"step": 590
},
{
"epoch": 0.0,
"learning_rate": 9.999825958155253e-05,
"loss": 2.3365,
"step": 595
},
{
"epoch": 0.0,
"learning_rate": 9.999822424421746e-05,
"loss": 2.2521,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 9.999818855174175e-05,
"loss": 2.3802,
"step": 605
},
{
"epoch": 0.0,
"learning_rate": 9.999815250412565e-05,
"loss": 2.3265,
"step": 610
},
{
"epoch": 0.0,
"learning_rate": 9.999811610136942e-05,
"loss": 2.3196,
"step": 615
},
{
"epoch": 0.0,
"learning_rate": 9.99980793434733e-05,
"loss": 2.3952,
"step": 620
},
{
"epoch": 0.0,
"learning_rate": 9.999804223043757e-05,
"loss": 2.3279,
"step": 625
},
{
"epoch": 0.0,
"learning_rate": 9.999800476226248e-05,
"loss": 2.2558,
"step": 630
},
{
"epoch": 0.0,
"learning_rate": 9.999796693894831e-05,
"loss": 2.424,
"step": 635
},
{
"epoch": 0.0,
"learning_rate": 9.999792876049534e-05,
"loss": 2.3813,
"step": 640
},
{
"epoch": 0.0,
"learning_rate": 9.99978902269038e-05,
"loss": 2.2896,
"step": 645
},
{
"epoch": 0.0,
"learning_rate": 9.999785133817401e-05,
"loss": 2.3497,
"step": 650
},
{
"epoch": 0.0,
"learning_rate": 9.99978120943062e-05,
"loss": 2.2952,
"step": 655
},
{
"epoch": 0.0,
"learning_rate": 9.999777249530068e-05,
"loss": 2.2497,
"step": 660
},
{
"epoch": 0.0,
"learning_rate": 9.999773254115772e-05,
"loss": 2.3667,
"step": 665
},
{
"epoch": 0.0,
"learning_rate": 9.999769223187761e-05,
"loss": 2.3058,
"step": 670
},
{
"epoch": 0.0,
"learning_rate": 9.999765156746064e-05,
"loss": 2.3563,
"step": 675
},
{
"epoch": 0.0,
"learning_rate": 9.999761054790708e-05,
"loss": 2.3291,
"step": 680
},
{
"epoch": 0.0,
"learning_rate": 9.999756917321725e-05,
"loss": 2.2963,
"step": 685
},
{
"epoch": 0.0,
"learning_rate": 9.999752744339142e-05,
"loss": 2.3033,
"step": 690
},
{
"epoch": 0.0,
"learning_rate": 9.99974853584299e-05,
"loss": 2.3644,
"step": 695
},
{
"epoch": 0.0,
"learning_rate": 9.999744291833298e-05,
"loss": 2.2754,
"step": 700
},
{
"epoch": 0.0,
"learning_rate": 9.999740012310096e-05,
"loss": 2.251,
"step": 705
},
{
"epoch": 0.0,
"learning_rate": 9.999735697273414e-05,
"loss": 2.3208,
"step": 710
},
{
"epoch": 0.0,
"learning_rate": 9.999731346723284e-05,
"loss": 2.2928,
"step": 715
},
{
"epoch": 0.0,
"learning_rate": 9.99972696065974e-05,
"loss": 2.2728,
"step": 720
},
{
"epoch": 0.0,
"learning_rate": 9.999722539082806e-05,
"loss": 2.4298,
"step": 725
},
{
"epoch": 0.0,
"learning_rate": 9.999718081992517e-05,
"loss": 2.3092,
"step": 730
},
{
"epoch": 0.0,
"learning_rate": 9.999713589388904e-05,
"loss": 2.3097,
"step": 735
},
{
"epoch": 0.0,
"learning_rate": 9.999709061272002e-05,
"loss": 2.2793,
"step": 740
},
{
"epoch": 0.0,
"learning_rate": 9.999704497641838e-05,
"loss": 2.2424,
"step": 745
},
{
"epoch": 0.0,
"learning_rate": 9.99969989849845e-05,
"loss": 2.2682,
"step": 750
},
{
"epoch": 0.0,
"learning_rate": 9.999695263841865e-05,
"loss": 2.3454,
"step": 755
},
{
"epoch": 0.0,
"learning_rate": 9.99969059367212e-05,
"loss": 2.3178,
"step": 760
},
{
"epoch": 0.0,
"learning_rate": 9.999685887989247e-05,
"loss": 2.2134,
"step": 765
},
{
"epoch": 0.0,
"learning_rate": 9.999681146793279e-05,
"loss": 2.3036,
"step": 770
},
{
"epoch": 0.0,
"learning_rate": 9.99967637008425e-05,
"loss": 2.2281,
"step": 775
},
{
"epoch": 0.0,
"learning_rate": 9.999671557862195e-05,
"loss": 2.2872,
"step": 780
},
{
"epoch": 0.0,
"learning_rate": 9.999666710127146e-05,
"loss": 2.3222,
"step": 785
},
{
"epoch": 0.0,
"learning_rate": 9.999661826879138e-05,
"loss": 2.3252,
"step": 790
},
{
"epoch": 0.0,
"learning_rate": 9.999656908118206e-05,
"loss": 2.3189,
"step": 795
},
{
"epoch": 0.0,
"learning_rate": 9.999651953844386e-05,
"loss": 2.2981,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 9.999646964057714e-05,
"loss": 2.2967,
"step": 805
},
{
"epoch": 0.0,
"learning_rate": 9.999641938758222e-05,
"loss": 2.373,
"step": 810
},
{
"epoch": 0.0,
"learning_rate": 9.999636877945948e-05,
"loss": 2.2433,
"step": 815
},
{
"epoch": 0.0,
"learning_rate": 9.999631781620926e-05,
"loss": 2.2853,
"step": 820
},
{
"epoch": 0.0,
"learning_rate": 9.999626649783196e-05,
"loss": 2.2309,
"step": 825
},
{
"epoch": 0.0,
"learning_rate": 9.99962148243279e-05,
"loss": 2.224,
"step": 830
},
{
"epoch": 0.0,
"learning_rate": 9.999616279569748e-05,
"loss": 2.3233,
"step": 835
},
{
"epoch": 0.0,
"learning_rate": 9.999611041194106e-05,
"loss": 2.2581,
"step": 840
},
{
"epoch": 0.0,
"learning_rate": 9.999605767305899e-05,
"loss": 2.2282,
"step": 845
},
{
"epoch": 0.0,
"learning_rate": 9.999600457905167e-05,
"loss": 2.256,
"step": 850
},
{
"epoch": 0.0,
"learning_rate": 9.999595112991949e-05,
"loss": 2.2754,
"step": 855
},
{
"epoch": 0.0,
"learning_rate": 9.99958973256628e-05,
"loss": 2.2858,
"step": 860
},
{
"epoch": 0.0,
"learning_rate": 9.999584316628199e-05,
"loss": 2.3145,
"step": 865
},
{
"epoch": 0.0,
"learning_rate": 9.999578865177745e-05,
"loss": 2.2716,
"step": 870
},
{
"epoch": 0.0,
"learning_rate": 9.999573378214957e-05,
"loss": 2.2737,
"step": 875
},
{
"epoch": 0.0,
"learning_rate": 9.999567855739871e-05,
"loss": 2.2759,
"step": 880
},
{
"epoch": 0.0,
"learning_rate": 9.999562297752531e-05,
"loss": 2.2142,
"step": 885
},
{
"epoch": 0.0,
"learning_rate": 9.999556704252975e-05,
"loss": 2.3048,
"step": 890
},
{
"epoch": 0.0,
"learning_rate": 9.99955107524124e-05,
"loss": 2.2356,
"step": 895
},
{
"epoch": 0.0,
"learning_rate": 9.999545410717369e-05,
"loss": 2.2648,
"step": 900
},
{
"epoch": 0.0,
"learning_rate": 9.9995397106814e-05,
"loss": 2.2718,
"step": 905
},
{
"epoch": 0.0,
"learning_rate": 9.999533975133376e-05,
"loss": 2.1817,
"step": 910
},
{
"epoch": 0.0,
"learning_rate": 9.999528204073336e-05,
"loss": 2.3446,
"step": 915
},
{
"epoch": 0.0,
"learning_rate": 9.999522397501321e-05,
"loss": 2.2839,
"step": 920
},
{
"epoch": 0.0,
"learning_rate": 9.999516555417372e-05,
"loss": 2.2265,
"step": 925
},
{
"epoch": 0.0,
"learning_rate": 9.999510677821532e-05,
"loss": 2.32,
"step": 930
},
{
"epoch": 0.01,
"learning_rate": 9.999504764713841e-05,
"loss": 2.2913,
"step": 935
},
{
"epoch": 0.01,
"learning_rate": 9.999498816094343e-05,
"loss": 2.1976,
"step": 940
},
{
"epoch": 0.01,
"learning_rate": 9.999492831963078e-05,
"loss": 2.2944,
"step": 945
},
{
"epoch": 0.01,
"learning_rate": 9.999486812320091e-05,
"loss": 2.2538,
"step": 950
},
{
"epoch": 0.01,
"learning_rate": 9.999480757165422e-05,
"loss": 2.1969,
"step": 955
},
{
"epoch": 0.01,
"learning_rate": 9.999474666499115e-05,
"loss": 2.2265,
"step": 960
},
{
"epoch": 0.01,
"learning_rate": 9.999468540321215e-05,
"loss": 2.2501,
"step": 965
},
{
"epoch": 0.01,
"learning_rate": 9.999462378631764e-05,
"loss": 2.2723,
"step": 970
},
{
"epoch": 0.01,
"learning_rate": 9.999456181430805e-05,
"loss": 2.2585,
"step": 975
},
{
"epoch": 0.01,
"learning_rate": 9.999449948718384e-05,
"loss": 2.1975,
"step": 980
},
{
"epoch": 0.01,
"learning_rate": 9.999443680494542e-05,
"loss": 2.2807,
"step": 985
},
{
"epoch": 0.01,
"learning_rate": 9.999437376759328e-05,
"loss": 2.2114,
"step": 990
},
{
"epoch": 0.01,
"learning_rate": 9.999431037512784e-05,
"loss": 2.2353,
"step": 995
},
{
"epoch": 0.01,
"learning_rate": 9.999424662754955e-05,
"loss": 2.2853,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 9.999418252485888e-05,
"loss": 2.2036,
"step": 1005
},
{
"epoch": 0.01,
"learning_rate": 9.999411806705626e-05,
"loss": 2.2508,
"step": 1010
},
{
"epoch": 0.01,
"learning_rate": 9.999405325414218e-05,
"loss": 2.1966,
"step": 1015
},
{
"epoch": 0.01,
"learning_rate": 9.999398808611705e-05,
"loss": 2.2214,
"step": 1020
},
{
"epoch": 0.01,
"learning_rate": 9.999392256298139e-05,
"loss": 2.233,
"step": 1025
},
{
"epoch": 0.01,
"learning_rate": 9.999385668473562e-05,
"loss": 2.2457,
"step": 1030
},
{
"epoch": 0.01,
"learning_rate": 9.999379045138024e-05,
"loss": 2.2132,
"step": 1035
},
{
"epoch": 0.01,
"learning_rate": 9.999372386291571e-05,
"loss": 2.261,
"step": 1040
},
{
"epoch": 0.01,
"learning_rate": 9.999365691934251e-05,
"loss": 2.1686,
"step": 1045
},
{
"epoch": 0.01,
"learning_rate": 9.999358962066108e-05,
"loss": 2.1927,
"step": 1050
},
{
"epoch": 0.01,
"learning_rate": 9.999352196687195e-05,
"loss": 2.2486,
"step": 1055
},
{
"epoch": 0.01,
"learning_rate": 9.999345395797557e-05,
"loss": 2.281,
"step": 1060
},
{
"epoch": 0.01,
"learning_rate": 9.999338559397242e-05,
"loss": 2.2213,
"step": 1065
},
{
"epoch": 0.01,
"learning_rate": 9.9993316874863e-05,
"loss": 2.2035,
"step": 1070
},
{
"epoch": 0.01,
"learning_rate": 9.999324780064779e-05,
"loss": 2.2247,
"step": 1075
},
{
"epoch": 0.01,
"learning_rate": 9.999317837132729e-05,
"loss": 2.2729,
"step": 1080
},
{
"epoch": 0.01,
"learning_rate": 9.999310858690198e-05,
"loss": 2.1674,
"step": 1085
},
{
"epoch": 0.01,
"learning_rate": 9.999303844737235e-05,
"loss": 2.298,
"step": 1090
},
{
"epoch": 0.01,
"learning_rate": 9.999296795273893e-05,
"loss": 2.1917,
"step": 1095
},
{
"epoch": 0.01,
"learning_rate": 9.99928971030022e-05,
"loss": 2.2396,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 9.999282589816265e-05,
"loss": 2.1645,
"step": 1105
},
{
"epoch": 0.01,
"learning_rate": 9.99927543382208e-05,
"loss": 2.2335,
"step": 1110
},
{
"epoch": 0.01,
"learning_rate": 9.999268242317716e-05,
"loss": 2.3187,
"step": 1115
},
{
"epoch": 0.01,
"learning_rate": 9.999261015303225e-05,
"loss": 2.1898,
"step": 1120
},
{
"epoch": 0.01,
"learning_rate": 9.999253752778655e-05,
"loss": 2.1982,
"step": 1125
},
{
"epoch": 0.01,
"learning_rate": 9.999246454744062e-05,
"loss": 2.2065,
"step": 1130
},
{
"epoch": 0.01,
"learning_rate": 9.999239121199496e-05,
"loss": 2.1835,
"step": 1135
},
{
"epoch": 0.01,
"learning_rate": 9.999231752145007e-05,
"loss": 2.297,
"step": 1140
},
{
"epoch": 0.01,
"learning_rate": 9.999224347580649e-05,
"loss": 2.1948,
"step": 1145
},
{
"epoch": 0.01,
"learning_rate": 9.999216907506474e-05,
"loss": 2.2976,
"step": 1150
},
{
"epoch": 0.01,
"learning_rate": 9.999209431922538e-05,
"loss": 2.1546,
"step": 1155
},
{
"epoch": 0.01,
"learning_rate": 9.99920192082889e-05,
"loss": 2.2606,
"step": 1160
},
{
"epoch": 0.01,
"learning_rate": 9.999194374225585e-05,
"loss": 2.1286,
"step": 1165
},
{
"epoch": 0.01,
"learning_rate": 9.999186792112677e-05,
"loss": 2.2137,
"step": 1170
},
{
"epoch": 0.01,
"learning_rate": 9.99917917449022e-05,
"loss": 2.2462,
"step": 1175
},
{
"epoch": 0.01,
"learning_rate": 9.999171521358267e-05,
"loss": 2.194,
"step": 1180
},
{
"epoch": 0.01,
"learning_rate": 9.999163832716871e-05,
"loss": 2.3038,
"step": 1185
},
{
"epoch": 0.01,
"learning_rate": 9.999156108566091e-05,
"loss": 2.2024,
"step": 1190
},
{
"epoch": 0.01,
"learning_rate": 9.999148348905978e-05,
"loss": 2.2281,
"step": 1195
},
{
"epoch": 0.01,
"learning_rate": 9.999140553736591e-05,
"loss": 2.3076,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 9.99913272305798e-05,
"loss": 2.1543,
"step": 1205
},
{
"epoch": 0.01,
"learning_rate": 9.999124856870204e-05,
"loss": 2.1762,
"step": 1210
},
{
"epoch": 0.01,
"learning_rate": 9.999116955173318e-05,
"loss": 2.2303,
"step": 1215
},
{
"epoch": 0.01,
"learning_rate": 9.99910901796738e-05,
"loss": 2.2688,
"step": 1220
},
{
"epoch": 0.01,
"learning_rate": 9.999101045252444e-05,
"loss": 2.2159,
"step": 1225
},
{
"epoch": 0.01,
"learning_rate": 9.999093037028568e-05,
"loss": 2.2502,
"step": 1230
},
{
"epoch": 0.01,
"learning_rate": 9.999084993295806e-05,
"loss": 2.2158,
"step": 1235
},
{
"epoch": 0.01,
"learning_rate": 9.99907691405422e-05,
"loss": 2.2183,
"step": 1240
},
{
"epoch": 0.01,
"learning_rate": 9.999068799303865e-05,
"loss": 2.1595,
"step": 1245
},
{
"epoch": 0.01,
"learning_rate": 9.999060649044797e-05,
"loss": 2.2377,
"step": 1250
},
{
"epoch": 0.01,
"learning_rate": 9.999052463277076e-05,
"loss": 2.1848,
"step": 1255
},
{
"epoch": 0.01,
"learning_rate": 9.999044242000761e-05,
"loss": 2.2617,
"step": 1260
},
{
"epoch": 0.01,
"learning_rate": 9.999035985215908e-05,
"loss": 2.2413,
"step": 1265
},
{
"epoch": 0.01,
"learning_rate": 9.999027692922576e-05,
"loss": 2.2558,
"step": 1270
},
{
"epoch": 0.01,
"learning_rate": 9.999019365120824e-05,
"loss": 2.2329,
"step": 1275
},
{
"epoch": 0.01,
"learning_rate": 9.999011001810712e-05,
"loss": 2.1904,
"step": 1280
},
{
"epoch": 0.01,
"learning_rate": 9.9990026029923e-05,
"loss": 2.2001,
"step": 1285
},
{
"epoch": 0.01,
"learning_rate": 9.998994168665648e-05,
"loss": 2.2125,
"step": 1290
},
{
"epoch": 0.01,
"learning_rate": 9.998985698830811e-05,
"loss": 2.2595,
"step": 1295
},
{
"epoch": 0.01,
"learning_rate": 9.998977193487855e-05,
"loss": 2.1524,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 9.998968652636838e-05,
"loss": 2.262,
"step": 1305
},
{
"epoch": 0.01,
"learning_rate": 9.998960076277821e-05,
"loss": 2.2323,
"step": 1310
},
{
"epoch": 0.01,
"learning_rate": 9.998951464410864e-05,
"loss": 2.1489,
"step": 1315
},
{
"epoch": 0.01,
"learning_rate": 9.99894281703603e-05,
"loss": 2.2797,
"step": 1320
},
{
"epoch": 0.01,
"learning_rate": 9.99893413415338e-05,
"loss": 2.1403,
"step": 1325
},
{
"epoch": 0.01,
"learning_rate": 9.998925415762972e-05,
"loss": 2.1532,
"step": 1330
},
{
"epoch": 0.01,
"learning_rate": 9.998916661864875e-05,
"loss": 2.1249,
"step": 1335
},
{
"epoch": 0.01,
"learning_rate": 9.998907872459144e-05,
"loss": 2.2107,
"step": 1340
},
{
"epoch": 0.01,
"learning_rate": 9.998899047545847e-05,
"loss": 2.2304,
"step": 1345
},
{
"epoch": 0.01,
"learning_rate": 9.998890187125042e-05,
"loss": 2.1913,
"step": 1350
},
{
"epoch": 0.01,
"learning_rate": 9.998881291196796e-05,
"loss": 2.1502,
"step": 1355
},
{
"epoch": 0.01,
"learning_rate": 9.998872359761169e-05,
"loss": 2.1971,
"step": 1360
},
{
"epoch": 0.01,
"learning_rate": 9.998863392818226e-05,
"loss": 2.1425,
"step": 1365
},
{
"epoch": 0.01,
"learning_rate": 9.99885439036803e-05,
"loss": 2.2467,
"step": 1370
},
{
"epoch": 0.01,
"learning_rate": 9.998845352410649e-05,
"loss": 2.2199,
"step": 1375
},
{
"epoch": 0.01,
"learning_rate": 9.99883627894614e-05,
"loss": 2.2122,
"step": 1380
},
{
"epoch": 0.01,
"learning_rate": 9.998827169974573e-05,
"loss": 2.1418,
"step": 1385
},
{
"epoch": 0.01,
"learning_rate": 9.99881802549601e-05,
"loss": 2.2162,
"step": 1390
},
{
"epoch": 0.01,
"learning_rate": 9.998808845510517e-05,
"loss": 2.2291,
"step": 1395
},
{
"epoch": 0.01,
"learning_rate": 9.998799630018159e-05,
"loss": 2.1994,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 9.998790379019e-05,
"loss": 2.2042,
"step": 1405
},
{
"epoch": 0.01,
"learning_rate": 9.99878109251311e-05,
"loss": 2.2801,
"step": 1410
},
{
"epoch": 0.01,
"learning_rate": 9.99877177050055e-05,
"loss": 2.1827,
"step": 1415
},
{
"epoch": 0.01,
"learning_rate": 9.99876241298139e-05,
"loss": 2.1468,
"step": 1420
},
{
"epoch": 0.01,
"learning_rate": 9.998753019955694e-05,
"loss": 2.1695,
"step": 1425
},
{
"epoch": 0.01,
"learning_rate": 9.99874359142353e-05,
"loss": 2.1839,
"step": 1430
},
{
"epoch": 0.01,
"learning_rate": 9.998734127384964e-05,
"loss": 2.1977,
"step": 1435
},
{
"epoch": 0.01,
"learning_rate": 9.998724627840063e-05,
"loss": 2.1427,
"step": 1440
},
{
"epoch": 0.01,
"learning_rate": 9.998715092788895e-05,
"loss": 2.1486,
"step": 1445
},
{
"epoch": 0.01,
"learning_rate": 9.998705522231529e-05,
"loss": 2.2304,
"step": 1450
},
{
"epoch": 0.01,
"learning_rate": 9.998695916168033e-05,
"loss": 2.2171,
"step": 1455
},
{
"epoch": 0.01,
"learning_rate": 9.998686274598472e-05,
"loss": 2.1853,
"step": 1460
},
{
"epoch": 0.01,
"learning_rate": 9.998676597522918e-05,
"loss": 2.1481,
"step": 1465
},
{
"epoch": 0.01,
"learning_rate": 9.998666884941437e-05,
"loss": 2.1574,
"step": 1470
},
{
"epoch": 0.01,
"learning_rate": 9.9986571368541e-05,
"loss": 2.2056,
"step": 1475
},
{
"epoch": 0.01,
"learning_rate": 9.998647353260976e-05,
"loss": 2.2068,
"step": 1480
},
{
"epoch": 0.01,
"learning_rate": 9.998637534162134e-05,
"loss": 2.1437,
"step": 1485
},
{
"epoch": 0.01,
"learning_rate": 9.998627679557644e-05,
"loss": 2.1642,
"step": 1490
},
{
"epoch": 0.01,
"learning_rate": 9.998617789447575e-05,
"loss": 2.1636,
"step": 1495
},
{
"epoch": 0.01,
"learning_rate": 9.998607863831999e-05,
"loss": 2.1649,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 9.998597902710984e-05,
"loss": 2.1786,
"step": 1505
},
{
"epoch": 0.01,
"learning_rate": 9.998587906084605e-05,
"loss": 2.1819,
"step": 1510
},
{
"epoch": 0.01,
"learning_rate": 9.998577873952928e-05,
"loss": 2.1562,
"step": 1515
},
{
"epoch": 0.01,
"learning_rate": 9.998567806316027e-05,
"loss": 2.2005,
"step": 1520
},
{
"epoch": 0.01,
"learning_rate": 9.998557703173974e-05,
"loss": 2.1651,
"step": 1525
},
{
"epoch": 0.01,
"learning_rate": 9.998547564526838e-05,
"loss": 2.1955,
"step": 1530
},
{
"epoch": 0.01,
"learning_rate": 9.998537390374697e-05,
"loss": 2.1984,
"step": 1535
},
{
"epoch": 0.01,
"learning_rate": 9.998527180717614e-05,
"loss": 2.2526,
"step": 1540
},
{
"epoch": 0.01,
"learning_rate": 9.99851693555567e-05,
"loss": 2.2263,
"step": 1545
},
{
"epoch": 0.01,
"learning_rate": 9.998506654888934e-05,
"loss": 2.1534,
"step": 1550
},
{
"epoch": 0.01,
"learning_rate": 9.998496338717479e-05,
"loss": 2.2249,
"step": 1555
},
{
"epoch": 0.01,
"learning_rate": 9.998485987041379e-05,
"loss": 2.183,
"step": 1560
},
{
"epoch": 0.01,
"learning_rate": 9.998475599860706e-05,
"loss": 2.1818,
"step": 1565
},
{
"epoch": 0.01,
"learning_rate": 9.998465177175538e-05,
"loss": 2.2019,
"step": 1570
},
{
"epoch": 0.01,
"learning_rate": 9.998454718985943e-05,
"loss": 2.2007,
"step": 1575
},
{
"epoch": 0.01,
"learning_rate": 9.998444225291999e-05,
"loss": 2.2113,
"step": 1580
},
{
"epoch": 0.01,
"learning_rate": 9.998433696093781e-05,
"loss": 2.2125,
"step": 1585
},
{
"epoch": 0.01,
"learning_rate": 9.998423131391363e-05,
"loss": 2.0699,
"step": 1590
},
{
"epoch": 0.01,
"learning_rate": 9.998412531184819e-05,
"loss": 2.1805,
"step": 1595
},
{
"epoch": 0.01,
"learning_rate": 9.998401895474226e-05,
"loss": 2.2101,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 9.998391224259656e-05,
"loss": 2.2066,
"step": 1605
},
{
"epoch": 0.01,
"learning_rate": 9.99838051754119e-05,
"loss": 2.2581,
"step": 1610
},
{
"epoch": 0.01,
"learning_rate": 9.9983697753189e-05,
"loss": 2.1581,
"step": 1615
},
{
"epoch": 0.01,
"learning_rate": 9.998358997592865e-05,
"loss": 2.093,
"step": 1620
},
{
"epoch": 0.01,
"learning_rate": 9.998348184363159e-05,
"loss": 2.1358,
"step": 1625
},
{
"epoch": 0.01,
"learning_rate": 9.99833733562986e-05,
"loss": 2.1701,
"step": 1630
},
{
"epoch": 0.01,
"learning_rate": 9.998326451393045e-05,
"loss": 2.1604,
"step": 1635
},
{
"epoch": 0.01,
"learning_rate": 9.998315531652793e-05,
"loss": 2.1761,
"step": 1640
},
{
"epoch": 0.01,
"learning_rate": 9.998304576409178e-05,
"loss": 2.1755,
"step": 1645
},
{
"epoch": 0.01,
"learning_rate": 9.998293585662283e-05,
"loss": 2.2117,
"step": 1650
},
{
"epoch": 0.01,
"learning_rate": 9.998282559412181e-05,
"loss": 2.1516,
"step": 1655
},
{
"epoch": 0.01,
"learning_rate": 9.998271497658952e-05,
"loss": 2.1178,
"step": 1660
},
{
"epoch": 0.01,
"learning_rate": 9.998260400402675e-05,
"loss": 2.193,
"step": 1665
},
{
"epoch": 0.01,
"learning_rate": 9.99824926764343e-05,
"loss": 2.2114,
"step": 1670
},
{
"epoch": 0.01,
"learning_rate": 9.998238099381292e-05,
"loss": 2.1463,
"step": 1675
},
{
"epoch": 0.01,
"learning_rate": 9.998226895616344e-05,
"loss": 2.2288,
"step": 1680
},
{
"epoch": 0.01,
"learning_rate": 9.998215656348667e-05,
"loss": 2.1961,
"step": 1685
},
{
"epoch": 0.01,
"learning_rate": 9.998204381578335e-05,
"loss": 2.1976,
"step": 1690
},
{
"epoch": 0.01,
"learning_rate": 9.998193071305433e-05,
"loss": 2.1581,
"step": 1695
},
{
"epoch": 0.01,
"learning_rate": 9.99818172553004e-05,
"loss": 2.0984,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 9.998170344252238e-05,
"loss": 2.1273,
"step": 1705
},
{
"epoch": 0.01,
"learning_rate": 9.998158927472104e-05,
"loss": 2.1292,
"step": 1710
},
{
"epoch": 0.01,
"learning_rate": 9.998147475189722e-05,
"loss": 2.1373,
"step": 1715
},
{
"epoch": 0.01,
"learning_rate": 9.998135987405173e-05,
"loss": 2.1552,
"step": 1720
},
{
"epoch": 0.01,
"learning_rate": 9.998124464118539e-05,
"loss": 2.0913,
"step": 1725
},
{
"epoch": 0.01,
"learning_rate": 9.998112905329899e-05,
"loss": 2.1644,
"step": 1730
},
{
"epoch": 0.01,
"learning_rate": 9.998101311039339e-05,
"loss": 2.1454,
"step": 1735
},
{
"epoch": 0.01,
"learning_rate": 9.998089681246941e-05,
"loss": 2.1481,
"step": 1740
},
{
"epoch": 0.01,
"learning_rate": 9.998078015952783e-05,
"loss": 2.1885,
"step": 1745
},
{
"epoch": 0.01,
"learning_rate": 9.998066315156953e-05,
"loss": 2.1368,
"step": 1750
},
{
"epoch": 0.01,
"learning_rate": 9.998054578859532e-05,
"loss": 2.1375,
"step": 1755
},
{
"epoch": 0.01,
"learning_rate": 9.998042807060604e-05,
"loss": 2.1901,
"step": 1760
},
{
"epoch": 0.01,
"learning_rate": 9.998030999760253e-05,
"loss": 2.1038,
"step": 1765
},
{
"epoch": 0.01,
"learning_rate": 9.99801915695856e-05,
"loss": 2.1938,
"step": 1770
},
{
"epoch": 0.01,
"learning_rate": 9.998007278655612e-05,
"loss": 2.1706,
"step": 1775
},
{
"epoch": 0.01,
"learning_rate": 9.997995364851493e-05,
"loss": 2.1044,
"step": 1780
},
{
"epoch": 0.01,
"learning_rate": 9.997983415546287e-05,
"loss": 2.1951,
"step": 1785
},
{
"epoch": 0.01,
"learning_rate": 9.997971430740079e-05,
"loss": 2.1219,
"step": 1790
},
{
"epoch": 0.01,
"learning_rate": 9.997959410432955e-05,
"loss": 2.118,
"step": 1795
},
{
"epoch": 0.01,
"learning_rate": 9.997947354624998e-05,
"loss": 2.244,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 9.997935263316296e-05,
"loss": 2.2016,
"step": 1805
},
{
"epoch": 0.01,
"learning_rate": 9.997923136506934e-05,
"loss": 2.1986,
"step": 1810
},
{
"epoch": 0.01,
"learning_rate": 9.997910974196999e-05,
"loss": 2.1718,
"step": 1815
},
{
"epoch": 0.01,
"learning_rate": 9.997898776386576e-05,
"loss": 2.1469,
"step": 1820
},
{
"epoch": 0.01,
"learning_rate": 9.997886543075752e-05,
"loss": 2.0369,
"step": 1825
},
{
"epoch": 0.01,
"learning_rate": 9.997874274264615e-05,
"loss": 2.0537,
"step": 1830
},
{
"epoch": 0.01,
"learning_rate": 9.99786196995325e-05,
"loss": 2.2287,
"step": 1835
},
{
"epoch": 0.01,
"learning_rate": 9.997849630141747e-05,
"loss": 2.1659,
"step": 1840
},
{
"epoch": 0.01,
"learning_rate": 9.997837254830191e-05,
"loss": 2.2158,
"step": 1845
},
{
"epoch": 0.01,
"learning_rate": 9.997824844018673e-05,
"loss": 2.1484,
"step": 1850
},
{
"epoch": 0.01,
"learning_rate": 9.997812397707277e-05,
"loss": 2.1025,
"step": 1855
},
{
"epoch": 0.01,
"learning_rate": 9.997799915896098e-05,
"loss": 2.1684,
"step": 1860
},
{
"epoch": 0.01,
"learning_rate": 9.997787398585216e-05,
"loss": 2.127,
"step": 1865
},
{
"epoch": 0.01,
"learning_rate": 9.997774845774727e-05,
"loss": 2.184,
"step": 1870
},
{
"epoch": 0.01,
"learning_rate": 9.997762257464717e-05,
"loss": 2.194,
"step": 1875
},
{
"epoch": 0.01,
"learning_rate": 9.997749633655276e-05,
"loss": 2.204,
"step": 1880
},
{
"epoch": 0.01,
"learning_rate": 9.997736974346494e-05,
"loss": 2.11,
"step": 1885
},
{
"epoch": 0.01,
"learning_rate": 9.99772427953846e-05,
"loss": 2.1991,
"step": 1890
},
{
"epoch": 0.01,
"learning_rate": 9.997711549231265e-05,
"loss": 2.1468,
"step": 1895
},
{
"epoch": 0.01,
"learning_rate": 9.997698783424998e-05,
"loss": 2.1537,
"step": 1900
},
{
"epoch": 0.01,
"learning_rate": 9.997685982119752e-05,
"loss": 2.1576,
"step": 1905
},
{
"epoch": 0.01,
"learning_rate": 9.997673145315616e-05,
"loss": 2.1926,
"step": 1910
},
{
"epoch": 0.01,
"learning_rate": 9.997660273012683e-05,
"loss": 2.1583,
"step": 1915
},
{
"epoch": 0.01,
"learning_rate": 9.997647365211042e-05,
"loss": 2.175,
"step": 1920
},
{
"epoch": 0.01,
"learning_rate": 9.997634421910788e-05,
"loss": 2.2242,
"step": 1925
},
{
"epoch": 0.01,
"learning_rate": 9.99762144311201e-05,
"loss": 2.1012,
"step": 1930
},
{
"epoch": 0.01,
"learning_rate": 9.997608428814801e-05,
"loss": 2.1825,
"step": 1935
},
{
"epoch": 0.01,
"learning_rate": 9.997595379019253e-05,
"loss": 2.1936,
"step": 1940
},
{
"epoch": 0.01,
"learning_rate": 9.99758229372546e-05,
"loss": 2.0843,
"step": 1945
},
{
"epoch": 0.01,
"learning_rate": 9.997569172933515e-05,
"loss": 2.1341,
"step": 1950
},
{
"epoch": 0.01,
"learning_rate": 9.99755601664351e-05,
"loss": 2.2056,
"step": 1955
},
{
"epoch": 0.01,
"learning_rate": 9.99754282485554e-05,
"loss": 2.1455,
"step": 1960
},
{
"epoch": 0.01,
"learning_rate": 9.997529597569694e-05,
"loss": 2.2111,
"step": 1965
},
{
"epoch": 0.01,
"learning_rate": 9.997516334786073e-05,
"loss": 2.1462,
"step": 1970
},
{
"epoch": 0.01,
"learning_rate": 9.997503036504768e-05,
"loss": 2.1253,
"step": 1975
},
{
"epoch": 0.01,
"learning_rate": 9.997489702725871e-05,
"loss": 2.1177,
"step": 1980
},
{
"epoch": 0.01,
"learning_rate": 9.99747633344948e-05,
"loss": 2.1866,
"step": 1985
},
{
"epoch": 0.01,
"learning_rate": 9.997462928675689e-05,
"loss": 2.1611,
"step": 1990
},
{
"epoch": 0.01,
"learning_rate": 9.997449488404594e-05,
"loss": 2.1551,
"step": 1995
},
{
"epoch": 0.01,
"learning_rate": 9.997436012636288e-05,
"loss": 2.1757,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 9.997422501370868e-05,
"loss": 2.1454,
"step": 2005
},
{
"epoch": 0.01,
"learning_rate": 9.997408954608432e-05,
"loss": 2.2189,
"step": 2010
},
{
"epoch": 0.01,
"learning_rate": 9.997395372349073e-05,
"loss": 2.1664,
"step": 2015
},
{
"epoch": 0.01,
"learning_rate": 9.997381754592889e-05,
"loss": 2.1985,
"step": 2020
},
{
"epoch": 0.01,
"learning_rate": 9.997368101339979e-05,
"loss": 2.1798,
"step": 2025
},
{
"epoch": 0.01,
"learning_rate": 9.997354412590437e-05,
"loss": 2.0335,
"step": 2030
},
{
"epoch": 0.01,
"learning_rate": 9.997340688344359e-05,
"loss": 2.0931,
"step": 2035
},
{
"epoch": 0.01,
"learning_rate": 9.997326928601845e-05,
"loss": 2.1387,
"step": 2040
},
{
"epoch": 0.01,
"learning_rate": 9.997313133362994e-05,
"loss": 2.2468,
"step": 2045
},
{
"epoch": 0.01,
"learning_rate": 9.997299302627899e-05,
"loss": 2.1083,
"step": 2050
},
{
"epoch": 0.01,
"learning_rate": 9.997285436396664e-05,
"loss": 2.1459,
"step": 2055
},
{
"epoch": 0.01,
"learning_rate": 9.997271534669384e-05,
"loss": 2.1215,
"step": 2060
},
{
"epoch": 0.01,
"learning_rate": 9.997257597446158e-05,
"loss": 2.1947,
"step": 2065
},
{
"epoch": 0.01,
"learning_rate": 9.997243624727086e-05,
"loss": 2.1519,
"step": 2070
},
{
"epoch": 0.01,
"learning_rate": 9.997229616512267e-05,
"loss": 2.1879,
"step": 2075
},
{
"epoch": 0.01,
"learning_rate": 9.997215572801802e-05,
"loss": 2.2531,
"step": 2080
},
{
"epoch": 0.01,
"learning_rate": 9.997201493595786e-05,
"loss": 2.1118,
"step": 2085
},
{
"epoch": 0.01,
"learning_rate": 9.997187378894324e-05,
"loss": 2.1684,
"step": 2090
},
{
"epoch": 0.01,
"learning_rate": 9.997173228697514e-05,
"loss": 2.2057,
"step": 2095
},
{
"epoch": 0.01,
"learning_rate": 9.997159043005457e-05,
"loss": 2.1782,
"step": 2100
},
{
"epoch": 0.01,
"learning_rate": 9.997144821818254e-05,
"loss": 2.154,
"step": 2105
},
{
"epoch": 0.01,
"learning_rate": 9.997130565136005e-05,
"loss": 2.1836,
"step": 2110
},
{
"epoch": 0.01,
"learning_rate": 9.997116272958811e-05,
"loss": 2.2081,
"step": 2115
},
{
"epoch": 0.01,
"learning_rate": 9.997101945286777e-05,
"loss": 2.121,
"step": 2120
},
{
"epoch": 0.01,
"learning_rate": 9.997087582120001e-05,
"loss": 2.1661,
"step": 2125
},
{
"epoch": 0.01,
"learning_rate": 9.997073183458585e-05,
"loss": 2.0834,
"step": 2130
},
{
"epoch": 0.01,
"learning_rate": 9.997058749302635e-05,
"loss": 2.1925,
"step": 2135
},
{
"epoch": 0.01,
"learning_rate": 9.997044279652248e-05,
"loss": 2.1203,
"step": 2140
},
{
"epoch": 0.01,
"learning_rate": 9.997029774507532e-05,
"loss": 2.1692,
"step": 2145
},
{
"epoch": 0.01,
"learning_rate": 9.997015233868588e-05,
"loss": 2.0983,
"step": 2150
},
{
"epoch": 0.01,
"learning_rate": 9.997000657735518e-05,
"loss": 2.1571,
"step": 2155
},
{
"epoch": 0.01,
"learning_rate": 9.996986046108428e-05,
"loss": 2.0778,
"step": 2160
},
{
"epoch": 0.01,
"learning_rate": 9.996971398987419e-05,
"loss": 2.146,
"step": 2165
},
{
"epoch": 0.01,
"learning_rate": 9.996956716372597e-05,
"loss": 2.1212,
"step": 2170
},
{
"epoch": 0.01,
"learning_rate": 9.996941998264066e-05,
"loss": 2.1345,
"step": 2175
},
{
"epoch": 0.01,
"learning_rate": 9.99692724466193e-05,
"loss": 2.1075,
"step": 2180
},
{
"epoch": 0.01,
"learning_rate": 9.996912455566293e-05,
"loss": 2.1211,
"step": 2185
},
{
"epoch": 0.01,
"learning_rate": 9.996897630977264e-05,
"loss": 2.1173,
"step": 2190
},
{
"epoch": 0.01,
"learning_rate": 9.996882770894944e-05,
"loss": 2.1163,
"step": 2195
},
{
"epoch": 0.01,
"learning_rate": 9.99686787531944e-05,
"loss": 2.1874,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 9.996852944250857e-05,
"loss": 2.1383,
"step": 2205
},
{
"epoch": 0.01,
"learning_rate": 9.996837977689304e-05,
"loss": 2.113,
"step": 2210
},
{
"epoch": 0.01,
"learning_rate": 9.996822975634884e-05,
"loss": 2.0918,
"step": 2215
},
{
"epoch": 0.01,
"learning_rate": 9.996807938087705e-05,
"loss": 2.179,
"step": 2220
},
{
"epoch": 0.01,
"learning_rate": 9.996792865047873e-05,
"loss": 2.1038,
"step": 2225
},
{
"epoch": 0.01,
"learning_rate": 9.996777756515495e-05,
"loss": 2.1506,
"step": 2230
},
{
"epoch": 0.01,
"learning_rate": 9.99676261249068e-05,
"loss": 2.0858,
"step": 2235
},
{
"epoch": 0.01,
"learning_rate": 9.996747432973535e-05,
"loss": 2.0471,
"step": 2240
},
{
"epoch": 0.01,
"learning_rate": 9.996732217964166e-05,
"loss": 2.171,
"step": 2245
},
{
"epoch": 0.01,
"learning_rate": 9.996716967462682e-05,
"loss": 2.1919,
"step": 2250
},
{
"epoch": 0.01,
"learning_rate": 9.996701681469193e-05,
"loss": 2.1703,
"step": 2255
},
{
"epoch": 0.01,
"learning_rate": 9.996686359983807e-05,
"loss": 2.1452,
"step": 2260
},
{
"epoch": 0.01,
"learning_rate": 9.996671003006633e-05,
"loss": 2.1438,
"step": 2265
},
{
"epoch": 0.01,
"learning_rate": 9.996655610537777e-05,
"loss": 2.0694,
"step": 2270
},
{
"epoch": 0.01,
"learning_rate": 9.99664018257735e-05,
"loss": 2.1681,
"step": 2275
},
{
"epoch": 0.01,
"learning_rate": 9.996624719125463e-05,
"loss": 2.1955,
"step": 2280
},
{
"epoch": 0.01,
"learning_rate": 9.996609220182227e-05,
"loss": 2.1705,
"step": 2285
},
{
"epoch": 0.01,
"learning_rate": 9.996593685747747e-05,
"loss": 2.1137,
"step": 2290
},
{
"epoch": 0.01,
"learning_rate": 9.996578115822138e-05,
"loss": 2.1733,
"step": 2295
},
{
"epoch": 0.01,
"learning_rate": 9.996562510405509e-05,
"loss": 2.2201,
"step": 2300
},
{
"epoch": 0.01,
"learning_rate": 9.996546869497971e-05,
"loss": 2.1051,
"step": 2305
},
{
"epoch": 0.01,
"learning_rate": 9.996531193099634e-05,
"loss": 2.2088,
"step": 2310
},
{
"epoch": 0.01,
"learning_rate": 9.996515481210611e-05,
"loss": 2.1296,
"step": 2315
},
{
"epoch": 0.01,
"learning_rate": 9.996499733831013e-05,
"loss": 2.1409,
"step": 2320
},
{
"epoch": 0.01,
"learning_rate": 9.996483950960951e-05,
"loss": 2.1582,
"step": 2325
},
{
"epoch": 0.01,
"learning_rate": 9.996468132600539e-05,
"loss": 2.21,
"step": 2330
},
{
"epoch": 0.01,
"learning_rate": 9.996452278749889e-05,
"loss": 2.0888,
"step": 2335
},
{
"epoch": 0.01,
"learning_rate": 9.99643638940911e-05,
"loss": 2.0855,
"step": 2340
},
{
"epoch": 0.01,
"learning_rate": 9.99642046457832e-05,
"loss": 2.1133,
"step": 2345
},
{
"epoch": 0.01,
"learning_rate": 9.996404504257628e-05,
"loss": 2.1955,
"step": 2350
},
{
"epoch": 0.01,
"learning_rate": 9.996388508447151e-05,
"loss": 2.1283,
"step": 2355
},
{
"epoch": 0.01,
"learning_rate": 9.996372477147e-05,
"loss": 2.1481,
"step": 2360
},
{
"epoch": 0.01,
"learning_rate": 9.99635641035729e-05,
"loss": 2.1208,
"step": 2365
},
{
"epoch": 0.01,
"learning_rate": 9.996340308078134e-05,
"loss": 2.2239,
"step": 2370
},
{
"epoch": 0.01,
"learning_rate": 9.996324170309648e-05,
"loss": 2.1492,
"step": 2375
},
{
"epoch": 0.01,
"learning_rate": 9.996307997051946e-05,
"loss": 2.1751,
"step": 2380
},
{
"epoch": 0.01,
"learning_rate": 9.996291788305142e-05,
"loss": 2.1474,
"step": 2385
},
{
"epoch": 0.01,
"learning_rate": 9.996275544069351e-05,
"loss": 2.1329,
"step": 2390
},
{
"epoch": 0.01,
"learning_rate": 9.996259264344692e-05,
"loss": 2.1967,
"step": 2395
},
{
"epoch": 0.01,
"learning_rate": 9.996242949131276e-05,
"loss": 2.1344,
"step": 2400
},
{
"epoch": 0.01,
"learning_rate": 9.99622659842922e-05,
"loss": 2.1912,
"step": 2405
},
{
"epoch": 0.01,
"learning_rate": 9.996210212238641e-05,
"loss": 2.0982,
"step": 2410
},
{
"epoch": 0.01,
"learning_rate": 9.996193790559656e-05,
"loss": 2.0955,
"step": 2415
},
{
"epoch": 0.01,
"learning_rate": 9.996177333392381e-05,
"loss": 2.0673,
"step": 2420
},
{
"epoch": 0.01,
"learning_rate": 9.996160840736933e-05,
"loss": 2.1148,
"step": 2425
},
{
"epoch": 0.01,
"learning_rate": 9.996144312593429e-05,
"loss": 2.0884,
"step": 2430
},
{
"epoch": 0.01,
"learning_rate": 9.996127748961985e-05,
"loss": 2.1427,
"step": 2435
},
{
"epoch": 0.01,
"learning_rate": 9.996111149842722e-05,
"loss": 2.1604,
"step": 2440
},
{
"epoch": 0.01,
"learning_rate": 9.996094515235754e-05,
"loss": 2.1228,
"step": 2445
},
{
"epoch": 0.01,
"learning_rate": 9.996077845141201e-05,
"loss": 2.1371,
"step": 2450
},
{
"epoch": 0.01,
"learning_rate": 9.996061139559183e-05,
"loss": 2.0901,
"step": 2455
},
{
"epoch": 0.01,
"learning_rate": 9.996044398489816e-05,
"loss": 2.1381,
"step": 2460
},
{
"epoch": 0.01,
"learning_rate": 9.996027621933222e-05,
"loss": 2.1273,
"step": 2465
},
{
"epoch": 0.01,
"learning_rate": 9.996010809889515e-05,
"loss": 2.1295,
"step": 2470
},
{
"epoch": 0.01,
"learning_rate": 9.99599396235882e-05,
"loss": 2.1174,
"step": 2475
},
{
"epoch": 0.01,
"learning_rate": 9.995977079341255e-05,
"loss": 2.0971,
"step": 2480
},
{
"epoch": 0.01,
"learning_rate": 9.995960160836937e-05,
"loss": 2.1277,
"step": 2485
},
{
"epoch": 0.01,
"learning_rate": 9.995943206845989e-05,
"loss": 2.1866,
"step": 2490
},
{
"epoch": 0.01,
"learning_rate": 9.995926217368532e-05,
"loss": 2.0881,
"step": 2495
},
{
"epoch": 0.01,
"learning_rate": 9.995909192404685e-05,
"loss": 2.1177,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 9.995892131954567e-05,
"loss": 2.1555,
"step": 2505
},
{
"epoch": 0.01,
"learning_rate": 9.995875036018305e-05,
"loss": 2.1186,
"step": 2510
},
{
"epoch": 0.01,
"learning_rate": 9.995857904596015e-05,
"loss": 2.17,
"step": 2515
},
{
"epoch": 0.01,
"learning_rate": 9.995840737687821e-05,
"loss": 2.0997,
"step": 2520
},
{
"epoch": 0.01,
"learning_rate": 9.995823535293845e-05,
"loss": 2.0572,
"step": 2525
},
{
"epoch": 0.01,
"learning_rate": 9.99580629741421e-05,
"loss": 2.1231,
"step": 2530
},
{
"epoch": 0.01,
"learning_rate": 9.995789024049035e-05,
"loss": 2.0826,
"step": 2535
},
{
"epoch": 0.01,
"learning_rate": 9.995771715198446e-05,
"loss": 2.1464,
"step": 2540
},
{
"epoch": 0.01,
"learning_rate": 9.995754370862565e-05,
"loss": 2.1429,
"step": 2545
},
{
"epoch": 0.01,
"learning_rate": 9.995736991041516e-05,
"loss": 2.1132,
"step": 2550
},
{
"epoch": 0.01,
"learning_rate": 9.995719575735421e-05,
"loss": 2.1707,
"step": 2555
},
{
"epoch": 0.01,
"learning_rate": 9.995702124944404e-05,
"loss": 2.0475,
"step": 2560
},
{
"epoch": 0.01,
"learning_rate": 9.995684638668589e-05,
"loss": 2.1323,
"step": 2565
},
{
"epoch": 0.01,
"learning_rate": 9.995667116908101e-05,
"loss": 2.1117,
"step": 2570
},
{
"epoch": 0.01,
"learning_rate": 9.995649559663063e-05,
"loss": 2.2003,
"step": 2575
},
{
"epoch": 0.01,
"learning_rate": 9.995631966933601e-05,
"loss": 2.1345,
"step": 2580
},
{
"epoch": 0.01,
"learning_rate": 9.995614338719841e-05,
"loss": 2.1317,
"step": 2585
},
{
"epoch": 0.01,
"learning_rate": 9.995596675021905e-05,
"loss": 2.1195,
"step": 2590
},
{
"epoch": 0.01,
"learning_rate": 9.995578975839921e-05,
"loss": 2.1967,
"step": 2595
},
{
"epoch": 0.01,
"learning_rate": 9.995561241174014e-05,
"loss": 2.1783,
"step": 2600
},
{
"epoch": 0.01,
"learning_rate": 9.995543471024309e-05,
"loss": 2.1464,
"step": 2605
},
{
"epoch": 0.01,
"learning_rate": 9.995525665390935e-05,
"loss": 2.1402,
"step": 2610
},
{
"epoch": 0.01,
"learning_rate": 9.995507824274016e-05,
"loss": 2.1201,
"step": 2615
},
{
"epoch": 0.01,
"learning_rate": 9.995489947673677e-05,
"loss": 2.122,
"step": 2620
},
{
"epoch": 0.01,
"learning_rate": 9.99547203559005e-05,
"loss": 2.0298,
"step": 2625
},
{
"epoch": 0.01,
"learning_rate": 9.995454088023258e-05,
"loss": 2.1367,
"step": 2630
},
{
"epoch": 0.01,
"learning_rate": 9.995436104973431e-05,
"loss": 2.1453,
"step": 2635
},
{
"epoch": 0.01,
"learning_rate": 9.995418086440696e-05,
"loss": 2.1443,
"step": 2640
},
{
"epoch": 0.01,
"learning_rate": 9.99540003242518e-05,
"loss": 2.0653,
"step": 2645
},
{
"epoch": 0.01,
"learning_rate": 9.995381942927014e-05,
"loss": 2.1893,
"step": 2650
},
{
"epoch": 0.01,
"learning_rate": 9.995363817946321e-05,
"loss": 2.1107,
"step": 2655
},
{
"epoch": 0.01,
"learning_rate": 9.995345657483235e-05,
"loss": 2.0983,
"step": 2660
},
{
"epoch": 0.01,
"learning_rate": 9.995327461537884e-05,
"loss": 2.1801,
"step": 2665
},
{
"epoch": 0.01,
"learning_rate": 9.995309230110397e-05,
"loss": 2.1235,
"step": 2670
},
{
"epoch": 0.01,
"learning_rate": 9.995290963200902e-05,
"loss": 2.0591,
"step": 2675
},
{
"epoch": 0.01,
"learning_rate": 9.995272660809529e-05,
"loss": 2.0806,
"step": 2680
},
{
"epoch": 0.01,
"learning_rate": 9.995254322936409e-05,
"loss": 2.1917,
"step": 2685
},
{
"epoch": 0.01,
"learning_rate": 9.995235949581674e-05,
"loss": 2.1048,
"step": 2690
},
{
"epoch": 0.01,
"learning_rate": 9.995217540745449e-05,
"loss": 2.0945,
"step": 2695
},
{
"epoch": 0.01,
"learning_rate": 9.995199096427871e-05,
"loss": 2.0783,
"step": 2700
},
{
"epoch": 0.01,
"learning_rate": 9.995180616629067e-05,
"loss": 2.0235,
"step": 2705
},
{
"epoch": 0.01,
"learning_rate": 9.995162101349171e-05,
"loss": 2.1152,
"step": 2710
},
{
"epoch": 0.01,
"learning_rate": 9.995143550588311e-05,
"loss": 2.1279,
"step": 2715
},
{
"epoch": 0.01,
"learning_rate": 9.995124964346622e-05,
"loss": 2.1414,
"step": 2720
},
{
"epoch": 0.01,
"learning_rate": 9.995106342624236e-05,
"loss": 2.0728,
"step": 2725
},
{
"epoch": 0.01,
"learning_rate": 9.995087685421283e-05,
"loss": 2.081,
"step": 2730
},
{
"epoch": 0.01,
"learning_rate": 9.995068992737896e-05,
"loss": 2.158,
"step": 2735
},
{
"epoch": 0.01,
"learning_rate": 9.995050264574209e-05,
"loss": 2.1343,
"step": 2740
},
{
"epoch": 0.01,
"learning_rate": 9.995031500930354e-05,
"loss": 2.0842,
"step": 2745
},
{
"epoch": 0.01,
"learning_rate": 9.995012701806465e-05,
"loss": 2.0863,
"step": 2750
},
{
"epoch": 0.01,
"learning_rate": 9.994993867202676e-05,
"loss": 2.1224,
"step": 2755
},
{
"epoch": 0.01,
"learning_rate": 9.99497499711912e-05,
"loss": 2.1162,
"step": 2760
},
{
"epoch": 0.01,
"learning_rate": 9.99495609155593e-05,
"loss": 2.1158,
"step": 2765
},
{
"epoch": 0.01,
"learning_rate": 9.994937150513242e-05,
"loss": 2.1247,
"step": 2770
},
{
"epoch": 0.01,
"learning_rate": 9.994918173991189e-05,
"loss": 2.1815,
"step": 2775
},
{
"epoch": 0.01,
"learning_rate": 9.994899161989908e-05,
"loss": 2.1047,
"step": 2780
},
{
"epoch": 0.01,
"learning_rate": 9.994880114509532e-05,
"loss": 2.1875,
"step": 2785
},
{
"epoch": 0.01,
"learning_rate": 9.994861031550197e-05,
"loss": 2.0797,
"step": 2790
},
{
"epoch": 0.01,
"learning_rate": 9.994841913112038e-05,
"loss": 2.0576,
"step": 2795
},
{
"epoch": 0.02,
"learning_rate": 9.994822759195193e-05,
"loss": 2.1058,
"step": 2800
},
{
"epoch": 0.02,
"learning_rate": 9.994803569799796e-05,
"loss": 2.1434,
"step": 2805
},
{
"epoch": 0.02,
"learning_rate": 9.994784344925983e-05,
"loss": 2.1208,
"step": 2810
},
{
"epoch": 0.02,
"learning_rate": 9.994765084573891e-05,
"loss": 2.0372,
"step": 2815
},
{
"epoch": 0.02,
"learning_rate": 9.994745788743658e-05,
"loss": 2.1411,
"step": 2820
},
{
"epoch": 0.02,
"learning_rate": 9.994726457435419e-05,
"loss": 2.1772,
"step": 2825
},
{
"epoch": 0.02,
"learning_rate": 9.994707090649313e-05,
"loss": 2.1164,
"step": 2830
},
{
"epoch": 0.02,
"learning_rate": 9.994687688385478e-05,
"loss": 2.0338,
"step": 2835
},
{
"epoch": 0.02,
"learning_rate": 9.99466825064405e-05,
"loss": 2.0496,
"step": 2840
},
{
"epoch": 0.02,
"learning_rate": 9.994648777425169e-05,
"loss": 2.1539,
"step": 2845
},
{
"epoch": 0.02,
"learning_rate": 9.99462926872897e-05,
"loss": 2.1241,
"step": 2850
},
{
"epoch": 0.02,
"learning_rate": 9.994609724555595e-05,
"loss": 2.1269,
"step": 2855
},
{
"epoch": 0.02,
"learning_rate": 9.994590144905183e-05,
"loss": 2.1446,
"step": 2860
},
{
"epoch": 0.02,
"learning_rate": 9.99457052977787e-05,
"loss": 2.0867,
"step": 2865
},
{
"epoch": 0.02,
"learning_rate": 9.994550879173797e-05,
"loss": 2.1193,
"step": 2870
},
{
"epoch": 0.02,
"learning_rate": 9.994531193093104e-05,
"loss": 2.1206,
"step": 2875
},
{
"epoch": 0.02,
"learning_rate": 9.99451147153593e-05,
"loss": 2.0615,
"step": 2880
},
{
"epoch": 0.02,
"learning_rate": 9.994491714502417e-05,
"loss": 2.1501,
"step": 2885
},
{
"epoch": 0.02,
"learning_rate": 9.994471921992703e-05,
"loss": 2.112,
"step": 2890
},
{
"epoch": 0.02,
"learning_rate": 9.994452094006928e-05,
"loss": 2.1012,
"step": 2895
},
{
"epoch": 0.02,
"learning_rate": 9.994432230545236e-05,
"loss": 2.1588,
"step": 2900
},
{
"epoch": 0.02,
"learning_rate": 9.994412331607766e-05,
"loss": 2.1417,
"step": 2905
},
{
"epoch": 0.02,
"learning_rate": 9.99439239719466e-05,
"loss": 2.1224,
"step": 2910
},
{
"epoch": 0.02,
"learning_rate": 9.99437242730606e-05,
"loss": 2.0665,
"step": 2915
},
{
"epoch": 0.02,
"learning_rate": 9.994352421942106e-05,
"loss": 2.0654,
"step": 2920
},
{
"epoch": 0.02,
"learning_rate": 9.99433238110294e-05,
"loss": 2.1563,
"step": 2925
},
{
"epoch": 0.02,
"learning_rate": 9.994312304788708e-05,
"loss": 2.105,
"step": 2930
},
{
"epoch": 0.02,
"learning_rate": 9.994292192999548e-05,
"loss": 2.1558,
"step": 2935
},
{
"epoch": 0.02,
"learning_rate": 9.994272045735606e-05,
"loss": 2.1118,
"step": 2940
},
{
"epoch": 0.02,
"learning_rate": 9.994251862997025e-05,
"loss": 2.1664,
"step": 2945
},
{
"epoch": 0.02,
"learning_rate": 9.994231644783948e-05,
"loss": 2.1915,
"step": 2950
},
{
"epoch": 0.02,
"learning_rate": 9.994211391096517e-05,
"loss": 2.1064,
"step": 2955
},
{
"epoch": 0.02,
"learning_rate": 9.994191101934878e-05,
"loss": 2.1568,
"step": 2960
},
{
"epoch": 0.02,
"learning_rate": 9.994170777299172e-05,
"loss": 2.1486,
"step": 2965
},
{
"epoch": 0.02,
"learning_rate": 9.994150417189547e-05,
"loss": 2.0641,
"step": 2970
},
{
"epoch": 0.02,
"learning_rate": 9.994130021606147e-05,
"loss": 2.1553,
"step": 2975
},
{
"epoch": 0.02,
"learning_rate": 9.994109590549115e-05,
"loss": 2.1309,
"step": 2980
},
{
"epoch": 0.02,
"learning_rate": 9.994089124018596e-05,
"loss": 2.1357,
"step": 2985
},
{
"epoch": 0.02,
"learning_rate": 9.994068622014738e-05,
"loss": 2.0536,
"step": 2990
},
{
"epoch": 0.02,
"learning_rate": 9.994048084537684e-05,
"loss": 2.108,
"step": 2995
},
{
"epoch": 0.02,
"learning_rate": 9.994027511587582e-05,
"loss": 2.1347,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 9.994006903164576e-05,
"loss": 2.2014,
"step": 3005
},
{
"epoch": 0.02,
"learning_rate": 9.993986259268815e-05,
"loss": 2.0732,
"step": 3010
},
{
"epoch": 0.02,
"learning_rate": 9.993965579900444e-05,
"loss": 2.0932,
"step": 3015
},
{
"epoch": 0.02,
"learning_rate": 9.993944865059609e-05,
"loss": 2.1291,
"step": 3020
},
{
"epoch": 0.02,
"learning_rate": 9.993924114746458e-05,
"loss": 2.124,
"step": 3025
},
{
"epoch": 0.02,
"learning_rate": 9.99390332896114e-05,
"loss": 2.1174,
"step": 3030
},
{
"epoch": 0.02,
"learning_rate": 9.9938825077038e-05,
"loss": 2.1522,
"step": 3035
},
{
"epoch": 0.02,
"learning_rate": 9.993861650974587e-05,
"loss": 2.0949,
"step": 3040
},
{
"epoch": 0.02,
"learning_rate": 9.99384075877365e-05,
"loss": 2.1321,
"step": 3045
},
{
"epoch": 0.02,
"learning_rate": 9.993819831101136e-05,
"loss": 2.0997,
"step": 3050
},
{
"epoch": 0.02,
"learning_rate": 9.993798867957196e-05,
"loss": 1.9854,
"step": 3055
},
{
"epoch": 0.02,
"learning_rate": 9.993777869341975e-05,
"loss": 2.047,
"step": 3060
},
{
"epoch": 0.02,
"learning_rate": 9.993756835255625e-05,
"loss": 2.1012,
"step": 3065
},
{
"epoch": 0.02,
"learning_rate": 9.993735765698296e-05,
"loss": 2.1487,
"step": 3070
},
{
"epoch": 0.02,
"learning_rate": 9.993714660670135e-05,
"loss": 2.092,
"step": 3075
},
{
"epoch": 0.02,
"learning_rate": 9.993693520171292e-05,
"loss": 2.0648,
"step": 3080
},
{
"epoch": 0.02,
"learning_rate": 9.993672344201921e-05,
"loss": 2.1037,
"step": 3085
},
{
"epoch": 0.02,
"learning_rate": 9.99365113276217e-05,
"loss": 2.0949,
"step": 3090
},
{
"epoch": 0.02,
"learning_rate": 9.993629885852187e-05,
"loss": 2.1528,
"step": 3095
},
{
"epoch": 0.02,
"learning_rate": 9.993608603472127e-05,
"loss": 1.9833,
"step": 3100
},
{
"epoch": 0.02,
"learning_rate": 9.993587285622141e-05,
"loss": 2.0672,
"step": 3105
},
{
"epoch": 0.02,
"learning_rate": 9.993565932302377e-05,
"loss": 2.14,
"step": 3110
},
{
"epoch": 0.02,
"learning_rate": 9.993544543512989e-05,
"loss": 2.0756,
"step": 3115
},
{
"epoch": 0.02,
"learning_rate": 9.99352311925413e-05,
"loss": 2.0531,
"step": 3120
},
{
"epoch": 0.02,
"learning_rate": 9.993501659525951e-05,
"loss": 2.0048,
"step": 3125
},
{
"epoch": 0.02,
"learning_rate": 9.993480164328602e-05,
"loss": 2.0591,
"step": 3130
},
{
"epoch": 0.02,
"learning_rate": 9.99345863366224e-05,
"loss": 2.1044,
"step": 3135
},
{
"epoch": 0.02,
"learning_rate": 9.993437067527015e-05,
"loss": 2.1492,
"step": 3140
},
{
"epoch": 0.02,
"learning_rate": 9.993415465923081e-05,
"loss": 2.0853,
"step": 3145
},
{
"epoch": 0.02,
"learning_rate": 9.993393828850593e-05,
"loss": 2.1198,
"step": 3150
},
{
"epoch": 0.02,
"learning_rate": 9.993372156309701e-05,
"loss": 2.142,
"step": 3155
},
{
"epoch": 0.02,
"learning_rate": 9.993350448300563e-05,
"loss": 1.9885,
"step": 3160
},
{
"epoch": 0.02,
"learning_rate": 9.993328704823331e-05,
"loss": 2.0396,
"step": 3165
},
{
"epoch": 0.02,
"learning_rate": 9.993306925878159e-05,
"loss": 2.1393,
"step": 3170
},
{
"epoch": 0.02,
"learning_rate": 9.993285111465203e-05,
"loss": 2.0735,
"step": 3175
},
{
"epoch": 0.02,
"learning_rate": 9.993263261584616e-05,
"loss": 2.1967,
"step": 3180
},
{
"epoch": 0.02,
"learning_rate": 9.993241376236557e-05,
"loss": 2.1183,
"step": 3185
},
{
"epoch": 0.02,
"learning_rate": 9.993219455421178e-05,
"loss": 2.1021,
"step": 3190
},
{
"epoch": 0.02,
"learning_rate": 9.993197499138636e-05,
"loss": 2.0567,
"step": 3195
},
{
"epoch": 0.02,
"learning_rate": 9.993175507389085e-05,
"loss": 2.05,
"step": 3200
},
{
"epoch": 0.02,
"learning_rate": 9.993153480172686e-05,
"loss": 2.0591,
"step": 3205
},
{
"epoch": 0.02,
"learning_rate": 9.993131417489591e-05,
"loss": 2.141,
"step": 3210
},
{
"epoch": 0.02,
"learning_rate": 9.993109319339957e-05,
"loss": 2.1258,
"step": 3215
},
{
"epoch": 0.02,
"learning_rate": 9.993087185723944e-05,
"loss": 2.0829,
"step": 3220
},
{
"epoch": 0.02,
"learning_rate": 9.993065016641706e-05,
"loss": 2.1085,
"step": 3225
},
{
"epoch": 0.02,
"learning_rate": 9.993042812093402e-05,
"loss": 2.0949,
"step": 3230
},
{
"epoch": 0.02,
"learning_rate": 9.99302057207919e-05,
"loss": 2.1479,
"step": 3235
},
{
"epoch": 0.02,
"learning_rate": 9.992998296599228e-05,
"loss": 2.1102,
"step": 3240
},
{
"epoch": 0.02,
"learning_rate": 9.992975985653673e-05,
"loss": 2.0816,
"step": 3245
},
{
"epoch": 0.02,
"learning_rate": 9.992953639242684e-05,
"loss": 2.1524,
"step": 3250
},
{
"epoch": 0.02,
"learning_rate": 9.99293125736642e-05,
"loss": 2.0424,
"step": 3255
},
{
"epoch": 0.02,
"learning_rate": 9.992908840025039e-05,
"loss": 2.0284,
"step": 3260
},
{
"epoch": 0.02,
"learning_rate": 9.992886387218702e-05,
"loss": 2.1154,
"step": 3265
},
{
"epoch": 0.02,
"learning_rate": 9.992863898947568e-05,
"loss": 2.1435,
"step": 3270
},
{
"epoch": 0.02,
"learning_rate": 9.992841375211797e-05,
"loss": 2.0608,
"step": 3275
},
{
"epoch": 0.02,
"learning_rate": 9.992818816011547e-05,
"loss": 2.042,
"step": 3280
},
{
"epoch": 0.02,
"learning_rate": 9.992796221346981e-05,
"loss": 2.0706,
"step": 3285
},
{
"epoch": 0.02,
"learning_rate": 9.992773591218256e-05,
"loss": 2.101,
"step": 3290
},
{
"epoch": 0.02,
"learning_rate": 9.992750925625536e-05,
"loss": 2.1302,
"step": 3295
},
{
"epoch": 0.02,
"learning_rate": 9.99272822456898e-05,
"loss": 2.0219,
"step": 3300
},
{
"epoch": 0.02,
"learning_rate": 9.99270548804875e-05,
"loss": 2.0402,
"step": 3305
},
{
"epoch": 0.02,
"learning_rate": 9.992682716065009e-05,
"loss": 2.0915,
"step": 3310
},
{
"epoch": 0.02,
"learning_rate": 9.992659908617916e-05,
"loss": 2.1156,
"step": 3315
},
{
"epoch": 0.02,
"learning_rate": 9.992637065707635e-05,
"loss": 2.0644,
"step": 3320
},
{
"epoch": 0.02,
"learning_rate": 9.992614187334328e-05,
"loss": 2.0334,
"step": 3325
},
{
"epoch": 0.02,
"learning_rate": 9.992591273498155e-05,
"loss": 2.0331,
"step": 3330
},
{
"epoch": 0.02,
"learning_rate": 9.992568324199283e-05,
"loss": 2.1095,
"step": 3335
},
{
"epoch": 0.02,
"learning_rate": 9.992545339437873e-05,
"loss": 2.074,
"step": 3340
},
{
"epoch": 0.02,
"learning_rate": 9.992522319214087e-05,
"loss": 2.0836,
"step": 3345
},
{
"epoch": 0.02,
"learning_rate": 9.992499263528088e-05,
"loss": 2.091,
"step": 3350
},
{
"epoch": 0.02,
"learning_rate": 9.992476172380044e-05,
"loss": 2.066,
"step": 3355
},
{
"epoch": 0.02,
"learning_rate": 9.992453045770115e-05,
"loss": 2.1505,
"step": 3360
},
{
"epoch": 0.02,
"learning_rate": 9.992429883698466e-05,
"loss": 2.0739,
"step": 3365
},
{
"epoch": 0.02,
"learning_rate": 9.992406686165265e-05,
"loss": 2.1702,
"step": 3370
},
{
"epoch": 0.02,
"learning_rate": 9.992383453170672e-05,
"loss": 2.1138,
"step": 3375
},
{
"epoch": 0.02,
"learning_rate": 9.992360184714855e-05,
"loss": 2.0859,
"step": 3380
},
{
"epoch": 0.02,
"learning_rate": 9.992336880797977e-05,
"loss": 2.0476,
"step": 3385
},
{
"epoch": 0.02,
"learning_rate": 9.992313541420204e-05,
"loss": 2.138,
"step": 3390
},
{
"epoch": 0.02,
"learning_rate": 9.992290166581704e-05,
"loss": 2.0537,
"step": 3395
},
{
"epoch": 0.02,
"learning_rate": 9.992266756282642e-05,
"loss": 2.0522,
"step": 3400
},
{
"epoch": 0.02,
"learning_rate": 9.992243310523182e-05,
"loss": 2.0737,
"step": 3405
},
{
"epoch": 0.02,
"learning_rate": 9.992219829303496e-05,
"loss": 2.0488,
"step": 3410
},
{
"epoch": 0.02,
"learning_rate": 9.992196312623742e-05,
"loss": 2.0802,
"step": 3415
},
{
"epoch": 0.02,
"learning_rate": 9.992172760484097e-05,
"loss": 1.9796,
"step": 3420
},
{
"epoch": 0.02,
"learning_rate": 9.992149172884721e-05,
"loss": 2.0359,
"step": 3425
},
{
"epoch": 0.02,
"learning_rate": 9.992125549825785e-05,
"loss": 2.0753,
"step": 3430
},
{
"epoch": 0.02,
"learning_rate": 9.992101891307455e-05,
"loss": 2.07,
"step": 3435
},
{
"epoch": 0.02,
"learning_rate": 9.992078197329901e-05,
"loss": 2.0578,
"step": 3440
},
{
"epoch": 0.02,
"learning_rate": 9.99205446789329e-05,
"loss": 2.1023,
"step": 3445
},
{
"epoch": 0.02,
"learning_rate": 9.99203070299779e-05,
"loss": 2.0924,
"step": 3450
},
{
"epoch": 0.02,
"learning_rate": 9.992006902643571e-05,
"loss": 2.0667,
"step": 3455
},
{
"epoch": 0.02,
"learning_rate": 9.991983066830803e-05,
"loss": 1.9825,
"step": 3460
},
{
"epoch": 0.02,
"learning_rate": 9.991959195559652e-05,
"loss": 2.0436,
"step": 3465
},
{
"epoch": 0.02,
"learning_rate": 9.991935288830289e-05,
"loss": 2.0274,
"step": 3470
},
{
"epoch": 0.02,
"learning_rate": 9.991911346642887e-05,
"loss": 2.1177,
"step": 3475
},
{
"epoch": 0.02,
"learning_rate": 9.991887368997612e-05,
"loss": 2.1236,
"step": 3480
},
{
"epoch": 0.02,
"learning_rate": 9.991863355894636e-05,
"loss": 2.0816,
"step": 3485
},
{
"epoch": 0.02,
"learning_rate": 9.991839307334129e-05,
"loss": 2.0726,
"step": 3490
},
{
"epoch": 0.02,
"learning_rate": 9.991815223316261e-05,
"loss": 2.0361,
"step": 3495
},
{
"epoch": 0.02,
"learning_rate": 9.991791103841205e-05,
"loss": 2.0808,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 9.991766948909132e-05,
"loss": 2.0804,
"step": 3505
},
{
"epoch": 0.02,
"learning_rate": 9.991742758520213e-05,
"loss": 2.0501,
"step": 3510
},
{
"epoch": 0.02,
"learning_rate": 9.991718532674619e-05,
"loss": 2.0838,
"step": 3515
},
{
"epoch": 0.02,
"learning_rate": 9.991694271372524e-05,
"loss": 2.0909,
"step": 3520
},
{
"epoch": 0.02,
"learning_rate": 9.991669974614098e-05,
"loss": 2.1647,
"step": 3525
},
{
"epoch": 0.02,
"learning_rate": 9.991645642399517e-05,
"loss": 2.0469,
"step": 3530
},
{
"epoch": 0.02,
"learning_rate": 9.99162127472895e-05,
"loss": 2.1149,
"step": 3535
},
{
"epoch": 0.02,
"learning_rate": 9.991596871602571e-05,
"loss": 2.0733,
"step": 3540
},
{
"epoch": 0.02,
"learning_rate": 9.991572433020555e-05,
"loss": 2.059,
"step": 3545
},
{
"epoch": 0.02,
"learning_rate": 9.991547958983076e-05,
"loss": 2.1091,
"step": 3550
},
{
"epoch": 0.02,
"learning_rate": 9.991523449490305e-05,
"loss": 2.085,
"step": 3555
},
{
"epoch": 0.02,
"learning_rate": 9.991498904542418e-05,
"loss": 2.114,
"step": 3560
},
{
"epoch": 0.02,
"learning_rate": 9.99147432413959e-05,
"loss": 2.0382,
"step": 3565
},
{
"epoch": 0.02,
"learning_rate": 9.991449708281993e-05,
"loss": 2.0991,
"step": 3570
},
{
"epoch": 0.02,
"learning_rate": 9.991425056969804e-05,
"loss": 2.068,
"step": 3575
},
{
"epoch": 0.02,
"learning_rate": 9.991400370203197e-05,
"loss": 2.0653,
"step": 3580
},
{
"epoch": 0.02,
"learning_rate": 9.991375647982349e-05,
"loss": 2.1443,
"step": 3585
},
{
"epoch": 0.02,
"learning_rate": 9.991350890307433e-05,
"loss": 2.0975,
"step": 3590
},
{
"epoch": 0.02,
"learning_rate": 9.991326097178629e-05,
"loss": 2.0211,
"step": 3595
},
{
"epoch": 0.02,
"learning_rate": 9.991301268596108e-05,
"loss": 2.113,
"step": 3600
},
{
"epoch": 0.02,
"learning_rate": 9.99127640456005e-05,
"loss": 2.1179,
"step": 3605
},
{
"epoch": 0.02,
"learning_rate": 9.99125150507063e-05,
"loss": 2.1045,
"step": 3610
},
{
"epoch": 0.02,
"learning_rate": 9.991226570128025e-05,
"loss": 2.102,
"step": 3615
},
{
"epoch": 0.02,
"learning_rate": 9.991201599732413e-05,
"loss": 2.0815,
"step": 3620
},
{
"epoch": 0.02,
"learning_rate": 9.991176593883971e-05,
"loss": 2.0296,
"step": 3625
},
{
"epoch": 0.02,
"learning_rate": 9.991151552582874e-05,
"loss": 2.0478,
"step": 3630
},
{
"epoch": 0.02,
"learning_rate": 9.991126475829305e-05,
"loss": 2.1971,
"step": 3635
},
{
"epoch": 0.02,
"learning_rate": 9.991101363623438e-05,
"loss": 2.093,
"step": 3640
},
{
"epoch": 0.02,
"learning_rate": 9.991076215965453e-05,
"loss": 2.0844,
"step": 3645
},
{
"epoch": 0.02,
"learning_rate": 9.991051032855529e-05,
"loss": 1.9985,
"step": 3650
},
{
"epoch": 0.02,
"learning_rate": 9.991025814293845e-05,
"loss": 2.0604,
"step": 3655
},
{
"epoch": 0.02,
"learning_rate": 9.991000560280577e-05,
"loss": 2.0789,
"step": 3660
},
{
"epoch": 0.02,
"learning_rate": 9.990975270815908e-05,
"loss": 2.0277,
"step": 3665
},
{
"epoch": 0.02,
"learning_rate": 9.990949945900015e-05,
"loss": 2.0749,
"step": 3670
},
{
"epoch": 0.02,
"learning_rate": 9.990924585533081e-05,
"loss": 2.162,
"step": 3675
},
{
"epoch": 0.02,
"learning_rate": 9.990899189715284e-05,
"loss": 2.0544,
"step": 3680
},
{
"epoch": 0.02,
"learning_rate": 9.990873758446805e-05,
"loss": 2.1276,
"step": 3685
},
{
"epoch": 0.02,
"learning_rate": 9.990848291727823e-05,
"loss": 2.0133,
"step": 3690
},
{
"epoch": 0.02,
"learning_rate": 9.990822789558522e-05,
"loss": 2.0193,
"step": 3695
},
{
"epoch": 0.02,
"learning_rate": 9.990797251939081e-05,
"loss": 2.1395,
"step": 3700
},
{
"epoch": 0.02,
"learning_rate": 9.990771678869683e-05,
"loss": 2.0069,
"step": 3705
},
{
"epoch": 0.02,
"learning_rate": 9.990746070350508e-05,
"loss": 2.03,
"step": 3710
},
{
"epoch": 0.02,
"learning_rate": 9.990720426381737e-05,
"loss": 2.0554,
"step": 3715
},
{
"epoch": 0.02,
"learning_rate": 9.990694746963553e-05,
"loss": 2.1037,
"step": 3720
},
{
"epoch": 0.02,
"learning_rate": 9.99066903209614e-05,
"loss": 2.1363,
"step": 3725
},
{
"epoch": 0.02,
"learning_rate": 9.990643281779681e-05,
"loss": 2.079,
"step": 3730
},
{
"epoch": 0.02,
"learning_rate": 9.990617496014356e-05,
"loss": 2.0835,
"step": 3735
},
{
"epoch": 0.02,
"learning_rate": 9.99059167480035e-05,
"loss": 2.1401,
"step": 3740
},
{
"epoch": 0.02,
"learning_rate": 9.990565818137847e-05,
"loss": 2.0933,
"step": 3745
},
{
"epoch": 0.02,
"learning_rate": 9.99053992602703e-05,
"loss": 2.033,
"step": 3750
},
{
"epoch": 0.02,
"learning_rate": 9.99051399846808e-05,
"loss": 2.1374,
"step": 3755
},
{
"epoch": 0.02,
"learning_rate": 9.990488035461185e-05,
"loss": 2.1362,
"step": 3760
},
{
"epoch": 0.02,
"learning_rate": 9.99046203700653e-05,
"loss": 2.038,
"step": 3765
},
{
"epoch": 0.02,
"learning_rate": 9.990436003104295e-05,
"loss": 2.0637,
"step": 3770
},
{
"epoch": 0.02,
"learning_rate": 9.99040993375467e-05,
"loss": 2.0728,
"step": 3775
},
{
"epoch": 0.02,
"learning_rate": 9.990383828957839e-05,
"loss": 2.0247,
"step": 3780
},
{
"epoch": 0.02,
"learning_rate": 9.990357688713985e-05,
"loss": 2.014,
"step": 3785
},
{
"epoch": 0.02,
"learning_rate": 9.990331513023294e-05,
"loss": 2.0486,
"step": 3790
},
{
"epoch": 0.02,
"learning_rate": 9.990305301885953e-05,
"loss": 1.9962,
"step": 3795
},
{
"epoch": 0.02,
"learning_rate": 9.99027905530215e-05,
"loss": 2.1115,
"step": 3800
},
{
"epoch": 0.02,
"learning_rate": 9.99025277327207e-05,
"loss": 2.0927,
"step": 3805
},
{
"epoch": 0.02,
"learning_rate": 9.990226455795897e-05,
"loss": 1.9722,
"step": 3810
},
{
"epoch": 0.02,
"learning_rate": 9.990200102873822e-05,
"loss": 2.0852,
"step": 3815
},
{
"epoch": 0.02,
"learning_rate": 9.99017371450603e-05,
"loss": 2.0546,
"step": 3820
},
{
"epoch": 0.02,
"learning_rate": 9.990147290692708e-05,
"loss": 2.0951,
"step": 3825
},
{
"epoch": 0.02,
"learning_rate": 9.990120831434047e-05,
"loss": 2.0867,
"step": 3830
},
{
"epoch": 0.02,
"learning_rate": 9.990094336730231e-05,
"loss": 1.9501,
"step": 3835
},
{
"epoch": 0.02,
"learning_rate": 9.990067806581449e-05,
"loss": 2.1001,
"step": 3840
},
{
"epoch": 0.02,
"learning_rate": 9.990041240987891e-05,
"loss": 2.1187,
"step": 3845
},
{
"epoch": 0.02,
"learning_rate": 9.990014639949745e-05,
"loss": 2.1309,
"step": 3850
},
{
"epoch": 0.02,
"learning_rate": 9.9899880034672e-05,
"loss": 2.1072,
"step": 3855
},
{
"epoch": 0.02,
"learning_rate": 9.989961331540444e-05,
"loss": 2.0686,
"step": 3860
},
{
"epoch": 0.02,
"learning_rate": 9.989934624169668e-05,
"loss": 2.0179,
"step": 3865
},
{
"epoch": 0.02,
"learning_rate": 9.989907881355062e-05,
"loss": 2.078,
"step": 3870
},
{
"epoch": 0.02,
"learning_rate": 9.989881103096814e-05,
"loss": 2.1654,
"step": 3875
},
{
"epoch": 0.02,
"learning_rate": 9.989854289395115e-05,
"loss": 2.0667,
"step": 3880
},
{
"epoch": 0.02,
"learning_rate": 9.989827440250157e-05,
"loss": 2.07,
"step": 3885
},
{
"epoch": 0.02,
"learning_rate": 9.98980055566213e-05,
"loss": 2.1168,
"step": 3890
},
{
"epoch": 0.02,
"learning_rate": 9.989773635631224e-05,
"loss": 2.1341,
"step": 3895
},
{
"epoch": 0.02,
"learning_rate": 9.989746680157628e-05,
"loss": 2.0411,
"step": 3900
},
{
"epoch": 0.02,
"learning_rate": 9.989719689241539e-05,
"loss": 2.072,
"step": 3905
},
{
"epoch": 0.02,
"learning_rate": 9.989692662883145e-05,
"loss": 2.0824,
"step": 3910
},
{
"epoch": 0.02,
"learning_rate": 9.989665601082638e-05,
"loss": 2.0306,
"step": 3915
},
{
"epoch": 0.02,
"learning_rate": 9.989638503840213e-05,
"loss": 2.0073,
"step": 3920
},
{
"epoch": 0.02,
"learning_rate": 9.98961137115606e-05,
"loss": 2.0591,
"step": 3925
},
{
"epoch": 0.02,
"learning_rate": 9.989584203030371e-05,
"loss": 2.0908,
"step": 3930
},
{
"epoch": 0.02,
"learning_rate": 9.98955699946334e-05,
"loss": 2.0463,
"step": 3935
},
{
"epoch": 0.02,
"learning_rate": 9.989529760455163e-05,
"loss": 2.0773,
"step": 3940
},
{
"epoch": 0.02,
"learning_rate": 9.989502486006029e-05,
"loss": 2.0684,
"step": 3945
},
{
"epoch": 0.02,
"learning_rate": 9.989475176116134e-05,
"loss": 2.0368,
"step": 3950
},
{
"epoch": 0.02,
"learning_rate": 9.989447830785672e-05,
"loss": 2.0703,
"step": 3955
},
{
"epoch": 0.02,
"learning_rate": 9.989420450014837e-05,
"loss": 2.1062,
"step": 3960
},
{
"epoch": 0.02,
"learning_rate": 9.989393033803823e-05,
"loss": 2.1135,
"step": 3965
},
{
"epoch": 0.02,
"learning_rate": 9.989365582152826e-05,
"loss": 2.018,
"step": 3970
},
{
"epoch": 0.02,
"learning_rate": 9.989338095062039e-05,
"loss": 2.0451,
"step": 3975
},
{
"epoch": 0.02,
"learning_rate": 9.989310572531658e-05,
"loss": 2.0308,
"step": 3980
},
{
"epoch": 0.02,
"learning_rate": 9.98928301456188e-05,
"loss": 2.0893,
"step": 3985
},
{
"epoch": 0.02,
"learning_rate": 9.989255421152899e-05,
"loss": 2.0603,
"step": 3990
},
{
"epoch": 0.02,
"learning_rate": 9.989227792304912e-05,
"loss": 1.9256,
"step": 3995
},
{
"epoch": 0.02,
"learning_rate": 9.989200128018115e-05,
"loss": 2.1013,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 9.989172428292703e-05,
"loss": 2.1088,
"step": 4005
},
{
"epoch": 0.02,
"learning_rate": 9.989144693128874e-05,
"loss": 2.0657,
"step": 4010
},
{
"epoch": 0.02,
"learning_rate": 9.989116922526826e-05,
"loss": 2.0084,
"step": 4015
},
{
"epoch": 0.02,
"learning_rate": 9.989089116486756e-05,
"loss": 2.0377,
"step": 4020
},
{
"epoch": 0.02,
"learning_rate": 9.98906127500886e-05,
"loss": 2.0226,
"step": 4025
},
{
"epoch": 0.02,
"learning_rate": 9.989033398093337e-05,
"loss": 2.1829,
"step": 4030
},
{
"epoch": 0.02,
"learning_rate": 9.989005485740385e-05,
"loss": 2.0284,
"step": 4035
},
{
"epoch": 0.02,
"learning_rate": 9.988977537950201e-05,
"loss": 2.0763,
"step": 4040
},
{
"epoch": 0.02,
"learning_rate": 9.988949554722986e-05,
"loss": 2.0076,
"step": 4045
},
{
"epoch": 0.02,
"learning_rate": 9.988921536058935e-05,
"loss": 2.0946,
"step": 4050
},
{
"epoch": 0.02,
"learning_rate": 9.98889348195825e-05,
"loss": 2.0773,
"step": 4055
},
{
"epoch": 0.02,
"learning_rate": 9.988865392421129e-05,
"loss": 2.0941,
"step": 4060
},
{
"epoch": 0.02,
"learning_rate": 9.988837267447773e-05,
"loss": 2.0708,
"step": 4065
},
{
"epoch": 0.02,
"learning_rate": 9.988809107038382e-05,
"loss": 1.9459,
"step": 4070
},
{
"epoch": 0.02,
"learning_rate": 9.988780911193152e-05,
"loss": 2.1198,
"step": 4075
},
{
"epoch": 0.02,
"learning_rate": 9.988752679912288e-05,
"loss": 2.0402,
"step": 4080
},
{
"epoch": 0.02,
"learning_rate": 9.988724413195987e-05,
"loss": 2.0241,
"step": 4085
},
{
"epoch": 0.02,
"learning_rate": 9.988696111044454e-05,
"loss": 2.0197,
"step": 4090
},
{
"epoch": 0.02,
"learning_rate": 9.988667773457885e-05,
"loss": 2.1362,
"step": 4095
},
{
"epoch": 0.02,
"learning_rate": 9.988639400436483e-05,
"loss": 1.9679,
"step": 4100
},
{
"epoch": 0.02,
"learning_rate": 9.988610991980453e-05,
"loss": 2.0068,
"step": 4105
},
{
"epoch": 0.02,
"learning_rate": 9.988582548089991e-05,
"loss": 2.1016,
"step": 4110
},
{
"epoch": 0.02,
"learning_rate": 9.988554068765305e-05,
"loss": 2.0239,
"step": 4115
},
{
"epoch": 0.02,
"learning_rate": 9.988525554006594e-05,
"loss": 2.0423,
"step": 4120
},
{
"epoch": 0.02,
"learning_rate": 9.988497003814059e-05,
"loss": 2.0547,
"step": 4125
},
{
"epoch": 0.02,
"learning_rate": 9.988468418187905e-05,
"loss": 2.0756,
"step": 4130
},
{
"epoch": 0.02,
"learning_rate": 9.988439797128335e-05,
"loss": 2.0766,
"step": 4135
},
{
"epoch": 0.02,
"learning_rate": 9.988411140635553e-05,
"loss": 2.1552,
"step": 4140
},
{
"epoch": 0.02,
"learning_rate": 9.98838244870976e-05,
"loss": 2.0678,
"step": 4145
},
{
"epoch": 0.02,
"learning_rate": 9.988353721351162e-05,
"loss": 2.0704,
"step": 4150
},
{
"epoch": 0.02,
"learning_rate": 9.988324958559963e-05,
"loss": 2.1182,
"step": 4155
},
{
"epoch": 0.02,
"learning_rate": 9.988296160336369e-05,
"loss": 2.1208,
"step": 4160
},
{
"epoch": 0.02,
"learning_rate": 9.988267326680578e-05,
"loss": 2.068,
"step": 4165
},
{
"epoch": 0.02,
"learning_rate": 9.9882384575928e-05,
"loss": 2.0726,
"step": 4170
},
{
"epoch": 0.02,
"learning_rate": 9.98820955307324e-05,
"loss": 2.0966,
"step": 4175
},
{
"epoch": 0.02,
"learning_rate": 9.988180613122102e-05,
"loss": 2.0723,
"step": 4180
},
{
"epoch": 0.02,
"learning_rate": 9.988151637739594e-05,
"loss": 2.0793,
"step": 4185
},
{
"epoch": 0.02,
"learning_rate": 9.988122626925918e-05,
"loss": 2.0673,
"step": 4190
},
{
"epoch": 0.02,
"learning_rate": 9.988093580681282e-05,
"loss": 2.0706,
"step": 4195
},
{
"epoch": 0.02,
"learning_rate": 9.988064499005892e-05,
"loss": 2.0263,
"step": 4200
},
{
"epoch": 0.02,
"learning_rate": 9.988035381899956e-05,
"loss": 2.0712,
"step": 4205
},
{
"epoch": 0.02,
"learning_rate": 9.988006229363677e-05,
"loss": 2.084,
"step": 4210
},
{
"epoch": 0.02,
"learning_rate": 9.987977041397267e-05,
"loss": 1.9338,
"step": 4215
},
{
"epoch": 0.02,
"learning_rate": 9.987947818000931e-05,
"loss": 2.0734,
"step": 4220
},
{
"epoch": 0.02,
"learning_rate": 9.987918559174875e-05,
"loss": 1.9958,
"step": 4225
},
{
"epoch": 0.02,
"learning_rate": 9.98788926491931e-05,
"loss": 2.0333,
"step": 4230
},
{
"epoch": 0.02,
"learning_rate": 9.987859935234443e-05,
"loss": 2.0458,
"step": 4235
},
{
"epoch": 0.02,
"learning_rate": 9.98783057012048e-05,
"loss": 2.0196,
"step": 4240
},
{
"epoch": 0.02,
"learning_rate": 9.987801169577633e-05,
"loss": 2.0685,
"step": 4245
},
{
"epoch": 0.02,
"learning_rate": 9.987771733606108e-05,
"loss": 1.9578,
"step": 4250
},
{
"epoch": 0.02,
"learning_rate": 9.987742262206116e-05,
"loss": 2.0413,
"step": 4255
},
{
"epoch": 0.02,
"learning_rate": 9.987712755377865e-05,
"loss": 2.0978,
"step": 4260
},
{
"epoch": 0.02,
"learning_rate": 9.987683213121566e-05,
"loss": 2.0103,
"step": 4265
},
{
"epoch": 0.02,
"learning_rate": 9.987653635437428e-05,
"loss": 1.9552,
"step": 4270
},
{
"epoch": 0.02,
"learning_rate": 9.98762402232566e-05,
"loss": 1.991,
"step": 4275
},
{
"epoch": 0.02,
"learning_rate": 9.987594373786475e-05,
"loss": 2.013,
"step": 4280
},
{
"epoch": 0.02,
"learning_rate": 9.987564689820082e-05,
"loss": 2.0177,
"step": 4285
},
{
"epoch": 0.02,
"learning_rate": 9.98753497042669e-05,
"loss": 2.1126,
"step": 4290
},
{
"epoch": 0.02,
"learning_rate": 9.987505215606514e-05,
"loss": 1.9554,
"step": 4295
},
{
"epoch": 0.02,
"learning_rate": 9.987475425359762e-05,
"loss": 2.0723,
"step": 4300
},
{
"epoch": 0.02,
"learning_rate": 9.987445599686649e-05,
"loss": 2.1134,
"step": 4305
},
{
"epoch": 0.02,
"learning_rate": 9.987415738587385e-05,
"loss": 2.1237,
"step": 4310
},
{
"epoch": 0.02,
"learning_rate": 9.98738584206218e-05,
"loss": 2.0193,
"step": 4315
},
{
"epoch": 0.02,
"learning_rate": 9.987355910111249e-05,
"loss": 2.0684,
"step": 4320
},
{
"epoch": 0.02,
"learning_rate": 9.987325942734804e-05,
"loss": 2.0232,
"step": 4325
},
{
"epoch": 0.02,
"learning_rate": 9.987295939933059e-05,
"loss": 2.0774,
"step": 4330
},
{
"epoch": 0.02,
"learning_rate": 9.987265901706224e-05,
"loss": 2.0294,
"step": 4335
},
{
"epoch": 0.02,
"learning_rate": 9.987235828054515e-05,
"loss": 1.9743,
"step": 4340
},
{
"epoch": 0.02,
"learning_rate": 9.987205718978146e-05,
"loss": 2.0636,
"step": 4345
},
{
"epoch": 0.02,
"learning_rate": 9.987175574477327e-05,
"loss": 2.089,
"step": 4350
},
{
"epoch": 0.02,
"learning_rate": 9.987145394552278e-05,
"loss": 2.0946,
"step": 4355
},
{
"epoch": 0.02,
"learning_rate": 9.987115179203208e-05,
"loss": 2.0423,
"step": 4360
},
{
"epoch": 0.02,
"learning_rate": 9.987084928430334e-05,
"loss": 2.0878,
"step": 4365
},
{
"epoch": 0.02,
"learning_rate": 9.987054642233872e-05,
"loss": 2.0948,
"step": 4370
},
{
"epoch": 0.02,
"learning_rate": 9.987024320614036e-05,
"loss": 2.1751,
"step": 4375
},
{
"epoch": 0.02,
"learning_rate": 9.98699396357104e-05,
"loss": 2.0248,
"step": 4380
},
{
"epoch": 0.02,
"learning_rate": 9.986963571105102e-05,
"loss": 2.0241,
"step": 4385
},
{
"epoch": 0.02,
"learning_rate": 9.986933143216438e-05,
"loss": 2.0722,
"step": 4390
},
{
"epoch": 0.02,
"learning_rate": 9.986902679905259e-05,
"loss": 2.05,
"step": 4395
},
{
"epoch": 0.02,
"learning_rate": 9.986872181171789e-05,
"loss": 2.0651,
"step": 4400
},
{
"epoch": 0.02,
"learning_rate": 9.986841647016238e-05,
"loss": 2.0344,
"step": 4405
},
{
"epoch": 0.02,
"learning_rate": 9.986811077438827e-05,
"loss": 2.0755,
"step": 4410
},
{
"epoch": 0.02,
"learning_rate": 9.986780472439771e-05,
"loss": 2.0046,
"step": 4415
},
{
"epoch": 0.02,
"learning_rate": 9.986749832019291e-05,
"loss": 2.019,
"step": 4420
},
{
"epoch": 0.02,
"learning_rate": 9.9867191561776e-05,
"loss": 1.983,
"step": 4425
},
{
"epoch": 0.02,
"learning_rate": 9.986688444914918e-05,
"loss": 2.1184,
"step": 4430
},
{
"epoch": 0.02,
"learning_rate": 9.986657698231462e-05,
"loss": 2.0049,
"step": 4435
},
{
"epoch": 0.02,
"learning_rate": 9.986626916127454e-05,
"loss": 2.0521,
"step": 4440
},
{
"epoch": 0.02,
"learning_rate": 9.986596098603109e-05,
"loss": 2.0542,
"step": 4445
},
{
"epoch": 0.02,
"learning_rate": 9.986565245658646e-05,
"loss": 2.0248,
"step": 4450
},
{
"epoch": 0.02,
"learning_rate": 9.986534357294286e-05,
"loss": 2.0343,
"step": 4455
},
{
"epoch": 0.02,
"learning_rate": 9.986503433510246e-05,
"loss": 1.9742,
"step": 4460
},
{
"epoch": 0.02,
"learning_rate": 9.986472474306748e-05,
"loss": 2.0847,
"step": 4465
},
{
"epoch": 0.02,
"learning_rate": 9.986441479684012e-05,
"loss": 2.0323,
"step": 4470
},
{
"epoch": 0.02,
"learning_rate": 9.986410449642256e-05,
"loss": 2.0379,
"step": 4475
},
{
"epoch": 0.02,
"learning_rate": 9.986379384181702e-05,
"loss": 2.0984,
"step": 4480
},
{
"epoch": 0.02,
"learning_rate": 9.98634828330257e-05,
"loss": 2.1112,
"step": 4485
},
{
"epoch": 0.02,
"learning_rate": 9.986317147005082e-05,
"loss": 2.0416,
"step": 4490
},
{
"epoch": 0.02,
"learning_rate": 9.986285975289458e-05,
"loss": 2.065,
"step": 4495
},
{
"epoch": 0.02,
"learning_rate": 9.986254768155919e-05,
"loss": 2.0597,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 9.986223525604688e-05,
"loss": 2.0483,
"step": 4505
},
{
"epoch": 0.02,
"learning_rate": 9.986192247635986e-05,
"loss": 2.0859,
"step": 4510
},
{
"epoch": 0.02,
"learning_rate": 9.986160934250038e-05,
"loss": 2.0388,
"step": 4515
},
{
"epoch": 0.02,
"learning_rate": 9.98612958544706e-05,
"loss": 2.047,
"step": 4520
},
{
"epoch": 0.02,
"learning_rate": 9.98609820122728e-05,
"loss": 2.0379,
"step": 4525
},
{
"epoch": 0.02,
"learning_rate": 9.98606678159092e-05,
"loss": 2.0987,
"step": 4530
},
{
"epoch": 0.02,
"learning_rate": 9.986035326538203e-05,
"loss": 2.0693,
"step": 4535
},
{
"epoch": 0.02,
"learning_rate": 9.986003836069352e-05,
"loss": 2.1115,
"step": 4540
},
{
"epoch": 0.02,
"learning_rate": 9.98597231018459e-05,
"loss": 2.0416,
"step": 4545
},
{
"epoch": 0.02,
"learning_rate": 9.985940748884142e-05,
"loss": 2.0564,
"step": 4550
},
{
"epoch": 0.02,
"learning_rate": 9.985909152168233e-05,
"loss": 2.0218,
"step": 4555
},
{
"epoch": 0.02,
"learning_rate": 9.985877520037086e-05,
"loss": 2.1198,
"step": 4560
},
{
"epoch": 0.02,
"learning_rate": 9.985845852490925e-05,
"loss": 2.1291,
"step": 4565
},
{
"epoch": 0.02,
"learning_rate": 9.985814149529978e-05,
"loss": 2.0943,
"step": 4570
},
{
"epoch": 0.02,
"learning_rate": 9.985782411154465e-05,
"loss": 1.9922,
"step": 4575
},
{
"epoch": 0.02,
"learning_rate": 9.985750637364617e-05,
"loss": 2.1209,
"step": 4580
},
{
"epoch": 0.02,
"learning_rate": 9.985718828160658e-05,
"loss": 2.0295,
"step": 4585
},
{
"epoch": 0.02,
"learning_rate": 9.98568698354281e-05,
"loss": 1.9902,
"step": 4590
},
{
"epoch": 0.02,
"learning_rate": 9.985655103511306e-05,
"loss": 1.9848,
"step": 4595
},
{
"epoch": 0.02,
"learning_rate": 9.985623188066366e-05,
"loss": 2.1046,
"step": 4600
},
{
"epoch": 0.02,
"learning_rate": 9.985591237208222e-05,
"loss": 2.1052,
"step": 4605
},
{
"epoch": 0.02,
"learning_rate": 9.985559250937097e-05,
"loss": 2.0431,
"step": 4610
},
{
"epoch": 0.02,
"learning_rate": 9.98552722925322e-05,
"loss": 2.0371,
"step": 4615
},
{
"epoch": 0.02,
"learning_rate": 9.985495172156818e-05,
"loss": 2.0619,
"step": 4620
},
{
"epoch": 0.02,
"learning_rate": 9.985463079648118e-05,
"loss": 1.9874,
"step": 4625
},
{
"epoch": 0.02,
"learning_rate": 9.985430951727352e-05,
"loss": 2.0278,
"step": 4630
},
{
"epoch": 0.02,
"learning_rate": 9.985398788394742e-05,
"loss": 1.9961,
"step": 4635
},
{
"epoch": 0.02,
"learning_rate": 9.985366589650521e-05,
"loss": 2.0804,
"step": 4640
},
{
"epoch": 0.02,
"learning_rate": 9.985334355494916e-05,
"loss": 2.0509,
"step": 4645
},
{
"epoch": 0.02,
"learning_rate": 9.985302085928157e-05,
"loss": 2.0934,
"step": 4650
},
{
"epoch": 0.02,
"learning_rate": 9.98526978095047e-05,
"loss": 2.0751,
"step": 4655
},
{
"epoch": 0.02,
"learning_rate": 9.985237440562088e-05,
"loss": 2.1077,
"step": 4660
},
{
"epoch": 0.03,
"learning_rate": 9.98520506476324e-05,
"loss": 2.1009,
"step": 4665
},
{
"epoch": 0.03,
"learning_rate": 9.985172653554156e-05,
"loss": 2.0445,
"step": 4670
},
{
"epoch": 0.03,
"learning_rate": 9.985140206935064e-05,
"loss": 2.0885,
"step": 4675
},
{
"epoch": 0.03,
"learning_rate": 9.985107724906198e-05,
"loss": 2.0894,
"step": 4680
},
{
"epoch": 0.03,
"learning_rate": 9.985075207467787e-05,
"loss": 2.036,
"step": 4685
},
{
"epoch": 0.03,
"learning_rate": 9.985042654620061e-05,
"loss": 2.0781,
"step": 4690
},
{
"epoch": 0.03,
"learning_rate": 9.985010066363251e-05,
"loss": 2.1166,
"step": 4695
},
{
"epoch": 0.03,
"learning_rate": 9.984977442697591e-05,
"loss": 2.0854,
"step": 4700
},
{
"epoch": 0.03,
"learning_rate": 9.984944783623312e-05,
"loss": 1.9965,
"step": 4705
},
{
"epoch": 0.03,
"learning_rate": 9.984912089140644e-05,
"loss": 2.0438,
"step": 4710
},
{
"epoch": 0.03,
"learning_rate": 9.984879359249822e-05,
"loss": 2.015,
"step": 4715
},
{
"epoch": 0.03,
"learning_rate": 9.984846593951077e-05,
"loss": 2.1087,
"step": 4720
},
{
"epoch": 0.03,
"learning_rate": 9.98481379324464e-05,
"loss": 2.0686,
"step": 4725
},
{
"epoch": 0.03,
"learning_rate": 9.984780957130746e-05,
"loss": 1.9714,
"step": 4730
},
{
"epoch": 0.03,
"learning_rate": 9.98474808560963e-05,
"loss": 2.0992,
"step": 4735
},
{
"epoch": 0.03,
"learning_rate": 9.984715178681521e-05,
"loss": 2.0685,
"step": 4740
},
{
"epoch": 0.03,
"learning_rate": 9.984682236346657e-05,
"loss": 2.1039,
"step": 4745
},
{
"epoch": 0.03,
"learning_rate": 9.98464925860527e-05,
"loss": 2.0557,
"step": 4750
},
{
"epoch": 0.03,
"learning_rate": 9.984616245457595e-05,
"loss": 2.0004,
"step": 4755
},
{
"epoch": 0.03,
"learning_rate": 9.984583196903864e-05,
"loss": 2.0354,
"step": 4760
},
{
"epoch": 0.03,
"learning_rate": 9.984550112944315e-05,
"loss": 2.0522,
"step": 4765
},
{
"epoch": 0.03,
"learning_rate": 9.98451699357918e-05,
"loss": 1.9686,
"step": 4770
},
{
"epoch": 0.03,
"learning_rate": 9.984483838808698e-05,
"loss": 2.0692,
"step": 4775
},
{
"epoch": 0.03,
"learning_rate": 9.984450648633103e-05,
"loss": 2.0019,
"step": 4780
},
{
"epoch": 0.03,
"learning_rate": 9.984417423052629e-05,
"loss": 2.0711,
"step": 4785
},
{
"epoch": 0.03,
"learning_rate": 9.984384162067513e-05,
"loss": 1.956,
"step": 4790
},
{
"epoch": 0.03,
"learning_rate": 9.984350865677992e-05,
"loss": 2.0397,
"step": 4795
},
{
"epoch": 0.03,
"learning_rate": 9.984317533884303e-05,
"loss": 2.1375,
"step": 4800
},
{
"epoch": 0.03,
"learning_rate": 9.984284166686679e-05,
"loss": 2.1349,
"step": 4805
},
{
"epoch": 0.03,
"learning_rate": 9.984250764085361e-05,
"loss": 2.056,
"step": 4810
},
{
"epoch": 0.03,
"learning_rate": 9.984217326080585e-05,
"loss": 2.0068,
"step": 4815
},
{
"epoch": 0.03,
"learning_rate": 9.98418385267259e-05,
"loss": 2.058,
"step": 4820
},
{
"epoch": 0.03,
"learning_rate": 9.984150343861611e-05,
"loss": 2.0381,
"step": 4825
},
{
"epoch": 0.03,
"learning_rate": 9.984116799647887e-05,
"loss": 1.994,
"step": 4830
},
{
"epoch": 0.03,
"learning_rate": 9.984083220031657e-05,
"loss": 2.1349,
"step": 4835
},
{
"epoch": 0.03,
"learning_rate": 9.984049605013159e-05,
"loss": 2.0791,
"step": 4840
},
{
"epoch": 0.03,
"learning_rate": 9.984015954592631e-05,
"loss": 2.0391,
"step": 4845
},
{
"epoch": 0.03,
"learning_rate": 9.983982268770314e-05,
"loss": 2.0749,
"step": 4850
},
{
"epoch": 0.03,
"learning_rate": 9.983948547546445e-05,
"loss": 2.1045,
"step": 4855
},
{
"epoch": 0.03,
"learning_rate": 9.983914790921266e-05,
"loss": 1.9671,
"step": 4860
},
{
"epoch": 0.03,
"learning_rate": 9.983880998895014e-05,
"loss": 2.007,
"step": 4865
},
{
"epoch": 0.03,
"learning_rate": 9.983847171467932e-05,
"loss": 2.0749,
"step": 4870
},
{
"epoch": 0.03,
"learning_rate": 9.983813308640258e-05,
"loss": 2.1148,
"step": 4875
},
{
"epoch": 0.03,
"learning_rate": 9.983779410412233e-05,
"loss": 1.9765,
"step": 4880
},
{
"epoch": 0.03,
"learning_rate": 9.983745476784099e-05,
"loss": 1.9868,
"step": 4885
},
{
"epoch": 0.03,
"learning_rate": 9.983711507756095e-05,
"loss": 2.0202,
"step": 4890
},
{
"epoch": 0.03,
"learning_rate": 9.983677503328463e-05,
"loss": 2.0758,
"step": 4895
},
{
"epoch": 0.03,
"learning_rate": 9.983643463501446e-05,
"loss": 2.0468,
"step": 4900
},
{
"epoch": 0.03,
"learning_rate": 9.983609388275286e-05,
"loss": 2.1025,
"step": 4905
},
{
"epoch": 0.03,
"learning_rate": 9.983575277650221e-05,
"loss": 2.0673,
"step": 4910
},
{
"epoch": 0.03,
"learning_rate": 9.983541131626497e-05,
"loss": 1.9581,
"step": 4915
},
{
"epoch": 0.03,
"learning_rate": 9.983506950204356e-05,
"loss": 2.0071,
"step": 4920
},
{
"epoch": 0.03,
"learning_rate": 9.98347273338404e-05,
"loss": 1.9986,
"step": 4925
},
{
"epoch": 0.03,
"learning_rate": 9.983438481165793e-05,
"loss": 2.0449,
"step": 4930
},
{
"epoch": 0.03,
"learning_rate": 9.983404193549856e-05,
"loss": 2.0598,
"step": 4935
},
{
"epoch": 0.03,
"learning_rate": 9.983369870536477e-05,
"loss": 2.0549,
"step": 4940
},
{
"epoch": 0.03,
"learning_rate": 9.983335512125896e-05,
"loss": 2.0398,
"step": 4945
},
{
"epoch": 0.03,
"learning_rate": 9.983301118318357e-05,
"loss": 2.0025,
"step": 4950
},
{
"epoch": 0.03,
"learning_rate": 9.983266689114105e-05,
"loss": 2.0668,
"step": 4955
},
{
"epoch": 0.03,
"learning_rate": 9.983232224513387e-05,
"loss": 2.057,
"step": 4960
},
{
"epoch": 0.03,
"learning_rate": 9.983197724516444e-05,
"loss": 2.039,
"step": 4965
},
{
"epoch": 0.03,
"learning_rate": 9.983163189123523e-05,
"loss": 1.998,
"step": 4970
},
{
"epoch": 0.03,
"learning_rate": 9.983128618334871e-05,
"loss": 2.0518,
"step": 4975
},
{
"epoch": 0.03,
"learning_rate": 9.98309401215073e-05,
"loss": 2.0248,
"step": 4980
},
{
"epoch": 0.03,
"learning_rate": 9.983059370571348e-05,
"loss": 2.016,
"step": 4985
},
{
"epoch": 0.03,
"learning_rate": 9.983024693596969e-05,
"loss": 2.0439,
"step": 4990
},
{
"epoch": 0.03,
"learning_rate": 9.982989981227843e-05,
"loss": 2.0758,
"step": 4995
},
{
"epoch": 0.03,
"learning_rate": 9.982955233464213e-05,
"loss": 2.0772,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 9.982920450306328e-05,
"loss": 2.0312,
"step": 5005
},
{
"epoch": 0.03,
"learning_rate": 9.982885631754435e-05,
"loss": 2.024,
"step": 5010
},
{
"epoch": 0.03,
"learning_rate": 9.982850777808779e-05,
"loss": 2.0497,
"step": 5015
},
{
"epoch": 0.03,
"learning_rate": 9.98281588846961e-05,
"loss": 2.0138,
"step": 5020
},
{
"epoch": 0.03,
"learning_rate": 9.982780963737174e-05,
"loss": 2.0547,
"step": 5025
},
{
"epoch": 0.03,
"learning_rate": 9.982746003611722e-05,
"loss": 2.0544,
"step": 5030
},
{
"epoch": 0.03,
"learning_rate": 9.9827110080935e-05,
"loss": 2.0458,
"step": 5035
},
{
"epoch": 0.03,
"learning_rate": 9.982675977182755e-05,
"loss": 2.0618,
"step": 5040
},
{
"epoch": 0.03,
"learning_rate": 9.98264091087974e-05,
"loss": 2.0118,
"step": 5045
},
{
"epoch": 0.03,
"learning_rate": 9.982605809184702e-05,
"loss": 2.0365,
"step": 5050
},
{
"epoch": 0.03,
"learning_rate": 9.982570672097888e-05,
"loss": 2.0,
"step": 5055
},
{
"epoch": 0.03,
"learning_rate": 9.982535499619551e-05,
"loss": 1.9969,
"step": 5060
},
{
"epoch": 0.03,
"learning_rate": 9.982500291749941e-05,
"loss": 2.0542,
"step": 5065
},
{
"epoch": 0.03,
"learning_rate": 9.982465048489304e-05,
"loss": 2.1168,
"step": 5070
},
{
"epoch": 0.03,
"learning_rate": 9.982429769837895e-05,
"loss": 2.0345,
"step": 5075
},
{
"epoch": 0.03,
"learning_rate": 9.982394455795962e-05,
"loss": 2.0399,
"step": 5080
},
{
"epoch": 0.03,
"learning_rate": 9.982359106363756e-05,
"loss": 2.0969,
"step": 5085
},
{
"epoch": 0.03,
"learning_rate": 9.982323721541532e-05,
"loss": 1.9996,
"step": 5090
},
{
"epoch": 0.03,
"learning_rate": 9.982288301329534e-05,
"loss": 2.0568,
"step": 5095
},
{
"epoch": 0.03,
"learning_rate": 9.982252845728017e-05,
"loss": 2.0027,
"step": 5100
},
{
"epoch": 0.03,
"learning_rate": 9.982217354737235e-05,
"loss": 2.0737,
"step": 5105
},
{
"epoch": 0.03,
"learning_rate": 9.98218182835744e-05,
"loss": 2.1,
"step": 5110
},
{
"epoch": 0.03,
"learning_rate": 9.982146266588881e-05,
"loss": 2.042,
"step": 5115
},
{
"epoch": 0.03,
"learning_rate": 9.982110669431813e-05,
"loss": 2.0809,
"step": 5120
},
{
"epoch": 0.03,
"learning_rate": 9.982075036886487e-05,
"loss": 2.0066,
"step": 5125
},
{
"epoch": 0.03,
"learning_rate": 9.982039368953158e-05,
"loss": 2.082,
"step": 5130
},
{
"epoch": 0.03,
"learning_rate": 9.982003665632078e-05,
"loss": 2.0075,
"step": 5135
},
{
"epoch": 0.03,
"learning_rate": 9.981967926923503e-05,
"loss": 1.9794,
"step": 5140
},
{
"epoch": 0.03,
"learning_rate": 9.981932152827685e-05,
"loss": 2.1486,
"step": 5145
},
{
"epoch": 0.03,
"learning_rate": 9.981896343344877e-05,
"loss": 2.101,
"step": 5150
},
{
"epoch": 0.03,
"learning_rate": 9.981860498475335e-05,
"loss": 2.03,
"step": 5155
},
{
"epoch": 0.03,
"learning_rate": 9.981824618219313e-05,
"loss": 2.0962,
"step": 5160
},
{
"epoch": 0.03,
"learning_rate": 9.981788702577066e-05,
"loss": 2.005,
"step": 5165
},
{
"epoch": 0.03,
"learning_rate": 9.98175275154885e-05,
"loss": 2.0122,
"step": 5170
},
{
"epoch": 0.03,
"learning_rate": 9.981716765134918e-05,
"loss": 2.0894,
"step": 5175
},
{
"epoch": 0.03,
"learning_rate": 9.981680743335529e-05,
"loss": 2.0297,
"step": 5180
},
{
"epoch": 0.03,
"learning_rate": 9.981644686150936e-05,
"loss": 1.9625,
"step": 5185
},
{
"epoch": 0.03,
"learning_rate": 9.981608593581396e-05,
"loss": 2.0708,
"step": 5190
},
{
"epoch": 0.03,
"learning_rate": 9.981572465627167e-05,
"loss": 2.0373,
"step": 5195
},
{
"epoch": 0.03,
"learning_rate": 9.981536302288502e-05,
"loss": 2.0149,
"step": 5200
},
{
"epoch": 0.03,
"learning_rate": 9.981500103565662e-05,
"loss": 2.0008,
"step": 5205
},
{
"epoch": 0.03,
"learning_rate": 9.9814638694589e-05,
"loss": 1.9993,
"step": 5210
},
{
"epoch": 0.03,
"learning_rate": 9.981427599968477e-05,
"loss": 2.0359,
"step": 5215
},
{
"epoch": 0.03,
"learning_rate": 9.981391295094649e-05,
"loss": 2.0671,
"step": 5220
},
{
"epoch": 0.03,
"learning_rate": 9.981354954837674e-05,
"loss": 2.012,
"step": 5225
},
{
"epoch": 0.03,
"learning_rate": 9.981318579197809e-05,
"loss": 2.077,
"step": 5230
},
{
"epoch": 0.03,
"learning_rate": 9.981282168175314e-05,
"loss": 2.0149,
"step": 5235
},
{
"epoch": 0.03,
"learning_rate": 9.981245721770448e-05,
"loss": 2.0238,
"step": 5240
},
{
"epoch": 0.03,
"learning_rate": 9.981209239983469e-05,
"loss": 2.1072,
"step": 5245
},
{
"epoch": 0.03,
"learning_rate": 9.981172722814635e-05,
"loss": 1.9851,
"step": 5250
},
{
"epoch": 0.03,
"learning_rate": 9.981136170264206e-05,
"loss": 2.0358,
"step": 5255
},
{
"epoch": 0.03,
"learning_rate": 9.981099582332444e-05,
"loss": 2.0128,
"step": 5260
},
{
"epoch": 0.03,
"learning_rate": 9.981062959019607e-05,
"loss": 2.066,
"step": 5265
},
{
"epoch": 0.03,
"learning_rate": 9.981026300325955e-05,
"loss": 2.0088,
"step": 5270
},
{
"epoch": 0.03,
"learning_rate": 9.980989606251747e-05,
"loss": 2.0478,
"step": 5275
},
{
"epoch": 0.03,
"learning_rate": 9.980952876797247e-05,
"loss": 2.107,
"step": 5280
},
{
"epoch": 0.03,
"learning_rate": 9.980916111962713e-05,
"loss": 2.0363,
"step": 5285
},
{
"epoch": 0.03,
"learning_rate": 9.980879311748407e-05,
"loss": 2.0522,
"step": 5290
},
{
"epoch": 0.03,
"learning_rate": 9.980842476154591e-05,
"loss": 2.0539,
"step": 5295
},
{
"epoch": 0.03,
"learning_rate": 9.980805605181527e-05,
"loss": 2.0588,
"step": 5300
},
{
"epoch": 0.03,
"learning_rate": 9.980768698829477e-05,
"loss": 2.1043,
"step": 5305
},
{
"epoch": 0.03,
"learning_rate": 9.9807317570987e-05,
"loss": 2.0878,
"step": 5310
},
{
"epoch": 0.03,
"learning_rate": 9.980694779989461e-05,
"loss": 2.0425,
"step": 5315
},
{
"epoch": 0.03,
"learning_rate": 9.980657767502025e-05,
"loss": 1.9924,
"step": 5320
},
{
"epoch": 0.03,
"learning_rate": 9.98062071963665e-05,
"loss": 2.1004,
"step": 5325
},
{
"epoch": 0.03,
"learning_rate": 9.980583636393602e-05,
"loss": 2.0305,
"step": 5330
},
{
"epoch": 0.03,
"learning_rate": 9.980546517773144e-05,
"loss": 2.1131,
"step": 5335
},
{
"epoch": 0.03,
"learning_rate": 9.980509363775538e-05,
"loss": 2.0027,
"step": 5340
},
{
"epoch": 0.03,
"learning_rate": 9.980472174401053e-05,
"loss": 2.1153,
"step": 5345
},
{
"epoch": 0.03,
"learning_rate": 9.980434949649946e-05,
"loss": 2.077,
"step": 5350
},
{
"epoch": 0.03,
"learning_rate": 9.980397689522486e-05,
"loss": 2.0693,
"step": 5355
},
{
"epoch": 0.03,
"learning_rate": 9.980360394018937e-05,
"loss": 2.0223,
"step": 5360
},
{
"epoch": 0.03,
"learning_rate": 9.980323063139563e-05,
"loss": 2.0687,
"step": 5365
},
{
"epoch": 0.03,
"learning_rate": 9.98028569688463e-05,
"loss": 2.0238,
"step": 5370
},
{
"epoch": 0.03,
"learning_rate": 9.980248295254404e-05,
"loss": 2.0387,
"step": 5375
},
{
"epoch": 0.03,
"learning_rate": 9.980210858249149e-05,
"loss": 1.9431,
"step": 5380
},
{
"epoch": 0.03,
"learning_rate": 9.980173385869132e-05,
"loss": 2.0146,
"step": 5385
},
{
"epoch": 0.03,
"learning_rate": 9.980135878114618e-05,
"loss": 2.0662,
"step": 5390
},
{
"epoch": 0.03,
"learning_rate": 9.980098334985875e-05,
"loss": 2.0441,
"step": 5395
},
{
"epoch": 0.03,
"learning_rate": 9.98006075648317e-05,
"loss": 1.9998,
"step": 5400
},
{
"epoch": 0.03,
"learning_rate": 9.980023142606767e-05,
"loss": 2.1095,
"step": 5405
},
{
"epoch": 0.03,
"learning_rate": 9.979985493356936e-05,
"loss": 2.0766,
"step": 5410
},
{
"epoch": 0.03,
"learning_rate": 9.979947808733942e-05,
"loss": 1.9905,
"step": 5415
},
{
"epoch": 0.03,
"learning_rate": 9.979910088738056e-05,
"loss": 2.014,
"step": 5420
},
{
"epoch": 0.03,
"learning_rate": 9.979872333369544e-05,
"loss": 2.0737,
"step": 5425
},
{
"epoch": 0.03,
"learning_rate": 9.979834542628675e-05,
"loss": 2.0305,
"step": 5430
},
{
"epoch": 0.03,
"learning_rate": 9.979796716515715e-05,
"loss": 2.1095,
"step": 5435
},
{
"epoch": 0.03,
"learning_rate": 9.979758855030934e-05,
"loss": 2.0303,
"step": 5440
},
{
"epoch": 0.03,
"learning_rate": 9.979720958174603e-05,
"loss": 2.0775,
"step": 5445
},
{
"epoch": 0.03,
"learning_rate": 9.979683025946989e-05,
"loss": 1.9659,
"step": 5450
},
{
"epoch": 0.03,
"learning_rate": 9.979645058348362e-05,
"loss": 1.9939,
"step": 5455
},
{
"epoch": 0.03,
"learning_rate": 9.97960705537899e-05,
"loss": 2.0309,
"step": 5460
},
{
"epoch": 0.03,
"learning_rate": 9.979569017039146e-05,
"loss": 2.0113,
"step": 5465
},
{
"epoch": 0.03,
"learning_rate": 9.979530943329099e-05,
"loss": 2.0891,
"step": 5470
},
{
"epoch": 0.03,
"learning_rate": 9.97949283424912e-05,
"loss": 2.068,
"step": 5475
},
{
"epoch": 0.03,
"learning_rate": 9.979454689799477e-05,
"loss": 1.9671,
"step": 5480
},
{
"epoch": 0.03,
"learning_rate": 9.979416509980443e-05,
"loss": 2.0225,
"step": 5485
},
{
"epoch": 0.03,
"learning_rate": 9.97937829479229e-05,
"loss": 2.0431,
"step": 5490
},
{
"epoch": 0.03,
"learning_rate": 9.979340044235289e-05,
"loss": 2.0738,
"step": 5495
},
{
"epoch": 0.03,
"learning_rate": 9.979301758309709e-05,
"loss": 2.0235,
"step": 5500
},
{
"epoch": 0.03,
"learning_rate": 9.979263437015826e-05,
"loss": 2.0222,
"step": 5505
},
{
"epoch": 0.03,
"learning_rate": 9.979225080353909e-05,
"loss": 2.0097,
"step": 5510
},
{
"epoch": 0.03,
"learning_rate": 9.979186688324233e-05,
"loss": 2.0445,
"step": 5515
},
{
"epoch": 0.03,
"learning_rate": 9.97914826092707e-05,
"loss": 2.002,
"step": 5520
},
{
"epoch": 0.03,
"learning_rate": 9.97910979816269e-05,
"loss": 2.083,
"step": 5525
},
{
"epoch": 0.03,
"learning_rate": 9.97907130003137e-05,
"loss": 2.0754,
"step": 5530
},
{
"epoch": 0.03,
"learning_rate": 9.979032766533382e-05,
"loss": 2.0324,
"step": 5535
},
{
"epoch": 0.03,
"learning_rate": 9.978994197669e-05,
"loss": 2.0531,
"step": 5540
},
{
"epoch": 0.03,
"learning_rate": 9.978955593438497e-05,
"loss": 1.9866,
"step": 5545
},
{
"epoch": 0.03,
"learning_rate": 9.978916953842147e-05,
"loss": 1.9406,
"step": 5550
},
{
"epoch": 0.03,
"learning_rate": 9.978878278880228e-05,
"loss": 2.0477,
"step": 5555
},
{
"epoch": 0.03,
"learning_rate": 9.978839568553012e-05,
"loss": 2.0186,
"step": 5560
},
{
"epoch": 0.03,
"learning_rate": 9.978800822860772e-05,
"loss": 2.0568,
"step": 5565
},
{
"epoch": 0.03,
"learning_rate": 9.978762041803787e-05,
"loss": 1.9523,
"step": 5570
},
{
"epoch": 0.03,
"learning_rate": 9.97872322538233e-05,
"loss": 1.9795,
"step": 5575
},
{
"epoch": 0.03,
"learning_rate": 9.978684373596676e-05,
"loss": 1.997,
"step": 5580
},
{
"epoch": 0.03,
"learning_rate": 9.978645486447105e-05,
"loss": 2.0578,
"step": 5585
},
{
"epoch": 0.03,
"learning_rate": 9.97860656393389e-05,
"loss": 2.0888,
"step": 5590
},
{
"epoch": 0.03,
"learning_rate": 9.978567606057308e-05,
"loss": 2.0318,
"step": 5595
},
{
"epoch": 0.03,
"learning_rate": 9.978528612817635e-05,
"loss": 1.9641,
"step": 5600
},
{
"epoch": 0.03,
"learning_rate": 9.97848958421515e-05,
"loss": 2.0621,
"step": 5605
},
{
"epoch": 0.03,
"learning_rate": 9.978450520250127e-05,
"loss": 2.0417,
"step": 5610
},
{
"epoch": 0.03,
"learning_rate": 9.978411420922847e-05,
"loss": 2.0543,
"step": 5615
},
{
"epoch": 0.03,
"learning_rate": 9.978372286233586e-05,
"loss": 2.0508,
"step": 5620
},
{
"epoch": 0.03,
"learning_rate": 9.978333116182624e-05,
"loss": 2.0862,
"step": 5625
},
{
"epoch": 0.03,
"learning_rate": 9.978293910770233e-05,
"loss": 2.0201,
"step": 5630
},
{
"epoch": 0.03,
"learning_rate": 9.978254669996701e-05,
"loss": 2.0025,
"step": 5635
},
{
"epoch": 0.03,
"learning_rate": 9.978215393862299e-05,
"loss": 2.1048,
"step": 5640
},
{
"epoch": 0.03,
"learning_rate": 9.978176082367308e-05,
"loss": 2.0377,
"step": 5645
},
{
"epoch": 0.03,
"learning_rate": 9.978136735512009e-05,
"loss": 1.9783,
"step": 5650
},
{
"epoch": 0.03,
"learning_rate": 9.97809735329668e-05,
"loss": 1.9601,
"step": 5655
},
{
"epoch": 0.03,
"learning_rate": 9.978057935721601e-05,
"loss": 2.012,
"step": 5660
},
{
"epoch": 0.03,
"learning_rate": 9.978018482787052e-05,
"loss": 2.0197,
"step": 5665
},
{
"epoch": 0.03,
"learning_rate": 9.977978994493314e-05,
"loss": 2.0049,
"step": 5670
},
{
"epoch": 0.03,
"learning_rate": 9.977939470840666e-05,
"loss": 2.075,
"step": 5675
},
{
"epoch": 0.03,
"learning_rate": 9.97789991182939e-05,
"loss": 2.0486,
"step": 5680
},
{
"epoch": 0.03,
"learning_rate": 9.977860317459766e-05,
"loss": 2.0422,
"step": 5685
},
{
"epoch": 0.03,
"learning_rate": 9.977820687732075e-05,
"loss": 2.0207,
"step": 5690
},
{
"epoch": 0.03,
"learning_rate": 9.977781022646601e-05,
"loss": 2.0681,
"step": 5695
},
{
"epoch": 0.03,
"learning_rate": 9.977741322203624e-05,
"loss": 2.0077,
"step": 5700
},
{
"epoch": 0.03,
"learning_rate": 9.977701586403425e-05,
"loss": 2.0018,
"step": 5705
},
{
"epoch": 0.03,
"learning_rate": 9.977661815246288e-05,
"loss": 1.9899,
"step": 5710
},
{
"epoch": 0.03,
"learning_rate": 9.977622008732494e-05,
"loss": 1.9763,
"step": 5715
},
{
"epoch": 0.03,
"learning_rate": 9.977582166862328e-05,
"loss": 2.0542,
"step": 5720
},
{
"epoch": 0.03,
"learning_rate": 9.97754228963607e-05,
"loss": 2.0095,
"step": 5725
},
{
"epoch": 0.03,
"learning_rate": 9.977502377054005e-05,
"loss": 1.9889,
"step": 5730
},
{
"epoch": 0.03,
"learning_rate": 9.977462429116416e-05,
"loss": 2.0249,
"step": 5735
},
{
"epoch": 0.03,
"learning_rate": 9.977422445823588e-05,
"loss": 2.0513,
"step": 5740
},
{
"epoch": 0.03,
"learning_rate": 9.977382427175802e-05,
"loss": 1.9523,
"step": 5745
},
{
"epoch": 0.03,
"learning_rate": 9.977342373173347e-05,
"loss": 1.9659,
"step": 5750
},
{
"epoch": 0.03,
"learning_rate": 9.977302283816502e-05,
"loss": 2.0309,
"step": 5755
},
{
"epoch": 0.03,
"learning_rate": 9.977262159105556e-05,
"loss": 1.9858,
"step": 5760
},
{
"epoch": 0.03,
"learning_rate": 9.977221999040791e-05,
"loss": 2.0457,
"step": 5765
},
{
"epoch": 0.03,
"learning_rate": 9.977181803622494e-05,
"loss": 2.0458,
"step": 5770
},
{
"epoch": 0.03,
"learning_rate": 9.977141572850952e-05,
"loss": 2.0099,
"step": 5775
},
{
"epoch": 0.03,
"learning_rate": 9.977101306726448e-05,
"loss": 2.0289,
"step": 5780
},
{
"epoch": 0.03,
"learning_rate": 9.977061005249271e-05,
"loss": 2.0149,
"step": 5785
},
{
"epoch": 0.03,
"learning_rate": 9.977020668419704e-05,
"loss": 2.0387,
"step": 5790
},
{
"epoch": 0.03,
"learning_rate": 9.976980296238034e-05,
"loss": 2.0555,
"step": 5795
},
{
"epoch": 0.03,
"learning_rate": 9.976939888704548e-05,
"loss": 2.042,
"step": 5800
},
{
"epoch": 0.03,
"learning_rate": 9.976899445819537e-05,
"loss": 2.0697,
"step": 5805
},
{
"epoch": 0.03,
"learning_rate": 9.976858967583282e-05,
"loss": 1.9781,
"step": 5810
},
{
"epoch": 0.03,
"learning_rate": 9.976818453996073e-05,
"loss": 2.0167,
"step": 5815
},
{
"epoch": 0.03,
"learning_rate": 9.976777905058199e-05,
"loss": 2.0748,
"step": 5820
},
{
"epoch": 0.03,
"learning_rate": 9.976737320769948e-05,
"loss": 1.9844,
"step": 5825
},
{
"epoch": 0.03,
"learning_rate": 9.976696701131607e-05,
"loss": 2.0306,
"step": 5830
},
{
"epoch": 0.03,
"learning_rate": 9.976656046143465e-05,
"loss": 1.9807,
"step": 5835
},
{
"epoch": 0.03,
"learning_rate": 9.976615355805811e-05,
"loss": 2.0541,
"step": 5840
},
{
"epoch": 0.03,
"learning_rate": 9.976574630118934e-05,
"loss": 1.9824,
"step": 5845
},
{
"epoch": 0.03,
"learning_rate": 9.976533869083122e-05,
"loss": 2.0362,
"step": 5850
},
{
"epoch": 0.03,
"learning_rate": 9.976493072698665e-05,
"loss": 2.0351,
"step": 5855
},
{
"epoch": 0.03,
"learning_rate": 9.976452240965854e-05,
"loss": 2.0557,
"step": 5860
},
{
"epoch": 0.03,
"learning_rate": 9.97641137388498e-05,
"loss": 2.0135,
"step": 5865
},
{
"epoch": 0.03,
"learning_rate": 9.976370471456332e-05,
"loss": 2.02,
"step": 5870
},
{
"epoch": 0.03,
"learning_rate": 9.976329533680197e-05,
"loss": 2.0537,
"step": 5875
},
{
"epoch": 0.03,
"learning_rate": 9.976288560556871e-05,
"loss": 2.1182,
"step": 5880
},
{
"epoch": 0.03,
"learning_rate": 9.976247552086644e-05,
"loss": 2.0621,
"step": 5885
},
{
"epoch": 0.03,
"learning_rate": 9.976206508269806e-05,
"loss": 1.9764,
"step": 5890
},
{
"epoch": 0.03,
"learning_rate": 9.976165429106647e-05,
"loss": 1.9988,
"step": 5895
},
{
"epoch": 0.03,
"learning_rate": 9.976124314597461e-05,
"loss": 2.0085,
"step": 5900
},
{
"epoch": 0.03,
"learning_rate": 9.976083164742541e-05,
"loss": 2.0583,
"step": 5905
},
{
"epoch": 0.03,
"learning_rate": 9.976041979542178e-05,
"loss": 1.9886,
"step": 5910
},
{
"epoch": 0.03,
"learning_rate": 9.976000758996664e-05,
"loss": 2.0606,
"step": 5915
},
{
"epoch": 0.03,
"learning_rate": 9.975959503106292e-05,
"loss": 1.994,
"step": 5920
},
{
"epoch": 0.03,
"learning_rate": 9.975918211871356e-05,
"loss": 2.039,
"step": 5925
},
{
"epoch": 0.03,
"learning_rate": 9.975876885292148e-05,
"loss": 2.0486,
"step": 5930
},
{
"epoch": 0.03,
"learning_rate": 9.975835523368962e-05,
"loss": 2.0669,
"step": 5935
},
{
"epoch": 0.03,
"learning_rate": 9.975794126102094e-05,
"loss": 1.9435,
"step": 5940
},
{
"epoch": 0.03,
"learning_rate": 9.975752693491833e-05,
"loss": 1.9804,
"step": 5945
},
{
"epoch": 0.03,
"learning_rate": 9.975711225538478e-05,
"loss": 2.0441,
"step": 5950
},
{
"epoch": 0.03,
"learning_rate": 9.975669722242323e-05,
"loss": 1.9861,
"step": 5955
},
{
"epoch": 0.03,
"learning_rate": 9.97562818360366e-05,
"loss": 1.9587,
"step": 5960
},
{
"epoch": 0.03,
"learning_rate": 9.975586609622787e-05,
"loss": 1.9777,
"step": 5965
},
{
"epoch": 0.03,
"learning_rate": 9.975545000299997e-05,
"loss": 2.0714,
"step": 5970
},
{
"epoch": 0.03,
"learning_rate": 9.975503355635586e-05,
"loss": 1.9984,
"step": 5975
},
{
"epoch": 0.03,
"learning_rate": 9.975461675629852e-05,
"loss": 2.0411,
"step": 5980
},
{
"epoch": 0.03,
"learning_rate": 9.97541996028309e-05,
"loss": 2.0607,
"step": 5985
},
{
"epoch": 0.03,
"learning_rate": 9.975378209595594e-05,
"loss": 2.0601,
"step": 5990
},
{
"epoch": 0.03,
"learning_rate": 9.975336423567663e-05,
"loss": 2.0539,
"step": 5995
},
{
"epoch": 0.03,
"learning_rate": 9.975294602199592e-05,
"loss": 1.9951,
"step": 6000
},
{
"epoch": 0.03,
"learning_rate": 9.975252745491681e-05,
"loss": 2.0518,
"step": 6005
},
{
"epoch": 0.03,
"learning_rate": 9.975210853444223e-05,
"loss": 2.0102,
"step": 6010
},
{
"epoch": 0.03,
"learning_rate": 9.975168926057521e-05,
"loss": 2.0358,
"step": 6015
},
{
"epoch": 0.03,
"learning_rate": 9.975126963331868e-05,
"loss": 2.1038,
"step": 6020
},
{
"epoch": 0.03,
"learning_rate": 9.975084965267565e-05,
"loss": 2.0379,
"step": 6025
},
{
"epoch": 0.03,
"learning_rate": 9.975042931864908e-05,
"loss": 2.0255,
"step": 6030
},
{
"epoch": 0.03,
"learning_rate": 9.9750008631242e-05,
"loss": 2.0661,
"step": 6035
},
{
"epoch": 0.03,
"learning_rate": 9.974958759045734e-05,
"loss": 2.0662,
"step": 6040
},
{
"epoch": 0.03,
"learning_rate": 9.974916619629812e-05,
"loss": 1.9876,
"step": 6045
},
{
"epoch": 0.03,
"learning_rate": 9.974874444876733e-05,
"loss": 1.9667,
"step": 6050
},
{
"epoch": 0.03,
"learning_rate": 9.974832234786797e-05,
"loss": 1.967,
"step": 6055
},
{
"epoch": 0.03,
"learning_rate": 9.974789989360303e-05,
"loss": 2.0128,
"step": 6060
},
{
"epoch": 0.03,
"learning_rate": 9.974747708597552e-05,
"loss": 2.041,
"step": 6065
},
{
"epoch": 0.03,
"learning_rate": 9.974705392498844e-05,
"loss": 1.9842,
"step": 6070
},
{
"epoch": 0.03,
"learning_rate": 9.974663041064478e-05,
"loss": 1.9994,
"step": 6075
},
{
"epoch": 0.03,
"learning_rate": 9.974620654294757e-05,
"loss": 2.0904,
"step": 6080
},
{
"epoch": 0.03,
"learning_rate": 9.974578232189982e-05,
"loss": 1.9493,
"step": 6085
},
{
"epoch": 0.03,
"learning_rate": 9.974535774750453e-05,
"loss": 2.0644,
"step": 6090
},
{
"epoch": 0.03,
"learning_rate": 9.974493281976471e-05,
"loss": 1.9976,
"step": 6095
},
{
"epoch": 0.03,
"learning_rate": 9.97445075386834e-05,
"loss": 2.0306,
"step": 6100
},
{
"epoch": 0.03,
"learning_rate": 9.974408190426362e-05,
"loss": 1.9883,
"step": 6105
},
{
"epoch": 0.03,
"learning_rate": 9.974365591650839e-05,
"loss": 2.0261,
"step": 6110
},
{
"epoch": 0.03,
"learning_rate": 9.974322957542071e-05,
"loss": 1.9964,
"step": 6115
},
{
"epoch": 0.03,
"learning_rate": 9.974280288100363e-05,
"loss": 2.0024,
"step": 6120
},
{
"epoch": 0.03,
"learning_rate": 9.974237583326019e-05,
"loss": 2.0565,
"step": 6125
},
{
"epoch": 0.03,
"learning_rate": 9.974194843219341e-05,
"loss": 2.0345,
"step": 6130
},
{
"epoch": 0.03,
"learning_rate": 9.974152067780634e-05,
"loss": 2.0108,
"step": 6135
},
{
"epoch": 0.03,
"learning_rate": 9.974109257010198e-05,
"loss": 1.9535,
"step": 6140
},
{
"epoch": 0.03,
"learning_rate": 9.974066410908342e-05,
"loss": 2.001,
"step": 6145
},
{
"epoch": 0.03,
"learning_rate": 9.974023529475367e-05,
"loss": 1.9255,
"step": 6150
},
{
"epoch": 0.03,
"learning_rate": 9.973980612711579e-05,
"loss": 2.0138,
"step": 6155
},
{
"epoch": 0.03,
"learning_rate": 9.973937660617283e-05,
"loss": 2.0027,
"step": 6160
},
{
"epoch": 0.03,
"learning_rate": 9.973894673192782e-05,
"loss": 1.9745,
"step": 6165
},
{
"epoch": 0.03,
"learning_rate": 9.973851650438385e-05,
"loss": 1.9862,
"step": 6170
},
{
"epoch": 0.03,
"learning_rate": 9.973808592354394e-05,
"loss": 2.0503,
"step": 6175
},
{
"epoch": 0.03,
"learning_rate": 9.973765498941118e-05,
"loss": 2.0841,
"step": 6180
},
{
"epoch": 0.03,
"learning_rate": 9.97372237019886e-05,
"loss": 1.9803,
"step": 6185
},
{
"epoch": 0.03,
"learning_rate": 9.97367920612793e-05,
"loss": 2.0016,
"step": 6190
},
{
"epoch": 0.03,
"learning_rate": 9.973636006728631e-05,
"loss": 2.0594,
"step": 6195
},
{
"epoch": 0.03,
"learning_rate": 9.97359277200127e-05,
"loss": 2.0383,
"step": 6200
},
{
"epoch": 0.03,
"learning_rate": 9.973549501946157e-05,
"loss": 2.0488,
"step": 6205
},
{
"epoch": 0.03,
"learning_rate": 9.973506196563598e-05,
"loss": 2.0141,
"step": 6210
},
{
"epoch": 0.03,
"learning_rate": 9.9734628558539e-05,
"loss": 1.9639,
"step": 6215
},
{
"epoch": 0.03,
"learning_rate": 9.97341947981737e-05,
"loss": 1.9677,
"step": 6220
},
{
"epoch": 0.03,
"learning_rate": 9.973376068454319e-05,
"loss": 2.003,
"step": 6225
},
{
"epoch": 0.03,
"learning_rate": 9.973332621765054e-05,
"loss": 1.989,
"step": 6230
},
{
"epoch": 0.03,
"learning_rate": 9.973289139749882e-05,
"loss": 2.0069,
"step": 6235
},
{
"epoch": 0.03,
"learning_rate": 9.973245622409113e-05,
"loss": 2.0557,
"step": 6240
},
{
"epoch": 0.03,
"learning_rate": 9.973202069743057e-05,
"loss": 1.9541,
"step": 6245
},
{
"epoch": 0.03,
"learning_rate": 9.973158481752023e-05,
"loss": 2.041,
"step": 6250
},
{
"epoch": 0.03,
"learning_rate": 9.973114858436318e-05,
"loss": 1.9581,
"step": 6255
},
{
"epoch": 0.03,
"learning_rate": 9.973071199796256e-05,
"loss": 2.0957,
"step": 6260
},
{
"epoch": 0.03,
"learning_rate": 9.973027505832145e-05,
"loss": 2.0173,
"step": 6265
},
{
"epoch": 0.03,
"learning_rate": 9.972983776544296e-05,
"loss": 1.9487,
"step": 6270
},
{
"epoch": 0.03,
"learning_rate": 9.972940011933019e-05,
"loss": 1.9921,
"step": 6275
},
{
"epoch": 0.03,
"learning_rate": 9.972896211998624e-05,
"loss": 2.0187,
"step": 6280
},
{
"epoch": 0.03,
"learning_rate": 9.972852376741424e-05,
"loss": 1.9875,
"step": 6285
},
{
"epoch": 0.03,
"learning_rate": 9.972808506161729e-05,
"loss": 1.9621,
"step": 6290
},
{
"epoch": 0.03,
"learning_rate": 9.972764600259849e-05,
"loss": 2.0505,
"step": 6295
},
{
"epoch": 0.03,
"learning_rate": 9.9727206590361e-05,
"loss": 1.9287,
"step": 6300
},
{
"epoch": 0.03,
"learning_rate": 9.972676682490793e-05,
"loss": 2.0285,
"step": 6305
},
{
"epoch": 0.03,
"learning_rate": 9.97263267062424e-05,
"loss": 2.0691,
"step": 6310
},
{
"epoch": 0.03,
"learning_rate": 9.972588623436752e-05,
"loss": 1.9675,
"step": 6315
},
{
"epoch": 0.03,
"learning_rate": 9.972544540928641e-05,
"loss": 2.0413,
"step": 6320
},
{
"epoch": 0.03,
"learning_rate": 9.972500423100224e-05,
"loss": 2.0125,
"step": 6325
},
{
"epoch": 0.03,
"learning_rate": 9.972456269951813e-05,
"loss": 1.979,
"step": 6330
},
{
"epoch": 0.03,
"learning_rate": 9.97241208148372e-05,
"loss": 2.0495,
"step": 6335
},
{
"epoch": 0.03,
"learning_rate": 9.97236785769626e-05,
"loss": 2.0133,
"step": 6340
},
{
"epoch": 0.03,
"learning_rate": 9.972323598589748e-05,
"loss": 1.991,
"step": 6345
},
{
"epoch": 0.03,
"learning_rate": 9.972279304164496e-05,
"loss": 2.012,
"step": 6350
},
{
"epoch": 0.03,
"learning_rate": 9.97223497442082e-05,
"loss": 2.0136,
"step": 6355
},
{
"epoch": 0.03,
"learning_rate": 9.972190609359035e-05,
"loss": 1.9614,
"step": 6360
},
{
"epoch": 0.03,
"learning_rate": 9.972146208979459e-05,
"loss": 1.9851,
"step": 6365
},
{
"epoch": 0.03,
"learning_rate": 9.9721017732824e-05,
"loss": 2.0537,
"step": 6370
},
{
"epoch": 0.03,
"learning_rate": 9.972057302268181e-05,
"loss": 2.0377,
"step": 6375
},
{
"epoch": 0.03,
"learning_rate": 9.972012795937114e-05,
"loss": 2.0176,
"step": 6380
},
{
"epoch": 0.03,
"learning_rate": 9.971968254289517e-05,
"loss": 1.9863,
"step": 6385
},
{
"epoch": 0.03,
"learning_rate": 9.971923677325705e-05,
"loss": 2.0417,
"step": 6390
},
{
"epoch": 0.03,
"learning_rate": 9.971879065045995e-05,
"loss": 1.9519,
"step": 6395
},
{
"epoch": 0.03,
"learning_rate": 9.971834417450703e-05,
"loss": 2.0531,
"step": 6400
},
{
"epoch": 0.03,
"learning_rate": 9.971789734540148e-05,
"loss": 2.0438,
"step": 6405
},
{
"epoch": 0.03,
"learning_rate": 9.971745016314646e-05,
"loss": 2.01,
"step": 6410
},
{
"epoch": 0.03,
"learning_rate": 9.971700262774516e-05,
"loss": 1.9724,
"step": 6415
},
{
"epoch": 0.03,
"learning_rate": 9.971655473920073e-05,
"loss": 1.9754,
"step": 6420
},
{
"epoch": 0.03,
"learning_rate": 9.971610649751639e-05,
"loss": 2.0196,
"step": 6425
},
{
"epoch": 0.03,
"learning_rate": 9.971565790269531e-05,
"loss": 1.9845,
"step": 6430
},
{
"epoch": 0.03,
"learning_rate": 9.971520895474067e-05,
"loss": 2.0217,
"step": 6435
},
{
"epoch": 0.03,
"learning_rate": 9.971475965365565e-05,
"loss": 2.0258,
"step": 6440
},
{
"epoch": 0.03,
"learning_rate": 9.971430999944346e-05,
"loss": 2.0893,
"step": 6445
},
{
"epoch": 0.03,
"learning_rate": 9.971385999210728e-05,
"loss": 1.9407,
"step": 6450
},
{
"epoch": 0.03,
"learning_rate": 9.971340963165032e-05,
"loss": 2.0937,
"step": 6455
},
{
"epoch": 0.03,
"learning_rate": 9.971295891807577e-05,
"loss": 2.037,
"step": 6460
},
{
"epoch": 0.03,
"learning_rate": 9.971250785138684e-05,
"loss": 1.9684,
"step": 6465
},
{
"epoch": 0.03,
"learning_rate": 9.971205643158671e-05,
"loss": 2.0733,
"step": 6470
},
{
"epoch": 0.03,
"learning_rate": 9.971160465867862e-05,
"loss": 1.9969,
"step": 6475
},
{
"epoch": 0.03,
"learning_rate": 9.971115253266575e-05,
"loss": 1.9802,
"step": 6480
},
{
"epoch": 0.03,
"learning_rate": 9.971070005355132e-05,
"loss": 1.9721,
"step": 6485
},
{
"epoch": 0.03,
"learning_rate": 9.971024722133855e-05,
"loss": 2.015,
"step": 6490
},
{
"epoch": 0.03,
"learning_rate": 9.970979403603067e-05,
"loss": 1.993,
"step": 6495
},
{
"epoch": 0.03,
"learning_rate": 9.970934049763088e-05,
"loss": 1.9833,
"step": 6500
},
{
"epoch": 0.03,
"learning_rate": 9.97088866061424e-05,
"loss": 1.9772,
"step": 6505
},
{
"epoch": 0.03,
"learning_rate": 9.970843236156844e-05,
"loss": 2.0174,
"step": 6510
},
{
"epoch": 0.03,
"learning_rate": 9.970797776391227e-05,
"loss": 1.952,
"step": 6515
},
{
"epoch": 0.03,
"learning_rate": 9.970752281317708e-05,
"loss": 2.0481,
"step": 6520
},
{
"epoch": 0.03,
"learning_rate": 9.970706750936613e-05,
"loss": 1.9975,
"step": 6525
},
{
"epoch": 0.04,
"learning_rate": 9.970661185248262e-05,
"loss": 2.043,
"step": 6530
},
{
"epoch": 0.04,
"learning_rate": 9.970615584252982e-05,
"loss": 1.9594,
"step": 6535
},
{
"epoch": 0.04,
"learning_rate": 9.970569947951096e-05,
"loss": 1.9948,
"step": 6540
},
{
"epoch": 0.04,
"learning_rate": 9.970524276342927e-05,
"loss": 2.0064,
"step": 6545
},
{
"epoch": 0.04,
"learning_rate": 9.970478569428801e-05,
"loss": 1.9888,
"step": 6550
},
{
"epoch": 0.04,
"learning_rate": 9.970432827209043e-05,
"loss": 1.967,
"step": 6555
},
{
"epoch": 0.04,
"learning_rate": 9.970387049683974e-05,
"loss": 2.0812,
"step": 6560
},
{
"epoch": 0.04,
"learning_rate": 9.970341236853922e-05,
"loss": 2.0089,
"step": 6565
},
{
"epoch": 0.04,
"learning_rate": 9.970295388719212e-05,
"loss": 1.9099,
"step": 6570
},
{
"epoch": 0.04,
"learning_rate": 9.970249505280172e-05,
"loss": 2.0363,
"step": 6575
},
{
"epoch": 0.04,
"learning_rate": 9.970203586537125e-05,
"loss": 1.9763,
"step": 6580
},
{
"epoch": 0.04,
"learning_rate": 9.970157632490398e-05,
"loss": 2.1303,
"step": 6585
},
{
"epoch": 0.04,
"learning_rate": 9.970111643140316e-05,
"loss": 2.022,
"step": 6590
},
{
"epoch": 0.04,
"learning_rate": 9.970065618487209e-05,
"loss": 1.924,
"step": 6595
},
{
"epoch": 0.04,
"learning_rate": 9.970019558531401e-05,
"loss": 2.0566,
"step": 6600
},
{
"epoch": 0.04,
"learning_rate": 9.96997346327322e-05,
"loss": 1.9599,
"step": 6605
},
{
"epoch": 0.04,
"learning_rate": 9.969927332712995e-05,
"loss": 2.0337,
"step": 6610
},
{
"epoch": 0.04,
"learning_rate": 9.969881166851051e-05,
"loss": 1.9663,
"step": 6615
},
{
"epoch": 0.04,
"learning_rate": 9.969834965687719e-05,
"loss": 1.9997,
"step": 6620
},
{
"epoch": 0.04,
"learning_rate": 9.969788729223322e-05,
"loss": 2.0046,
"step": 6625
},
{
"epoch": 0.04,
"learning_rate": 9.969742457458194e-05,
"loss": 2.0479,
"step": 6630
},
{
"epoch": 0.04,
"learning_rate": 9.969696150392662e-05,
"loss": 2.0611,
"step": 6635
},
{
"epoch": 0.04,
"learning_rate": 9.969649808027053e-05,
"loss": 1.9539,
"step": 6640
},
{
"epoch": 0.04,
"learning_rate": 9.969603430361698e-05,
"loss": 2.0269,
"step": 6645
},
{
"epoch": 0.04,
"learning_rate": 9.969557017396926e-05,
"loss": 2.0042,
"step": 6650
},
{
"epoch": 0.04,
"learning_rate": 9.969510569133067e-05,
"loss": 1.9818,
"step": 6655
},
{
"epoch": 0.04,
"learning_rate": 9.96946408557045e-05,
"loss": 2.0365,
"step": 6660
},
{
"epoch": 0.04,
"learning_rate": 9.969417566709406e-05,
"loss": 1.9702,
"step": 6665
},
{
"epoch": 0.04,
"learning_rate": 9.969371012550265e-05,
"loss": 2.0564,
"step": 6670
},
{
"epoch": 0.04,
"learning_rate": 9.969324423093358e-05,
"loss": 1.9872,
"step": 6675
},
{
"epoch": 0.04,
"learning_rate": 9.969277798339016e-05,
"loss": 2.0273,
"step": 6680
},
{
"epoch": 0.04,
"learning_rate": 9.96923113828757e-05,
"loss": 1.9812,
"step": 6685
},
{
"epoch": 0.04,
"learning_rate": 9.96918444293935e-05,
"loss": 2.075,
"step": 6690
},
{
"epoch": 0.04,
"learning_rate": 9.96913771229469e-05,
"loss": 1.9764,
"step": 6695
},
{
"epoch": 0.04,
"learning_rate": 9.969090946353922e-05,
"loss": 2.0034,
"step": 6700
},
{
"epoch": 0.04,
"learning_rate": 9.969044145117377e-05,
"loss": 1.9957,
"step": 6705
},
{
"epoch": 0.04,
"learning_rate": 9.968997308585387e-05,
"loss": 2.0319,
"step": 6710
},
{
"epoch": 0.04,
"learning_rate": 9.968950436758284e-05,
"loss": 2.027,
"step": 6715
},
{
"epoch": 0.04,
"learning_rate": 9.968903529636403e-05,
"loss": 2.0571,
"step": 6720
},
{
"epoch": 0.04,
"learning_rate": 9.968856587220077e-05,
"loss": 2.0462,
"step": 6725
},
{
"epoch": 0.04,
"learning_rate": 9.96880960950964e-05,
"loss": 2.0162,
"step": 6730
},
{
"epoch": 0.04,
"learning_rate": 9.968762596505421e-05,
"loss": 2.0274,
"step": 6735
},
{
"epoch": 0.04,
"learning_rate": 9.968715548207761e-05,
"loss": 2.0689,
"step": 6740
},
{
"epoch": 0.04,
"learning_rate": 9.968668464616988e-05,
"loss": 1.9734,
"step": 6745
},
{
"epoch": 0.04,
"learning_rate": 9.968621345733442e-05,
"loss": 2.028,
"step": 6750
},
{
"epoch": 0.04,
"learning_rate": 9.968574191557451e-05,
"loss": 2.0073,
"step": 6755
},
{
"epoch": 0.04,
"learning_rate": 9.968527002089356e-05,
"loss": 1.8531,
"step": 6760
},
{
"epoch": 0.04,
"learning_rate": 9.968479777329491e-05,
"loss": 2.0034,
"step": 6765
},
{
"epoch": 0.04,
"learning_rate": 9.968432517278188e-05,
"loss": 1.9882,
"step": 6770
},
{
"epoch": 0.04,
"learning_rate": 9.968385221935787e-05,
"loss": 2.0418,
"step": 6775
},
{
"epoch": 0.04,
"learning_rate": 9.968337891302621e-05,
"loss": 1.9707,
"step": 6780
},
{
"epoch": 0.04,
"learning_rate": 9.968290525379028e-05,
"loss": 2.0913,
"step": 6785
},
{
"epoch": 0.04,
"learning_rate": 9.968243124165342e-05,
"loss": 2.0141,
"step": 6790
},
{
"epoch": 0.04,
"learning_rate": 9.968195687661902e-05,
"loss": 2.0488,
"step": 6795
},
{
"epoch": 0.04,
"learning_rate": 9.968148215869044e-05,
"loss": 1.9781,
"step": 6800
},
{
"epoch": 0.04,
"learning_rate": 9.968100708787107e-05,
"loss": 1.9892,
"step": 6805
},
{
"epoch": 0.04,
"learning_rate": 9.968053166416424e-05,
"loss": 2.0141,
"step": 6810
},
{
"epoch": 0.04,
"learning_rate": 9.968005588757339e-05,
"loss": 2.0157,
"step": 6815
},
{
"epoch": 0.04,
"learning_rate": 9.967957975810184e-05,
"loss": 1.991,
"step": 6820
},
{
"epoch": 0.04,
"learning_rate": 9.9679103275753e-05,
"loss": 2.0568,
"step": 6825
},
{
"epoch": 0.04,
"learning_rate": 9.967862644053027e-05,
"loss": 2.0411,
"step": 6830
},
{
"epoch": 0.04,
"learning_rate": 9.9678149252437e-05,
"loss": 2.0586,
"step": 6835
},
{
"epoch": 0.04,
"learning_rate": 9.96776717114766e-05,
"loss": 1.9888,
"step": 6840
},
{
"epoch": 0.04,
"learning_rate": 9.967719381765248e-05,
"loss": 2.0177,
"step": 6845
},
{
"epoch": 0.04,
"learning_rate": 9.9676715570968e-05,
"loss": 2.0482,
"step": 6850
},
{
"epoch": 0.04,
"learning_rate": 9.967623697142656e-05,
"loss": 2.0369,
"step": 6855
},
{
"epoch": 0.04,
"learning_rate": 9.96757580190316e-05,
"loss": 2.0377,
"step": 6860
},
{
"epoch": 0.04,
"learning_rate": 9.967527871378646e-05,
"loss": 2.0471,
"step": 6865
},
{
"epoch": 0.04,
"learning_rate": 9.96747990556946e-05,
"loss": 2.0083,
"step": 6870
},
{
"epoch": 0.04,
"learning_rate": 9.96743190447594e-05,
"loss": 1.9945,
"step": 6875
},
{
"epoch": 0.04,
"learning_rate": 9.967383868098427e-05,
"loss": 2.0135,
"step": 6880
},
{
"epoch": 0.04,
"learning_rate": 9.967335796437263e-05,
"loss": 2.07,
"step": 6885
},
{
"epoch": 0.04,
"learning_rate": 9.96728768949279e-05,
"loss": 1.9917,
"step": 6890
},
{
"epoch": 0.04,
"learning_rate": 9.967239547265348e-05,
"loss": 1.9694,
"step": 6895
},
{
"epoch": 0.04,
"learning_rate": 9.96719136975528e-05,
"loss": 2.0603,
"step": 6900
},
{
"epoch": 0.04,
"learning_rate": 9.967143156962926e-05,
"loss": 2.0543,
"step": 6905
},
{
"epoch": 0.04,
"learning_rate": 9.967094908888634e-05,
"loss": 1.9917,
"step": 6910
},
{
"epoch": 0.04,
"learning_rate": 9.967046625532741e-05,
"loss": 2.0115,
"step": 6915
},
{
"epoch": 0.04,
"learning_rate": 9.966998306895592e-05,
"loss": 2.0037,
"step": 6920
},
{
"epoch": 0.04,
"learning_rate": 9.966949952977532e-05,
"loss": 2.0146,
"step": 6925
},
{
"epoch": 0.04,
"learning_rate": 9.966901563778901e-05,
"loss": 1.9123,
"step": 6930
},
{
"epoch": 0.04,
"learning_rate": 9.966853139300045e-05,
"loss": 2.0225,
"step": 6935
},
{
"epoch": 0.04,
"learning_rate": 9.966804679541307e-05,
"loss": 2.0493,
"step": 6940
},
{
"epoch": 0.04,
"learning_rate": 9.966756184503032e-05,
"loss": 1.9251,
"step": 6945
},
{
"epoch": 0.04,
"learning_rate": 9.966707654185565e-05,
"loss": 2.0667,
"step": 6950
},
{
"epoch": 0.04,
"learning_rate": 9.966659088589249e-05,
"loss": 1.9902,
"step": 6955
},
{
"epoch": 0.04,
"learning_rate": 9.96661048771443e-05,
"loss": 2.0103,
"step": 6960
},
{
"epoch": 0.04,
"learning_rate": 9.966561851561453e-05,
"loss": 1.9489,
"step": 6965
},
{
"epoch": 0.04,
"learning_rate": 9.966513180130663e-05,
"loss": 1.9653,
"step": 6970
},
{
"epoch": 0.04,
"learning_rate": 9.966464473422407e-05,
"loss": 2.0667,
"step": 6975
},
{
"epoch": 0.04,
"learning_rate": 9.966415731437031e-05,
"loss": 1.9987,
"step": 6980
},
{
"epoch": 0.04,
"learning_rate": 9.96636695417488e-05,
"loss": 1.9679,
"step": 6985
},
{
"epoch": 0.04,
"learning_rate": 9.9663181416363e-05,
"loss": 2.0341,
"step": 6990
},
{
"epoch": 0.04,
"learning_rate": 9.966269293821638e-05,
"loss": 2.0463,
"step": 6995
},
{
"epoch": 0.04,
"learning_rate": 9.966220410731241e-05,
"loss": 2.018,
"step": 7000
},
{
"epoch": 0.04,
"learning_rate": 9.96617149236546e-05,
"loss": 1.9994,
"step": 7005
},
{
"epoch": 0.04,
"learning_rate": 9.966122538724636e-05,
"loss": 1.9997,
"step": 7010
},
{
"epoch": 0.04,
"learning_rate": 9.966073549809121e-05,
"loss": 2.0147,
"step": 7015
},
{
"epoch": 0.04,
"learning_rate": 9.966024525619262e-05,
"loss": 2.0328,
"step": 7020
},
{
"epoch": 0.04,
"learning_rate": 9.965975466155409e-05,
"loss": 2.1036,
"step": 7025
},
{
"epoch": 0.04,
"learning_rate": 9.965926371417906e-05,
"loss": 1.9785,
"step": 7030
},
{
"epoch": 0.04,
"learning_rate": 9.965877241407107e-05,
"loss": 2.0073,
"step": 7035
},
{
"epoch": 0.04,
"learning_rate": 9.965828076123356e-05,
"loss": 1.9852,
"step": 7040
},
{
"epoch": 0.04,
"learning_rate": 9.965778875567004e-05,
"loss": 2.0214,
"step": 7045
},
{
"epoch": 0.04,
"learning_rate": 9.965729639738404e-05,
"loss": 1.9908,
"step": 7050
},
{
"epoch": 0.04,
"learning_rate": 9.9656803686379e-05,
"loss": 2.0134,
"step": 7055
},
{
"epoch": 0.04,
"learning_rate": 9.965631062265845e-05,
"loss": 1.9897,
"step": 7060
},
{
"epoch": 0.04,
"learning_rate": 9.96558172062259e-05,
"loss": 2.0196,
"step": 7065
},
{
"epoch": 0.04,
"learning_rate": 9.965532343708485e-05,
"loss": 1.9965,
"step": 7070
},
{
"epoch": 0.04,
"learning_rate": 9.96548293152388e-05,
"loss": 1.9686,
"step": 7075
},
{
"epoch": 0.04,
"learning_rate": 9.965433484069124e-05,
"loss": 1.9644,
"step": 7080
},
{
"epoch": 0.04,
"learning_rate": 9.965384001344573e-05,
"loss": 1.9562,
"step": 7085
},
{
"epoch": 0.04,
"learning_rate": 9.965334483350575e-05,
"loss": 2.0009,
"step": 7090
},
{
"epoch": 0.04,
"learning_rate": 9.965284930087481e-05,
"loss": 2.0114,
"step": 7095
},
{
"epoch": 0.04,
"learning_rate": 9.965235341555645e-05,
"loss": 2.008,
"step": 7100
},
{
"epoch": 0.04,
"learning_rate": 9.965185717755421e-05,
"loss": 2.0773,
"step": 7105
},
{
"epoch": 0.04,
"learning_rate": 9.965136058687157e-05,
"loss": 2.0484,
"step": 7110
},
{
"epoch": 0.04,
"learning_rate": 9.965086364351209e-05,
"loss": 1.9974,
"step": 7115
},
{
"epoch": 0.04,
"learning_rate": 9.965036634747929e-05,
"loss": 1.9902,
"step": 7120
},
{
"epoch": 0.04,
"learning_rate": 9.964986869877671e-05,
"loss": 1.9821,
"step": 7125
},
{
"epoch": 0.04,
"learning_rate": 9.964937069740785e-05,
"loss": 1.9923,
"step": 7130
},
{
"epoch": 0.04,
"learning_rate": 9.964887234337629e-05,
"loss": 1.9904,
"step": 7135
},
{
"epoch": 0.04,
"learning_rate": 9.964837363668556e-05,
"loss": 1.9594,
"step": 7140
},
{
"epoch": 0.04,
"learning_rate": 9.96478745773392e-05,
"loss": 1.9911,
"step": 7145
},
{
"epoch": 0.04,
"learning_rate": 9.964737516534073e-05,
"loss": 1.9348,
"step": 7150
},
{
"epoch": 0.04,
"learning_rate": 9.964687540069373e-05,
"loss": 2.0769,
"step": 7155
},
{
"epoch": 0.04,
"learning_rate": 9.964637528340175e-05,
"loss": 2.0249,
"step": 7160
},
{
"epoch": 0.04,
"learning_rate": 9.964587481346831e-05,
"loss": 1.9802,
"step": 7165
},
{
"epoch": 0.04,
"learning_rate": 9.9645373990897e-05,
"loss": 2.0397,
"step": 7170
},
{
"epoch": 0.04,
"learning_rate": 9.964487281569137e-05,
"loss": 2.0419,
"step": 7175
},
{
"epoch": 0.04,
"learning_rate": 9.964437128785495e-05,
"loss": 2.0258,
"step": 7180
},
{
"epoch": 0.04,
"learning_rate": 9.964386940739134e-05,
"loss": 2.0194,
"step": 7185
},
{
"epoch": 0.04,
"learning_rate": 9.96433671743041e-05,
"loss": 2.1051,
"step": 7190
},
{
"epoch": 0.04,
"learning_rate": 9.964286458859678e-05,
"loss": 1.9453,
"step": 7195
},
{
"epoch": 0.04,
"learning_rate": 9.964236165027294e-05,
"loss": 2.0375,
"step": 7200
},
{
"epoch": 0.04,
"learning_rate": 9.96418583593362e-05,
"loss": 1.9935,
"step": 7205
},
{
"epoch": 0.04,
"learning_rate": 9.964135471579008e-05,
"loss": 2.041,
"step": 7210
},
{
"epoch": 0.04,
"learning_rate": 9.96408507196382e-05,
"loss": 2.0225,
"step": 7215
},
{
"epoch": 0.04,
"learning_rate": 9.964034637088411e-05,
"loss": 1.9878,
"step": 7220
},
{
"epoch": 0.04,
"learning_rate": 9.963984166953139e-05,
"loss": 2.0059,
"step": 7225
},
{
"epoch": 0.04,
"learning_rate": 9.963933661558367e-05,
"loss": 2.0556,
"step": 7230
},
{
"epoch": 0.04,
"learning_rate": 9.963883120904449e-05,
"loss": 1.9969,
"step": 7235
},
{
"epoch": 0.04,
"learning_rate": 9.963832544991747e-05,
"loss": 1.9426,
"step": 7240
},
{
"epoch": 0.04,
"learning_rate": 9.963781933820618e-05,
"loss": 1.9196,
"step": 7245
},
{
"epoch": 0.04,
"learning_rate": 9.963731287391422e-05,
"loss": 1.9937,
"step": 7250
},
{
"epoch": 0.04,
"learning_rate": 9.96368060570452e-05,
"loss": 2.0707,
"step": 7255
},
{
"epoch": 0.04,
"learning_rate": 9.96362988876027e-05,
"loss": 1.9294,
"step": 7260
},
{
"epoch": 0.04,
"learning_rate": 9.963579136559034e-05,
"loss": 1.9988,
"step": 7265
},
{
"epoch": 0.04,
"learning_rate": 9.963528349101172e-05,
"loss": 1.9736,
"step": 7270
},
{
"epoch": 0.04,
"learning_rate": 9.963477526387046e-05,
"loss": 2.0282,
"step": 7275
},
{
"epoch": 0.04,
"learning_rate": 9.963426668417014e-05,
"loss": 2.0478,
"step": 7280
},
{
"epoch": 0.04,
"learning_rate": 9.96337577519144e-05,
"loss": 1.9992,
"step": 7285
},
{
"epoch": 0.04,
"learning_rate": 9.963324846710683e-05,
"loss": 2.0065,
"step": 7290
},
{
"epoch": 0.04,
"learning_rate": 9.963273882975108e-05,
"loss": 1.9976,
"step": 7295
},
{
"epoch": 0.04,
"learning_rate": 9.963222883985073e-05,
"loss": 2.0532,
"step": 7300
},
{
"epoch": 0.04,
"learning_rate": 9.963171849740945e-05,
"loss": 2.0331,
"step": 7305
},
{
"epoch": 0.04,
"learning_rate": 9.963120780243082e-05,
"loss": 2.005,
"step": 7310
},
{
"epoch": 0.04,
"learning_rate": 9.963069675491848e-05,
"loss": 2.0824,
"step": 7315
},
{
"epoch": 0.04,
"learning_rate": 9.963018535487608e-05,
"loss": 2.0479,
"step": 7320
},
{
"epoch": 0.04,
"learning_rate": 9.962967360230724e-05,
"loss": 2.0173,
"step": 7325
},
{
"epoch": 0.04,
"learning_rate": 9.962916149721559e-05,
"loss": 2.0553,
"step": 7330
},
{
"epoch": 0.04,
"learning_rate": 9.962864903960476e-05,
"loss": 2.0261,
"step": 7335
},
{
"epoch": 0.04,
"learning_rate": 9.962813622947842e-05,
"loss": 2.005,
"step": 7340
},
{
"epoch": 0.04,
"learning_rate": 9.962762306684019e-05,
"loss": 2.0544,
"step": 7345
},
{
"epoch": 0.04,
"learning_rate": 9.962710955169373e-05,
"loss": 2.0028,
"step": 7350
},
{
"epoch": 0.04,
"learning_rate": 9.962659568404264e-05,
"loss": 1.9812,
"step": 7355
},
{
"epoch": 0.04,
"learning_rate": 9.962608146389064e-05,
"loss": 1.9918,
"step": 7360
},
{
"epoch": 0.04,
"learning_rate": 9.962556689124134e-05,
"loss": 2.0163,
"step": 7365
},
{
"epoch": 0.04,
"learning_rate": 9.96250519660984e-05,
"loss": 2.0619,
"step": 7370
},
{
"epoch": 0.04,
"learning_rate": 9.962453668846549e-05,
"loss": 2.0202,
"step": 7375
},
{
"epoch": 0.04,
"learning_rate": 9.962402105834625e-05,
"loss": 1.957,
"step": 7380
},
{
"epoch": 0.04,
"learning_rate": 9.962350507574436e-05,
"loss": 1.993,
"step": 7385
},
{
"epoch": 0.04,
"learning_rate": 9.96229887406635e-05,
"loss": 2.0448,
"step": 7390
},
{
"epoch": 0.04,
"learning_rate": 9.962247205310729e-05,
"loss": 1.9107,
"step": 7395
},
{
"epoch": 0.04,
"learning_rate": 9.962195501307943e-05,
"loss": 2.0034,
"step": 7400
},
{
"epoch": 0.04,
"learning_rate": 9.96214376205836e-05,
"loss": 1.9806,
"step": 7405
},
{
"epoch": 0.04,
"learning_rate": 9.962091987562345e-05,
"loss": 2.0212,
"step": 7410
},
{
"epoch": 0.04,
"learning_rate": 9.962040177820269e-05,
"loss": 2.0019,
"step": 7415
},
{
"epoch": 0.04,
"learning_rate": 9.961988332832496e-05,
"loss": 2.09,
"step": 7420
},
{
"epoch": 0.04,
"learning_rate": 9.961936452599398e-05,
"loss": 2.0117,
"step": 7425
},
{
"epoch": 0.04,
"learning_rate": 9.961884537121341e-05,
"loss": 2.0284,
"step": 7430
},
{
"epoch": 0.04,
"learning_rate": 9.961832586398698e-05,
"loss": 1.9937,
"step": 7435
},
{
"epoch": 0.04,
"learning_rate": 9.961780600431832e-05,
"loss": 1.9803,
"step": 7440
},
{
"epoch": 0.04,
"learning_rate": 9.961728579221115e-05,
"loss": 1.9942,
"step": 7445
},
{
"epoch": 0.04,
"learning_rate": 9.961676522766917e-05,
"loss": 1.9269,
"step": 7450
},
{
"epoch": 0.04,
"learning_rate": 9.961624431069607e-05,
"loss": 1.9785,
"step": 7455
},
{
"epoch": 0.04,
"learning_rate": 9.961572304129557e-05,
"loss": 2.0498,
"step": 7460
},
{
"epoch": 0.04,
"learning_rate": 9.961520141947134e-05,
"loss": 1.9777,
"step": 7465
},
{
"epoch": 0.04,
"learning_rate": 9.961467944522709e-05,
"loss": 1.9093,
"step": 7470
},
{
"epoch": 0.04,
"learning_rate": 9.961415711856657e-05,
"loss": 2.001,
"step": 7475
},
{
"epoch": 0.04,
"learning_rate": 9.961363443949344e-05,
"loss": 1.9999,
"step": 7480
},
{
"epoch": 0.04,
"learning_rate": 9.961311140801142e-05,
"loss": 1.9976,
"step": 7485
},
{
"epoch": 0.04,
"learning_rate": 9.961258802412425e-05,
"loss": 1.9944,
"step": 7490
},
{
"epoch": 0.04,
"learning_rate": 9.961206428783563e-05,
"loss": 2.0266,
"step": 7495
},
{
"epoch": 0.04,
"learning_rate": 9.96115401991493e-05,
"loss": 2.0224,
"step": 7500
},
{
"epoch": 0.04,
"learning_rate": 9.961101575806893e-05,
"loss": 2.0505,
"step": 7505
},
{
"epoch": 0.04,
"learning_rate": 9.961049096459831e-05,
"loss": 1.9339,
"step": 7510
},
{
"epoch": 0.04,
"learning_rate": 9.960996581874113e-05,
"loss": 1.912,
"step": 7515
},
{
"epoch": 0.04,
"learning_rate": 9.960944032050113e-05,
"loss": 2.0519,
"step": 7520
},
{
"epoch": 0.04,
"learning_rate": 9.960891446988205e-05,
"loss": 2.0559,
"step": 7525
},
{
"epoch": 0.04,
"learning_rate": 9.960838826688761e-05,
"loss": 2.0433,
"step": 7530
},
{
"epoch": 0.04,
"learning_rate": 9.960786171152157e-05,
"loss": 1.9908,
"step": 7535
},
{
"epoch": 0.04,
"learning_rate": 9.960733480378764e-05,
"loss": 2.0542,
"step": 7540
},
{
"epoch": 0.04,
"learning_rate": 9.960680754368959e-05,
"loss": 1.8691,
"step": 7545
},
{
"epoch": 0.04,
"learning_rate": 9.960627993123113e-05,
"loss": 1.9864,
"step": 7550
},
{
"epoch": 0.04,
"learning_rate": 9.960575196641605e-05,
"loss": 2.0064,
"step": 7555
},
{
"epoch": 0.04,
"learning_rate": 9.960522364924807e-05,
"loss": 2.0273,
"step": 7560
},
{
"epoch": 0.04,
"learning_rate": 9.960469497973097e-05,
"loss": 1.8736,
"step": 7565
},
{
"epoch": 0.04,
"learning_rate": 9.960416595786848e-05,
"loss": 1.9411,
"step": 7570
},
{
"epoch": 0.04,
"learning_rate": 9.960363658366436e-05,
"loss": 2.0192,
"step": 7575
},
{
"epoch": 0.04,
"learning_rate": 9.960310685712238e-05,
"loss": 1.9586,
"step": 7580
},
{
"epoch": 0.04,
"learning_rate": 9.96025767782463e-05,
"loss": 1.9872,
"step": 7585
},
{
"epoch": 0.04,
"learning_rate": 9.96020463470399e-05,
"loss": 2.0043,
"step": 7590
},
{
"epoch": 0.04,
"learning_rate": 9.960151556350691e-05,
"loss": 1.9484,
"step": 7595
},
{
"epoch": 0.04,
"learning_rate": 9.960098442765114e-05,
"loss": 2.0329,
"step": 7600
},
{
"epoch": 0.04,
"learning_rate": 9.960045293947633e-05,
"loss": 1.9613,
"step": 7605
},
{
"epoch": 0.04,
"learning_rate": 9.959992109898628e-05,
"loss": 2.0747,
"step": 7610
},
{
"epoch": 0.04,
"learning_rate": 9.959938890618474e-05,
"loss": 1.9774,
"step": 7615
},
{
"epoch": 0.04,
"learning_rate": 9.959885636107553e-05,
"loss": 2.0601,
"step": 7620
},
{
"epoch": 0.04,
"learning_rate": 9.959832346366241e-05,
"loss": 2.0121,
"step": 7625
},
{
"epoch": 0.04,
"learning_rate": 9.959779021394916e-05,
"loss": 1.985,
"step": 7630
},
{
"epoch": 0.04,
"learning_rate": 9.959725661193956e-05,
"loss": 2.067,
"step": 7635
},
{
"epoch": 0.04,
"learning_rate": 9.959672265763743e-05,
"loss": 1.9449,
"step": 7640
},
{
"epoch": 0.04,
"learning_rate": 9.959618835104654e-05,
"loss": 1.9915,
"step": 7645
},
{
"epoch": 0.04,
"learning_rate": 9.95956536921707e-05,
"loss": 2.0822,
"step": 7650
},
{
"epoch": 0.04,
"learning_rate": 9.95951186810137e-05,
"loss": 2.0754,
"step": 7655
},
{
"epoch": 0.04,
"learning_rate": 9.959458331757933e-05,
"loss": 1.8964,
"step": 7660
},
{
"epoch": 0.04,
"learning_rate": 9.959404760187141e-05,
"loss": 1.9996,
"step": 7665
},
{
"epoch": 0.04,
"learning_rate": 9.959351153389374e-05,
"loss": 2.0028,
"step": 7670
},
{
"epoch": 0.04,
"learning_rate": 9.959297511365011e-05,
"loss": 1.936,
"step": 7675
},
{
"epoch": 0.04,
"learning_rate": 9.959243834114435e-05,
"loss": 2.075,
"step": 7680
},
{
"epoch": 0.04,
"learning_rate": 9.959190121638029e-05,
"loss": 2.052,
"step": 7685
},
{
"epoch": 0.04,
"learning_rate": 9.959136373936171e-05,
"loss": 2.0153,
"step": 7690
},
{
"epoch": 0.04,
"learning_rate": 9.959082591009243e-05,
"loss": 1.9949,
"step": 7695
},
{
"epoch": 0.04,
"learning_rate": 9.95902877285763e-05,
"loss": 1.9838,
"step": 7700
},
{
"epoch": 0.04,
"learning_rate": 9.958974919481712e-05,
"loss": 1.9915,
"step": 7705
},
{
"epoch": 0.04,
"learning_rate": 9.958921030881871e-05,
"loss": 1.9786,
"step": 7710
},
{
"epoch": 0.04,
"learning_rate": 9.958867107058492e-05,
"loss": 1.9825,
"step": 7715
},
{
"epoch": 0.04,
"learning_rate": 9.958813148011957e-05,
"loss": 2.0341,
"step": 7720
},
{
"epoch": 0.04,
"learning_rate": 9.958759153742647e-05,
"loss": 2.0045,
"step": 7725
},
{
"epoch": 0.04,
"learning_rate": 9.958705124250948e-05,
"loss": 1.999,
"step": 7730
},
{
"epoch": 0.04,
"learning_rate": 9.958651059537245e-05,
"loss": 2.0145,
"step": 7735
},
{
"epoch": 0.04,
"learning_rate": 9.958596959601919e-05,
"loss": 1.99,
"step": 7740
},
{
"epoch": 0.04,
"learning_rate": 9.958542824445356e-05,
"loss": 2.0717,
"step": 7745
},
{
"epoch": 0.04,
"learning_rate": 9.958488654067941e-05,
"loss": 2.0347,
"step": 7750
},
{
"epoch": 0.04,
"learning_rate": 9.958434448470056e-05,
"loss": 1.9638,
"step": 7755
},
{
"epoch": 0.04,
"learning_rate": 9.95838020765209e-05,
"loss": 2.019,
"step": 7760
},
{
"epoch": 0.04,
"learning_rate": 9.958325931614425e-05,
"loss": 1.9548,
"step": 7765
},
{
"epoch": 0.04,
"learning_rate": 9.958271620357448e-05,
"loss": 1.9133,
"step": 7770
},
{
"epoch": 0.04,
"learning_rate": 9.958217273881545e-05,
"loss": 2.116,
"step": 7775
},
{
"epoch": 0.04,
"learning_rate": 9.958162892187102e-05,
"loss": 2.1317,
"step": 7780
},
{
"epoch": 0.04,
"learning_rate": 9.958108475274503e-05,
"loss": 2.0029,
"step": 7785
},
{
"epoch": 0.04,
"learning_rate": 9.958054023144139e-05,
"loss": 2.0805,
"step": 7790
},
{
"epoch": 0.04,
"learning_rate": 9.957999535796391e-05,
"loss": 1.9678,
"step": 7795
},
{
"epoch": 0.04,
"learning_rate": 9.957945013231652e-05,
"loss": 2.0251,
"step": 7800
},
{
"epoch": 0.04,
"learning_rate": 9.957890455450305e-05,
"loss": 2.0018,
"step": 7805
},
{
"epoch": 0.04,
"learning_rate": 9.95783586245274e-05,
"loss": 1.9396,
"step": 7810
},
{
"epoch": 0.04,
"learning_rate": 9.957781234239342e-05,
"loss": 1.9333,
"step": 7815
},
{
"epoch": 0.04,
"learning_rate": 9.957726570810503e-05,
"loss": 1.9647,
"step": 7820
},
{
"epoch": 0.04,
"learning_rate": 9.957671872166607e-05,
"loss": 1.9504,
"step": 7825
},
{
"epoch": 0.04,
"learning_rate": 9.957617138308047e-05,
"loss": 2.0089,
"step": 7830
},
{
"epoch": 0.04,
"learning_rate": 9.957562369235207e-05,
"loss": 1.9718,
"step": 7835
},
{
"epoch": 0.04,
"learning_rate": 9.95750756494848e-05,
"loss": 1.976,
"step": 7840
},
{
"epoch": 0.04,
"learning_rate": 9.957452725448253e-05,
"loss": 1.9936,
"step": 7845
},
{
"epoch": 0.04,
"learning_rate": 9.957397850734915e-05,
"loss": 1.9434,
"step": 7850
},
{
"epoch": 0.04,
"learning_rate": 9.957342940808859e-05,
"loss": 1.9113,
"step": 7855
},
{
"epoch": 0.04,
"learning_rate": 9.957287995670473e-05,
"loss": 1.9534,
"step": 7860
},
{
"epoch": 0.04,
"learning_rate": 9.957233015320145e-05,
"loss": 1.9051,
"step": 7865
},
{
"epoch": 0.04,
"learning_rate": 9.957177999758269e-05,
"loss": 1.9833,
"step": 7870
},
{
"epoch": 0.04,
"learning_rate": 9.957122948985236e-05,
"loss": 2.0766,
"step": 7875
},
{
"epoch": 0.04,
"learning_rate": 9.957067863001435e-05,
"loss": 1.9527,
"step": 7880
},
{
"epoch": 0.04,
"learning_rate": 9.957012741807256e-05,
"loss": 2.0636,
"step": 7885
},
{
"epoch": 0.04,
"learning_rate": 9.956957585403094e-05,
"loss": 1.9874,
"step": 7890
},
{
"epoch": 0.04,
"learning_rate": 9.95690239378934e-05,
"loss": 2.0283,
"step": 7895
},
{
"epoch": 0.04,
"learning_rate": 9.956847166966385e-05,
"loss": 1.9646,
"step": 7900
},
{
"epoch": 0.04,
"learning_rate": 9.956791904934622e-05,
"loss": 1.9773,
"step": 7905
},
{
"epoch": 0.04,
"learning_rate": 9.956736607694441e-05,
"loss": 2.0773,
"step": 7910
},
{
"epoch": 0.04,
"learning_rate": 9.956681275246238e-05,
"loss": 2.0507,
"step": 7915
},
{
"epoch": 0.04,
"learning_rate": 9.956625907590404e-05,
"loss": 2.117,
"step": 7920
},
{
"epoch": 0.04,
"learning_rate": 9.956570504727334e-05,
"loss": 1.9419,
"step": 7925
},
{
"epoch": 0.04,
"learning_rate": 9.956515066657421e-05,
"loss": 1.9661,
"step": 7930
},
{
"epoch": 0.04,
"learning_rate": 9.956459593381058e-05,
"loss": 1.9915,
"step": 7935
},
{
"epoch": 0.04,
"learning_rate": 9.95640408489864e-05,
"loss": 2.0339,
"step": 7940
},
{
"epoch": 0.04,
"learning_rate": 9.95634854121056e-05,
"loss": 1.9688,
"step": 7945
},
{
"epoch": 0.04,
"learning_rate": 9.956292962317214e-05,
"loss": 1.9662,
"step": 7950
},
{
"epoch": 0.04,
"learning_rate": 9.956237348218995e-05,
"loss": 1.998,
"step": 7955
},
{
"epoch": 0.04,
"learning_rate": 9.9561816989163e-05,
"loss": 2.0069,
"step": 7960
},
{
"epoch": 0.04,
"learning_rate": 9.956126014409523e-05,
"loss": 1.9785,
"step": 7965
},
{
"epoch": 0.04,
"learning_rate": 9.95607029469906e-05,
"loss": 1.9378,
"step": 7970
},
{
"epoch": 0.04,
"learning_rate": 9.956014539785307e-05,
"loss": 1.9604,
"step": 7975
},
{
"epoch": 0.04,
"learning_rate": 9.95595874966866e-05,
"loss": 2.0338,
"step": 7980
},
{
"epoch": 0.04,
"learning_rate": 9.955902924349514e-05,
"loss": 1.9645,
"step": 7985
},
{
"epoch": 0.04,
"learning_rate": 9.955847063828267e-05,
"loss": 2.035,
"step": 7990
},
{
"epoch": 0.04,
"learning_rate": 9.955791168105316e-05,
"loss": 1.9543,
"step": 7995
},
{
"epoch": 0.04,
"learning_rate": 9.955735237181056e-05,
"loss": 1.9499,
"step": 8000
}
],
"logging_steps": 5,
"max_steps": 186479,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 1.905411560256307e+19,
"trial_name": null,
"trial_params": null
}