{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.04290021543951941, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-06, "loss": 2.8473, "step": 5 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 2.7726, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.5e-05, "loss": 2.8703, "step": 15 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 2.7936, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.5e-05, "loss": 2.8171, "step": 25 }, { "epoch": 0.0, "learning_rate": 3e-05, "loss": 2.8034, "step": 30 }, { "epoch": 0.0, "learning_rate": 3.5e-05, "loss": 2.7991, "step": 35 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 2.748, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.5e-05, "loss": 2.7692, "step": 45 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.7435, "step": 50 }, { "epoch": 0.0, "learning_rate": 5.500000000000001e-05, "loss": 2.7909, "step": 55 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 2.6836, "step": 60 }, { "epoch": 0.0, "learning_rate": 6.500000000000001e-05, "loss": 2.6582, "step": 65 }, { "epoch": 0.0, "learning_rate": 7e-05, "loss": 2.695, "step": 70 }, { "epoch": 0.0, "learning_rate": 7.500000000000001e-05, "loss": 2.6744, "step": 75 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 2.6914, "step": 80 }, { "epoch": 0.0, "learning_rate": 8.5e-05, "loss": 2.6122, "step": 85 }, { "epoch": 0.0, "learning_rate": 9e-05, "loss": 2.6094, "step": 90 }, { "epoch": 0.0, "learning_rate": 9.5e-05, "loss": 2.6107, "step": 95 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 2.5494, "step": 100 }, { "epoch": 0.0, "learning_rate": 9.999999982242337e-05, "loss": 2.6113, "step": 105 }, { "epoch": 0.0, "learning_rate": 9.999999928969349e-05, "loss": 2.5314, "step": 110 }, { "epoch": 0.0, "learning_rate": 9.999999840181035e-05, "loss": 2.5724, "step": 115 }, { "epoch": 0.0, "learning_rate": 9.999999715877396e-05, "loss": 2.5832, "step": 120 }, { "epoch": 0.0, "learning_rate": 9.999999556058433e-05, "loss": 2.5706, "step": 125 }, { "epoch": 0.0, "learning_rate": 9.999999360724148e-05, "loss": 2.5541, "step": 130 }, { "epoch": 0.0, "learning_rate": 9.999999129874542e-05, "loss": 2.5733, "step": 135 }, { "epoch": 0.0, "learning_rate": 9.999998863509615e-05, "loss": 2.6335, "step": 140 }, { "epoch": 0.0, "learning_rate": 9.999998561629372e-05, "loss": 2.5492, "step": 145 }, { "epoch": 0.0, "learning_rate": 9.999998224233811e-05, "loss": 2.6278, "step": 150 }, { "epoch": 0.0, "learning_rate": 9.99999785132294e-05, "loss": 2.5097, "step": 155 }, { "epoch": 0.0, "learning_rate": 9.999997442896756e-05, "loss": 2.5647, "step": 160 }, { "epoch": 0.0, "learning_rate": 9.999996998955264e-05, "loss": 2.5926, "step": 165 }, { "epoch": 0.0, "learning_rate": 9.999996519498469e-05, "loss": 2.5379, "step": 170 }, { "epoch": 0.0, "learning_rate": 9.999996004526372e-05, "loss": 2.527, "step": 175 }, { "epoch": 0.0, "learning_rate": 9.999995454038977e-05, "loss": 2.5837, "step": 180 }, { "epoch": 0.0, "learning_rate": 9.99999486803629e-05, "loss": 2.5457, "step": 185 }, { "epoch": 0.0, "learning_rate": 9.999994246518311e-05, "loss": 2.5282, "step": 190 }, { "epoch": 0.0, "learning_rate": 9.99999358948505e-05, "loss": 2.4637, "step": 195 }, { "epoch": 0.0, "learning_rate": 9.999992896936507e-05, "loss": 2.5831, "step": 200 }, { "epoch": 0.0, "learning_rate": 9.999992168872689e-05, "loss": 2.5234, "step": 205 }, { "epoch": 0.0, "learning_rate": 9.999991405293602e-05, "loss": 2.5167, "step": 210 }, { "epoch": 0.0, "learning_rate": 9.999990606199247e-05, "loss": 2.5296, "step": 215 }, { "epoch": 0.0, "learning_rate": 9.999989771589636e-05, "loss": 2.5526, "step": 220 }, { "epoch": 0.0, "learning_rate": 9.999988901464771e-05, "loss": 2.5035, "step": 225 }, { "epoch": 0.0, "learning_rate": 9.999987995824658e-05, "loss": 2.4831, "step": 230 }, { "epoch": 0.0, "learning_rate": 9.999987054669306e-05, "loss": 2.506, "step": 235 }, { "epoch": 0.0, "learning_rate": 9.999986077998718e-05, "loss": 2.5331, "step": 240 }, { "epoch": 0.0, "learning_rate": 9.999985065812905e-05, "loss": 2.5156, "step": 245 }, { "epoch": 0.0, "learning_rate": 9.999984018111872e-05, "loss": 2.5009, "step": 250 }, { "epoch": 0.0, "learning_rate": 9.999982934895625e-05, "loss": 2.5374, "step": 255 }, { "epoch": 0.0, "learning_rate": 9.999981816164175e-05, "loss": 2.5013, "step": 260 }, { "epoch": 0.0, "learning_rate": 9.999980661917528e-05, "loss": 2.4897, "step": 265 }, { "epoch": 0.0, "learning_rate": 9.999979472155692e-05, "loss": 2.462, "step": 270 }, { "epoch": 0.0, "learning_rate": 9.999978246878676e-05, "loss": 2.5021, "step": 275 }, { "epoch": 0.0, "learning_rate": 9.99997698608649e-05, "loss": 2.4469, "step": 280 }, { "epoch": 0.0, "learning_rate": 9.99997568977914e-05, "loss": 2.4394, "step": 285 }, { "epoch": 0.0, "learning_rate": 9.999974357956636e-05, "loss": 2.4699, "step": 290 }, { "epoch": 0.0, "learning_rate": 9.999972990618991e-05, "loss": 2.4906, "step": 295 }, { "epoch": 0.0, "learning_rate": 9.99997158776621e-05, "loss": 2.4426, "step": 300 }, { "epoch": 0.0, "learning_rate": 9.999970149398306e-05, "loss": 2.3726, "step": 305 }, { "epoch": 0.0, "learning_rate": 9.999968675515288e-05, "loss": 2.4599, "step": 310 }, { "epoch": 0.0, "learning_rate": 9.999967166117166e-05, "loss": 2.5107, "step": 315 }, { "epoch": 0.0, "learning_rate": 9.999965621203952e-05, "loss": 2.4459, "step": 320 }, { "epoch": 0.0, "learning_rate": 9.999964040775656e-05, "loss": 2.4612, "step": 325 }, { "epoch": 0.0, "learning_rate": 9.99996242483229e-05, "loss": 2.5075, "step": 330 }, { "epoch": 0.0, "learning_rate": 9.999960773373865e-05, "loss": 2.4152, "step": 335 }, { "epoch": 0.0, "learning_rate": 9.999959086400393e-05, "loss": 2.4186, "step": 340 }, { "epoch": 0.0, "learning_rate": 9.999957363911885e-05, "loss": 2.5206, "step": 345 }, { "epoch": 0.0, "learning_rate": 9.999955605908353e-05, "loss": 2.4124, "step": 350 }, { "epoch": 0.0, "learning_rate": 9.999953812389813e-05, "loss": 2.4077, "step": 355 }, { "epoch": 0.0, "learning_rate": 9.999951983356273e-05, "loss": 2.4685, "step": 360 }, { "epoch": 0.0, "learning_rate": 9.999950118807749e-05, "loss": 2.4559, "step": 365 }, { "epoch": 0.0, "learning_rate": 9.999948218744254e-05, "loss": 2.3816, "step": 370 }, { "epoch": 0.0, "learning_rate": 9.9999462831658e-05, "loss": 2.441, "step": 375 }, { "epoch": 0.0, "learning_rate": 9.999944312072402e-05, "loss": 2.427, "step": 380 }, { "epoch": 0.0, "learning_rate": 9.999942305464075e-05, "loss": 2.467, "step": 385 }, { "epoch": 0.0, "learning_rate": 9.99994026334083e-05, "loss": 2.4406, "step": 390 }, { "epoch": 0.0, "learning_rate": 9.999938185702685e-05, "loss": 2.3956, "step": 395 }, { "epoch": 0.0, "learning_rate": 9.999936072549653e-05, "loss": 2.4279, "step": 400 }, { "epoch": 0.0, "learning_rate": 9.999933923881747e-05, "loss": 2.3775, "step": 405 }, { "epoch": 0.0, "learning_rate": 9.999931739698989e-05, "loss": 2.4536, "step": 410 }, { "epoch": 0.0, "learning_rate": 9.999929520001386e-05, "loss": 2.4011, "step": 415 }, { "epoch": 0.0, "learning_rate": 9.999927264788959e-05, "loss": 2.3642, "step": 420 }, { "epoch": 0.0, "learning_rate": 9.999924974061723e-05, "loss": 2.3429, "step": 425 }, { "epoch": 0.0, "learning_rate": 9.999922647819694e-05, "loss": 2.3448, "step": 430 }, { "epoch": 0.0, "learning_rate": 9.999920286062889e-05, "loss": 2.4617, "step": 435 }, { "epoch": 0.0, "learning_rate": 9.999917888791323e-05, "loss": 2.4442, "step": 440 }, { "epoch": 0.0, "learning_rate": 9.999915456005016e-05, "loss": 2.454, "step": 445 }, { "epoch": 0.0, "learning_rate": 9.999912987703983e-05, "loss": 2.4007, "step": 450 }, { "epoch": 0.0, "learning_rate": 9.999910483888242e-05, "loss": 2.3814, "step": 455 }, { "epoch": 0.0, "learning_rate": 9.999907944557812e-05, "loss": 2.3584, "step": 460 }, { "epoch": 0.0, "learning_rate": 9.999905369712708e-05, "loss": 2.434, "step": 465 }, { "epoch": 0.0, "learning_rate": 9.999902759352953e-05, "loss": 2.398, "step": 470 }, { "epoch": 0.0, "learning_rate": 9.999900113478561e-05, "loss": 2.4065, "step": 475 }, { "epoch": 0.0, "learning_rate": 9.999897432089553e-05, "loss": 2.3793, "step": 480 }, { "epoch": 0.0, "learning_rate": 9.999894715185947e-05, "loss": 2.3103, "step": 485 }, { "epoch": 0.0, "learning_rate": 9.999891962767764e-05, "loss": 2.4316, "step": 490 }, { "epoch": 0.0, "learning_rate": 9.999889174835023e-05, "loss": 2.3382, "step": 495 }, { "epoch": 0.0, "learning_rate": 9.999886351387741e-05, "loss": 2.3045, "step": 500 }, { "epoch": 0.0, "learning_rate": 9.999883492425944e-05, "loss": 2.3562, "step": 505 }, { "epoch": 0.0, "learning_rate": 9.999880597949645e-05, "loss": 2.3554, "step": 510 }, { "epoch": 0.0, "learning_rate": 9.99987766795887e-05, "loss": 2.383, "step": 515 }, { "epoch": 0.0, "learning_rate": 9.999874702453639e-05, "loss": 2.3416, "step": 520 }, { "epoch": 0.0, "learning_rate": 9.999871701433972e-05, "loss": 2.3196, "step": 525 }, { "epoch": 0.0, "learning_rate": 9.999868664899889e-05, "loss": 2.2929, "step": 530 }, { "epoch": 0.0, "learning_rate": 9.999865592851413e-05, "loss": 2.3593, "step": 535 }, { "epoch": 0.0, "learning_rate": 9.999862485288567e-05, "loss": 2.3656, "step": 540 }, { "epoch": 0.0, "learning_rate": 9.999859342211371e-05, "loss": 2.3468, "step": 545 }, { "epoch": 0.0, "learning_rate": 9.99985616361985e-05, "loss": 2.3337, "step": 550 }, { "epoch": 0.0, "learning_rate": 9.999852949514023e-05, "loss": 2.3549, "step": 555 }, { "epoch": 0.0, "learning_rate": 9.999849699893916e-05, "loss": 2.3345, "step": 560 }, { "epoch": 0.0, "learning_rate": 9.999846414759548e-05, "loss": 2.3254, "step": 565 }, { "epoch": 0.0, "learning_rate": 9.999843094110947e-05, "loss": 2.3593, "step": 570 }, { "epoch": 0.0, "learning_rate": 9.999839737948136e-05, "loss": 2.3664, "step": 575 }, { "epoch": 0.0, "learning_rate": 9.999836346271136e-05, "loss": 2.4034, "step": 580 }, { "epoch": 0.0, "learning_rate": 9.999832919079973e-05, "loss": 2.3747, "step": 585 }, { "epoch": 0.0, "learning_rate": 9.99982945637467e-05, "loss": 2.4076, "step": 590 }, { "epoch": 0.0, "learning_rate": 9.999825958155253e-05, "loss": 2.3365, "step": 595 }, { "epoch": 0.0, "learning_rate": 9.999822424421746e-05, "loss": 2.2521, "step": 600 }, { "epoch": 0.0, "learning_rate": 9.999818855174175e-05, "loss": 2.3802, "step": 605 }, { "epoch": 0.0, "learning_rate": 9.999815250412565e-05, "loss": 2.3265, "step": 610 }, { "epoch": 0.0, "learning_rate": 9.999811610136942e-05, "loss": 2.3196, "step": 615 }, { "epoch": 0.0, "learning_rate": 9.99980793434733e-05, "loss": 2.3952, "step": 620 }, { "epoch": 0.0, "learning_rate": 9.999804223043757e-05, "loss": 2.3279, "step": 625 }, { "epoch": 0.0, "learning_rate": 9.999800476226248e-05, "loss": 2.2558, "step": 630 }, { "epoch": 0.0, "learning_rate": 9.999796693894831e-05, "loss": 2.424, "step": 635 }, { "epoch": 0.0, "learning_rate": 9.999792876049534e-05, "loss": 2.3813, "step": 640 }, { "epoch": 0.0, "learning_rate": 9.99978902269038e-05, "loss": 2.2896, "step": 645 }, { "epoch": 0.0, "learning_rate": 9.999785133817401e-05, "loss": 2.3497, "step": 650 }, { "epoch": 0.0, "learning_rate": 9.99978120943062e-05, "loss": 2.2952, "step": 655 }, { "epoch": 0.0, "learning_rate": 9.999777249530068e-05, "loss": 2.2497, "step": 660 }, { "epoch": 0.0, "learning_rate": 9.999773254115772e-05, "loss": 2.3667, "step": 665 }, { "epoch": 0.0, "learning_rate": 9.999769223187761e-05, "loss": 2.3058, "step": 670 }, { "epoch": 0.0, "learning_rate": 9.999765156746064e-05, "loss": 2.3563, "step": 675 }, { "epoch": 0.0, "learning_rate": 9.999761054790708e-05, "loss": 2.3291, "step": 680 }, { "epoch": 0.0, "learning_rate": 9.999756917321725e-05, "loss": 2.2963, "step": 685 }, { "epoch": 0.0, "learning_rate": 9.999752744339142e-05, "loss": 2.3033, "step": 690 }, { "epoch": 0.0, "learning_rate": 9.99974853584299e-05, "loss": 2.3644, "step": 695 }, { "epoch": 0.0, "learning_rate": 9.999744291833298e-05, "loss": 2.2754, "step": 700 }, { "epoch": 0.0, "learning_rate": 9.999740012310096e-05, "loss": 2.251, "step": 705 }, { "epoch": 0.0, "learning_rate": 9.999735697273414e-05, "loss": 2.3208, "step": 710 }, { "epoch": 0.0, "learning_rate": 9.999731346723284e-05, "loss": 2.2928, "step": 715 }, { "epoch": 0.0, "learning_rate": 9.99972696065974e-05, "loss": 2.2728, "step": 720 }, { "epoch": 0.0, "learning_rate": 9.999722539082806e-05, "loss": 2.4298, "step": 725 }, { "epoch": 0.0, "learning_rate": 9.999718081992517e-05, "loss": 2.3092, "step": 730 }, { "epoch": 0.0, "learning_rate": 9.999713589388904e-05, "loss": 2.3097, "step": 735 }, { "epoch": 0.0, "learning_rate": 9.999709061272002e-05, "loss": 2.2793, "step": 740 }, { "epoch": 0.0, "learning_rate": 9.999704497641838e-05, "loss": 2.2424, "step": 745 }, { "epoch": 0.0, "learning_rate": 9.99969989849845e-05, "loss": 2.2682, "step": 750 }, { "epoch": 0.0, "learning_rate": 9.999695263841865e-05, "loss": 2.3454, "step": 755 }, { "epoch": 0.0, "learning_rate": 9.99969059367212e-05, "loss": 2.3178, "step": 760 }, { "epoch": 0.0, "learning_rate": 9.999685887989247e-05, "loss": 2.2134, "step": 765 }, { "epoch": 0.0, "learning_rate": 9.999681146793279e-05, "loss": 2.3036, "step": 770 }, { "epoch": 0.0, "learning_rate": 9.99967637008425e-05, "loss": 2.2281, "step": 775 }, { "epoch": 0.0, "learning_rate": 9.999671557862195e-05, "loss": 2.2872, "step": 780 }, { "epoch": 0.0, "learning_rate": 9.999666710127146e-05, "loss": 2.3222, "step": 785 }, { "epoch": 0.0, "learning_rate": 9.999661826879138e-05, "loss": 2.3252, "step": 790 }, { "epoch": 0.0, "learning_rate": 9.999656908118206e-05, "loss": 2.3189, "step": 795 }, { "epoch": 0.0, "learning_rate": 9.999651953844386e-05, "loss": 2.2981, "step": 800 }, { "epoch": 0.0, "learning_rate": 9.999646964057714e-05, "loss": 2.2967, "step": 805 }, { "epoch": 0.0, "learning_rate": 9.999641938758222e-05, "loss": 2.373, "step": 810 }, { "epoch": 0.0, "learning_rate": 9.999636877945948e-05, "loss": 2.2433, "step": 815 }, { "epoch": 0.0, "learning_rate": 9.999631781620926e-05, "loss": 2.2853, "step": 820 }, { "epoch": 0.0, "learning_rate": 9.999626649783196e-05, "loss": 2.2309, "step": 825 }, { "epoch": 0.0, "learning_rate": 9.99962148243279e-05, "loss": 2.224, "step": 830 }, { "epoch": 0.0, "learning_rate": 9.999616279569748e-05, "loss": 2.3233, "step": 835 }, { "epoch": 0.0, "learning_rate": 9.999611041194106e-05, "loss": 2.2581, "step": 840 }, { "epoch": 0.0, "learning_rate": 9.999605767305899e-05, "loss": 2.2282, "step": 845 }, { "epoch": 0.0, "learning_rate": 9.999600457905167e-05, "loss": 2.256, "step": 850 }, { "epoch": 0.0, "learning_rate": 9.999595112991949e-05, "loss": 2.2754, "step": 855 }, { "epoch": 0.0, "learning_rate": 9.99958973256628e-05, "loss": 2.2858, "step": 860 }, { "epoch": 0.0, "learning_rate": 9.999584316628199e-05, "loss": 2.3145, "step": 865 }, { "epoch": 0.0, "learning_rate": 9.999578865177745e-05, "loss": 2.2716, "step": 870 }, { "epoch": 0.0, "learning_rate": 9.999573378214957e-05, "loss": 2.2737, "step": 875 }, { "epoch": 0.0, "learning_rate": 9.999567855739871e-05, "loss": 2.2759, "step": 880 }, { "epoch": 0.0, "learning_rate": 9.999562297752531e-05, "loss": 2.2142, "step": 885 }, { "epoch": 0.0, "learning_rate": 9.999556704252975e-05, "loss": 2.3048, "step": 890 }, { "epoch": 0.0, "learning_rate": 9.99955107524124e-05, "loss": 2.2356, "step": 895 }, { "epoch": 0.0, "learning_rate": 9.999545410717369e-05, "loss": 2.2648, "step": 900 }, { "epoch": 0.0, "learning_rate": 9.9995397106814e-05, "loss": 2.2718, "step": 905 }, { "epoch": 0.0, "learning_rate": 9.999533975133376e-05, "loss": 2.1817, "step": 910 }, { "epoch": 0.0, "learning_rate": 9.999528204073336e-05, "loss": 2.3446, "step": 915 }, { "epoch": 0.0, "learning_rate": 9.999522397501321e-05, "loss": 2.2839, "step": 920 }, { "epoch": 0.0, "learning_rate": 9.999516555417372e-05, "loss": 2.2265, "step": 925 }, { "epoch": 0.0, "learning_rate": 9.999510677821532e-05, "loss": 2.32, "step": 930 }, { "epoch": 0.01, "learning_rate": 9.999504764713841e-05, "loss": 2.2913, "step": 935 }, { "epoch": 0.01, "learning_rate": 9.999498816094343e-05, "loss": 2.1976, "step": 940 }, { "epoch": 0.01, "learning_rate": 9.999492831963078e-05, "loss": 2.2944, "step": 945 }, { "epoch": 0.01, "learning_rate": 9.999486812320091e-05, "loss": 2.2538, "step": 950 }, { "epoch": 0.01, "learning_rate": 9.999480757165422e-05, "loss": 2.1969, "step": 955 }, { "epoch": 0.01, "learning_rate": 9.999474666499115e-05, "loss": 2.2265, "step": 960 }, { "epoch": 0.01, "learning_rate": 9.999468540321215e-05, "loss": 2.2501, "step": 965 }, { "epoch": 0.01, "learning_rate": 9.999462378631764e-05, "loss": 2.2723, "step": 970 }, { "epoch": 0.01, "learning_rate": 9.999456181430805e-05, "loss": 2.2585, "step": 975 }, { "epoch": 0.01, "learning_rate": 9.999449948718384e-05, "loss": 2.1975, "step": 980 }, { "epoch": 0.01, "learning_rate": 9.999443680494542e-05, "loss": 2.2807, "step": 985 }, { "epoch": 0.01, "learning_rate": 9.999437376759328e-05, "loss": 2.2114, "step": 990 }, { "epoch": 0.01, "learning_rate": 9.999431037512784e-05, "loss": 2.2353, "step": 995 }, { "epoch": 0.01, "learning_rate": 9.999424662754955e-05, "loss": 2.2853, "step": 1000 }, { "epoch": 0.01, "learning_rate": 9.999418252485888e-05, "loss": 2.2036, "step": 1005 }, { "epoch": 0.01, "learning_rate": 9.999411806705626e-05, "loss": 2.2508, "step": 1010 }, { "epoch": 0.01, "learning_rate": 9.999405325414218e-05, "loss": 2.1966, "step": 1015 }, { "epoch": 0.01, "learning_rate": 9.999398808611705e-05, "loss": 2.2214, "step": 1020 }, { "epoch": 0.01, "learning_rate": 9.999392256298139e-05, "loss": 2.233, "step": 1025 }, { "epoch": 0.01, "learning_rate": 9.999385668473562e-05, "loss": 2.2457, "step": 1030 }, { "epoch": 0.01, "learning_rate": 9.999379045138024e-05, "loss": 2.2132, "step": 1035 }, { "epoch": 0.01, "learning_rate": 9.999372386291571e-05, "loss": 2.261, "step": 1040 }, { "epoch": 0.01, "learning_rate": 9.999365691934251e-05, "loss": 2.1686, "step": 1045 }, { "epoch": 0.01, "learning_rate": 9.999358962066108e-05, "loss": 2.1927, "step": 1050 }, { "epoch": 0.01, "learning_rate": 9.999352196687195e-05, "loss": 2.2486, "step": 1055 }, { "epoch": 0.01, "learning_rate": 9.999345395797557e-05, "loss": 2.281, "step": 1060 }, { "epoch": 0.01, "learning_rate": 9.999338559397242e-05, "loss": 2.2213, "step": 1065 }, { "epoch": 0.01, "learning_rate": 9.9993316874863e-05, "loss": 2.2035, "step": 1070 }, { "epoch": 0.01, "learning_rate": 9.999324780064779e-05, "loss": 2.2247, "step": 1075 }, { "epoch": 0.01, "learning_rate": 9.999317837132729e-05, "loss": 2.2729, "step": 1080 }, { "epoch": 0.01, "learning_rate": 9.999310858690198e-05, "loss": 2.1674, "step": 1085 }, { "epoch": 0.01, "learning_rate": 9.999303844737235e-05, "loss": 2.298, "step": 1090 }, { "epoch": 0.01, "learning_rate": 9.999296795273893e-05, "loss": 2.1917, "step": 1095 }, { "epoch": 0.01, "learning_rate": 9.99928971030022e-05, "loss": 2.2396, "step": 1100 }, { "epoch": 0.01, "learning_rate": 9.999282589816265e-05, "loss": 2.1645, "step": 1105 }, { "epoch": 0.01, "learning_rate": 9.99927543382208e-05, "loss": 2.2335, "step": 1110 }, { "epoch": 0.01, "learning_rate": 9.999268242317716e-05, "loss": 2.3187, "step": 1115 }, { "epoch": 0.01, "learning_rate": 9.999261015303225e-05, "loss": 2.1898, "step": 1120 }, { "epoch": 0.01, "learning_rate": 9.999253752778655e-05, "loss": 2.1982, "step": 1125 }, { "epoch": 0.01, "learning_rate": 9.999246454744062e-05, "loss": 2.2065, "step": 1130 }, { "epoch": 0.01, "learning_rate": 9.999239121199496e-05, "loss": 2.1835, "step": 1135 }, { "epoch": 0.01, "learning_rate": 9.999231752145007e-05, "loss": 2.297, "step": 1140 }, { "epoch": 0.01, "learning_rate": 9.999224347580649e-05, "loss": 2.1948, "step": 1145 }, { "epoch": 0.01, "learning_rate": 9.999216907506474e-05, "loss": 2.2976, "step": 1150 }, { "epoch": 0.01, "learning_rate": 9.999209431922538e-05, "loss": 2.1546, "step": 1155 }, { "epoch": 0.01, "learning_rate": 9.99920192082889e-05, "loss": 2.2606, "step": 1160 }, { "epoch": 0.01, "learning_rate": 9.999194374225585e-05, "loss": 2.1286, "step": 1165 }, { "epoch": 0.01, "learning_rate": 9.999186792112677e-05, "loss": 2.2137, "step": 1170 }, { "epoch": 0.01, "learning_rate": 9.99917917449022e-05, "loss": 2.2462, "step": 1175 }, { "epoch": 0.01, "learning_rate": 9.999171521358267e-05, "loss": 2.194, "step": 1180 }, { "epoch": 0.01, "learning_rate": 9.999163832716871e-05, "loss": 2.3038, "step": 1185 }, { "epoch": 0.01, "learning_rate": 9.999156108566091e-05, "loss": 2.2024, "step": 1190 }, { "epoch": 0.01, "learning_rate": 9.999148348905978e-05, "loss": 2.2281, "step": 1195 }, { "epoch": 0.01, "learning_rate": 9.999140553736591e-05, "loss": 2.3076, "step": 1200 }, { "epoch": 0.01, "learning_rate": 9.99913272305798e-05, "loss": 2.1543, "step": 1205 }, { "epoch": 0.01, "learning_rate": 9.999124856870204e-05, "loss": 2.1762, "step": 1210 }, { "epoch": 0.01, "learning_rate": 9.999116955173318e-05, "loss": 2.2303, "step": 1215 }, { "epoch": 0.01, "learning_rate": 9.99910901796738e-05, "loss": 2.2688, "step": 1220 }, { "epoch": 0.01, "learning_rate": 9.999101045252444e-05, "loss": 2.2159, "step": 1225 }, { "epoch": 0.01, "learning_rate": 9.999093037028568e-05, "loss": 2.2502, "step": 1230 }, { "epoch": 0.01, "learning_rate": 9.999084993295806e-05, "loss": 2.2158, "step": 1235 }, { "epoch": 0.01, "learning_rate": 9.99907691405422e-05, "loss": 2.2183, "step": 1240 }, { "epoch": 0.01, "learning_rate": 9.999068799303865e-05, "loss": 2.1595, "step": 1245 }, { "epoch": 0.01, "learning_rate": 9.999060649044797e-05, "loss": 2.2377, "step": 1250 }, { "epoch": 0.01, "learning_rate": 9.999052463277076e-05, "loss": 2.1848, "step": 1255 }, { "epoch": 0.01, "learning_rate": 9.999044242000761e-05, "loss": 2.2617, "step": 1260 }, { "epoch": 0.01, "learning_rate": 9.999035985215908e-05, "loss": 2.2413, "step": 1265 }, { "epoch": 0.01, "learning_rate": 9.999027692922576e-05, "loss": 2.2558, "step": 1270 }, { "epoch": 0.01, "learning_rate": 9.999019365120824e-05, "loss": 2.2329, "step": 1275 }, { "epoch": 0.01, "learning_rate": 9.999011001810712e-05, "loss": 2.1904, "step": 1280 }, { "epoch": 0.01, "learning_rate": 9.9990026029923e-05, "loss": 2.2001, "step": 1285 }, { "epoch": 0.01, "learning_rate": 9.998994168665648e-05, "loss": 2.2125, "step": 1290 }, { "epoch": 0.01, "learning_rate": 9.998985698830811e-05, "loss": 2.2595, "step": 1295 }, { "epoch": 0.01, "learning_rate": 9.998977193487855e-05, "loss": 2.1524, "step": 1300 }, { "epoch": 0.01, "learning_rate": 9.998968652636838e-05, "loss": 2.262, "step": 1305 }, { "epoch": 0.01, "learning_rate": 9.998960076277821e-05, "loss": 2.2323, "step": 1310 }, { "epoch": 0.01, "learning_rate": 9.998951464410864e-05, "loss": 2.1489, "step": 1315 }, { "epoch": 0.01, "learning_rate": 9.99894281703603e-05, "loss": 2.2797, "step": 1320 }, { "epoch": 0.01, "learning_rate": 9.99893413415338e-05, "loss": 2.1403, "step": 1325 }, { "epoch": 0.01, "learning_rate": 9.998925415762972e-05, "loss": 2.1532, "step": 1330 }, { "epoch": 0.01, "learning_rate": 9.998916661864875e-05, "loss": 2.1249, "step": 1335 }, { "epoch": 0.01, "learning_rate": 9.998907872459144e-05, "loss": 2.2107, "step": 1340 }, { "epoch": 0.01, "learning_rate": 9.998899047545847e-05, "loss": 2.2304, "step": 1345 }, { "epoch": 0.01, "learning_rate": 9.998890187125042e-05, "loss": 2.1913, "step": 1350 }, { "epoch": 0.01, "learning_rate": 9.998881291196796e-05, "loss": 2.1502, "step": 1355 }, { "epoch": 0.01, "learning_rate": 9.998872359761169e-05, "loss": 2.1971, "step": 1360 }, { "epoch": 0.01, "learning_rate": 9.998863392818226e-05, "loss": 2.1425, "step": 1365 }, { "epoch": 0.01, "learning_rate": 9.99885439036803e-05, "loss": 2.2467, "step": 1370 }, { "epoch": 0.01, "learning_rate": 9.998845352410649e-05, "loss": 2.2199, "step": 1375 }, { "epoch": 0.01, "learning_rate": 9.99883627894614e-05, "loss": 2.2122, "step": 1380 }, { "epoch": 0.01, "learning_rate": 9.998827169974573e-05, "loss": 2.1418, "step": 1385 }, { "epoch": 0.01, "learning_rate": 9.99881802549601e-05, "loss": 2.2162, "step": 1390 }, { "epoch": 0.01, "learning_rate": 9.998808845510517e-05, "loss": 2.2291, "step": 1395 }, { "epoch": 0.01, "learning_rate": 9.998799630018159e-05, "loss": 2.1994, "step": 1400 }, { "epoch": 0.01, "learning_rate": 9.998790379019e-05, "loss": 2.2042, "step": 1405 }, { "epoch": 0.01, "learning_rate": 9.99878109251311e-05, "loss": 2.2801, "step": 1410 }, { "epoch": 0.01, "learning_rate": 9.99877177050055e-05, "loss": 2.1827, "step": 1415 }, { "epoch": 0.01, "learning_rate": 9.99876241298139e-05, "loss": 2.1468, "step": 1420 }, { "epoch": 0.01, "learning_rate": 9.998753019955694e-05, "loss": 2.1695, "step": 1425 }, { "epoch": 0.01, "learning_rate": 9.99874359142353e-05, "loss": 2.1839, "step": 1430 }, { "epoch": 0.01, "learning_rate": 9.998734127384964e-05, "loss": 2.1977, "step": 1435 }, { "epoch": 0.01, "learning_rate": 9.998724627840063e-05, "loss": 2.1427, "step": 1440 }, { "epoch": 0.01, "learning_rate": 9.998715092788895e-05, "loss": 2.1486, "step": 1445 }, { "epoch": 0.01, "learning_rate": 9.998705522231529e-05, "loss": 2.2304, "step": 1450 }, { "epoch": 0.01, "learning_rate": 9.998695916168033e-05, "loss": 2.2171, "step": 1455 }, { "epoch": 0.01, "learning_rate": 9.998686274598472e-05, "loss": 2.1853, "step": 1460 }, { "epoch": 0.01, "learning_rate": 9.998676597522918e-05, "loss": 2.1481, "step": 1465 }, { "epoch": 0.01, "learning_rate": 9.998666884941437e-05, "loss": 2.1574, "step": 1470 }, { "epoch": 0.01, "learning_rate": 9.9986571368541e-05, "loss": 2.2056, "step": 1475 }, { "epoch": 0.01, "learning_rate": 9.998647353260976e-05, "loss": 2.2068, "step": 1480 }, { "epoch": 0.01, "learning_rate": 9.998637534162134e-05, "loss": 2.1437, "step": 1485 }, { "epoch": 0.01, "learning_rate": 9.998627679557644e-05, "loss": 2.1642, "step": 1490 }, { "epoch": 0.01, "learning_rate": 9.998617789447575e-05, "loss": 2.1636, "step": 1495 }, { "epoch": 0.01, "learning_rate": 9.998607863831999e-05, "loss": 2.1649, "step": 1500 }, { "epoch": 0.01, "learning_rate": 9.998597902710984e-05, "loss": 2.1786, "step": 1505 }, { "epoch": 0.01, "learning_rate": 9.998587906084605e-05, "loss": 2.1819, "step": 1510 }, { "epoch": 0.01, "learning_rate": 9.998577873952928e-05, "loss": 2.1562, "step": 1515 }, { "epoch": 0.01, "learning_rate": 9.998567806316027e-05, "loss": 2.2005, "step": 1520 }, { "epoch": 0.01, "learning_rate": 9.998557703173974e-05, "loss": 2.1651, "step": 1525 }, { "epoch": 0.01, "learning_rate": 9.998547564526838e-05, "loss": 2.1955, "step": 1530 }, { "epoch": 0.01, "learning_rate": 9.998537390374697e-05, "loss": 2.1984, "step": 1535 }, { "epoch": 0.01, "learning_rate": 9.998527180717614e-05, "loss": 2.2526, "step": 1540 }, { "epoch": 0.01, "learning_rate": 9.99851693555567e-05, "loss": 2.2263, "step": 1545 }, { "epoch": 0.01, "learning_rate": 9.998506654888934e-05, "loss": 2.1534, "step": 1550 }, { "epoch": 0.01, "learning_rate": 9.998496338717479e-05, "loss": 2.2249, "step": 1555 }, { "epoch": 0.01, "learning_rate": 9.998485987041379e-05, "loss": 2.183, "step": 1560 }, { "epoch": 0.01, "learning_rate": 9.998475599860706e-05, "loss": 2.1818, "step": 1565 }, { "epoch": 0.01, "learning_rate": 9.998465177175538e-05, "loss": 2.2019, "step": 1570 }, { "epoch": 0.01, "learning_rate": 9.998454718985943e-05, "loss": 2.2007, "step": 1575 }, { "epoch": 0.01, "learning_rate": 9.998444225291999e-05, "loss": 2.2113, "step": 1580 }, { "epoch": 0.01, "learning_rate": 9.998433696093781e-05, "loss": 2.2125, "step": 1585 }, { "epoch": 0.01, "learning_rate": 9.998423131391363e-05, "loss": 2.0699, "step": 1590 }, { "epoch": 0.01, "learning_rate": 9.998412531184819e-05, "loss": 2.1805, "step": 1595 }, { "epoch": 0.01, "learning_rate": 9.998401895474226e-05, "loss": 2.2101, "step": 1600 }, { "epoch": 0.01, "learning_rate": 9.998391224259656e-05, "loss": 2.2066, "step": 1605 }, { "epoch": 0.01, "learning_rate": 9.99838051754119e-05, "loss": 2.2581, "step": 1610 }, { "epoch": 0.01, "learning_rate": 9.9983697753189e-05, "loss": 2.1581, "step": 1615 }, { "epoch": 0.01, "learning_rate": 9.998358997592865e-05, "loss": 2.093, "step": 1620 }, { "epoch": 0.01, "learning_rate": 9.998348184363159e-05, "loss": 2.1358, "step": 1625 }, { "epoch": 0.01, "learning_rate": 9.99833733562986e-05, "loss": 2.1701, "step": 1630 }, { "epoch": 0.01, "learning_rate": 9.998326451393045e-05, "loss": 2.1604, "step": 1635 }, { "epoch": 0.01, "learning_rate": 9.998315531652793e-05, "loss": 2.1761, "step": 1640 }, { "epoch": 0.01, "learning_rate": 9.998304576409178e-05, "loss": 2.1755, "step": 1645 }, { "epoch": 0.01, "learning_rate": 9.998293585662283e-05, "loss": 2.2117, "step": 1650 }, { "epoch": 0.01, "learning_rate": 9.998282559412181e-05, "loss": 2.1516, "step": 1655 }, { "epoch": 0.01, "learning_rate": 9.998271497658952e-05, "loss": 2.1178, "step": 1660 }, { "epoch": 0.01, "learning_rate": 9.998260400402675e-05, "loss": 2.193, "step": 1665 }, { "epoch": 0.01, "learning_rate": 9.99824926764343e-05, "loss": 2.2114, "step": 1670 }, { "epoch": 0.01, "learning_rate": 9.998238099381292e-05, "loss": 2.1463, "step": 1675 }, { "epoch": 0.01, "learning_rate": 9.998226895616344e-05, "loss": 2.2288, "step": 1680 }, { "epoch": 0.01, "learning_rate": 9.998215656348667e-05, "loss": 2.1961, "step": 1685 }, { "epoch": 0.01, "learning_rate": 9.998204381578335e-05, "loss": 2.1976, "step": 1690 }, { "epoch": 0.01, "learning_rate": 9.998193071305433e-05, "loss": 2.1581, "step": 1695 }, { "epoch": 0.01, "learning_rate": 9.99818172553004e-05, "loss": 2.0984, "step": 1700 }, { "epoch": 0.01, "learning_rate": 9.998170344252238e-05, "loss": 2.1273, "step": 1705 }, { "epoch": 0.01, "learning_rate": 9.998158927472104e-05, "loss": 2.1292, "step": 1710 }, { "epoch": 0.01, "learning_rate": 9.998147475189722e-05, "loss": 2.1373, "step": 1715 }, { "epoch": 0.01, "learning_rate": 9.998135987405173e-05, "loss": 2.1552, "step": 1720 }, { "epoch": 0.01, "learning_rate": 9.998124464118539e-05, "loss": 2.0913, "step": 1725 }, { "epoch": 0.01, "learning_rate": 9.998112905329899e-05, "loss": 2.1644, "step": 1730 }, { "epoch": 0.01, "learning_rate": 9.998101311039339e-05, "loss": 2.1454, "step": 1735 }, { "epoch": 0.01, "learning_rate": 9.998089681246941e-05, "loss": 2.1481, "step": 1740 }, { "epoch": 0.01, "learning_rate": 9.998078015952783e-05, "loss": 2.1885, "step": 1745 }, { "epoch": 0.01, "learning_rate": 9.998066315156953e-05, "loss": 2.1368, "step": 1750 }, { "epoch": 0.01, "learning_rate": 9.998054578859532e-05, "loss": 2.1375, "step": 1755 }, { "epoch": 0.01, "learning_rate": 9.998042807060604e-05, "loss": 2.1901, "step": 1760 }, { "epoch": 0.01, "learning_rate": 9.998030999760253e-05, "loss": 2.1038, "step": 1765 }, { "epoch": 0.01, "learning_rate": 9.99801915695856e-05, "loss": 2.1938, "step": 1770 }, { "epoch": 0.01, "learning_rate": 9.998007278655612e-05, "loss": 2.1706, "step": 1775 }, { "epoch": 0.01, "learning_rate": 9.997995364851493e-05, "loss": 2.1044, "step": 1780 }, { "epoch": 0.01, "learning_rate": 9.997983415546287e-05, "loss": 2.1951, "step": 1785 }, { "epoch": 0.01, "learning_rate": 9.997971430740079e-05, "loss": 2.1219, "step": 1790 }, { "epoch": 0.01, "learning_rate": 9.997959410432955e-05, "loss": 2.118, "step": 1795 }, { "epoch": 0.01, "learning_rate": 9.997947354624998e-05, "loss": 2.244, "step": 1800 }, { "epoch": 0.01, "learning_rate": 9.997935263316296e-05, "loss": 2.2016, "step": 1805 }, { "epoch": 0.01, "learning_rate": 9.997923136506934e-05, "loss": 2.1986, "step": 1810 }, { "epoch": 0.01, "learning_rate": 9.997910974196999e-05, "loss": 2.1718, "step": 1815 }, { "epoch": 0.01, "learning_rate": 9.997898776386576e-05, "loss": 2.1469, "step": 1820 }, { "epoch": 0.01, "learning_rate": 9.997886543075752e-05, "loss": 2.0369, "step": 1825 }, { "epoch": 0.01, "learning_rate": 9.997874274264615e-05, "loss": 2.0537, "step": 1830 }, { "epoch": 0.01, "learning_rate": 9.99786196995325e-05, "loss": 2.2287, "step": 1835 }, { "epoch": 0.01, "learning_rate": 9.997849630141747e-05, "loss": 2.1659, "step": 1840 }, { "epoch": 0.01, "learning_rate": 9.997837254830191e-05, "loss": 2.2158, "step": 1845 }, { "epoch": 0.01, "learning_rate": 9.997824844018673e-05, "loss": 2.1484, "step": 1850 }, { "epoch": 0.01, "learning_rate": 9.997812397707277e-05, "loss": 2.1025, "step": 1855 }, { "epoch": 0.01, "learning_rate": 9.997799915896098e-05, "loss": 2.1684, "step": 1860 }, { "epoch": 0.01, "learning_rate": 9.997787398585216e-05, "loss": 2.127, "step": 1865 }, { "epoch": 0.01, "learning_rate": 9.997774845774727e-05, "loss": 2.184, "step": 1870 }, { "epoch": 0.01, "learning_rate": 9.997762257464717e-05, "loss": 2.194, "step": 1875 }, { "epoch": 0.01, "learning_rate": 9.997749633655276e-05, "loss": 2.204, "step": 1880 }, { "epoch": 0.01, "learning_rate": 9.997736974346494e-05, "loss": 2.11, "step": 1885 }, { "epoch": 0.01, "learning_rate": 9.99772427953846e-05, "loss": 2.1991, "step": 1890 }, { "epoch": 0.01, "learning_rate": 9.997711549231265e-05, "loss": 2.1468, "step": 1895 }, { "epoch": 0.01, "learning_rate": 9.997698783424998e-05, "loss": 2.1537, "step": 1900 }, { "epoch": 0.01, "learning_rate": 9.997685982119752e-05, "loss": 2.1576, "step": 1905 }, { "epoch": 0.01, "learning_rate": 9.997673145315616e-05, "loss": 2.1926, "step": 1910 }, { "epoch": 0.01, "learning_rate": 9.997660273012683e-05, "loss": 2.1583, "step": 1915 }, { "epoch": 0.01, "learning_rate": 9.997647365211042e-05, "loss": 2.175, "step": 1920 }, { "epoch": 0.01, "learning_rate": 9.997634421910788e-05, "loss": 2.2242, "step": 1925 }, { "epoch": 0.01, "learning_rate": 9.99762144311201e-05, "loss": 2.1012, "step": 1930 }, { "epoch": 0.01, "learning_rate": 9.997608428814801e-05, "loss": 2.1825, "step": 1935 }, { "epoch": 0.01, "learning_rate": 9.997595379019253e-05, "loss": 2.1936, "step": 1940 }, { "epoch": 0.01, "learning_rate": 9.99758229372546e-05, "loss": 2.0843, "step": 1945 }, { "epoch": 0.01, "learning_rate": 9.997569172933515e-05, "loss": 2.1341, "step": 1950 }, { "epoch": 0.01, "learning_rate": 9.99755601664351e-05, "loss": 2.2056, "step": 1955 }, { "epoch": 0.01, "learning_rate": 9.99754282485554e-05, "loss": 2.1455, "step": 1960 }, { "epoch": 0.01, "learning_rate": 9.997529597569694e-05, "loss": 2.2111, "step": 1965 }, { "epoch": 0.01, "learning_rate": 9.997516334786073e-05, "loss": 2.1462, "step": 1970 }, { "epoch": 0.01, "learning_rate": 9.997503036504768e-05, "loss": 2.1253, "step": 1975 }, { "epoch": 0.01, "learning_rate": 9.997489702725871e-05, "loss": 2.1177, "step": 1980 }, { "epoch": 0.01, "learning_rate": 9.99747633344948e-05, "loss": 2.1866, "step": 1985 }, { "epoch": 0.01, "learning_rate": 9.997462928675689e-05, "loss": 2.1611, "step": 1990 }, { "epoch": 0.01, "learning_rate": 9.997449488404594e-05, "loss": 2.1551, "step": 1995 }, { "epoch": 0.01, "learning_rate": 9.997436012636288e-05, "loss": 2.1757, "step": 2000 }, { "epoch": 0.01, "learning_rate": 9.997422501370868e-05, "loss": 2.1454, "step": 2005 }, { "epoch": 0.01, "learning_rate": 9.997408954608432e-05, "loss": 2.2189, "step": 2010 }, { "epoch": 0.01, "learning_rate": 9.997395372349073e-05, "loss": 2.1664, "step": 2015 }, { "epoch": 0.01, "learning_rate": 9.997381754592889e-05, "loss": 2.1985, "step": 2020 }, { "epoch": 0.01, "learning_rate": 9.997368101339979e-05, "loss": 2.1798, "step": 2025 }, { "epoch": 0.01, "learning_rate": 9.997354412590437e-05, "loss": 2.0335, "step": 2030 }, { "epoch": 0.01, "learning_rate": 9.997340688344359e-05, "loss": 2.0931, "step": 2035 }, { "epoch": 0.01, "learning_rate": 9.997326928601845e-05, "loss": 2.1387, "step": 2040 }, { "epoch": 0.01, "learning_rate": 9.997313133362994e-05, "loss": 2.2468, "step": 2045 }, { "epoch": 0.01, "learning_rate": 9.997299302627899e-05, "loss": 2.1083, "step": 2050 }, { "epoch": 0.01, "learning_rate": 9.997285436396664e-05, "loss": 2.1459, "step": 2055 }, { "epoch": 0.01, "learning_rate": 9.997271534669384e-05, "loss": 2.1215, "step": 2060 }, { "epoch": 0.01, "learning_rate": 9.997257597446158e-05, "loss": 2.1947, "step": 2065 }, { "epoch": 0.01, "learning_rate": 9.997243624727086e-05, "loss": 2.1519, "step": 2070 }, { "epoch": 0.01, "learning_rate": 9.997229616512267e-05, "loss": 2.1879, "step": 2075 }, { "epoch": 0.01, "learning_rate": 9.997215572801802e-05, "loss": 2.2531, "step": 2080 }, { "epoch": 0.01, "learning_rate": 9.997201493595786e-05, "loss": 2.1118, "step": 2085 }, { "epoch": 0.01, "learning_rate": 9.997187378894324e-05, "loss": 2.1684, "step": 2090 }, { "epoch": 0.01, "learning_rate": 9.997173228697514e-05, "loss": 2.2057, "step": 2095 }, { "epoch": 0.01, "learning_rate": 9.997159043005457e-05, "loss": 2.1782, "step": 2100 }, { "epoch": 0.01, "learning_rate": 9.997144821818254e-05, "loss": 2.154, "step": 2105 }, { "epoch": 0.01, "learning_rate": 9.997130565136005e-05, "loss": 2.1836, "step": 2110 }, { "epoch": 0.01, "learning_rate": 9.997116272958811e-05, "loss": 2.2081, "step": 2115 }, { "epoch": 0.01, "learning_rate": 9.997101945286777e-05, "loss": 2.121, "step": 2120 }, { "epoch": 0.01, "learning_rate": 9.997087582120001e-05, "loss": 2.1661, "step": 2125 }, { "epoch": 0.01, "learning_rate": 9.997073183458585e-05, "loss": 2.0834, "step": 2130 }, { "epoch": 0.01, "learning_rate": 9.997058749302635e-05, "loss": 2.1925, "step": 2135 }, { "epoch": 0.01, "learning_rate": 9.997044279652248e-05, "loss": 2.1203, "step": 2140 }, { "epoch": 0.01, "learning_rate": 9.997029774507532e-05, "loss": 2.1692, "step": 2145 }, { "epoch": 0.01, "learning_rate": 9.997015233868588e-05, "loss": 2.0983, "step": 2150 }, { "epoch": 0.01, "learning_rate": 9.997000657735518e-05, "loss": 2.1571, "step": 2155 }, { "epoch": 0.01, "learning_rate": 9.996986046108428e-05, "loss": 2.0778, "step": 2160 }, { "epoch": 0.01, "learning_rate": 9.996971398987419e-05, "loss": 2.146, "step": 2165 }, { "epoch": 0.01, "learning_rate": 9.996956716372597e-05, "loss": 2.1212, "step": 2170 }, { "epoch": 0.01, "learning_rate": 9.996941998264066e-05, "loss": 2.1345, "step": 2175 }, { "epoch": 0.01, "learning_rate": 9.99692724466193e-05, "loss": 2.1075, "step": 2180 }, { "epoch": 0.01, "learning_rate": 9.996912455566293e-05, "loss": 2.1211, "step": 2185 }, { "epoch": 0.01, "learning_rate": 9.996897630977264e-05, "loss": 2.1173, "step": 2190 }, { "epoch": 0.01, "learning_rate": 9.996882770894944e-05, "loss": 2.1163, "step": 2195 }, { "epoch": 0.01, "learning_rate": 9.99686787531944e-05, "loss": 2.1874, "step": 2200 }, { "epoch": 0.01, "learning_rate": 9.996852944250857e-05, "loss": 2.1383, "step": 2205 }, { "epoch": 0.01, "learning_rate": 9.996837977689304e-05, "loss": 2.113, "step": 2210 }, { "epoch": 0.01, "learning_rate": 9.996822975634884e-05, "loss": 2.0918, "step": 2215 }, { "epoch": 0.01, "learning_rate": 9.996807938087705e-05, "loss": 2.179, "step": 2220 }, { "epoch": 0.01, "learning_rate": 9.996792865047873e-05, "loss": 2.1038, "step": 2225 }, { "epoch": 0.01, "learning_rate": 9.996777756515495e-05, "loss": 2.1506, "step": 2230 }, { "epoch": 0.01, "learning_rate": 9.99676261249068e-05, "loss": 2.0858, "step": 2235 }, { "epoch": 0.01, "learning_rate": 9.996747432973535e-05, "loss": 2.0471, "step": 2240 }, { "epoch": 0.01, "learning_rate": 9.996732217964166e-05, "loss": 2.171, "step": 2245 }, { "epoch": 0.01, "learning_rate": 9.996716967462682e-05, "loss": 2.1919, "step": 2250 }, { "epoch": 0.01, "learning_rate": 9.996701681469193e-05, "loss": 2.1703, "step": 2255 }, { "epoch": 0.01, "learning_rate": 9.996686359983807e-05, "loss": 2.1452, "step": 2260 }, { "epoch": 0.01, "learning_rate": 9.996671003006633e-05, "loss": 2.1438, "step": 2265 }, { "epoch": 0.01, "learning_rate": 9.996655610537777e-05, "loss": 2.0694, "step": 2270 }, { "epoch": 0.01, "learning_rate": 9.99664018257735e-05, "loss": 2.1681, "step": 2275 }, { "epoch": 0.01, "learning_rate": 9.996624719125463e-05, "loss": 2.1955, "step": 2280 }, { "epoch": 0.01, "learning_rate": 9.996609220182227e-05, "loss": 2.1705, "step": 2285 }, { "epoch": 0.01, "learning_rate": 9.996593685747747e-05, "loss": 2.1137, "step": 2290 }, { "epoch": 0.01, "learning_rate": 9.996578115822138e-05, "loss": 2.1733, "step": 2295 }, { "epoch": 0.01, "learning_rate": 9.996562510405509e-05, "loss": 2.2201, "step": 2300 }, { "epoch": 0.01, "learning_rate": 9.996546869497971e-05, "loss": 2.1051, "step": 2305 }, { "epoch": 0.01, "learning_rate": 9.996531193099634e-05, "loss": 2.2088, "step": 2310 }, { "epoch": 0.01, "learning_rate": 9.996515481210611e-05, "loss": 2.1296, "step": 2315 }, { "epoch": 0.01, "learning_rate": 9.996499733831013e-05, "loss": 2.1409, "step": 2320 }, { "epoch": 0.01, "learning_rate": 9.996483950960951e-05, "loss": 2.1582, "step": 2325 }, { "epoch": 0.01, "learning_rate": 9.996468132600539e-05, "loss": 2.21, "step": 2330 }, { "epoch": 0.01, "learning_rate": 9.996452278749889e-05, "loss": 2.0888, "step": 2335 }, { "epoch": 0.01, "learning_rate": 9.99643638940911e-05, "loss": 2.0855, "step": 2340 }, { "epoch": 0.01, "learning_rate": 9.99642046457832e-05, "loss": 2.1133, "step": 2345 }, { "epoch": 0.01, "learning_rate": 9.996404504257628e-05, "loss": 2.1955, "step": 2350 }, { "epoch": 0.01, "learning_rate": 9.996388508447151e-05, "loss": 2.1283, "step": 2355 }, { "epoch": 0.01, "learning_rate": 9.996372477147e-05, "loss": 2.1481, "step": 2360 }, { "epoch": 0.01, "learning_rate": 9.99635641035729e-05, "loss": 2.1208, "step": 2365 }, { "epoch": 0.01, "learning_rate": 9.996340308078134e-05, "loss": 2.2239, "step": 2370 }, { "epoch": 0.01, "learning_rate": 9.996324170309648e-05, "loss": 2.1492, "step": 2375 }, { "epoch": 0.01, "learning_rate": 9.996307997051946e-05, "loss": 2.1751, "step": 2380 }, { "epoch": 0.01, "learning_rate": 9.996291788305142e-05, "loss": 2.1474, "step": 2385 }, { "epoch": 0.01, "learning_rate": 9.996275544069351e-05, "loss": 2.1329, "step": 2390 }, { "epoch": 0.01, "learning_rate": 9.996259264344692e-05, "loss": 2.1967, "step": 2395 }, { "epoch": 0.01, "learning_rate": 9.996242949131276e-05, "loss": 2.1344, "step": 2400 }, { "epoch": 0.01, "learning_rate": 9.99622659842922e-05, "loss": 2.1912, "step": 2405 }, { "epoch": 0.01, "learning_rate": 9.996210212238641e-05, "loss": 2.0982, "step": 2410 }, { "epoch": 0.01, "learning_rate": 9.996193790559656e-05, "loss": 2.0955, "step": 2415 }, { "epoch": 0.01, "learning_rate": 9.996177333392381e-05, "loss": 2.0673, "step": 2420 }, { "epoch": 0.01, "learning_rate": 9.996160840736933e-05, "loss": 2.1148, "step": 2425 }, { "epoch": 0.01, "learning_rate": 9.996144312593429e-05, "loss": 2.0884, "step": 2430 }, { "epoch": 0.01, "learning_rate": 9.996127748961985e-05, "loss": 2.1427, "step": 2435 }, { "epoch": 0.01, "learning_rate": 9.996111149842722e-05, "loss": 2.1604, "step": 2440 }, { "epoch": 0.01, "learning_rate": 9.996094515235754e-05, "loss": 2.1228, "step": 2445 }, { "epoch": 0.01, "learning_rate": 9.996077845141201e-05, "loss": 2.1371, "step": 2450 }, { "epoch": 0.01, "learning_rate": 9.996061139559183e-05, "loss": 2.0901, "step": 2455 }, { "epoch": 0.01, "learning_rate": 9.996044398489816e-05, "loss": 2.1381, "step": 2460 }, { "epoch": 0.01, "learning_rate": 9.996027621933222e-05, "loss": 2.1273, "step": 2465 }, { "epoch": 0.01, "learning_rate": 9.996010809889515e-05, "loss": 2.1295, "step": 2470 }, { "epoch": 0.01, "learning_rate": 9.99599396235882e-05, "loss": 2.1174, "step": 2475 }, { "epoch": 0.01, "learning_rate": 9.995977079341255e-05, "loss": 2.0971, "step": 2480 }, { "epoch": 0.01, "learning_rate": 9.995960160836937e-05, "loss": 2.1277, "step": 2485 }, { "epoch": 0.01, "learning_rate": 9.995943206845989e-05, "loss": 2.1866, "step": 2490 }, { "epoch": 0.01, "learning_rate": 9.995926217368532e-05, "loss": 2.0881, "step": 2495 }, { "epoch": 0.01, "learning_rate": 9.995909192404685e-05, "loss": 2.1177, "step": 2500 }, { "epoch": 0.01, "learning_rate": 9.995892131954567e-05, "loss": 2.1555, "step": 2505 }, { "epoch": 0.01, "learning_rate": 9.995875036018305e-05, "loss": 2.1186, "step": 2510 }, { "epoch": 0.01, "learning_rate": 9.995857904596015e-05, "loss": 2.17, "step": 2515 }, { "epoch": 0.01, "learning_rate": 9.995840737687821e-05, "loss": 2.0997, "step": 2520 }, { "epoch": 0.01, "learning_rate": 9.995823535293845e-05, "loss": 2.0572, "step": 2525 }, { "epoch": 0.01, "learning_rate": 9.99580629741421e-05, "loss": 2.1231, "step": 2530 }, { "epoch": 0.01, "learning_rate": 9.995789024049035e-05, "loss": 2.0826, "step": 2535 }, { "epoch": 0.01, "learning_rate": 9.995771715198446e-05, "loss": 2.1464, "step": 2540 }, { "epoch": 0.01, "learning_rate": 9.995754370862565e-05, "loss": 2.1429, "step": 2545 }, { "epoch": 0.01, "learning_rate": 9.995736991041516e-05, "loss": 2.1132, "step": 2550 }, { "epoch": 0.01, "learning_rate": 9.995719575735421e-05, "loss": 2.1707, "step": 2555 }, { "epoch": 0.01, "learning_rate": 9.995702124944404e-05, "loss": 2.0475, "step": 2560 }, { "epoch": 0.01, "learning_rate": 9.995684638668589e-05, "loss": 2.1323, "step": 2565 }, { "epoch": 0.01, "learning_rate": 9.995667116908101e-05, "loss": 2.1117, "step": 2570 }, { "epoch": 0.01, "learning_rate": 9.995649559663063e-05, "loss": 2.2003, "step": 2575 }, { "epoch": 0.01, "learning_rate": 9.995631966933601e-05, "loss": 2.1345, "step": 2580 }, { "epoch": 0.01, "learning_rate": 9.995614338719841e-05, "loss": 2.1317, "step": 2585 }, { "epoch": 0.01, "learning_rate": 9.995596675021905e-05, "loss": 2.1195, "step": 2590 }, { "epoch": 0.01, "learning_rate": 9.995578975839921e-05, "loss": 2.1967, "step": 2595 }, { "epoch": 0.01, "learning_rate": 9.995561241174014e-05, "loss": 2.1783, "step": 2600 }, { "epoch": 0.01, "learning_rate": 9.995543471024309e-05, "loss": 2.1464, "step": 2605 }, { "epoch": 0.01, "learning_rate": 9.995525665390935e-05, "loss": 2.1402, "step": 2610 }, { "epoch": 0.01, "learning_rate": 9.995507824274016e-05, "loss": 2.1201, "step": 2615 }, { "epoch": 0.01, "learning_rate": 9.995489947673677e-05, "loss": 2.122, "step": 2620 }, { "epoch": 0.01, "learning_rate": 9.99547203559005e-05, "loss": 2.0298, "step": 2625 }, { "epoch": 0.01, "learning_rate": 9.995454088023258e-05, "loss": 2.1367, "step": 2630 }, { "epoch": 0.01, "learning_rate": 9.995436104973431e-05, "loss": 2.1453, "step": 2635 }, { "epoch": 0.01, "learning_rate": 9.995418086440696e-05, "loss": 2.1443, "step": 2640 }, { "epoch": 0.01, "learning_rate": 9.99540003242518e-05, "loss": 2.0653, "step": 2645 }, { "epoch": 0.01, "learning_rate": 9.995381942927014e-05, "loss": 2.1893, "step": 2650 }, { "epoch": 0.01, "learning_rate": 9.995363817946321e-05, "loss": 2.1107, "step": 2655 }, { "epoch": 0.01, "learning_rate": 9.995345657483235e-05, "loss": 2.0983, "step": 2660 }, { "epoch": 0.01, "learning_rate": 9.995327461537884e-05, "loss": 2.1801, "step": 2665 }, { "epoch": 0.01, "learning_rate": 9.995309230110397e-05, "loss": 2.1235, "step": 2670 }, { "epoch": 0.01, "learning_rate": 9.995290963200902e-05, "loss": 2.0591, "step": 2675 }, { "epoch": 0.01, "learning_rate": 9.995272660809529e-05, "loss": 2.0806, "step": 2680 }, { "epoch": 0.01, "learning_rate": 9.995254322936409e-05, "loss": 2.1917, "step": 2685 }, { "epoch": 0.01, "learning_rate": 9.995235949581674e-05, "loss": 2.1048, "step": 2690 }, { "epoch": 0.01, "learning_rate": 9.995217540745449e-05, "loss": 2.0945, "step": 2695 }, { "epoch": 0.01, "learning_rate": 9.995199096427871e-05, "loss": 2.0783, "step": 2700 }, { "epoch": 0.01, "learning_rate": 9.995180616629067e-05, "loss": 2.0235, "step": 2705 }, { "epoch": 0.01, "learning_rate": 9.995162101349171e-05, "loss": 2.1152, "step": 2710 }, { "epoch": 0.01, "learning_rate": 9.995143550588311e-05, "loss": 2.1279, "step": 2715 }, { "epoch": 0.01, "learning_rate": 9.995124964346622e-05, "loss": 2.1414, "step": 2720 }, { "epoch": 0.01, "learning_rate": 9.995106342624236e-05, "loss": 2.0728, "step": 2725 }, { "epoch": 0.01, "learning_rate": 9.995087685421283e-05, "loss": 2.081, "step": 2730 }, { "epoch": 0.01, "learning_rate": 9.995068992737896e-05, "loss": 2.158, "step": 2735 }, { "epoch": 0.01, "learning_rate": 9.995050264574209e-05, "loss": 2.1343, "step": 2740 }, { "epoch": 0.01, "learning_rate": 9.995031500930354e-05, "loss": 2.0842, "step": 2745 }, { "epoch": 0.01, "learning_rate": 9.995012701806465e-05, "loss": 2.0863, "step": 2750 }, { "epoch": 0.01, "learning_rate": 9.994993867202676e-05, "loss": 2.1224, "step": 2755 }, { "epoch": 0.01, "learning_rate": 9.99497499711912e-05, "loss": 2.1162, "step": 2760 }, { "epoch": 0.01, "learning_rate": 9.99495609155593e-05, "loss": 2.1158, "step": 2765 }, { "epoch": 0.01, "learning_rate": 9.994937150513242e-05, "loss": 2.1247, "step": 2770 }, { "epoch": 0.01, "learning_rate": 9.994918173991189e-05, "loss": 2.1815, "step": 2775 }, { "epoch": 0.01, "learning_rate": 9.994899161989908e-05, "loss": 2.1047, "step": 2780 }, { "epoch": 0.01, "learning_rate": 9.994880114509532e-05, "loss": 2.1875, "step": 2785 }, { "epoch": 0.01, "learning_rate": 9.994861031550197e-05, "loss": 2.0797, "step": 2790 }, { "epoch": 0.01, "learning_rate": 9.994841913112038e-05, "loss": 2.0576, "step": 2795 }, { "epoch": 0.02, "learning_rate": 9.994822759195193e-05, "loss": 2.1058, "step": 2800 }, { "epoch": 0.02, "learning_rate": 9.994803569799796e-05, "loss": 2.1434, "step": 2805 }, { "epoch": 0.02, "learning_rate": 9.994784344925983e-05, "loss": 2.1208, "step": 2810 }, { "epoch": 0.02, "learning_rate": 9.994765084573891e-05, "loss": 2.0372, "step": 2815 }, { "epoch": 0.02, "learning_rate": 9.994745788743658e-05, "loss": 2.1411, "step": 2820 }, { "epoch": 0.02, "learning_rate": 9.994726457435419e-05, "loss": 2.1772, "step": 2825 }, { "epoch": 0.02, "learning_rate": 9.994707090649313e-05, "loss": 2.1164, "step": 2830 }, { "epoch": 0.02, "learning_rate": 9.994687688385478e-05, "loss": 2.0338, "step": 2835 }, { "epoch": 0.02, "learning_rate": 9.99466825064405e-05, "loss": 2.0496, "step": 2840 }, { "epoch": 0.02, "learning_rate": 9.994648777425169e-05, "loss": 2.1539, "step": 2845 }, { "epoch": 0.02, "learning_rate": 9.99462926872897e-05, "loss": 2.1241, "step": 2850 }, { "epoch": 0.02, "learning_rate": 9.994609724555595e-05, "loss": 2.1269, "step": 2855 }, { "epoch": 0.02, "learning_rate": 9.994590144905183e-05, "loss": 2.1446, "step": 2860 }, { "epoch": 0.02, "learning_rate": 9.99457052977787e-05, "loss": 2.0867, "step": 2865 }, { "epoch": 0.02, "learning_rate": 9.994550879173797e-05, "loss": 2.1193, "step": 2870 }, { "epoch": 0.02, "learning_rate": 9.994531193093104e-05, "loss": 2.1206, "step": 2875 }, { "epoch": 0.02, "learning_rate": 9.99451147153593e-05, "loss": 2.0615, "step": 2880 }, { "epoch": 0.02, "learning_rate": 9.994491714502417e-05, "loss": 2.1501, "step": 2885 }, { "epoch": 0.02, "learning_rate": 9.994471921992703e-05, "loss": 2.112, "step": 2890 }, { "epoch": 0.02, "learning_rate": 9.994452094006928e-05, "loss": 2.1012, "step": 2895 }, { "epoch": 0.02, "learning_rate": 9.994432230545236e-05, "loss": 2.1588, "step": 2900 }, { "epoch": 0.02, "learning_rate": 9.994412331607766e-05, "loss": 2.1417, "step": 2905 }, { "epoch": 0.02, "learning_rate": 9.99439239719466e-05, "loss": 2.1224, "step": 2910 }, { "epoch": 0.02, "learning_rate": 9.99437242730606e-05, "loss": 2.0665, "step": 2915 }, { "epoch": 0.02, "learning_rate": 9.994352421942106e-05, "loss": 2.0654, "step": 2920 }, { "epoch": 0.02, "learning_rate": 9.99433238110294e-05, "loss": 2.1563, "step": 2925 }, { "epoch": 0.02, "learning_rate": 9.994312304788708e-05, "loss": 2.105, "step": 2930 }, { "epoch": 0.02, "learning_rate": 9.994292192999548e-05, "loss": 2.1558, "step": 2935 }, { "epoch": 0.02, "learning_rate": 9.994272045735606e-05, "loss": 2.1118, "step": 2940 }, { "epoch": 0.02, "learning_rate": 9.994251862997025e-05, "loss": 2.1664, "step": 2945 }, { "epoch": 0.02, "learning_rate": 9.994231644783948e-05, "loss": 2.1915, "step": 2950 }, { "epoch": 0.02, "learning_rate": 9.994211391096517e-05, "loss": 2.1064, "step": 2955 }, { "epoch": 0.02, "learning_rate": 9.994191101934878e-05, "loss": 2.1568, "step": 2960 }, { "epoch": 0.02, "learning_rate": 9.994170777299172e-05, "loss": 2.1486, "step": 2965 }, { "epoch": 0.02, "learning_rate": 9.994150417189547e-05, "loss": 2.0641, "step": 2970 }, { "epoch": 0.02, "learning_rate": 9.994130021606147e-05, "loss": 2.1553, "step": 2975 }, { "epoch": 0.02, "learning_rate": 9.994109590549115e-05, "loss": 2.1309, "step": 2980 }, { "epoch": 0.02, "learning_rate": 9.994089124018596e-05, "loss": 2.1357, "step": 2985 }, { "epoch": 0.02, "learning_rate": 9.994068622014738e-05, "loss": 2.0536, "step": 2990 }, { "epoch": 0.02, "learning_rate": 9.994048084537684e-05, "loss": 2.108, "step": 2995 }, { "epoch": 0.02, "learning_rate": 9.994027511587582e-05, "loss": 2.1347, "step": 3000 }, { "epoch": 0.02, "learning_rate": 9.994006903164576e-05, "loss": 2.2014, "step": 3005 }, { "epoch": 0.02, "learning_rate": 9.993986259268815e-05, "loss": 2.0732, "step": 3010 }, { "epoch": 0.02, "learning_rate": 9.993965579900444e-05, "loss": 2.0932, "step": 3015 }, { "epoch": 0.02, "learning_rate": 9.993944865059609e-05, "loss": 2.1291, "step": 3020 }, { "epoch": 0.02, "learning_rate": 9.993924114746458e-05, "loss": 2.124, "step": 3025 }, { "epoch": 0.02, "learning_rate": 9.99390332896114e-05, "loss": 2.1174, "step": 3030 }, { "epoch": 0.02, "learning_rate": 9.9938825077038e-05, "loss": 2.1522, "step": 3035 }, { "epoch": 0.02, "learning_rate": 9.993861650974587e-05, "loss": 2.0949, "step": 3040 }, { "epoch": 0.02, "learning_rate": 9.99384075877365e-05, "loss": 2.1321, "step": 3045 }, { "epoch": 0.02, "learning_rate": 9.993819831101136e-05, "loss": 2.0997, "step": 3050 }, { "epoch": 0.02, "learning_rate": 9.993798867957196e-05, "loss": 1.9854, "step": 3055 }, { "epoch": 0.02, "learning_rate": 9.993777869341975e-05, "loss": 2.047, "step": 3060 }, { "epoch": 0.02, "learning_rate": 9.993756835255625e-05, "loss": 2.1012, "step": 3065 }, { "epoch": 0.02, "learning_rate": 9.993735765698296e-05, "loss": 2.1487, "step": 3070 }, { "epoch": 0.02, "learning_rate": 9.993714660670135e-05, "loss": 2.092, "step": 3075 }, { "epoch": 0.02, "learning_rate": 9.993693520171292e-05, "loss": 2.0648, "step": 3080 }, { "epoch": 0.02, "learning_rate": 9.993672344201921e-05, "loss": 2.1037, "step": 3085 }, { "epoch": 0.02, "learning_rate": 9.99365113276217e-05, "loss": 2.0949, "step": 3090 }, { "epoch": 0.02, "learning_rate": 9.993629885852187e-05, "loss": 2.1528, "step": 3095 }, { "epoch": 0.02, "learning_rate": 9.993608603472127e-05, "loss": 1.9833, "step": 3100 }, { "epoch": 0.02, "learning_rate": 9.993587285622141e-05, "loss": 2.0672, "step": 3105 }, { "epoch": 0.02, "learning_rate": 9.993565932302377e-05, "loss": 2.14, "step": 3110 }, { "epoch": 0.02, "learning_rate": 9.993544543512989e-05, "loss": 2.0756, "step": 3115 }, { "epoch": 0.02, "learning_rate": 9.99352311925413e-05, "loss": 2.0531, "step": 3120 }, { "epoch": 0.02, "learning_rate": 9.993501659525951e-05, "loss": 2.0048, "step": 3125 }, { "epoch": 0.02, "learning_rate": 9.993480164328602e-05, "loss": 2.0591, "step": 3130 }, { "epoch": 0.02, "learning_rate": 9.99345863366224e-05, "loss": 2.1044, "step": 3135 }, { "epoch": 0.02, "learning_rate": 9.993437067527015e-05, "loss": 2.1492, "step": 3140 }, { "epoch": 0.02, "learning_rate": 9.993415465923081e-05, "loss": 2.0853, "step": 3145 }, { "epoch": 0.02, "learning_rate": 9.993393828850593e-05, "loss": 2.1198, "step": 3150 }, { "epoch": 0.02, "learning_rate": 9.993372156309701e-05, "loss": 2.142, "step": 3155 }, { "epoch": 0.02, "learning_rate": 9.993350448300563e-05, "loss": 1.9885, "step": 3160 }, { "epoch": 0.02, "learning_rate": 9.993328704823331e-05, "loss": 2.0396, "step": 3165 }, { "epoch": 0.02, "learning_rate": 9.993306925878159e-05, "loss": 2.1393, "step": 3170 }, { "epoch": 0.02, "learning_rate": 9.993285111465203e-05, "loss": 2.0735, "step": 3175 }, { "epoch": 0.02, "learning_rate": 9.993263261584616e-05, "loss": 2.1967, "step": 3180 }, { "epoch": 0.02, "learning_rate": 9.993241376236557e-05, "loss": 2.1183, "step": 3185 }, { "epoch": 0.02, "learning_rate": 9.993219455421178e-05, "loss": 2.1021, "step": 3190 }, { "epoch": 0.02, "learning_rate": 9.993197499138636e-05, "loss": 2.0567, "step": 3195 }, { "epoch": 0.02, "learning_rate": 9.993175507389085e-05, "loss": 2.05, "step": 3200 }, { "epoch": 0.02, "learning_rate": 9.993153480172686e-05, "loss": 2.0591, "step": 3205 }, { "epoch": 0.02, "learning_rate": 9.993131417489591e-05, "loss": 2.141, "step": 3210 }, { "epoch": 0.02, "learning_rate": 9.993109319339957e-05, "loss": 2.1258, "step": 3215 }, { "epoch": 0.02, "learning_rate": 9.993087185723944e-05, "loss": 2.0829, "step": 3220 }, { "epoch": 0.02, "learning_rate": 9.993065016641706e-05, "loss": 2.1085, "step": 3225 }, { "epoch": 0.02, "learning_rate": 9.993042812093402e-05, "loss": 2.0949, "step": 3230 }, { "epoch": 0.02, "learning_rate": 9.99302057207919e-05, "loss": 2.1479, "step": 3235 }, { "epoch": 0.02, "learning_rate": 9.992998296599228e-05, "loss": 2.1102, "step": 3240 }, { "epoch": 0.02, "learning_rate": 9.992975985653673e-05, "loss": 2.0816, "step": 3245 }, { "epoch": 0.02, "learning_rate": 9.992953639242684e-05, "loss": 2.1524, "step": 3250 }, { "epoch": 0.02, "learning_rate": 9.99293125736642e-05, "loss": 2.0424, "step": 3255 }, { "epoch": 0.02, "learning_rate": 9.992908840025039e-05, "loss": 2.0284, "step": 3260 }, { "epoch": 0.02, "learning_rate": 9.992886387218702e-05, "loss": 2.1154, "step": 3265 }, { "epoch": 0.02, "learning_rate": 9.992863898947568e-05, "loss": 2.1435, "step": 3270 }, { "epoch": 0.02, "learning_rate": 9.992841375211797e-05, "loss": 2.0608, "step": 3275 }, { "epoch": 0.02, "learning_rate": 9.992818816011547e-05, "loss": 2.042, "step": 3280 }, { "epoch": 0.02, "learning_rate": 9.992796221346981e-05, "loss": 2.0706, "step": 3285 }, { "epoch": 0.02, "learning_rate": 9.992773591218256e-05, "loss": 2.101, "step": 3290 }, { "epoch": 0.02, "learning_rate": 9.992750925625536e-05, "loss": 2.1302, "step": 3295 }, { "epoch": 0.02, "learning_rate": 9.99272822456898e-05, "loss": 2.0219, "step": 3300 }, { "epoch": 0.02, "learning_rate": 9.99270548804875e-05, "loss": 2.0402, "step": 3305 }, { "epoch": 0.02, "learning_rate": 9.992682716065009e-05, "loss": 2.0915, "step": 3310 }, { "epoch": 0.02, "learning_rate": 9.992659908617916e-05, "loss": 2.1156, "step": 3315 }, { "epoch": 0.02, "learning_rate": 9.992637065707635e-05, "loss": 2.0644, "step": 3320 }, { "epoch": 0.02, "learning_rate": 9.992614187334328e-05, "loss": 2.0334, "step": 3325 }, { "epoch": 0.02, "learning_rate": 9.992591273498155e-05, "loss": 2.0331, "step": 3330 }, { "epoch": 0.02, "learning_rate": 9.992568324199283e-05, "loss": 2.1095, "step": 3335 }, { "epoch": 0.02, "learning_rate": 9.992545339437873e-05, "loss": 2.074, "step": 3340 }, { "epoch": 0.02, "learning_rate": 9.992522319214087e-05, "loss": 2.0836, "step": 3345 }, { "epoch": 0.02, "learning_rate": 9.992499263528088e-05, "loss": 2.091, "step": 3350 }, { "epoch": 0.02, "learning_rate": 9.992476172380044e-05, "loss": 2.066, "step": 3355 }, { "epoch": 0.02, "learning_rate": 9.992453045770115e-05, "loss": 2.1505, "step": 3360 }, { "epoch": 0.02, "learning_rate": 9.992429883698466e-05, "loss": 2.0739, "step": 3365 }, { "epoch": 0.02, "learning_rate": 9.992406686165265e-05, "loss": 2.1702, "step": 3370 }, { "epoch": 0.02, "learning_rate": 9.992383453170672e-05, "loss": 2.1138, "step": 3375 }, { "epoch": 0.02, "learning_rate": 9.992360184714855e-05, "loss": 2.0859, "step": 3380 }, { "epoch": 0.02, "learning_rate": 9.992336880797977e-05, "loss": 2.0476, "step": 3385 }, { "epoch": 0.02, "learning_rate": 9.992313541420204e-05, "loss": 2.138, "step": 3390 }, { "epoch": 0.02, "learning_rate": 9.992290166581704e-05, "loss": 2.0537, "step": 3395 }, { "epoch": 0.02, "learning_rate": 9.992266756282642e-05, "loss": 2.0522, "step": 3400 }, { "epoch": 0.02, "learning_rate": 9.992243310523182e-05, "loss": 2.0737, "step": 3405 }, { "epoch": 0.02, "learning_rate": 9.992219829303496e-05, "loss": 2.0488, "step": 3410 }, { "epoch": 0.02, "learning_rate": 9.992196312623742e-05, "loss": 2.0802, "step": 3415 }, { "epoch": 0.02, "learning_rate": 9.992172760484097e-05, "loss": 1.9796, "step": 3420 }, { "epoch": 0.02, "learning_rate": 9.992149172884721e-05, "loss": 2.0359, "step": 3425 }, { "epoch": 0.02, "learning_rate": 9.992125549825785e-05, "loss": 2.0753, "step": 3430 }, { "epoch": 0.02, "learning_rate": 9.992101891307455e-05, "loss": 2.07, "step": 3435 }, { "epoch": 0.02, "learning_rate": 9.992078197329901e-05, "loss": 2.0578, "step": 3440 }, { "epoch": 0.02, "learning_rate": 9.99205446789329e-05, "loss": 2.1023, "step": 3445 }, { "epoch": 0.02, "learning_rate": 9.99203070299779e-05, "loss": 2.0924, "step": 3450 }, { "epoch": 0.02, "learning_rate": 9.992006902643571e-05, "loss": 2.0667, "step": 3455 }, { "epoch": 0.02, "learning_rate": 9.991983066830803e-05, "loss": 1.9825, "step": 3460 }, { "epoch": 0.02, "learning_rate": 9.991959195559652e-05, "loss": 2.0436, "step": 3465 }, { "epoch": 0.02, "learning_rate": 9.991935288830289e-05, "loss": 2.0274, "step": 3470 }, { "epoch": 0.02, "learning_rate": 9.991911346642887e-05, "loss": 2.1177, "step": 3475 }, { "epoch": 0.02, "learning_rate": 9.991887368997612e-05, "loss": 2.1236, "step": 3480 }, { "epoch": 0.02, "learning_rate": 9.991863355894636e-05, "loss": 2.0816, "step": 3485 }, { "epoch": 0.02, "learning_rate": 9.991839307334129e-05, "loss": 2.0726, "step": 3490 }, { "epoch": 0.02, "learning_rate": 9.991815223316261e-05, "loss": 2.0361, "step": 3495 }, { "epoch": 0.02, "learning_rate": 9.991791103841205e-05, "loss": 2.0808, "step": 3500 }, { "epoch": 0.02, "learning_rate": 9.991766948909132e-05, "loss": 2.0804, "step": 3505 }, { "epoch": 0.02, "learning_rate": 9.991742758520213e-05, "loss": 2.0501, "step": 3510 }, { "epoch": 0.02, "learning_rate": 9.991718532674619e-05, "loss": 2.0838, "step": 3515 }, { "epoch": 0.02, "learning_rate": 9.991694271372524e-05, "loss": 2.0909, "step": 3520 }, { "epoch": 0.02, "learning_rate": 9.991669974614098e-05, "loss": 2.1647, "step": 3525 }, { "epoch": 0.02, "learning_rate": 9.991645642399517e-05, "loss": 2.0469, "step": 3530 }, { "epoch": 0.02, "learning_rate": 9.99162127472895e-05, "loss": 2.1149, "step": 3535 }, { "epoch": 0.02, "learning_rate": 9.991596871602571e-05, "loss": 2.0733, "step": 3540 }, { "epoch": 0.02, "learning_rate": 9.991572433020555e-05, "loss": 2.059, "step": 3545 }, { "epoch": 0.02, "learning_rate": 9.991547958983076e-05, "loss": 2.1091, "step": 3550 }, { "epoch": 0.02, "learning_rate": 9.991523449490305e-05, "loss": 2.085, "step": 3555 }, { "epoch": 0.02, "learning_rate": 9.991498904542418e-05, "loss": 2.114, "step": 3560 }, { "epoch": 0.02, "learning_rate": 9.99147432413959e-05, "loss": 2.0382, "step": 3565 }, { "epoch": 0.02, "learning_rate": 9.991449708281993e-05, "loss": 2.0991, "step": 3570 }, { "epoch": 0.02, "learning_rate": 9.991425056969804e-05, "loss": 2.068, "step": 3575 }, { "epoch": 0.02, "learning_rate": 9.991400370203197e-05, "loss": 2.0653, "step": 3580 }, { "epoch": 0.02, "learning_rate": 9.991375647982349e-05, "loss": 2.1443, "step": 3585 }, { "epoch": 0.02, "learning_rate": 9.991350890307433e-05, "loss": 2.0975, "step": 3590 }, { "epoch": 0.02, "learning_rate": 9.991326097178629e-05, "loss": 2.0211, "step": 3595 }, { "epoch": 0.02, "learning_rate": 9.991301268596108e-05, "loss": 2.113, "step": 3600 }, { "epoch": 0.02, "learning_rate": 9.99127640456005e-05, "loss": 2.1179, "step": 3605 }, { "epoch": 0.02, "learning_rate": 9.99125150507063e-05, "loss": 2.1045, "step": 3610 }, { "epoch": 0.02, "learning_rate": 9.991226570128025e-05, "loss": 2.102, "step": 3615 }, { "epoch": 0.02, "learning_rate": 9.991201599732413e-05, "loss": 2.0815, "step": 3620 }, { "epoch": 0.02, "learning_rate": 9.991176593883971e-05, "loss": 2.0296, "step": 3625 }, { "epoch": 0.02, "learning_rate": 9.991151552582874e-05, "loss": 2.0478, "step": 3630 }, { "epoch": 0.02, "learning_rate": 9.991126475829305e-05, "loss": 2.1971, "step": 3635 }, { "epoch": 0.02, "learning_rate": 9.991101363623438e-05, "loss": 2.093, "step": 3640 }, { "epoch": 0.02, "learning_rate": 9.991076215965453e-05, "loss": 2.0844, "step": 3645 }, { "epoch": 0.02, "learning_rate": 9.991051032855529e-05, "loss": 1.9985, "step": 3650 }, { "epoch": 0.02, "learning_rate": 9.991025814293845e-05, "loss": 2.0604, "step": 3655 }, { "epoch": 0.02, "learning_rate": 9.991000560280577e-05, "loss": 2.0789, "step": 3660 }, { "epoch": 0.02, "learning_rate": 9.990975270815908e-05, "loss": 2.0277, "step": 3665 }, { "epoch": 0.02, "learning_rate": 9.990949945900015e-05, "loss": 2.0749, "step": 3670 }, { "epoch": 0.02, "learning_rate": 9.990924585533081e-05, "loss": 2.162, "step": 3675 }, { "epoch": 0.02, "learning_rate": 9.990899189715284e-05, "loss": 2.0544, "step": 3680 }, { "epoch": 0.02, "learning_rate": 9.990873758446805e-05, "loss": 2.1276, "step": 3685 }, { "epoch": 0.02, "learning_rate": 9.990848291727823e-05, "loss": 2.0133, "step": 3690 }, { "epoch": 0.02, "learning_rate": 9.990822789558522e-05, "loss": 2.0193, "step": 3695 }, { "epoch": 0.02, "learning_rate": 9.990797251939081e-05, "loss": 2.1395, "step": 3700 }, { "epoch": 0.02, "learning_rate": 9.990771678869683e-05, "loss": 2.0069, "step": 3705 }, { "epoch": 0.02, "learning_rate": 9.990746070350508e-05, "loss": 2.03, "step": 3710 }, { "epoch": 0.02, "learning_rate": 9.990720426381737e-05, "loss": 2.0554, "step": 3715 }, { "epoch": 0.02, "learning_rate": 9.990694746963553e-05, "loss": 2.1037, "step": 3720 }, { "epoch": 0.02, "learning_rate": 9.99066903209614e-05, "loss": 2.1363, "step": 3725 }, { "epoch": 0.02, "learning_rate": 9.990643281779681e-05, "loss": 2.079, "step": 3730 }, { "epoch": 0.02, "learning_rate": 9.990617496014356e-05, "loss": 2.0835, "step": 3735 }, { "epoch": 0.02, "learning_rate": 9.99059167480035e-05, "loss": 2.1401, "step": 3740 }, { "epoch": 0.02, "learning_rate": 9.990565818137847e-05, "loss": 2.0933, "step": 3745 }, { "epoch": 0.02, "learning_rate": 9.99053992602703e-05, "loss": 2.033, "step": 3750 }, { "epoch": 0.02, "learning_rate": 9.99051399846808e-05, "loss": 2.1374, "step": 3755 }, { "epoch": 0.02, "learning_rate": 9.990488035461185e-05, "loss": 2.1362, "step": 3760 }, { "epoch": 0.02, "learning_rate": 9.99046203700653e-05, "loss": 2.038, "step": 3765 }, { "epoch": 0.02, "learning_rate": 9.990436003104295e-05, "loss": 2.0637, "step": 3770 }, { "epoch": 0.02, "learning_rate": 9.99040993375467e-05, "loss": 2.0728, "step": 3775 }, { "epoch": 0.02, "learning_rate": 9.990383828957839e-05, "loss": 2.0247, "step": 3780 }, { "epoch": 0.02, "learning_rate": 9.990357688713985e-05, "loss": 2.014, "step": 3785 }, { "epoch": 0.02, "learning_rate": 9.990331513023294e-05, "loss": 2.0486, "step": 3790 }, { "epoch": 0.02, "learning_rate": 9.990305301885953e-05, "loss": 1.9962, "step": 3795 }, { "epoch": 0.02, "learning_rate": 9.99027905530215e-05, "loss": 2.1115, "step": 3800 }, { "epoch": 0.02, "learning_rate": 9.99025277327207e-05, "loss": 2.0927, "step": 3805 }, { "epoch": 0.02, "learning_rate": 9.990226455795897e-05, "loss": 1.9722, "step": 3810 }, { "epoch": 0.02, "learning_rate": 9.990200102873822e-05, "loss": 2.0852, "step": 3815 }, { "epoch": 0.02, "learning_rate": 9.99017371450603e-05, "loss": 2.0546, "step": 3820 }, { "epoch": 0.02, "learning_rate": 9.990147290692708e-05, "loss": 2.0951, "step": 3825 }, { "epoch": 0.02, "learning_rate": 9.990120831434047e-05, "loss": 2.0867, "step": 3830 }, { "epoch": 0.02, "learning_rate": 9.990094336730231e-05, "loss": 1.9501, "step": 3835 }, { "epoch": 0.02, "learning_rate": 9.990067806581449e-05, "loss": 2.1001, "step": 3840 }, { "epoch": 0.02, "learning_rate": 9.990041240987891e-05, "loss": 2.1187, "step": 3845 }, { "epoch": 0.02, "learning_rate": 9.990014639949745e-05, "loss": 2.1309, "step": 3850 }, { "epoch": 0.02, "learning_rate": 9.9899880034672e-05, "loss": 2.1072, "step": 3855 }, { "epoch": 0.02, "learning_rate": 9.989961331540444e-05, "loss": 2.0686, "step": 3860 }, { "epoch": 0.02, "learning_rate": 9.989934624169668e-05, "loss": 2.0179, "step": 3865 }, { "epoch": 0.02, "learning_rate": 9.989907881355062e-05, "loss": 2.078, "step": 3870 }, { "epoch": 0.02, "learning_rate": 9.989881103096814e-05, "loss": 2.1654, "step": 3875 }, { "epoch": 0.02, "learning_rate": 9.989854289395115e-05, "loss": 2.0667, "step": 3880 }, { "epoch": 0.02, "learning_rate": 9.989827440250157e-05, "loss": 2.07, "step": 3885 }, { "epoch": 0.02, "learning_rate": 9.98980055566213e-05, "loss": 2.1168, "step": 3890 }, { "epoch": 0.02, "learning_rate": 9.989773635631224e-05, "loss": 2.1341, "step": 3895 }, { "epoch": 0.02, "learning_rate": 9.989746680157628e-05, "loss": 2.0411, "step": 3900 }, { "epoch": 0.02, "learning_rate": 9.989719689241539e-05, "loss": 2.072, "step": 3905 }, { "epoch": 0.02, "learning_rate": 9.989692662883145e-05, "loss": 2.0824, "step": 3910 }, { "epoch": 0.02, "learning_rate": 9.989665601082638e-05, "loss": 2.0306, "step": 3915 }, { "epoch": 0.02, "learning_rate": 9.989638503840213e-05, "loss": 2.0073, "step": 3920 }, { "epoch": 0.02, "learning_rate": 9.98961137115606e-05, "loss": 2.0591, "step": 3925 }, { "epoch": 0.02, "learning_rate": 9.989584203030371e-05, "loss": 2.0908, "step": 3930 }, { "epoch": 0.02, "learning_rate": 9.98955699946334e-05, "loss": 2.0463, "step": 3935 }, { "epoch": 0.02, "learning_rate": 9.989529760455163e-05, "loss": 2.0773, "step": 3940 }, { "epoch": 0.02, "learning_rate": 9.989502486006029e-05, "loss": 2.0684, "step": 3945 }, { "epoch": 0.02, "learning_rate": 9.989475176116134e-05, "loss": 2.0368, "step": 3950 }, { "epoch": 0.02, "learning_rate": 9.989447830785672e-05, "loss": 2.0703, "step": 3955 }, { "epoch": 0.02, "learning_rate": 9.989420450014837e-05, "loss": 2.1062, "step": 3960 }, { "epoch": 0.02, "learning_rate": 9.989393033803823e-05, "loss": 2.1135, "step": 3965 }, { "epoch": 0.02, "learning_rate": 9.989365582152826e-05, "loss": 2.018, "step": 3970 }, { "epoch": 0.02, "learning_rate": 9.989338095062039e-05, "loss": 2.0451, "step": 3975 }, { "epoch": 0.02, "learning_rate": 9.989310572531658e-05, "loss": 2.0308, "step": 3980 }, { "epoch": 0.02, "learning_rate": 9.98928301456188e-05, "loss": 2.0893, "step": 3985 }, { "epoch": 0.02, "learning_rate": 9.989255421152899e-05, "loss": 2.0603, "step": 3990 }, { "epoch": 0.02, "learning_rate": 9.989227792304912e-05, "loss": 1.9256, "step": 3995 }, { "epoch": 0.02, "learning_rate": 9.989200128018115e-05, "loss": 2.1013, "step": 4000 }, { "epoch": 0.02, "learning_rate": 9.989172428292703e-05, "loss": 2.1088, "step": 4005 }, { "epoch": 0.02, "learning_rate": 9.989144693128874e-05, "loss": 2.0657, "step": 4010 }, { "epoch": 0.02, "learning_rate": 9.989116922526826e-05, "loss": 2.0084, "step": 4015 }, { "epoch": 0.02, "learning_rate": 9.989089116486756e-05, "loss": 2.0377, "step": 4020 }, { "epoch": 0.02, "learning_rate": 9.98906127500886e-05, "loss": 2.0226, "step": 4025 }, { "epoch": 0.02, "learning_rate": 9.989033398093337e-05, "loss": 2.1829, "step": 4030 }, { "epoch": 0.02, "learning_rate": 9.989005485740385e-05, "loss": 2.0284, "step": 4035 }, { "epoch": 0.02, "learning_rate": 9.988977537950201e-05, "loss": 2.0763, "step": 4040 }, { "epoch": 0.02, "learning_rate": 9.988949554722986e-05, "loss": 2.0076, "step": 4045 }, { "epoch": 0.02, "learning_rate": 9.988921536058935e-05, "loss": 2.0946, "step": 4050 }, { "epoch": 0.02, "learning_rate": 9.98889348195825e-05, "loss": 2.0773, "step": 4055 }, { "epoch": 0.02, "learning_rate": 9.988865392421129e-05, "loss": 2.0941, "step": 4060 }, { "epoch": 0.02, "learning_rate": 9.988837267447773e-05, "loss": 2.0708, "step": 4065 }, { "epoch": 0.02, "learning_rate": 9.988809107038382e-05, "loss": 1.9459, "step": 4070 }, { "epoch": 0.02, "learning_rate": 9.988780911193152e-05, "loss": 2.1198, "step": 4075 }, { "epoch": 0.02, "learning_rate": 9.988752679912288e-05, "loss": 2.0402, "step": 4080 }, { "epoch": 0.02, "learning_rate": 9.988724413195987e-05, "loss": 2.0241, "step": 4085 }, { "epoch": 0.02, "learning_rate": 9.988696111044454e-05, "loss": 2.0197, "step": 4090 }, { "epoch": 0.02, "learning_rate": 9.988667773457885e-05, "loss": 2.1362, "step": 4095 }, { "epoch": 0.02, "learning_rate": 9.988639400436483e-05, "loss": 1.9679, "step": 4100 }, { "epoch": 0.02, "learning_rate": 9.988610991980453e-05, "loss": 2.0068, "step": 4105 }, { "epoch": 0.02, "learning_rate": 9.988582548089991e-05, "loss": 2.1016, "step": 4110 }, { "epoch": 0.02, "learning_rate": 9.988554068765305e-05, "loss": 2.0239, "step": 4115 }, { "epoch": 0.02, "learning_rate": 9.988525554006594e-05, "loss": 2.0423, "step": 4120 }, { "epoch": 0.02, "learning_rate": 9.988497003814059e-05, "loss": 2.0547, "step": 4125 }, { "epoch": 0.02, "learning_rate": 9.988468418187905e-05, "loss": 2.0756, "step": 4130 }, { "epoch": 0.02, "learning_rate": 9.988439797128335e-05, "loss": 2.0766, "step": 4135 }, { "epoch": 0.02, "learning_rate": 9.988411140635553e-05, "loss": 2.1552, "step": 4140 }, { "epoch": 0.02, "learning_rate": 9.98838244870976e-05, "loss": 2.0678, "step": 4145 }, { "epoch": 0.02, "learning_rate": 9.988353721351162e-05, "loss": 2.0704, "step": 4150 }, { "epoch": 0.02, "learning_rate": 9.988324958559963e-05, "loss": 2.1182, "step": 4155 }, { "epoch": 0.02, "learning_rate": 9.988296160336369e-05, "loss": 2.1208, "step": 4160 }, { "epoch": 0.02, "learning_rate": 9.988267326680578e-05, "loss": 2.068, "step": 4165 }, { "epoch": 0.02, "learning_rate": 9.9882384575928e-05, "loss": 2.0726, "step": 4170 }, { "epoch": 0.02, "learning_rate": 9.98820955307324e-05, "loss": 2.0966, "step": 4175 }, { "epoch": 0.02, "learning_rate": 9.988180613122102e-05, "loss": 2.0723, "step": 4180 }, { "epoch": 0.02, "learning_rate": 9.988151637739594e-05, "loss": 2.0793, "step": 4185 }, { "epoch": 0.02, "learning_rate": 9.988122626925918e-05, "loss": 2.0673, "step": 4190 }, { "epoch": 0.02, "learning_rate": 9.988093580681282e-05, "loss": 2.0706, "step": 4195 }, { "epoch": 0.02, "learning_rate": 9.988064499005892e-05, "loss": 2.0263, "step": 4200 }, { "epoch": 0.02, "learning_rate": 9.988035381899956e-05, "loss": 2.0712, "step": 4205 }, { "epoch": 0.02, "learning_rate": 9.988006229363677e-05, "loss": 2.084, "step": 4210 }, { "epoch": 0.02, "learning_rate": 9.987977041397267e-05, "loss": 1.9338, "step": 4215 }, { "epoch": 0.02, "learning_rate": 9.987947818000931e-05, "loss": 2.0734, "step": 4220 }, { "epoch": 0.02, "learning_rate": 9.987918559174875e-05, "loss": 1.9958, "step": 4225 }, { "epoch": 0.02, "learning_rate": 9.98788926491931e-05, "loss": 2.0333, "step": 4230 }, { "epoch": 0.02, "learning_rate": 9.987859935234443e-05, "loss": 2.0458, "step": 4235 }, { "epoch": 0.02, "learning_rate": 9.98783057012048e-05, "loss": 2.0196, "step": 4240 }, { "epoch": 0.02, "learning_rate": 9.987801169577633e-05, "loss": 2.0685, "step": 4245 }, { "epoch": 0.02, "learning_rate": 9.987771733606108e-05, "loss": 1.9578, "step": 4250 }, { "epoch": 0.02, "learning_rate": 9.987742262206116e-05, "loss": 2.0413, "step": 4255 }, { "epoch": 0.02, "learning_rate": 9.987712755377865e-05, "loss": 2.0978, "step": 4260 }, { "epoch": 0.02, "learning_rate": 9.987683213121566e-05, "loss": 2.0103, "step": 4265 }, { "epoch": 0.02, "learning_rate": 9.987653635437428e-05, "loss": 1.9552, "step": 4270 }, { "epoch": 0.02, "learning_rate": 9.98762402232566e-05, "loss": 1.991, "step": 4275 }, { "epoch": 0.02, "learning_rate": 9.987594373786475e-05, "loss": 2.013, "step": 4280 }, { "epoch": 0.02, "learning_rate": 9.987564689820082e-05, "loss": 2.0177, "step": 4285 }, { "epoch": 0.02, "learning_rate": 9.98753497042669e-05, "loss": 2.1126, "step": 4290 }, { "epoch": 0.02, "learning_rate": 9.987505215606514e-05, "loss": 1.9554, "step": 4295 }, { "epoch": 0.02, "learning_rate": 9.987475425359762e-05, "loss": 2.0723, "step": 4300 }, { "epoch": 0.02, "learning_rate": 9.987445599686649e-05, "loss": 2.1134, "step": 4305 }, { "epoch": 0.02, "learning_rate": 9.987415738587385e-05, "loss": 2.1237, "step": 4310 }, { "epoch": 0.02, "learning_rate": 9.98738584206218e-05, "loss": 2.0193, "step": 4315 }, { "epoch": 0.02, "learning_rate": 9.987355910111249e-05, "loss": 2.0684, "step": 4320 }, { "epoch": 0.02, "learning_rate": 9.987325942734804e-05, "loss": 2.0232, "step": 4325 }, { "epoch": 0.02, "learning_rate": 9.987295939933059e-05, "loss": 2.0774, "step": 4330 }, { "epoch": 0.02, "learning_rate": 9.987265901706224e-05, "loss": 2.0294, "step": 4335 }, { "epoch": 0.02, "learning_rate": 9.987235828054515e-05, "loss": 1.9743, "step": 4340 }, { "epoch": 0.02, "learning_rate": 9.987205718978146e-05, "loss": 2.0636, "step": 4345 }, { "epoch": 0.02, "learning_rate": 9.987175574477327e-05, "loss": 2.089, "step": 4350 }, { "epoch": 0.02, "learning_rate": 9.987145394552278e-05, "loss": 2.0946, "step": 4355 }, { "epoch": 0.02, "learning_rate": 9.987115179203208e-05, "loss": 2.0423, "step": 4360 }, { "epoch": 0.02, "learning_rate": 9.987084928430334e-05, "loss": 2.0878, "step": 4365 }, { "epoch": 0.02, "learning_rate": 9.987054642233872e-05, "loss": 2.0948, "step": 4370 }, { "epoch": 0.02, "learning_rate": 9.987024320614036e-05, "loss": 2.1751, "step": 4375 }, { "epoch": 0.02, "learning_rate": 9.98699396357104e-05, "loss": 2.0248, "step": 4380 }, { "epoch": 0.02, "learning_rate": 9.986963571105102e-05, "loss": 2.0241, "step": 4385 }, { "epoch": 0.02, "learning_rate": 9.986933143216438e-05, "loss": 2.0722, "step": 4390 }, { "epoch": 0.02, "learning_rate": 9.986902679905259e-05, "loss": 2.05, "step": 4395 }, { "epoch": 0.02, "learning_rate": 9.986872181171789e-05, "loss": 2.0651, "step": 4400 }, { "epoch": 0.02, "learning_rate": 9.986841647016238e-05, "loss": 2.0344, "step": 4405 }, { "epoch": 0.02, "learning_rate": 9.986811077438827e-05, "loss": 2.0755, "step": 4410 }, { "epoch": 0.02, "learning_rate": 9.986780472439771e-05, "loss": 2.0046, "step": 4415 }, { "epoch": 0.02, "learning_rate": 9.986749832019291e-05, "loss": 2.019, "step": 4420 }, { "epoch": 0.02, "learning_rate": 9.9867191561776e-05, "loss": 1.983, "step": 4425 }, { "epoch": 0.02, "learning_rate": 9.986688444914918e-05, "loss": 2.1184, "step": 4430 }, { "epoch": 0.02, "learning_rate": 9.986657698231462e-05, "loss": 2.0049, "step": 4435 }, { "epoch": 0.02, "learning_rate": 9.986626916127454e-05, "loss": 2.0521, "step": 4440 }, { "epoch": 0.02, "learning_rate": 9.986596098603109e-05, "loss": 2.0542, "step": 4445 }, { "epoch": 0.02, "learning_rate": 9.986565245658646e-05, "loss": 2.0248, "step": 4450 }, { "epoch": 0.02, "learning_rate": 9.986534357294286e-05, "loss": 2.0343, "step": 4455 }, { "epoch": 0.02, "learning_rate": 9.986503433510246e-05, "loss": 1.9742, "step": 4460 }, { "epoch": 0.02, "learning_rate": 9.986472474306748e-05, "loss": 2.0847, "step": 4465 }, { "epoch": 0.02, "learning_rate": 9.986441479684012e-05, "loss": 2.0323, "step": 4470 }, { "epoch": 0.02, "learning_rate": 9.986410449642256e-05, "loss": 2.0379, "step": 4475 }, { "epoch": 0.02, "learning_rate": 9.986379384181702e-05, "loss": 2.0984, "step": 4480 }, { "epoch": 0.02, "learning_rate": 9.98634828330257e-05, "loss": 2.1112, "step": 4485 }, { "epoch": 0.02, "learning_rate": 9.986317147005082e-05, "loss": 2.0416, "step": 4490 }, { "epoch": 0.02, "learning_rate": 9.986285975289458e-05, "loss": 2.065, "step": 4495 }, { "epoch": 0.02, "learning_rate": 9.986254768155919e-05, "loss": 2.0597, "step": 4500 }, { "epoch": 0.02, "learning_rate": 9.986223525604688e-05, "loss": 2.0483, "step": 4505 }, { "epoch": 0.02, "learning_rate": 9.986192247635986e-05, "loss": 2.0859, "step": 4510 }, { "epoch": 0.02, "learning_rate": 9.986160934250038e-05, "loss": 2.0388, "step": 4515 }, { "epoch": 0.02, "learning_rate": 9.98612958544706e-05, "loss": 2.047, "step": 4520 }, { "epoch": 0.02, "learning_rate": 9.98609820122728e-05, "loss": 2.0379, "step": 4525 }, { "epoch": 0.02, "learning_rate": 9.98606678159092e-05, "loss": 2.0987, "step": 4530 }, { "epoch": 0.02, "learning_rate": 9.986035326538203e-05, "loss": 2.0693, "step": 4535 }, { "epoch": 0.02, "learning_rate": 9.986003836069352e-05, "loss": 2.1115, "step": 4540 }, { "epoch": 0.02, "learning_rate": 9.98597231018459e-05, "loss": 2.0416, "step": 4545 }, { "epoch": 0.02, "learning_rate": 9.985940748884142e-05, "loss": 2.0564, "step": 4550 }, { "epoch": 0.02, "learning_rate": 9.985909152168233e-05, "loss": 2.0218, "step": 4555 }, { "epoch": 0.02, "learning_rate": 9.985877520037086e-05, "loss": 2.1198, "step": 4560 }, { "epoch": 0.02, "learning_rate": 9.985845852490925e-05, "loss": 2.1291, "step": 4565 }, { "epoch": 0.02, "learning_rate": 9.985814149529978e-05, "loss": 2.0943, "step": 4570 }, { "epoch": 0.02, "learning_rate": 9.985782411154465e-05, "loss": 1.9922, "step": 4575 }, { "epoch": 0.02, "learning_rate": 9.985750637364617e-05, "loss": 2.1209, "step": 4580 }, { "epoch": 0.02, "learning_rate": 9.985718828160658e-05, "loss": 2.0295, "step": 4585 }, { "epoch": 0.02, "learning_rate": 9.98568698354281e-05, "loss": 1.9902, "step": 4590 }, { "epoch": 0.02, "learning_rate": 9.985655103511306e-05, "loss": 1.9848, "step": 4595 }, { "epoch": 0.02, "learning_rate": 9.985623188066366e-05, "loss": 2.1046, "step": 4600 }, { "epoch": 0.02, "learning_rate": 9.985591237208222e-05, "loss": 2.1052, "step": 4605 }, { "epoch": 0.02, "learning_rate": 9.985559250937097e-05, "loss": 2.0431, "step": 4610 }, { "epoch": 0.02, "learning_rate": 9.98552722925322e-05, "loss": 2.0371, "step": 4615 }, { "epoch": 0.02, "learning_rate": 9.985495172156818e-05, "loss": 2.0619, "step": 4620 }, { "epoch": 0.02, "learning_rate": 9.985463079648118e-05, "loss": 1.9874, "step": 4625 }, { "epoch": 0.02, "learning_rate": 9.985430951727352e-05, "loss": 2.0278, "step": 4630 }, { "epoch": 0.02, "learning_rate": 9.985398788394742e-05, "loss": 1.9961, "step": 4635 }, { "epoch": 0.02, "learning_rate": 9.985366589650521e-05, "loss": 2.0804, "step": 4640 }, { "epoch": 0.02, "learning_rate": 9.985334355494916e-05, "loss": 2.0509, "step": 4645 }, { "epoch": 0.02, "learning_rate": 9.985302085928157e-05, "loss": 2.0934, "step": 4650 }, { "epoch": 0.02, "learning_rate": 9.98526978095047e-05, "loss": 2.0751, "step": 4655 }, { "epoch": 0.02, "learning_rate": 9.985237440562088e-05, "loss": 2.1077, "step": 4660 }, { "epoch": 0.03, "learning_rate": 9.98520506476324e-05, "loss": 2.1009, "step": 4665 }, { "epoch": 0.03, "learning_rate": 9.985172653554156e-05, "loss": 2.0445, "step": 4670 }, { "epoch": 0.03, "learning_rate": 9.985140206935064e-05, "loss": 2.0885, "step": 4675 }, { "epoch": 0.03, "learning_rate": 9.985107724906198e-05, "loss": 2.0894, "step": 4680 }, { "epoch": 0.03, "learning_rate": 9.985075207467787e-05, "loss": 2.036, "step": 4685 }, { "epoch": 0.03, "learning_rate": 9.985042654620061e-05, "loss": 2.0781, "step": 4690 }, { "epoch": 0.03, "learning_rate": 9.985010066363251e-05, "loss": 2.1166, "step": 4695 }, { "epoch": 0.03, "learning_rate": 9.984977442697591e-05, "loss": 2.0854, "step": 4700 }, { "epoch": 0.03, "learning_rate": 9.984944783623312e-05, "loss": 1.9965, "step": 4705 }, { "epoch": 0.03, "learning_rate": 9.984912089140644e-05, "loss": 2.0438, "step": 4710 }, { "epoch": 0.03, "learning_rate": 9.984879359249822e-05, "loss": 2.015, "step": 4715 }, { "epoch": 0.03, "learning_rate": 9.984846593951077e-05, "loss": 2.1087, "step": 4720 }, { "epoch": 0.03, "learning_rate": 9.98481379324464e-05, "loss": 2.0686, "step": 4725 }, { "epoch": 0.03, "learning_rate": 9.984780957130746e-05, "loss": 1.9714, "step": 4730 }, { "epoch": 0.03, "learning_rate": 9.98474808560963e-05, "loss": 2.0992, "step": 4735 }, { "epoch": 0.03, "learning_rate": 9.984715178681521e-05, "loss": 2.0685, "step": 4740 }, { "epoch": 0.03, "learning_rate": 9.984682236346657e-05, "loss": 2.1039, "step": 4745 }, { "epoch": 0.03, "learning_rate": 9.98464925860527e-05, "loss": 2.0557, "step": 4750 }, { "epoch": 0.03, "learning_rate": 9.984616245457595e-05, "loss": 2.0004, "step": 4755 }, { "epoch": 0.03, "learning_rate": 9.984583196903864e-05, "loss": 2.0354, "step": 4760 }, { "epoch": 0.03, "learning_rate": 9.984550112944315e-05, "loss": 2.0522, "step": 4765 }, { "epoch": 0.03, "learning_rate": 9.98451699357918e-05, "loss": 1.9686, "step": 4770 }, { "epoch": 0.03, "learning_rate": 9.984483838808698e-05, "loss": 2.0692, "step": 4775 }, { "epoch": 0.03, "learning_rate": 9.984450648633103e-05, "loss": 2.0019, "step": 4780 }, { "epoch": 0.03, "learning_rate": 9.984417423052629e-05, "loss": 2.0711, "step": 4785 }, { "epoch": 0.03, "learning_rate": 9.984384162067513e-05, "loss": 1.956, "step": 4790 }, { "epoch": 0.03, "learning_rate": 9.984350865677992e-05, "loss": 2.0397, "step": 4795 }, { "epoch": 0.03, "learning_rate": 9.984317533884303e-05, "loss": 2.1375, "step": 4800 }, { "epoch": 0.03, "learning_rate": 9.984284166686679e-05, "loss": 2.1349, "step": 4805 }, { "epoch": 0.03, "learning_rate": 9.984250764085361e-05, "loss": 2.056, "step": 4810 }, { "epoch": 0.03, "learning_rate": 9.984217326080585e-05, "loss": 2.0068, "step": 4815 }, { "epoch": 0.03, "learning_rate": 9.98418385267259e-05, "loss": 2.058, "step": 4820 }, { "epoch": 0.03, "learning_rate": 9.984150343861611e-05, "loss": 2.0381, "step": 4825 }, { "epoch": 0.03, "learning_rate": 9.984116799647887e-05, "loss": 1.994, "step": 4830 }, { "epoch": 0.03, "learning_rate": 9.984083220031657e-05, "loss": 2.1349, "step": 4835 }, { "epoch": 0.03, "learning_rate": 9.984049605013159e-05, "loss": 2.0791, "step": 4840 }, { "epoch": 0.03, "learning_rate": 9.984015954592631e-05, "loss": 2.0391, "step": 4845 }, { "epoch": 0.03, "learning_rate": 9.983982268770314e-05, "loss": 2.0749, "step": 4850 }, { "epoch": 0.03, "learning_rate": 9.983948547546445e-05, "loss": 2.1045, "step": 4855 }, { "epoch": 0.03, "learning_rate": 9.983914790921266e-05, "loss": 1.9671, "step": 4860 }, { "epoch": 0.03, "learning_rate": 9.983880998895014e-05, "loss": 2.007, "step": 4865 }, { "epoch": 0.03, "learning_rate": 9.983847171467932e-05, "loss": 2.0749, "step": 4870 }, { "epoch": 0.03, "learning_rate": 9.983813308640258e-05, "loss": 2.1148, "step": 4875 }, { "epoch": 0.03, "learning_rate": 9.983779410412233e-05, "loss": 1.9765, "step": 4880 }, { "epoch": 0.03, "learning_rate": 9.983745476784099e-05, "loss": 1.9868, "step": 4885 }, { "epoch": 0.03, "learning_rate": 9.983711507756095e-05, "loss": 2.0202, "step": 4890 }, { "epoch": 0.03, "learning_rate": 9.983677503328463e-05, "loss": 2.0758, "step": 4895 }, { "epoch": 0.03, "learning_rate": 9.983643463501446e-05, "loss": 2.0468, "step": 4900 }, { "epoch": 0.03, "learning_rate": 9.983609388275286e-05, "loss": 2.1025, "step": 4905 }, { "epoch": 0.03, "learning_rate": 9.983575277650221e-05, "loss": 2.0673, "step": 4910 }, { "epoch": 0.03, "learning_rate": 9.983541131626497e-05, "loss": 1.9581, "step": 4915 }, { "epoch": 0.03, "learning_rate": 9.983506950204356e-05, "loss": 2.0071, "step": 4920 }, { "epoch": 0.03, "learning_rate": 9.98347273338404e-05, "loss": 1.9986, "step": 4925 }, { "epoch": 0.03, "learning_rate": 9.983438481165793e-05, "loss": 2.0449, "step": 4930 }, { "epoch": 0.03, "learning_rate": 9.983404193549856e-05, "loss": 2.0598, "step": 4935 }, { "epoch": 0.03, "learning_rate": 9.983369870536477e-05, "loss": 2.0549, "step": 4940 }, { "epoch": 0.03, "learning_rate": 9.983335512125896e-05, "loss": 2.0398, "step": 4945 }, { "epoch": 0.03, "learning_rate": 9.983301118318357e-05, "loss": 2.0025, "step": 4950 }, { "epoch": 0.03, "learning_rate": 9.983266689114105e-05, "loss": 2.0668, "step": 4955 }, { "epoch": 0.03, "learning_rate": 9.983232224513387e-05, "loss": 2.057, "step": 4960 }, { "epoch": 0.03, "learning_rate": 9.983197724516444e-05, "loss": 2.039, "step": 4965 }, { "epoch": 0.03, "learning_rate": 9.983163189123523e-05, "loss": 1.998, "step": 4970 }, { "epoch": 0.03, "learning_rate": 9.983128618334871e-05, "loss": 2.0518, "step": 4975 }, { "epoch": 0.03, "learning_rate": 9.98309401215073e-05, "loss": 2.0248, "step": 4980 }, { "epoch": 0.03, "learning_rate": 9.983059370571348e-05, "loss": 2.016, "step": 4985 }, { "epoch": 0.03, "learning_rate": 9.983024693596969e-05, "loss": 2.0439, "step": 4990 }, { "epoch": 0.03, "learning_rate": 9.982989981227843e-05, "loss": 2.0758, "step": 4995 }, { "epoch": 0.03, "learning_rate": 9.982955233464213e-05, "loss": 2.0772, "step": 5000 }, { "epoch": 0.03, "learning_rate": 9.982920450306328e-05, "loss": 2.0312, "step": 5005 }, { "epoch": 0.03, "learning_rate": 9.982885631754435e-05, "loss": 2.024, "step": 5010 }, { "epoch": 0.03, "learning_rate": 9.982850777808779e-05, "loss": 2.0497, "step": 5015 }, { "epoch": 0.03, "learning_rate": 9.98281588846961e-05, "loss": 2.0138, "step": 5020 }, { "epoch": 0.03, "learning_rate": 9.982780963737174e-05, "loss": 2.0547, "step": 5025 }, { "epoch": 0.03, "learning_rate": 9.982746003611722e-05, "loss": 2.0544, "step": 5030 }, { "epoch": 0.03, "learning_rate": 9.9827110080935e-05, "loss": 2.0458, "step": 5035 }, { "epoch": 0.03, "learning_rate": 9.982675977182755e-05, "loss": 2.0618, "step": 5040 }, { "epoch": 0.03, "learning_rate": 9.98264091087974e-05, "loss": 2.0118, "step": 5045 }, { "epoch": 0.03, "learning_rate": 9.982605809184702e-05, "loss": 2.0365, "step": 5050 }, { "epoch": 0.03, "learning_rate": 9.982570672097888e-05, "loss": 2.0, "step": 5055 }, { "epoch": 0.03, "learning_rate": 9.982535499619551e-05, "loss": 1.9969, "step": 5060 }, { "epoch": 0.03, "learning_rate": 9.982500291749941e-05, "loss": 2.0542, "step": 5065 }, { "epoch": 0.03, "learning_rate": 9.982465048489304e-05, "loss": 2.1168, "step": 5070 }, { "epoch": 0.03, "learning_rate": 9.982429769837895e-05, "loss": 2.0345, "step": 5075 }, { "epoch": 0.03, "learning_rate": 9.982394455795962e-05, "loss": 2.0399, "step": 5080 }, { "epoch": 0.03, "learning_rate": 9.982359106363756e-05, "loss": 2.0969, "step": 5085 }, { "epoch": 0.03, "learning_rate": 9.982323721541532e-05, "loss": 1.9996, "step": 5090 }, { "epoch": 0.03, "learning_rate": 9.982288301329534e-05, "loss": 2.0568, "step": 5095 }, { "epoch": 0.03, "learning_rate": 9.982252845728017e-05, "loss": 2.0027, "step": 5100 }, { "epoch": 0.03, "learning_rate": 9.982217354737235e-05, "loss": 2.0737, "step": 5105 }, { "epoch": 0.03, "learning_rate": 9.98218182835744e-05, "loss": 2.1, "step": 5110 }, { "epoch": 0.03, "learning_rate": 9.982146266588881e-05, "loss": 2.042, "step": 5115 }, { "epoch": 0.03, "learning_rate": 9.982110669431813e-05, "loss": 2.0809, "step": 5120 }, { "epoch": 0.03, "learning_rate": 9.982075036886487e-05, "loss": 2.0066, "step": 5125 }, { "epoch": 0.03, "learning_rate": 9.982039368953158e-05, "loss": 2.082, "step": 5130 }, { "epoch": 0.03, "learning_rate": 9.982003665632078e-05, "loss": 2.0075, "step": 5135 }, { "epoch": 0.03, "learning_rate": 9.981967926923503e-05, "loss": 1.9794, "step": 5140 }, { "epoch": 0.03, "learning_rate": 9.981932152827685e-05, "loss": 2.1486, "step": 5145 }, { "epoch": 0.03, "learning_rate": 9.981896343344877e-05, "loss": 2.101, "step": 5150 }, { "epoch": 0.03, "learning_rate": 9.981860498475335e-05, "loss": 2.03, "step": 5155 }, { "epoch": 0.03, "learning_rate": 9.981824618219313e-05, "loss": 2.0962, "step": 5160 }, { "epoch": 0.03, "learning_rate": 9.981788702577066e-05, "loss": 2.005, "step": 5165 }, { "epoch": 0.03, "learning_rate": 9.98175275154885e-05, "loss": 2.0122, "step": 5170 }, { "epoch": 0.03, "learning_rate": 9.981716765134918e-05, "loss": 2.0894, "step": 5175 }, { "epoch": 0.03, "learning_rate": 9.981680743335529e-05, "loss": 2.0297, "step": 5180 }, { "epoch": 0.03, "learning_rate": 9.981644686150936e-05, "loss": 1.9625, "step": 5185 }, { "epoch": 0.03, "learning_rate": 9.981608593581396e-05, "loss": 2.0708, "step": 5190 }, { "epoch": 0.03, "learning_rate": 9.981572465627167e-05, "loss": 2.0373, "step": 5195 }, { "epoch": 0.03, "learning_rate": 9.981536302288502e-05, "loss": 2.0149, "step": 5200 }, { "epoch": 0.03, "learning_rate": 9.981500103565662e-05, "loss": 2.0008, "step": 5205 }, { "epoch": 0.03, "learning_rate": 9.9814638694589e-05, "loss": 1.9993, "step": 5210 }, { "epoch": 0.03, "learning_rate": 9.981427599968477e-05, "loss": 2.0359, "step": 5215 }, { "epoch": 0.03, "learning_rate": 9.981391295094649e-05, "loss": 2.0671, "step": 5220 }, { "epoch": 0.03, "learning_rate": 9.981354954837674e-05, "loss": 2.012, "step": 5225 }, { "epoch": 0.03, "learning_rate": 9.981318579197809e-05, "loss": 2.077, "step": 5230 }, { "epoch": 0.03, "learning_rate": 9.981282168175314e-05, "loss": 2.0149, "step": 5235 }, { "epoch": 0.03, "learning_rate": 9.981245721770448e-05, "loss": 2.0238, "step": 5240 }, { "epoch": 0.03, "learning_rate": 9.981209239983469e-05, "loss": 2.1072, "step": 5245 }, { "epoch": 0.03, "learning_rate": 9.981172722814635e-05, "loss": 1.9851, "step": 5250 }, { "epoch": 0.03, "learning_rate": 9.981136170264206e-05, "loss": 2.0358, "step": 5255 }, { "epoch": 0.03, "learning_rate": 9.981099582332444e-05, "loss": 2.0128, "step": 5260 }, { "epoch": 0.03, "learning_rate": 9.981062959019607e-05, "loss": 2.066, "step": 5265 }, { "epoch": 0.03, "learning_rate": 9.981026300325955e-05, "loss": 2.0088, "step": 5270 }, { "epoch": 0.03, "learning_rate": 9.980989606251747e-05, "loss": 2.0478, "step": 5275 }, { "epoch": 0.03, "learning_rate": 9.980952876797247e-05, "loss": 2.107, "step": 5280 }, { "epoch": 0.03, "learning_rate": 9.980916111962713e-05, "loss": 2.0363, "step": 5285 }, { "epoch": 0.03, "learning_rate": 9.980879311748407e-05, "loss": 2.0522, "step": 5290 }, { "epoch": 0.03, "learning_rate": 9.980842476154591e-05, "loss": 2.0539, "step": 5295 }, { "epoch": 0.03, "learning_rate": 9.980805605181527e-05, "loss": 2.0588, "step": 5300 }, { "epoch": 0.03, "learning_rate": 9.980768698829477e-05, "loss": 2.1043, "step": 5305 }, { "epoch": 0.03, "learning_rate": 9.9807317570987e-05, "loss": 2.0878, "step": 5310 }, { "epoch": 0.03, "learning_rate": 9.980694779989461e-05, "loss": 2.0425, "step": 5315 }, { "epoch": 0.03, "learning_rate": 9.980657767502025e-05, "loss": 1.9924, "step": 5320 }, { "epoch": 0.03, "learning_rate": 9.98062071963665e-05, "loss": 2.1004, "step": 5325 }, { "epoch": 0.03, "learning_rate": 9.980583636393602e-05, "loss": 2.0305, "step": 5330 }, { "epoch": 0.03, "learning_rate": 9.980546517773144e-05, "loss": 2.1131, "step": 5335 }, { "epoch": 0.03, "learning_rate": 9.980509363775538e-05, "loss": 2.0027, "step": 5340 }, { "epoch": 0.03, "learning_rate": 9.980472174401053e-05, "loss": 2.1153, "step": 5345 }, { "epoch": 0.03, "learning_rate": 9.980434949649946e-05, "loss": 2.077, "step": 5350 }, { "epoch": 0.03, "learning_rate": 9.980397689522486e-05, "loss": 2.0693, "step": 5355 }, { "epoch": 0.03, "learning_rate": 9.980360394018937e-05, "loss": 2.0223, "step": 5360 }, { "epoch": 0.03, "learning_rate": 9.980323063139563e-05, "loss": 2.0687, "step": 5365 }, { "epoch": 0.03, "learning_rate": 9.98028569688463e-05, "loss": 2.0238, "step": 5370 }, { "epoch": 0.03, "learning_rate": 9.980248295254404e-05, "loss": 2.0387, "step": 5375 }, { "epoch": 0.03, "learning_rate": 9.980210858249149e-05, "loss": 1.9431, "step": 5380 }, { "epoch": 0.03, "learning_rate": 9.980173385869132e-05, "loss": 2.0146, "step": 5385 }, { "epoch": 0.03, "learning_rate": 9.980135878114618e-05, "loss": 2.0662, "step": 5390 }, { "epoch": 0.03, "learning_rate": 9.980098334985875e-05, "loss": 2.0441, "step": 5395 }, { "epoch": 0.03, "learning_rate": 9.98006075648317e-05, "loss": 1.9998, "step": 5400 }, { "epoch": 0.03, "learning_rate": 9.980023142606767e-05, "loss": 2.1095, "step": 5405 }, { "epoch": 0.03, "learning_rate": 9.979985493356936e-05, "loss": 2.0766, "step": 5410 }, { "epoch": 0.03, "learning_rate": 9.979947808733942e-05, "loss": 1.9905, "step": 5415 }, { "epoch": 0.03, "learning_rate": 9.979910088738056e-05, "loss": 2.014, "step": 5420 }, { "epoch": 0.03, "learning_rate": 9.979872333369544e-05, "loss": 2.0737, "step": 5425 }, { "epoch": 0.03, "learning_rate": 9.979834542628675e-05, "loss": 2.0305, "step": 5430 }, { "epoch": 0.03, "learning_rate": 9.979796716515715e-05, "loss": 2.1095, "step": 5435 }, { "epoch": 0.03, "learning_rate": 9.979758855030934e-05, "loss": 2.0303, "step": 5440 }, { "epoch": 0.03, "learning_rate": 9.979720958174603e-05, "loss": 2.0775, "step": 5445 }, { "epoch": 0.03, "learning_rate": 9.979683025946989e-05, "loss": 1.9659, "step": 5450 }, { "epoch": 0.03, "learning_rate": 9.979645058348362e-05, "loss": 1.9939, "step": 5455 }, { "epoch": 0.03, "learning_rate": 9.97960705537899e-05, "loss": 2.0309, "step": 5460 }, { "epoch": 0.03, "learning_rate": 9.979569017039146e-05, "loss": 2.0113, "step": 5465 }, { "epoch": 0.03, "learning_rate": 9.979530943329099e-05, "loss": 2.0891, "step": 5470 }, { "epoch": 0.03, "learning_rate": 9.97949283424912e-05, "loss": 2.068, "step": 5475 }, { "epoch": 0.03, "learning_rate": 9.979454689799477e-05, "loss": 1.9671, "step": 5480 }, { "epoch": 0.03, "learning_rate": 9.979416509980443e-05, "loss": 2.0225, "step": 5485 }, { "epoch": 0.03, "learning_rate": 9.97937829479229e-05, "loss": 2.0431, "step": 5490 }, { "epoch": 0.03, "learning_rate": 9.979340044235289e-05, "loss": 2.0738, "step": 5495 }, { "epoch": 0.03, "learning_rate": 9.979301758309709e-05, "loss": 2.0235, "step": 5500 }, { "epoch": 0.03, "learning_rate": 9.979263437015826e-05, "loss": 2.0222, "step": 5505 }, { "epoch": 0.03, "learning_rate": 9.979225080353909e-05, "loss": 2.0097, "step": 5510 }, { "epoch": 0.03, "learning_rate": 9.979186688324233e-05, "loss": 2.0445, "step": 5515 }, { "epoch": 0.03, "learning_rate": 9.97914826092707e-05, "loss": 2.002, "step": 5520 }, { "epoch": 0.03, "learning_rate": 9.97910979816269e-05, "loss": 2.083, "step": 5525 }, { "epoch": 0.03, "learning_rate": 9.97907130003137e-05, "loss": 2.0754, "step": 5530 }, { "epoch": 0.03, "learning_rate": 9.979032766533382e-05, "loss": 2.0324, "step": 5535 }, { "epoch": 0.03, "learning_rate": 9.978994197669e-05, "loss": 2.0531, "step": 5540 }, { "epoch": 0.03, "learning_rate": 9.978955593438497e-05, "loss": 1.9866, "step": 5545 }, { "epoch": 0.03, "learning_rate": 9.978916953842147e-05, "loss": 1.9406, "step": 5550 }, { "epoch": 0.03, "learning_rate": 9.978878278880228e-05, "loss": 2.0477, "step": 5555 }, { "epoch": 0.03, "learning_rate": 9.978839568553012e-05, "loss": 2.0186, "step": 5560 }, { "epoch": 0.03, "learning_rate": 9.978800822860772e-05, "loss": 2.0568, "step": 5565 }, { "epoch": 0.03, "learning_rate": 9.978762041803787e-05, "loss": 1.9523, "step": 5570 }, { "epoch": 0.03, "learning_rate": 9.97872322538233e-05, "loss": 1.9795, "step": 5575 }, { "epoch": 0.03, "learning_rate": 9.978684373596676e-05, "loss": 1.997, "step": 5580 }, { "epoch": 0.03, "learning_rate": 9.978645486447105e-05, "loss": 2.0578, "step": 5585 }, { "epoch": 0.03, "learning_rate": 9.97860656393389e-05, "loss": 2.0888, "step": 5590 }, { "epoch": 0.03, "learning_rate": 9.978567606057308e-05, "loss": 2.0318, "step": 5595 }, { "epoch": 0.03, "learning_rate": 9.978528612817635e-05, "loss": 1.9641, "step": 5600 }, { "epoch": 0.03, "learning_rate": 9.97848958421515e-05, "loss": 2.0621, "step": 5605 }, { "epoch": 0.03, "learning_rate": 9.978450520250127e-05, "loss": 2.0417, "step": 5610 }, { "epoch": 0.03, "learning_rate": 9.978411420922847e-05, "loss": 2.0543, "step": 5615 }, { "epoch": 0.03, "learning_rate": 9.978372286233586e-05, "loss": 2.0508, "step": 5620 }, { "epoch": 0.03, "learning_rate": 9.978333116182624e-05, "loss": 2.0862, "step": 5625 }, { "epoch": 0.03, "learning_rate": 9.978293910770233e-05, "loss": 2.0201, "step": 5630 }, { "epoch": 0.03, "learning_rate": 9.978254669996701e-05, "loss": 2.0025, "step": 5635 }, { "epoch": 0.03, "learning_rate": 9.978215393862299e-05, "loss": 2.1048, "step": 5640 }, { "epoch": 0.03, "learning_rate": 9.978176082367308e-05, "loss": 2.0377, "step": 5645 }, { "epoch": 0.03, "learning_rate": 9.978136735512009e-05, "loss": 1.9783, "step": 5650 }, { "epoch": 0.03, "learning_rate": 9.97809735329668e-05, "loss": 1.9601, "step": 5655 }, { "epoch": 0.03, "learning_rate": 9.978057935721601e-05, "loss": 2.012, "step": 5660 }, { "epoch": 0.03, "learning_rate": 9.978018482787052e-05, "loss": 2.0197, "step": 5665 }, { "epoch": 0.03, "learning_rate": 9.977978994493314e-05, "loss": 2.0049, "step": 5670 }, { "epoch": 0.03, "learning_rate": 9.977939470840666e-05, "loss": 2.075, "step": 5675 }, { "epoch": 0.03, "learning_rate": 9.97789991182939e-05, "loss": 2.0486, "step": 5680 }, { "epoch": 0.03, "learning_rate": 9.977860317459766e-05, "loss": 2.0422, "step": 5685 }, { "epoch": 0.03, "learning_rate": 9.977820687732075e-05, "loss": 2.0207, "step": 5690 }, { "epoch": 0.03, "learning_rate": 9.977781022646601e-05, "loss": 2.0681, "step": 5695 }, { "epoch": 0.03, "learning_rate": 9.977741322203624e-05, "loss": 2.0077, "step": 5700 }, { "epoch": 0.03, "learning_rate": 9.977701586403425e-05, "loss": 2.0018, "step": 5705 }, { "epoch": 0.03, "learning_rate": 9.977661815246288e-05, "loss": 1.9899, "step": 5710 }, { "epoch": 0.03, "learning_rate": 9.977622008732494e-05, "loss": 1.9763, "step": 5715 }, { "epoch": 0.03, "learning_rate": 9.977582166862328e-05, "loss": 2.0542, "step": 5720 }, { "epoch": 0.03, "learning_rate": 9.97754228963607e-05, "loss": 2.0095, "step": 5725 }, { "epoch": 0.03, "learning_rate": 9.977502377054005e-05, "loss": 1.9889, "step": 5730 }, { "epoch": 0.03, "learning_rate": 9.977462429116416e-05, "loss": 2.0249, "step": 5735 }, { "epoch": 0.03, "learning_rate": 9.977422445823588e-05, "loss": 2.0513, "step": 5740 }, { "epoch": 0.03, "learning_rate": 9.977382427175802e-05, "loss": 1.9523, "step": 5745 }, { "epoch": 0.03, "learning_rate": 9.977342373173347e-05, "loss": 1.9659, "step": 5750 }, { "epoch": 0.03, "learning_rate": 9.977302283816502e-05, "loss": 2.0309, "step": 5755 }, { "epoch": 0.03, "learning_rate": 9.977262159105556e-05, "loss": 1.9858, "step": 5760 }, { "epoch": 0.03, "learning_rate": 9.977221999040791e-05, "loss": 2.0457, "step": 5765 }, { "epoch": 0.03, "learning_rate": 9.977181803622494e-05, "loss": 2.0458, "step": 5770 }, { "epoch": 0.03, "learning_rate": 9.977141572850952e-05, "loss": 2.0099, "step": 5775 }, { "epoch": 0.03, "learning_rate": 9.977101306726448e-05, "loss": 2.0289, "step": 5780 }, { "epoch": 0.03, "learning_rate": 9.977061005249271e-05, "loss": 2.0149, "step": 5785 }, { "epoch": 0.03, "learning_rate": 9.977020668419704e-05, "loss": 2.0387, "step": 5790 }, { "epoch": 0.03, "learning_rate": 9.976980296238034e-05, "loss": 2.0555, "step": 5795 }, { "epoch": 0.03, "learning_rate": 9.976939888704548e-05, "loss": 2.042, "step": 5800 }, { "epoch": 0.03, "learning_rate": 9.976899445819537e-05, "loss": 2.0697, "step": 5805 }, { "epoch": 0.03, "learning_rate": 9.976858967583282e-05, "loss": 1.9781, "step": 5810 }, { "epoch": 0.03, "learning_rate": 9.976818453996073e-05, "loss": 2.0167, "step": 5815 }, { "epoch": 0.03, "learning_rate": 9.976777905058199e-05, "loss": 2.0748, "step": 5820 }, { "epoch": 0.03, "learning_rate": 9.976737320769948e-05, "loss": 1.9844, "step": 5825 }, { "epoch": 0.03, "learning_rate": 9.976696701131607e-05, "loss": 2.0306, "step": 5830 }, { "epoch": 0.03, "learning_rate": 9.976656046143465e-05, "loss": 1.9807, "step": 5835 }, { "epoch": 0.03, "learning_rate": 9.976615355805811e-05, "loss": 2.0541, "step": 5840 }, { "epoch": 0.03, "learning_rate": 9.976574630118934e-05, "loss": 1.9824, "step": 5845 }, { "epoch": 0.03, "learning_rate": 9.976533869083122e-05, "loss": 2.0362, "step": 5850 }, { "epoch": 0.03, "learning_rate": 9.976493072698665e-05, "loss": 2.0351, "step": 5855 }, { "epoch": 0.03, "learning_rate": 9.976452240965854e-05, "loss": 2.0557, "step": 5860 }, { "epoch": 0.03, "learning_rate": 9.97641137388498e-05, "loss": 2.0135, "step": 5865 }, { "epoch": 0.03, "learning_rate": 9.976370471456332e-05, "loss": 2.02, "step": 5870 }, { "epoch": 0.03, "learning_rate": 9.976329533680197e-05, "loss": 2.0537, "step": 5875 }, { "epoch": 0.03, "learning_rate": 9.976288560556871e-05, "loss": 2.1182, "step": 5880 }, { "epoch": 0.03, "learning_rate": 9.976247552086644e-05, "loss": 2.0621, "step": 5885 }, { "epoch": 0.03, "learning_rate": 9.976206508269806e-05, "loss": 1.9764, "step": 5890 }, { "epoch": 0.03, "learning_rate": 9.976165429106647e-05, "loss": 1.9988, "step": 5895 }, { "epoch": 0.03, "learning_rate": 9.976124314597461e-05, "loss": 2.0085, "step": 5900 }, { "epoch": 0.03, "learning_rate": 9.976083164742541e-05, "loss": 2.0583, "step": 5905 }, { "epoch": 0.03, "learning_rate": 9.976041979542178e-05, "loss": 1.9886, "step": 5910 }, { "epoch": 0.03, "learning_rate": 9.976000758996664e-05, "loss": 2.0606, "step": 5915 }, { "epoch": 0.03, "learning_rate": 9.975959503106292e-05, "loss": 1.994, "step": 5920 }, { "epoch": 0.03, "learning_rate": 9.975918211871356e-05, "loss": 2.039, "step": 5925 }, { "epoch": 0.03, "learning_rate": 9.975876885292148e-05, "loss": 2.0486, "step": 5930 }, { "epoch": 0.03, "learning_rate": 9.975835523368962e-05, "loss": 2.0669, "step": 5935 }, { "epoch": 0.03, "learning_rate": 9.975794126102094e-05, "loss": 1.9435, "step": 5940 }, { "epoch": 0.03, "learning_rate": 9.975752693491833e-05, "loss": 1.9804, "step": 5945 }, { "epoch": 0.03, "learning_rate": 9.975711225538478e-05, "loss": 2.0441, "step": 5950 }, { "epoch": 0.03, "learning_rate": 9.975669722242323e-05, "loss": 1.9861, "step": 5955 }, { "epoch": 0.03, "learning_rate": 9.97562818360366e-05, "loss": 1.9587, "step": 5960 }, { "epoch": 0.03, "learning_rate": 9.975586609622787e-05, "loss": 1.9777, "step": 5965 }, { "epoch": 0.03, "learning_rate": 9.975545000299997e-05, "loss": 2.0714, "step": 5970 }, { "epoch": 0.03, "learning_rate": 9.975503355635586e-05, "loss": 1.9984, "step": 5975 }, { "epoch": 0.03, "learning_rate": 9.975461675629852e-05, "loss": 2.0411, "step": 5980 }, { "epoch": 0.03, "learning_rate": 9.97541996028309e-05, "loss": 2.0607, "step": 5985 }, { "epoch": 0.03, "learning_rate": 9.975378209595594e-05, "loss": 2.0601, "step": 5990 }, { "epoch": 0.03, "learning_rate": 9.975336423567663e-05, "loss": 2.0539, "step": 5995 }, { "epoch": 0.03, "learning_rate": 9.975294602199592e-05, "loss": 1.9951, "step": 6000 }, { "epoch": 0.03, "learning_rate": 9.975252745491681e-05, "loss": 2.0518, "step": 6005 }, { "epoch": 0.03, "learning_rate": 9.975210853444223e-05, "loss": 2.0102, "step": 6010 }, { "epoch": 0.03, "learning_rate": 9.975168926057521e-05, "loss": 2.0358, "step": 6015 }, { "epoch": 0.03, "learning_rate": 9.975126963331868e-05, "loss": 2.1038, "step": 6020 }, { "epoch": 0.03, "learning_rate": 9.975084965267565e-05, "loss": 2.0379, "step": 6025 }, { "epoch": 0.03, "learning_rate": 9.975042931864908e-05, "loss": 2.0255, "step": 6030 }, { "epoch": 0.03, "learning_rate": 9.9750008631242e-05, "loss": 2.0661, "step": 6035 }, { "epoch": 0.03, "learning_rate": 9.974958759045734e-05, "loss": 2.0662, "step": 6040 }, { "epoch": 0.03, "learning_rate": 9.974916619629812e-05, "loss": 1.9876, "step": 6045 }, { "epoch": 0.03, "learning_rate": 9.974874444876733e-05, "loss": 1.9667, "step": 6050 }, { "epoch": 0.03, "learning_rate": 9.974832234786797e-05, "loss": 1.967, "step": 6055 }, { "epoch": 0.03, "learning_rate": 9.974789989360303e-05, "loss": 2.0128, "step": 6060 }, { "epoch": 0.03, "learning_rate": 9.974747708597552e-05, "loss": 2.041, "step": 6065 }, { "epoch": 0.03, "learning_rate": 9.974705392498844e-05, "loss": 1.9842, "step": 6070 }, { "epoch": 0.03, "learning_rate": 9.974663041064478e-05, "loss": 1.9994, "step": 6075 }, { "epoch": 0.03, "learning_rate": 9.974620654294757e-05, "loss": 2.0904, "step": 6080 }, { "epoch": 0.03, "learning_rate": 9.974578232189982e-05, "loss": 1.9493, "step": 6085 }, { "epoch": 0.03, "learning_rate": 9.974535774750453e-05, "loss": 2.0644, "step": 6090 }, { "epoch": 0.03, "learning_rate": 9.974493281976471e-05, "loss": 1.9976, "step": 6095 }, { "epoch": 0.03, "learning_rate": 9.97445075386834e-05, "loss": 2.0306, "step": 6100 }, { "epoch": 0.03, "learning_rate": 9.974408190426362e-05, "loss": 1.9883, "step": 6105 }, { "epoch": 0.03, "learning_rate": 9.974365591650839e-05, "loss": 2.0261, "step": 6110 }, { "epoch": 0.03, "learning_rate": 9.974322957542071e-05, "loss": 1.9964, "step": 6115 }, { "epoch": 0.03, "learning_rate": 9.974280288100363e-05, "loss": 2.0024, "step": 6120 }, { "epoch": 0.03, "learning_rate": 9.974237583326019e-05, "loss": 2.0565, "step": 6125 }, { "epoch": 0.03, "learning_rate": 9.974194843219341e-05, "loss": 2.0345, "step": 6130 }, { "epoch": 0.03, "learning_rate": 9.974152067780634e-05, "loss": 2.0108, "step": 6135 }, { "epoch": 0.03, "learning_rate": 9.974109257010198e-05, "loss": 1.9535, "step": 6140 }, { "epoch": 0.03, "learning_rate": 9.974066410908342e-05, "loss": 2.001, "step": 6145 }, { "epoch": 0.03, "learning_rate": 9.974023529475367e-05, "loss": 1.9255, "step": 6150 }, { "epoch": 0.03, "learning_rate": 9.973980612711579e-05, "loss": 2.0138, "step": 6155 }, { "epoch": 0.03, "learning_rate": 9.973937660617283e-05, "loss": 2.0027, "step": 6160 }, { "epoch": 0.03, "learning_rate": 9.973894673192782e-05, "loss": 1.9745, "step": 6165 }, { "epoch": 0.03, "learning_rate": 9.973851650438385e-05, "loss": 1.9862, "step": 6170 }, { "epoch": 0.03, "learning_rate": 9.973808592354394e-05, "loss": 2.0503, "step": 6175 }, { "epoch": 0.03, "learning_rate": 9.973765498941118e-05, "loss": 2.0841, "step": 6180 }, { "epoch": 0.03, "learning_rate": 9.97372237019886e-05, "loss": 1.9803, "step": 6185 }, { "epoch": 0.03, "learning_rate": 9.97367920612793e-05, "loss": 2.0016, "step": 6190 }, { "epoch": 0.03, "learning_rate": 9.973636006728631e-05, "loss": 2.0594, "step": 6195 }, { "epoch": 0.03, "learning_rate": 9.97359277200127e-05, "loss": 2.0383, "step": 6200 }, { "epoch": 0.03, "learning_rate": 9.973549501946157e-05, "loss": 2.0488, "step": 6205 }, { "epoch": 0.03, "learning_rate": 9.973506196563598e-05, "loss": 2.0141, "step": 6210 }, { "epoch": 0.03, "learning_rate": 9.9734628558539e-05, "loss": 1.9639, "step": 6215 }, { "epoch": 0.03, "learning_rate": 9.97341947981737e-05, "loss": 1.9677, "step": 6220 }, { "epoch": 0.03, "learning_rate": 9.973376068454319e-05, "loss": 2.003, "step": 6225 }, { "epoch": 0.03, "learning_rate": 9.973332621765054e-05, "loss": 1.989, "step": 6230 }, { "epoch": 0.03, "learning_rate": 9.973289139749882e-05, "loss": 2.0069, "step": 6235 }, { "epoch": 0.03, "learning_rate": 9.973245622409113e-05, "loss": 2.0557, "step": 6240 }, { "epoch": 0.03, "learning_rate": 9.973202069743057e-05, "loss": 1.9541, "step": 6245 }, { "epoch": 0.03, "learning_rate": 9.973158481752023e-05, "loss": 2.041, "step": 6250 }, { "epoch": 0.03, "learning_rate": 9.973114858436318e-05, "loss": 1.9581, "step": 6255 }, { "epoch": 0.03, "learning_rate": 9.973071199796256e-05, "loss": 2.0957, "step": 6260 }, { "epoch": 0.03, "learning_rate": 9.973027505832145e-05, "loss": 2.0173, "step": 6265 }, { "epoch": 0.03, "learning_rate": 9.972983776544296e-05, "loss": 1.9487, "step": 6270 }, { "epoch": 0.03, "learning_rate": 9.972940011933019e-05, "loss": 1.9921, "step": 6275 }, { "epoch": 0.03, "learning_rate": 9.972896211998624e-05, "loss": 2.0187, "step": 6280 }, { "epoch": 0.03, "learning_rate": 9.972852376741424e-05, "loss": 1.9875, "step": 6285 }, { "epoch": 0.03, "learning_rate": 9.972808506161729e-05, "loss": 1.9621, "step": 6290 }, { "epoch": 0.03, "learning_rate": 9.972764600259849e-05, "loss": 2.0505, "step": 6295 }, { "epoch": 0.03, "learning_rate": 9.9727206590361e-05, "loss": 1.9287, "step": 6300 }, { "epoch": 0.03, "learning_rate": 9.972676682490793e-05, "loss": 2.0285, "step": 6305 }, { "epoch": 0.03, "learning_rate": 9.97263267062424e-05, "loss": 2.0691, "step": 6310 }, { "epoch": 0.03, "learning_rate": 9.972588623436752e-05, "loss": 1.9675, "step": 6315 }, { "epoch": 0.03, "learning_rate": 9.972544540928641e-05, "loss": 2.0413, "step": 6320 }, { "epoch": 0.03, "learning_rate": 9.972500423100224e-05, "loss": 2.0125, "step": 6325 }, { "epoch": 0.03, "learning_rate": 9.972456269951813e-05, "loss": 1.979, "step": 6330 }, { "epoch": 0.03, "learning_rate": 9.97241208148372e-05, "loss": 2.0495, "step": 6335 }, { "epoch": 0.03, "learning_rate": 9.97236785769626e-05, "loss": 2.0133, "step": 6340 }, { "epoch": 0.03, "learning_rate": 9.972323598589748e-05, "loss": 1.991, "step": 6345 }, { "epoch": 0.03, "learning_rate": 9.972279304164496e-05, "loss": 2.012, "step": 6350 }, { "epoch": 0.03, "learning_rate": 9.97223497442082e-05, "loss": 2.0136, "step": 6355 }, { "epoch": 0.03, "learning_rate": 9.972190609359035e-05, "loss": 1.9614, "step": 6360 }, { "epoch": 0.03, "learning_rate": 9.972146208979459e-05, "loss": 1.9851, "step": 6365 }, { "epoch": 0.03, "learning_rate": 9.9721017732824e-05, "loss": 2.0537, "step": 6370 }, { "epoch": 0.03, "learning_rate": 9.972057302268181e-05, "loss": 2.0377, "step": 6375 }, { "epoch": 0.03, "learning_rate": 9.972012795937114e-05, "loss": 2.0176, "step": 6380 }, { "epoch": 0.03, "learning_rate": 9.971968254289517e-05, "loss": 1.9863, "step": 6385 }, { "epoch": 0.03, "learning_rate": 9.971923677325705e-05, "loss": 2.0417, "step": 6390 }, { "epoch": 0.03, "learning_rate": 9.971879065045995e-05, "loss": 1.9519, "step": 6395 }, { "epoch": 0.03, "learning_rate": 9.971834417450703e-05, "loss": 2.0531, "step": 6400 }, { "epoch": 0.03, "learning_rate": 9.971789734540148e-05, "loss": 2.0438, "step": 6405 }, { "epoch": 0.03, "learning_rate": 9.971745016314646e-05, "loss": 2.01, "step": 6410 }, { "epoch": 0.03, "learning_rate": 9.971700262774516e-05, "loss": 1.9724, "step": 6415 }, { "epoch": 0.03, "learning_rate": 9.971655473920073e-05, "loss": 1.9754, "step": 6420 }, { "epoch": 0.03, "learning_rate": 9.971610649751639e-05, "loss": 2.0196, "step": 6425 }, { "epoch": 0.03, "learning_rate": 9.971565790269531e-05, "loss": 1.9845, "step": 6430 }, { "epoch": 0.03, "learning_rate": 9.971520895474067e-05, "loss": 2.0217, "step": 6435 }, { "epoch": 0.03, "learning_rate": 9.971475965365565e-05, "loss": 2.0258, "step": 6440 }, { "epoch": 0.03, "learning_rate": 9.971430999944346e-05, "loss": 2.0893, "step": 6445 }, { "epoch": 0.03, "learning_rate": 9.971385999210728e-05, "loss": 1.9407, "step": 6450 }, { "epoch": 0.03, "learning_rate": 9.971340963165032e-05, "loss": 2.0937, "step": 6455 }, { "epoch": 0.03, "learning_rate": 9.971295891807577e-05, "loss": 2.037, "step": 6460 }, { "epoch": 0.03, "learning_rate": 9.971250785138684e-05, "loss": 1.9684, "step": 6465 }, { "epoch": 0.03, "learning_rate": 9.971205643158671e-05, "loss": 2.0733, "step": 6470 }, { "epoch": 0.03, "learning_rate": 9.971160465867862e-05, "loss": 1.9969, "step": 6475 }, { "epoch": 0.03, "learning_rate": 9.971115253266575e-05, "loss": 1.9802, "step": 6480 }, { "epoch": 0.03, "learning_rate": 9.971070005355132e-05, "loss": 1.9721, "step": 6485 }, { "epoch": 0.03, "learning_rate": 9.971024722133855e-05, "loss": 2.015, "step": 6490 }, { "epoch": 0.03, "learning_rate": 9.970979403603067e-05, "loss": 1.993, "step": 6495 }, { "epoch": 0.03, "learning_rate": 9.970934049763088e-05, "loss": 1.9833, "step": 6500 }, { "epoch": 0.03, "learning_rate": 9.97088866061424e-05, "loss": 1.9772, "step": 6505 }, { "epoch": 0.03, "learning_rate": 9.970843236156844e-05, "loss": 2.0174, "step": 6510 }, { "epoch": 0.03, "learning_rate": 9.970797776391227e-05, "loss": 1.952, "step": 6515 }, { "epoch": 0.03, "learning_rate": 9.970752281317708e-05, "loss": 2.0481, "step": 6520 }, { "epoch": 0.03, "learning_rate": 9.970706750936613e-05, "loss": 1.9975, "step": 6525 }, { "epoch": 0.04, "learning_rate": 9.970661185248262e-05, "loss": 2.043, "step": 6530 }, { "epoch": 0.04, "learning_rate": 9.970615584252982e-05, "loss": 1.9594, "step": 6535 }, { "epoch": 0.04, "learning_rate": 9.970569947951096e-05, "loss": 1.9948, "step": 6540 }, { "epoch": 0.04, "learning_rate": 9.970524276342927e-05, "loss": 2.0064, "step": 6545 }, { "epoch": 0.04, "learning_rate": 9.970478569428801e-05, "loss": 1.9888, "step": 6550 }, { "epoch": 0.04, "learning_rate": 9.970432827209043e-05, "loss": 1.967, "step": 6555 }, { "epoch": 0.04, "learning_rate": 9.970387049683974e-05, "loss": 2.0812, "step": 6560 }, { "epoch": 0.04, "learning_rate": 9.970341236853922e-05, "loss": 2.0089, "step": 6565 }, { "epoch": 0.04, "learning_rate": 9.970295388719212e-05, "loss": 1.9099, "step": 6570 }, { "epoch": 0.04, "learning_rate": 9.970249505280172e-05, "loss": 2.0363, "step": 6575 }, { "epoch": 0.04, "learning_rate": 9.970203586537125e-05, "loss": 1.9763, "step": 6580 }, { "epoch": 0.04, "learning_rate": 9.970157632490398e-05, "loss": 2.1303, "step": 6585 }, { "epoch": 0.04, "learning_rate": 9.970111643140316e-05, "loss": 2.022, "step": 6590 }, { "epoch": 0.04, "learning_rate": 9.970065618487209e-05, "loss": 1.924, "step": 6595 }, { "epoch": 0.04, "learning_rate": 9.970019558531401e-05, "loss": 2.0566, "step": 6600 }, { "epoch": 0.04, "learning_rate": 9.96997346327322e-05, "loss": 1.9599, "step": 6605 }, { "epoch": 0.04, "learning_rate": 9.969927332712995e-05, "loss": 2.0337, "step": 6610 }, { "epoch": 0.04, "learning_rate": 9.969881166851051e-05, "loss": 1.9663, "step": 6615 }, { "epoch": 0.04, "learning_rate": 9.969834965687719e-05, "loss": 1.9997, "step": 6620 }, { "epoch": 0.04, "learning_rate": 9.969788729223322e-05, "loss": 2.0046, "step": 6625 }, { "epoch": 0.04, "learning_rate": 9.969742457458194e-05, "loss": 2.0479, "step": 6630 }, { "epoch": 0.04, "learning_rate": 9.969696150392662e-05, "loss": 2.0611, "step": 6635 }, { "epoch": 0.04, "learning_rate": 9.969649808027053e-05, "loss": 1.9539, "step": 6640 }, { "epoch": 0.04, "learning_rate": 9.969603430361698e-05, "loss": 2.0269, "step": 6645 }, { "epoch": 0.04, "learning_rate": 9.969557017396926e-05, "loss": 2.0042, "step": 6650 }, { "epoch": 0.04, "learning_rate": 9.969510569133067e-05, "loss": 1.9818, "step": 6655 }, { "epoch": 0.04, "learning_rate": 9.96946408557045e-05, "loss": 2.0365, "step": 6660 }, { "epoch": 0.04, "learning_rate": 9.969417566709406e-05, "loss": 1.9702, "step": 6665 }, { "epoch": 0.04, "learning_rate": 9.969371012550265e-05, "loss": 2.0564, "step": 6670 }, { "epoch": 0.04, "learning_rate": 9.969324423093358e-05, "loss": 1.9872, "step": 6675 }, { "epoch": 0.04, "learning_rate": 9.969277798339016e-05, "loss": 2.0273, "step": 6680 }, { "epoch": 0.04, "learning_rate": 9.96923113828757e-05, "loss": 1.9812, "step": 6685 }, { "epoch": 0.04, "learning_rate": 9.96918444293935e-05, "loss": 2.075, "step": 6690 }, { "epoch": 0.04, "learning_rate": 9.96913771229469e-05, "loss": 1.9764, "step": 6695 }, { "epoch": 0.04, "learning_rate": 9.969090946353922e-05, "loss": 2.0034, "step": 6700 }, { "epoch": 0.04, "learning_rate": 9.969044145117377e-05, "loss": 1.9957, "step": 6705 }, { "epoch": 0.04, "learning_rate": 9.968997308585387e-05, "loss": 2.0319, "step": 6710 }, { "epoch": 0.04, "learning_rate": 9.968950436758284e-05, "loss": 2.027, "step": 6715 }, { "epoch": 0.04, "learning_rate": 9.968903529636403e-05, "loss": 2.0571, "step": 6720 }, { "epoch": 0.04, "learning_rate": 9.968856587220077e-05, "loss": 2.0462, "step": 6725 }, { "epoch": 0.04, "learning_rate": 9.96880960950964e-05, "loss": 2.0162, "step": 6730 }, { "epoch": 0.04, "learning_rate": 9.968762596505421e-05, "loss": 2.0274, "step": 6735 }, { "epoch": 0.04, "learning_rate": 9.968715548207761e-05, "loss": 2.0689, "step": 6740 }, { "epoch": 0.04, "learning_rate": 9.968668464616988e-05, "loss": 1.9734, "step": 6745 }, { "epoch": 0.04, "learning_rate": 9.968621345733442e-05, "loss": 2.028, "step": 6750 }, { "epoch": 0.04, "learning_rate": 9.968574191557451e-05, "loss": 2.0073, "step": 6755 }, { "epoch": 0.04, "learning_rate": 9.968527002089356e-05, "loss": 1.8531, "step": 6760 }, { "epoch": 0.04, "learning_rate": 9.968479777329491e-05, "loss": 2.0034, "step": 6765 }, { "epoch": 0.04, "learning_rate": 9.968432517278188e-05, "loss": 1.9882, "step": 6770 }, { "epoch": 0.04, "learning_rate": 9.968385221935787e-05, "loss": 2.0418, "step": 6775 }, { "epoch": 0.04, "learning_rate": 9.968337891302621e-05, "loss": 1.9707, "step": 6780 }, { "epoch": 0.04, "learning_rate": 9.968290525379028e-05, "loss": 2.0913, "step": 6785 }, { "epoch": 0.04, "learning_rate": 9.968243124165342e-05, "loss": 2.0141, "step": 6790 }, { "epoch": 0.04, "learning_rate": 9.968195687661902e-05, "loss": 2.0488, "step": 6795 }, { "epoch": 0.04, "learning_rate": 9.968148215869044e-05, "loss": 1.9781, "step": 6800 }, { "epoch": 0.04, "learning_rate": 9.968100708787107e-05, "loss": 1.9892, "step": 6805 }, { "epoch": 0.04, "learning_rate": 9.968053166416424e-05, "loss": 2.0141, "step": 6810 }, { "epoch": 0.04, "learning_rate": 9.968005588757339e-05, "loss": 2.0157, "step": 6815 }, { "epoch": 0.04, "learning_rate": 9.967957975810184e-05, "loss": 1.991, "step": 6820 }, { "epoch": 0.04, "learning_rate": 9.9679103275753e-05, "loss": 2.0568, "step": 6825 }, { "epoch": 0.04, "learning_rate": 9.967862644053027e-05, "loss": 2.0411, "step": 6830 }, { "epoch": 0.04, "learning_rate": 9.9678149252437e-05, "loss": 2.0586, "step": 6835 }, { "epoch": 0.04, "learning_rate": 9.96776717114766e-05, "loss": 1.9888, "step": 6840 }, { "epoch": 0.04, "learning_rate": 9.967719381765248e-05, "loss": 2.0177, "step": 6845 }, { "epoch": 0.04, "learning_rate": 9.9676715570968e-05, "loss": 2.0482, "step": 6850 }, { "epoch": 0.04, "learning_rate": 9.967623697142656e-05, "loss": 2.0369, "step": 6855 }, { "epoch": 0.04, "learning_rate": 9.96757580190316e-05, "loss": 2.0377, "step": 6860 }, { "epoch": 0.04, "learning_rate": 9.967527871378646e-05, "loss": 2.0471, "step": 6865 }, { "epoch": 0.04, "learning_rate": 9.96747990556946e-05, "loss": 2.0083, "step": 6870 }, { "epoch": 0.04, "learning_rate": 9.96743190447594e-05, "loss": 1.9945, "step": 6875 }, { "epoch": 0.04, "learning_rate": 9.967383868098427e-05, "loss": 2.0135, "step": 6880 }, { "epoch": 0.04, "learning_rate": 9.967335796437263e-05, "loss": 2.07, "step": 6885 }, { "epoch": 0.04, "learning_rate": 9.96728768949279e-05, "loss": 1.9917, "step": 6890 }, { "epoch": 0.04, "learning_rate": 9.967239547265348e-05, "loss": 1.9694, "step": 6895 }, { "epoch": 0.04, "learning_rate": 9.96719136975528e-05, "loss": 2.0603, "step": 6900 }, { "epoch": 0.04, "learning_rate": 9.967143156962926e-05, "loss": 2.0543, "step": 6905 }, { "epoch": 0.04, "learning_rate": 9.967094908888634e-05, "loss": 1.9917, "step": 6910 }, { "epoch": 0.04, "learning_rate": 9.967046625532741e-05, "loss": 2.0115, "step": 6915 }, { "epoch": 0.04, "learning_rate": 9.966998306895592e-05, "loss": 2.0037, "step": 6920 }, { "epoch": 0.04, "learning_rate": 9.966949952977532e-05, "loss": 2.0146, "step": 6925 }, { "epoch": 0.04, "learning_rate": 9.966901563778901e-05, "loss": 1.9123, "step": 6930 }, { "epoch": 0.04, "learning_rate": 9.966853139300045e-05, "loss": 2.0225, "step": 6935 }, { "epoch": 0.04, "learning_rate": 9.966804679541307e-05, "loss": 2.0493, "step": 6940 }, { "epoch": 0.04, "learning_rate": 9.966756184503032e-05, "loss": 1.9251, "step": 6945 }, { "epoch": 0.04, "learning_rate": 9.966707654185565e-05, "loss": 2.0667, "step": 6950 }, { "epoch": 0.04, "learning_rate": 9.966659088589249e-05, "loss": 1.9902, "step": 6955 }, { "epoch": 0.04, "learning_rate": 9.96661048771443e-05, "loss": 2.0103, "step": 6960 }, { "epoch": 0.04, "learning_rate": 9.966561851561453e-05, "loss": 1.9489, "step": 6965 }, { "epoch": 0.04, "learning_rate": 9.966513180130663e-05, "loss": 1.9653, "step": 6970 }, { "epoch": 0.04, "learning_rate": 9.966464473422407e-05, "loss": 2.0667, "step": 6975 }, { "epoch": 0.04, "learning_rate": 9.966415731437031e-05, "loss": 1.9987, "step": 6980 }, { "epoch": 0.04, "learning_rate": 9.96636695417488e-05, "loss": 1.9679, "step": 6985 }, { "epoch": 0.04, "learning_rate": 9.9663181416363e-05, "loss": 2.0341, "step": 6990 }, { "epoch": 0.04, "learning_rate": 9.966269293821638e-05, "loss": 2.0463, "step": 6995 }, { "epoch": 0.04, "learning_rate": 9.966220410731241e-05, "loss": 2.018, "step": 7000 }, { "epoch": 0.04, "learning_rate": 9.96617149236546e-05, "loss": 1.9994, "step": 7005 }, { "epoch": 0.04, "learning_rate": 9.966122538724636e-05, "loss": 1.9997, "step": 7010 }, { "epoch": 0.04, "learning_rate": 9.966073549809121e-05, "loss": 2.0147, "step": 7015 }, { "epoch": 0.04, "learning_rate": 9.966024525619262e-05, "loss": 2.0328, "step": 7020 }, { "epoch": 0.04, "learning_rate": 9.965975466155409e-05, "loss": 2.1036, "step": 7025 }, { "epoch": 0.04, "learning_rate": 9.965926371417906e-05, "loss": 1.9785, "step": 7030 }, { "epoch": 0.04, "learning_rate": 9.965877241407107e-05, "loss": 2.0073, "step": 7035 }, { "epoch": 0.04, "learning_rate": 9.965828076123356e-05, "loss": 1.9852, "step": 7040 }, { "epoch": 0.04, "learning_rate": 9.965778875567004e-05, "loss": 2.0214, "step": 7045 }, { "epoch": 0.04, "learning_rate": 9.965729639738404e-05, "loss": 1.9908, "step": 7050 }, { "epoch": 0.04, "learning_rate": 9.9656803686379e-05, "loss": 2.0134, "step": 7055 }, { "epoch": 0.04, "learning_rate": 9.965631062265845e-05, "loss": 1.9897, "step": 7060 }, { "epoch": 0.04, "learning_rate": 9.96558172062259e-05, "loss": 2.0196, "step": 7065 }, { "epoch": 0.04, "learning_rate": 9.965532343708485e-05, "loss": 1.9965, "step": 7070 }, { "epoch": 0.04, "learning_rate": 9.96548293152388e-05, "loss": 1.9686, "step": 7075 }, { "epoch": 0.04, "learning_rate": 9.965433484069124e-05, "loss": 1.9644, "step": 7080 }, { "epoch": 0.04, "learning_rate": 9.965384001344573e-05, "loss": 1.9562, "step": 7085 }, { "epoch": 0.04, "learning_rate": 9.965334483350575e-05, "loss": 2.0009, "step": 7090 }, { "epoch": 0.04, "learning_rate": 9.965284930087481e-05, "loss": 2.0114, "step": 7095 }, { "epoch": 0.04, "learning_rate": 9.965235341555645e-05, "loss": 2.008, "step": 7100 }, { "epoch": 0.04, "learning_rate": 9.965185717755421e-05, "loss": 2.0773, "step": 7105 }, { "epoch": 0.04, "learning_rate": 9.965136058687157e-05, "loss": 2.0484, "step": 7110 }, { "epoch": 0.04, "learning_rate": 9.965086364351209e-05, "loss": 1.9974, "step": 7115 }, { "epoch": 0.04, "learning_rate": 9.965036634747929e-05, "loss": 1.9902, "step": 7120 }, { "epoch": 0.04, "learning_rate": 9.964986869877671e-05, "loss": 1.9821, "step": 7125 }, { "epoch": 0.04, "learning_rate": 9.964937069740785e-05, "loss": 1.9923, "step": 7130 }, { "epoch": 0.04, "learning_rate": 9.964887234337629e-05, "loss": 1.9904, "step": 7135 }, { "epoch": 0.04, "learning_rate": 9.964837363668556e-05, "loss": 1.9594, "step": 7140 }, { "epoch": 0.04, "learning_rate": 9.96478745773392e-05, "loss": 1.9911, "step": 7145 }, { "epoch": 0.04, "learning_rate": 9.964737516534073e-05, "loss": 1.9348, "step": 7150 }, { "epoch": 0.04, "learning_rate": 9.964687540069373e-05, "loss": 2.0769, "step": 7155 }, { "epoch": 0.04, "learning_rate": 9.964637528340175e-05, "loss": 2.0249, "step": 7160 }, { "epoch": 0.04, "learning_rate": 9.964587481346831e-05, "loss": 1.9802, "step": 7165 }, { "epoch": 0.04, "learning_rate": 9.9645373990897e-05, "loss": 2.0397, "step": 7170 }, { "epoch": 0.04, "learning_rate": 9.964487281569137e-05, "loss": 2.0419, "step": 7175 }, { "epoch": 0.04, "learning_rate": 9.964437128785495e-05, "loss": 2.0258, "step": 7180 }, { "epoch": 0.04, "learning_rate": 9.964386940739134e-05, "loss": 2.0194, "step": 7185 }, { "epoch": 0.04, "learning_rate": 9.96433671743041e-05, "loss": 2.1051, "step": 7190 }, { "epoch": 0.04, "learning_rate": 9.964286458859678e-05, "loss": 1.9453, "step": 7195 }, { "epoch": 0.04, "learning_rate": 9.964236165027294e-05, "loss": 2.0375, "step": 7200 }, { "epoch": 0.04, "learning_rate": 9.96418583593362e-05, "loss": 1.9935, "step": 7205 }, { "epoch": 0.04, "learning_rate": 9.964135471579008e-05, "loss": 2.041, "step": 7210 }, { "epoch": 0.04, "learning_rate": 9.96408507196382e-05, "loss": 2.0225, "step": 7215 }, { "epoch": 0.04, "learning_rate": 9.964034637088411e-05, "loss": 1.9878, "step": 7220 }, { "epoch": 0.04, "learning_rate": 9.963984166953139e-05, "loss": 2.0059, "step": 7225 }, { "epoch": 0.04, "learning_rate": 9.963933661558367e-05, "loss": 2.0556, "step": 7230 }, { "epoch": 0.04, "learning_rate": 9.963883120904449e-05, "loss": 1.9969, "step": 7235 }, { "epoch": 0.04, "learning_rate": 9.963832544991747e-05, "loss": 1.9426, "step": 7240 }, { "epoch": 0.04, "learning_rate": 9.963781933820618e-05, "loss": 1.9196, "step": 7245 }, { "epoch": 0.04, "learning_rate": 9.963731287391422e-05, "loss": 1.9937, "step": 7250 }, { "epoch": 0.04, "learning_rate": 9.96368060570452e-05, "loss": 2.0707, "step": 7255 }, { "epoch": 0.04, "learning_rate": 9.96362988876027e-05, "loss": 1.9294, "step": 7260 }, { "epoch": 0.04, "learning_rate": 9.963579136559034e-05, "loss": 1.9988, "step": 7265 }, { "epoch": 0.04, "learning_rate": 9.963528349101172e-05, "loss": 1.9736, "step": 7270 }, { "epoch": 0.04, "learning_rate": 9.963477526387046e-05, "loss": 2.0282, "step": 7275 }, { "epoch": 0.04, "learning_rate": 9.963426668417014e-05, "loss": 2.0478, "step": 7280 }, { "epoch": 0.04, "learning_rate": 9.96337577519144e-05, "loss": 1.9992, "step": 7285 }, { "epoch": 0.04, "learning_rate": 9.963324846710683e-05, "loss": 2.0065, "step": 7290 }, { "epoch": 0.04, "learning_rate": 9.963273882975108e-05, "loss": 1.9976, "step": 7295 }, { "epoch": 0.04, "learning_rate": 9.963222883985073e-05, "loss": 2.0532, "step": 7300 }, { "epoch": 0.04, "learning_rate": 9.963171849740945e-05, "loss": 2.0331, "step": 7305 }, { "epoch": 0.04, "learning_rate": 9.963120780243082e-05, "loss": 2.005, "step": 7310 }, { "epoch": 0.04, "learning_rate": 9.963069675491848e-05, "loss": 2.0824, "step": 7315 }, { "epoch": 0.04, "learning_rate": 9.963018535487608e-05, "loss": 2.0479, "step": 7320 }, { "epoch": 0.04, "learning_rate": 9.962967360230724e-05, "loss": 2.0173, "step": 7325 }, { "epoch": 0.04, "learning_rate": 9.962916149721559e-05, "loss": 2.0553, "step": 7330 }, { "epoch": 0.04, "learning_rate": 9.962864903960476e-05, "loss": 2.0261, "step": 7335 }, { "epoch": 0.04, "learning_rate": 9.962813622947842e-05, "loss": 2.005, "step": 7340 }, { "epoch": 0.04, "learning_rate": 9.962762306684019e-05, "loss": 2.0544, "step": 7345 }, { "epoch": 0.04, "learning_rate": 9.962710955169373e-05, "loss": 2.0028, "step": 7350 }, { "epoch": 0.04, "learning_rate": 9.962659568404264e-05, "loss": 1.9812, "step": 7355 }, { "epoch": 0.04, "learning_rate": 9.962608146389064e-05, "loss": 1.9918, "step": 7360 }, { "epoch": 0.04, "learning_rate": 9.962556689124134e-05, "loss": 2.0163, "step": 7365 }, { "epoch": 0.04, "learning_rate": 9.96250519660984e-05, "loss": 2.0619, "step": 7370 }, { "epoch": 0.04, "learning_rate": 9.962453668846549e-05, "loss": 2.0202, "step": 7375 }, { "epoch": 0.04, "learning_rate": 9.962402105834625e-05, "loss": 1.957, "step": 7380 }, { "epoch": 0.04, "learning_rate": 9.962350507574436e-05, "loss": 1.993, "step": 7385 }, { "epoch": 0.04, "learning_rate": 9.96229887406635e-05, "loss": 2.0448, "step": 7390 }, { "epoch": 0.04, "learning_rate": 9.962247205310729e-05, "loss": 1.9107, "step": 7395 }, { "epoch": 0.04, "learning_rate": 9.962195501307943e-05, "loss": 2.0034, "step": 7400 }, { "epoch": 0.04, "learning_rate": 9.96214376205836e-05, "loss": 1.9806, "step": 7405 }, { "epoch": 0.04, "learning_rate": 9.962091987562345e-05, "loss": 2.0212, "step": 7410 }, { "epoch": 0.04, "learning_rate": 9.962040177820269e-05, "loss": 2.0019, "step": 7415 }, { "epoch": 0.04, "learning_rate": 9.961988332832496e-05, "loss": 2.09, "step": 7420 }, { "epoch": 0.04, "learning_rate": 9.961936452599398e-05, "loss": 2.0117, "step": 7425 }, { "epoch": 0.04, "learning_rate": 9.961884537121341e-05, "loss": 2.0284, "step": 7430 }, { "epoch": 0.04, "learning_rate": 9.961832586398698e-05, "loss": 1.9937, "step": 7435 }, { "epoch": 0.04, "learning_rate": 9.961780600431832e-05, "loss": 1.9803, "step": 7440 }, { "epoch": 0.04, "learning_rate": 9.961728579221115e-05, "loss": 1.9942, "step": 7445 }, { "epoch": 0.04, "learning_rate": 9.961676522766917e-05, "loss": 1.9269, "step": 7450 }, { "epoch": 0.04, "learning_rate": 9.961624431069607e-05, "loss": 1.9785, "step": 7455 }, { "epoch": 0.04, "learning_rate": 9.961572304129557e-05, "loss": 2.0498, "step": 7460 }, { "epoch": 0.04, "learning_rate": 9.961520141947134e-05, "loss": 1.9777, "step": 7465 }, { "epoch": 0.04, "learning_rate": 9.961467944522709e-05, "loss": 1.9093, "step": 7470 }, { "epoch": 0.04, "learning_rate": 9.961415711856657e-05, "loss": 2.001, "step": 7475 }, { "epoch": 0.04, "learning_rate": 9.961363443949344e-05, "loss": 1.9999, "step": 7480 }, { "epoch": 0.04, "learning_rate": 9.961311140801142e-05, "loss": 1.9976, "step": 7485 }, { "epoch": 0.04, "learning_rate": 9.961258802412425e-05, "loss": 1.9944, "step": 7490 }, { "epoch": 0.04, "learning_rate": 9.961206428783563e-05, "loss": 2.0266, "step": 7495 }, { "epoch": 0.04, "learning_rate": 9.96115401991493e-05, "loss": 2.0224, "step": 7500 }, { "epoch": 0.04, "learning_rate": 9.961101575806893e-05, "loss": 2.0505, "step": 7505 }, { "epoch": 0.04, "learning_rate": 9.961049096459831e-05, "loss": 1.9339, "step": 7510 }, { "epoch": 0.04, "learning_rate": 9.960996581874113e-05, "loss": 1.912, "step": 7515 }, { "epoch": 0.04, "learning_rate": 9.960944032050113e-05, "loss": 2.0519, "step": 7520 }, { "epoch": 0.04, "learning_rate": 9.960891446988205e-05, "loss": 2.0559, "step": 7525 }, { "epoch": 0.04, "learning_rate": 9.960838826688761e-05, "loss": 2.0433, "step": 7530 }, { "epoch": 0.04, "learning_rate": 9.960786171152157e-05, "loss": 1.9908, "step": 7535 }, { "epoch": 0.04, "learning_rate": 9.960733480378764e-05, "loss": 2.0542, "step": 7540 }, { "epoch": 0.04, "learning_rate": 9.960680754368959e-05, "loss": 1.8691, "step": 7545 }, { "epoch": 0.04, "learning_rate": 9.960627993123113e-05, "loss": 1.9864, "step": 7550 }, { "epoch": 0.04, "learning_rate": 9.960575196641605e-05, "loss": 2.0064, "step": 7555 }, { "epoch": 0.04, "learning_rate": 9.960522364924807e-05, "loss": 2.0273, "step": 7560 }, { "epoch": 0.04, "learning_rate": 9.960469497973097e-05, "loss": 1.8736, "step": 7565 }, { "epoch": 0.04, "learning_rate": 9.960416595786848e-05, "loss": 1.9411, "step": 7570 }, { "epoch": 0.04, "learning_rate": 9.960363658366436e-05, "loss": 2.0192, "step": 7575 }, { "epoch": 0.04, "learning_rate": 9.960310685712238e-05, "loss": 1.9586, "step": 7580 }, { "epoch": 0.04, "learning_rate": 9.96025767782463e-05, "loss": 1.9872, "step": 7585 }, { "epoch": 0.04, "learning_rate": 9.96020463470399e-05, "loss": 2.0043, "step": 7590 }, { "epoch": 0.04, "learning_rate": 9.960151556350691e-05, "loss": 1.9484, "step": 7595 }, { "epoch": 0.04, "learning_rate": 9.960098442765114e-05, "loss": 2.0329, "step": 7600 }, { "epoch": 0.04, "learning_rate": 9.960045293947633e-05, "loss": 1.9613, "step": 7605 }, { "epoch": 0.04, "learning_rate": 9.959992109898628e-05, "loss": 2.0747, "step": 7610 }, { "epoch": 0.04, "learning_rate": 9.959938890618474e-05, "loss": 1.9774, "step": 7615 }, { "epoch": 0.04, "learning_rate": 9.959885636107553e-05, "loss": 2.0601, "step": 7620 }, { "epoch": 0.04, "learning_rate": 9.959832346366241e-05, "loss": 2.0121, "step": 7625 }, { "epoch": 0.04, "learning_rate": 9.959779021394916e-05, "loss": 1.985, "step": 7630 }, { "epoch": 0.04, "learning_rate": 9.959725661193956e-05, "loss": 2.067, "step": 7635 }, { "epoch": 0.04, "learning_rate": 9.959672265763743e-05, "loss": 1.9449, "step": 7640 }, { "epoch": 0.04, "learning_rate": 9.959618835104654e-05, "loss": 1.9915, "step": 7645 }, { "epoch": 0.04, "learning_rate": 9.95956536921707e-05, "loss": 2.0822, "step": 7650 }, { "epoch": 0.04, "learning_rate": 9.95951186810137e-05, "loss": 2.0754, "step": 7655 }, { "epoch": 0.04, "learning_rate": 9.959458331757933e-05, "loss": 1.8964, "step": 7660 }, { "epoch": 0.04, "learning_rate": 9.959404760187141e-05, "loss": 1.9996, "step": 7665 }, { "epoch": 0.04, "learning_rate": 9.959351153389374e-05, "loss": 2.0028, "step": 7670 }, { "epoch": 0.04, "learning_rate": 9.959297511365011e-05, "loss": 1.936, "step": 7675 }, { "epoch": 0.04, "learning_rate": 9.959243834114435e-05, "loss": 2.075, "step": 7680 }, { "epoch": 0.04, "learning_rate": 9.959190121638029e-05, "loss": 2.052, "step": 7685 }, { "epoch": 0.04, "learning_rate": 9.959136373936171e-05, "loss": 2.0153, "step": 7690 }, { "epoch": 0.04, "learning_rate": 9.959082591009243e-05, "loss": 1.9949, "step": 7695 }, { "epoch": 0.04, "learning_rate": 9.95902877285763e-05, "loss": 1.9838, "step": 7700 }, { "epoch": 0.04, "learning_rate": 9.958974919481712e-05, "loss": 1.9915, "step": 7705 }, { "epoch": 0.04, "learning_rate": 9.958921030881871e-05, "loss": 1.9786, "step": 7710 }, { "epoch": 0.04, "learning_rate": 9.958867107058492e-05, "loss": 1.9825, "step": 7715 }, { "epoch": 0.04, "learning_rate": 9.958813148011957e-05, "loss": 2.0341, "step": 7720 }, { "epoch": 0.04, "learning_rate": 9.958759153742647e-05, "loss": 2.0045, "step": 7725 }, { "epoch": 0.04, "learning_rate": 9.958705124250948e-05, "loss": 1.999, "step": 7730 }, { "epoch": 0.04, "learning_rate": 9.958651059537245e-05, "loss": 2.0145, "step": 7735 }, { "epoch": 0.04, "learning_rate": 9.958596959601919e-05, "loss": 1.99, "step": 7740 }, { "epoch": 0.04, "learning_rate": 9.958542824445356e-05, "loss": 2.0717, "step": 7745 }, { "epoch": 0.04, "learning_rate": 9.958488654067941e-05, "loss": 2.0347, "step": 7750 }, { "epoch": 0.04, "learning_rate": 9.958434448470056e-05, "loss": 1.9638, "step": 7755 }, { "epoch": 0.04, "learning_rate": 9.95838020765209e-05, "loss": 2.019, "step": 7760 }, { "epoch": 0.04, "learning_rate": 9.958325931614425e-05, "loss": 1.9548, "step": 7765 }, { "epoch": 0.04, "learning_rate": 9.958271620357448e-05, "loss": 1.9133, "step": 7770 }, { "epoch": 0.04, "learning_rate": 9.958217273881545e-05, "loss": 2.116, "step": 7775 }, { "epoch": 0.04, "learning_rate": 9.958162892187102e-05, "loss": 2.1317, "step": 7780 }, { "epoch": 0.04, "learning_rate": 9.958108475274503e-05, "loss": 2.0029, "step": 7785 }, { "epoch": 0.04, "learning_rate": 9.958054023144139e-05, "loss": 2.0805, "step": 7790 }, { "epoch": 0.04, "learning_rate": 9.957999535796391e-05, "loss": 1.9678, "step": 7795 }, { "epoch": 0.04, "learning_rate": 9.957945013231652e-05, "loss": 2.0251, "step": 7800 }, { "epoch": 0.04, "learning_rate": 9.957890455450305e-05, "loss": 2.0018, "step": 7805 }, { "epoch": 0.04, "learning_rate": 9.95783586245274e-05, "loss": 1.9396, "step": 7810 }, { "epoch": 0.04, "learning_rate": 9.957781234239342e-05, "loss": 1.9333, "step": 7815 }, { "epoch": 0.04, "learning_rate": 9.957726570810503e-05, "loss": 1.9647, "step": 7820 }, { "epoch": 0.04, "learning_rate": 9.957671872166607e-05, "loss": 1.9504, "step": 7825 }, { "epoch": 0.04, "learning_rate": 9.957617138308047e-05, "loss": 2.0089, "step": 7830 }, { "epoch": 0.04, "learning_rate": 9.957562369235207e-05, "loss": 1.9718, "step": 7835 }, { "epoch": 0.04, "learning_rate": 9.95750756494848e-05, "loss": 1.976, "step": 7840 }, { "epoch": 0.04, "learning_rate": 9.957452725448253e-05, "loss": 1.9936, "step": 7845 }, { "epoch": 0.04, "learning_rate": 9.957397850734915e-05, "loss": 1.9434, "step": 7850 }, { "epoch": 0.04, "learning_rate": 9.957342940808859e-05, "loss": 1.9113, "step": 7855 }, { "epoch": 0.04, "learning_rate": 9.957287995670473e-05, "loss": 1.9534, "step": 7860 }, { "epoch": 0.04, "learning_rate": 9.957233015320145e-05, "loss": 1.9051, "step": 7865 }, { "epoch": 0.04, "learning_rate": 9.957177999758269e-05, "loss": 1.9833, "step": 7870 }, { "epoch": 0.04, "learning_rate": 9.957122948985236e-05, "loss": 2.0766, "step": 7875 }, { "epoch": 0.04, "learning_rate": 9.957067863001435e-05, "loss": 1.9527, "step": 7880 }, { "epoch": 0.04, "learning_rate": 9.957012741807256e-05, "loss": 2.0636, "step": 7885 }, { "epoch": 0.04, "learning_rate": 9.956957585403094e-05, "loss": 1.9874, "step": 7890 }, { "epoch": 0.04, "learning_rate": 9.95690239378934e-05, "loss": 2.0283, "step": 7895 }, { "epoch": 0.04, "learning_rate": 9.956847166966385e-05, "loss": 1.9646, "step": 7900 }, { "epoch": 0.04, "learning_rate": 9.956791904934622e-05, "loss": 1.9773, "step": 7905 }, { "epoch": 0.04, "learning_rate": 9.956736607694441e-05, "loss": 2.0773, "step": 7910 }, { "epoch": 0.04, "learning_rate": 9.956681275246238e-05, "loss": 2.0507, "step": 7915 }, { "epoch": 0.04, "learning_rate": 9.956625907590404e-05, "loss": 2.117, "step": 7920 }, { "epoch": 0.04, "learning_rate": 9.956570504727334e-05, "loss": 1.9419, "step": 7925 }, { "epoch": 0.04, "learning_rate": 9.956515066657421e-05, "loss": 1.9661, "step": 7930 }, { "epoch": 0.04, "learning_rate": 9.956459593381058e-05, "loss": 1.9915, "step": 7935 }, { "epoch": 0.04, "learning_rate": 9.95640408489864e-05, "loss": 2.0339, "step": 7940 }, { "epoch": 0.04, "learning_rate": 9.95634854121056e-05, "loss": 1.9688, "step": 7945 }, { "epoch": 0.04, "learning_rate": 9.956292962317214e-05, "loss": 1.9662, "step": 7950 }, { "epoch": 0.04, "learning_rate": 9.956237348218995e-05, "loss": 1.998, "step": 7955 }, { "epoch": 0.04, "learning_rate": 9.9561816989163e-05, "loss": 2.0069, "step": 7960 }, { "epoch": 0.04, "learning_rate": 9.956126014409523e-05, "loss": 1.9785, "step": 7965 }, { "epoch": 0.04, "learning_rate": 9.95607029469906e-05, "loss": 1.9378, "step": 7970 }, { "epoch": 0.04, "learning_rate": 9.956014539785307e-05, "loss": 1.9604, "step": 7975 }, { "epoch": 0.04, "learning_rate": 9.95595874966866e-05, "loss": 2.0338, "step": 7980 }, { "epoch": 0.04, "learning_rate": 9.955902924349514e-05, "loss": 1.9645, "step": 7985 }, { "epoch": 0.04, "learning_rate": 9.955847063828267e-05, "loss": 2.035, "step": 7990 }, { "epoch": 0.04, "learning_rate": 9.955791168105316e-05, "loss": 1.9543, "step": 7995 }, { "epoch": 0.04, "learning_rate": 9.955735237181056e-05, "loss": 1.9499, "step": 8000 } ], "logging_steps": 5, "max_steps": 186479, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.905411560256307e+19, "trial_name": null, "trial_params": null }