{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.030336661096518617, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-06, "loss": 1.7996, "step": 5 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 1.8749, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.5e-05, "loss": 1.7917, "step": 15 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 1.866, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.5e-05, "loss": 1.8179, "step": 25 }, { "epoch": 0.0, "learning_rate": 3e-05, "loss": 1.7424, "step": 30 }, { "epoch": 0.0, "learning_rate": 3.5e-05, "loss": 1.776, "step": 35 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 1.7429, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.5e-05, "loss": 1.7392, "step": 45 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 1.7274, "step": 50 }, { "epoch": 0.0, "learning_rate": 5.500000000000001e-05, "loss": 1.7122, "step": 55 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 1.7711, "step": 60 }, { "epoch": 0.0, "learning_rate": 6.500000000000001e-05, "loss": 1.6894, "step": 65 }, { "epoch": 0.0, "learning_rate": 7e-05, "loss": 1.7153, "step": 70 }, { "epoch": 0.0, "learning_rate": 7.500000000000001e-05, "loss": 1.72, "step": 75 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 1.6923, "step": 80 }, { "epoch": 0.0, "learning_rate": 8.5e-05, "loss": 1.6293, "step": 85 }, { "epoch": 0.0, "learning_rate": 9e-05, "loss": 1.6991, "step": 90 }, { "epoch": 0.0, "learning_rate": 9.5e-05, "loss": 1.6649, "step": 95 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 1.6809, "step": 100 }, { "epoch": 0.0, "learning_rate": 9.999999984214647e-05, "loss": 1.6536, "step": 105 }, { "epoch": 0.0, "learning_rate": 9.999999936858588e-05, "loss": 1.6798, "step": 110 }, { "epoch": 0.0, "learning_rate": 9.999999857931825e-05, "loss": 1.6354, "step": 115 }, { "epoch": 0.0, "learning_rate": 9.999999747434355e-05, "loss": 1.6531, "step": 120 }, { "epoch": 0.0, "learning_rate": 9.999999605366182e-05, "loss": 1.6346, "step": 125 }, { "epoch": 0.0, "learning_rate": 9.999999431727304e-05, "loss": 1.5923, "step": 130 }, { "epoch": 0.0, "learning_rate": 9.999999226517724e-05, "loss": 1.6328, "step": 135 }, { "epoch": 0.0, "learning_rate": 9.999998989737443e-05, "loss": 1.643, "step": 140 }, { "epoch": 0.0, "learning_rate": 9.999998721386463e-05, "loss": 1.6708, "step": 145 }, { "epoch": 0.0, "learning_rate": 9.999998421464784e-05, "loss": 1.6129, "step": 150 }, { "epoch": 0.0, "learning_rate": 9.99999808997241e-05, "loss": 1.6173, "step": 155 }, { "epoch": 0.0, "learning_rate": 9.999997726909342e-05, "loss": 1.62, "step": 160 }, { "epoch": 0.0, "learning_rate": 9.999997332275582e-05, "loss": 1.6656, "step": 165 }, { "epoch": 0.0, "learning_rate": 9.999996906071134e-05, "loss": 1.5967, "step": 170 }, { "epoch": 0.0, "learning_rate": 9.999996448295999e-05, "loss": 1.6081, "step": 175 }, { "epoch": 0.0, "learning_rate": 9.999995958950179e-05, "loss": 1.6811, "step": 180 }, { "epoch": 0.0, "learning_rate": 9.99999543803368e-05, "loss": 1.6281, "step": 185 }, { "epoch": 0.0, "learning_rate": 9.999994885546504e-05, "loss": 1.6343, "step": 190 }, { "epoch": 0.0, "learning_rate": 9.999994301488653e-05, "loss": 1.6014, "step": 195 }, { "epoch": 0.0, "learning_rate": 9.999993685860133e-05, "loss": 1.6031, "step": 200 }, { "epoch": 0.0, "learning_rate": 9.999993038660947e-05, "loss": 1.6057, "step": 205 }, { "epoch": 0.0, "learning_rate": 9.999992359891099e-05, "loss": 1.6461, "step": 210 }, { "epoch": 0.0, "learning_rate": 9.999991649550593e-05, "loss": 1.5893, "step": 215 }, { "epoch": 0.0, "learning_rate": 9.999990907639434e-05, "loss": 1.585, "step": 220 }, { "epoch": 0.0, "learning_rate": 9.999990134157626e-05, "loss": 1.6501, "step": 225 }, { "epoch": 0.0, "learning_rate": 9.999989329105175e-05, "loss": 1.585, "step": 230 }, { "epoch": 0.0, "learning_rate": 9.999988492482087e-05, "loss": 1.5825, "step": 235 }, { "epoch": 0.0, "learning_rate": 9.999987624288363e-05, "loss": 1.6001, "step": 240 }, { "epoch": 0.0, "learning_rate": 9.999986724524012e-05, "loss": 1.5628, "step": 245 }, { "epoch": 0.0, "learning_rate": 9.999985793189038e-05, "loss": 1.5727, "step": 250 }, { "epoch": 0.0, "learning_rate": 9.999984830283449e-05, "loss": 1.6333, "step": 255 }, { "epoch": 0.0, "learning_rate": 9.99998383580725e-05, "loss": 1.6112, "step": 260 }, { "epoch": 0.0, "learning_rate": 9.999982809760446e-05, "loss": 1.5798, "step": 265 }, { "epoch": 0.0, "learning_rate": 9.999981752143045e-05, "loss": 1.5776, "step": 270 }, { "epoch": 0.0, "learning_rate": 9.999980662955052e-05, "loss": 1.5801, "step": 275 }, { "epoch": 0.0, "learning_rate": 9.999979542196479e-05, "loss": 1.6455, "step": 280 }, { "epoch": 0.0, "learning_rate": 9.999978389867326e-05, "loss": 1.5998, "step": 285 }, { "epoch": 0.0, "learning_rate": 9.999977205967603e-05, "loss": 1.6335, "step": 290 }, { "epoch": 0.0, "learning_rate": 9.999975990497319e-05, "loss": 1.5837, "step": 295 }, { "epoch": 0.0, "learning_rate": 9.999974743456482e-05, "loss": 1.6105, "step": 300 }, { "epoch": 0.0, "learning_rate": 9.999973464845096e-05, "loss": 1.5818, "step": 305 }, { "epoch": 0.0, "learning_rate": 9.999972154663173e-05, "loss": 1.602, "step": 310 }, { "epoch": 0.0, "learning_rate": 9.99997081291072e-05, "loss": 1.5588, "step": 315 }, { "epoch": 0.0, "learning_rate": 9.999969439587746e-05, "loss": 1.596, "step": 320 }, { "epoch": 0.0, "learning_rate": 9.999968034694258e-05, "loss": 1.5618, "step": 325 }, { "epoch": 0.0, "learning_rate": 9.999966598230266e-05, "loss": 1.6504, "step": 330 }, { "epoch": 0.0, "learning_rate": 9.999965130195779e-05, "loss": 1.5724, "step": 335 }, { "epoch": 0.0, "learning_rate": 9.999963630590805e-05, "loss": 1.5604, "step": 340 }, { "epoch": 0.0, "learning_rate": 9.999962099415356e-05, "loss": 1.5777, "step": 345 }, { "epoch": 0.0, "learning_rate": 9.99996053666944e-05, "loss": 1.6312, "step": 350 }, { "epoch": 0.0, "learning_rate": 9.999958942353067e-05, "loss": 1.5696, "step": 355 }, { "epoch": 0.0, "learning_rate": 9.999957316466249e-05, "loss": 1.5721, "step": 360 }, { "epoch": 0.0, "learning_rate": 9.999955659008992e-05, "loss": 1.6266, "step": 365 }, { "epoch": 0.0, "learning_rate": 9.999953969981311e-05, "loss": 1.5786, "step": 370 }, { "epoch": 0.0, "learning_rate": 9.999952249383214e-05, "loss": 1.5544, "step": 375 }, { "epoch": 0.0, "learning_rate": 9.999950497214712e-05, "loss": 1.6046, "step": 380 }, { "epoch": 0.0, "learning_rate": 9.999948713475817e-05, "loss": 1.6058, "step": 385 }, { "epoch": 0.0, "learning_rate": 9.99994689816654e-05, "loss": 1.5973, "step": 390 }, { "epoch": 0.0, "learning_rate": 9.999945051286892e-05, "loss": 1.6181, "step": 395 }, { "epoch": 0.0, "learning_rate": 9.999943172836885e-05, "loss": 1.5921, "step": 400 }, { "epoch": 0.0, "learning_rate": 9.99994126281653e-05, "loss": 1.625, "step": 405 }, { "epoch": 0.0, "learning_rate": 9.999939321225842e-05, "loss": 1.5971, "step": 410 }, { "epoch": 0.0, "learning_rate": 9.999937348064829e-05, "loss": 1.5907, "step": 415 }, { "epoch": 0.0, "learning_rate": 9.999935343333508e-05, "loss": 1.5694, "step": 420 }, { "epoch": 0.0, "learning_rate": 9.999933307031887e-05, "loss": 1.6243, "step": 425 }, { "epoch": 0.0, "learning_rate": 9.999931239159983e-05, "loss": 1.5286, "step": 430 }, { "epoch": 0.0, "learning_rate": 9.999929139717806e-05, "loss": 1.5546, "step": 435 }, { "epoch": 0.0, "learning_rate": 9.999927008705372e-05, "loss": 1.6108, "step": 440 }, { "epoch": 0.0, "learning_rate": 9.99992484612269e-05, "loss": 1.4987, "step": 445 }, { "epoch": 0.0, "learning_rate": 9.999922651969779e-05, "loss": 1.5822, "step": 450 }, { "epoch": 0.0, "learning_rate": 9.99992042624665e-05, "loss": 1.4994, "step": 455 }, { "epoch": 0.0, "learning_rate": 9.999918168953317e-05, "loss": 1.5675, "step": 460 }, { "epoch": 0.0, "learning_rate": 9.999915880089796e-05, "loss": 1.5846, "step": 465 }, { "epoch": 0.0, "learning_rate": 9.999913559656097e-05, "loss": 1.5625, "step": 470 }, { "epoch": 0.0, "learning_rate": 9.999911207652242e-05, "loss": 1.6224, "step": 475 }, { "epoch": 0.0, "learning_rate": 9.999908824078239e-05, "loss": 1.5623, "step": 480 }, { "epoch": 0.0, "learning_rate": 9.999906408934107e-05, "loss": 1.6009, "step": 485 }, { "epoch": 0.0, "learning_rate": 9.999903962219859e-05, "loss": 1.5425, "step": 490 }, { "epoch": 0.0, "learning_rate": 9.999901483935512e-05, "loss": 1.6444, "step": 495 }, { "epoch": 0.0, "learning_rate": 9.99989897408108e-05, "loss": 1.5167, "step": 500 }, { "epoch": 0.0, "learning_rate": 9.999896432656581e-05, "loss": 1.556, "step": 505 }, { "epoch": 0.0, "learning_rate": 9.999893859662031e-05, "loss": 1.5834, "step": 510 }, { "epoch": 0.0, "learning_rate": 9.999891255097444e-05, "loss": 1.5508, "step": 515 }, { "epoch": 0.0, "learning_rate": 9.999888618962838e-05, "loss": 1.5247, "step": 520 }, { "epoch": 0.0, "learning_rate": 9.999885951258228e-05, "loss": 1.5769, "step": 525 }, { "epoch": 0.0, "learning_rate": 9.999883251983634e-05, "loss": 1.5263, "step": 530 }, { "epoch": 0.0, "learning_rate": 9.99988052113907e-05, "loss": 1.608, "step": 535 }, { "epoch": 0.0, "learning_rate": 9.999877758724556e-05, "loss": 1.5593, "step": 540 }, { "epoch": 0.0, "learning_rate": 9.999874964740105e-05, "loss": 1.5406, "step": 545 }, { "epoch": 0.0, "learning_rate": 9.99987213918574e-05, "loss": 1.5401, "step": 550 }, { "epoch": 0.0, "learning_rate": 9.999869282061476e-05, "loss": 1.5681, "step": 555 }, { "epoch": 0.0, "learning_rate": 9.999866393367331e-05, "loss": 1.5728, "step": 560 }, { "epoch": 0.0, "learning_rate": 9.999863473103324e-05, "loss": 1.5974, "step": 565 }, { "epoch": 0.0, "learning_rate": 9.999860521269473e-05, "loss": 1.5599, "step": 570 }, { "epoch": 0.0, "learning_rate": 9.999857537865795e-05, "loss": 1.5956, "step": 575 }, { "epoch": 0.0, "learning_rate": 9.999854522892314e-05, "loss": 1.5796, "step": 580 }, { "epoch": 0.0, "learning_rate": 9.999851476349042e-05, "loss": 1.6238, "step": 585 }, { "epoch": 0.0, "learning_rate": 9.999848398236005e-05, "loss": 1.618, "step": 590 }, { "epoch": 0.0, "learning_rate": 9.999845288553216e-05, "loss": 1.4921, "step": 595 }, { "epoch": 0.0, "learning_rate": 9.9998421473007e-05, "loss": 1.594, "step": 600 }, { "epoch": 0.0, "learning_rate": 9.999838974478475e-05, "loss": 1.561, "step": 605 }, { "epoch": 0.0, "learning_rate": 9.99983577008656e-05, "loss": 1.609, "step": 610 }, { "epoch": 0.0, "learning_rate": 9.999832534124976e-05, "loss": 1.5937, "step": 615 }, { "epoch": 0.0, "learning_rate": 9.999829266593744e-05, "loss": 1.5282, "step": 620 }, { "epoch": 0.0, "learning_rate": 9.999825967492884e-05, "loss": 1.5998, "step": 625 }, { "epoch": 0.0, "learning_rate": 9.999822636822416e-05, "loss": 1.5592, "step": 630 }, { "epoch": 0.0, "learning_rate": 9.999819274582363e-05, "loss": 1.5475, "step": 635 }, { "epoch": 0.0, "learning_rate": 9.999815880772745e-05, "loss": 1.6313, "step": 640 }, { "epoch": 0.0, "learning_rate": 9.999812455393582e-05, "loss": 1.6535, "step": 645 }, { "epoch": 0.0, "learning_rate": 9.9998089984449e-05, "loss": 1.5264, "step": 650 }, { "epoch": 0.0, "learning_rate": 9.999805509926716e-05, "loss": 1.5979, "step": 655 }, { "epoch": 0.0, "learning_rate": 9.999801989839055e-05, "loss": 1.5618, "step": 660 }, { "epoch": 0.0, "learning_rate": 9.999798438181938e-05, "loss": 1.6201, "step": 665 }, { "epoch": 0.0, "learning_rate": 9.999794854955388e-05, "loss": 1.5691, "step": 670 }, { "epoch": 0.0, "learning_rate": 9.999791240159426e-05, "loss": 1.6399, "step": 675 }, { "epoch": 0.0, "learning_rate": 9.999787593794079e-05, "loss": 1.5727, "step": 680 }, { "epoch": 0.0, "learning_rate": 9.999783915859364e-05, "loss": 1.5899, "step": 685 }, { "epoch": 0.0, "learning_rate": 9.999780206355309e-05, "loss": 1.5925, "step": 690 }, { "epoch": 0.0, "learning_rate": 9.999776465281936e-05, "loss": 1.5569, "step": 695 }, { "epoch": 0.0, "learning_rate": 9.999772692639268e-05, "loss": 1.5749, "step": 700 }, { "epoch": 0.0, "learning_rate": 9.99976888842733e-05, "loss": 1.5409, "step": 705 }, { "epoch": 0.0, "learning_rate": 9.999765052646145e-05, "loss": 1.5865, "step": 710 }, { "epoch": 0.0, "learning_rate": 9.999761185295738e-05, "loss": 1.5857, "step": 715 }, { "epoch": 0.0, "learning_rate": 9.999757286376131e-05, "loss": 1.6028, "step": 720 }, { "epoch": 0.0, "learning_rate": 9.999753355887351e-05, "loss": 1.5525, "step": 725 }, { "epoch": 0.0, "learning_rate": 9.999749393829425e-05, "loss": 1.5298, "step": 730 }, { "epoch": 0.0, "learning_rate": 9.999745400202373e-05, "loss": 1.5837, "step": 735 }, { "epoch": 0.0, "learning_rate": 9.999741375006223e-05, "loss": 1.609, "step": 740 }, { "epoch": 0.0, "learning_rate": 9.999737318241001e-05, "loss": 1.5891, "step": 745 }, { "epoch": 0.0, "learning_rate": 9.99973322990673e-05, "loss": 1.5623, "step": 750 }, { "epoch": 0.0, "learning_rate": 9.99972911000344e-05, "loss": 1.5996, "step": 755 }, { "epoch": 0.0, "learning_rate": 9.999724958531151e-05, "loss": 1.5489, "step": 760 }, { "epoch": 0.0, "learning_rate": 9.999720775489896e-05, "loss": 1.6058, "step": 765 }, { "epoch": 0.0, "learning_rate": 9.999716560879696e-05, "loss": 1.5234, "step": 770 }, { "epoch": 0.0, "learning_rate": 9.99971231470058e-05, "loss": 1.5619, "step": 775 }, { "epoch": 0.0, "learning_rate": 9.999708036952576e-05, "loss": 1.5611, "step": 780 }, { "epoch": 0.0, "learning_rate": 9.999703727635708e-05, "loss": 1.5747, "step": 785 }, { "epoch": 0.0, "learning_rate": 9.999699386750005e-05, "loss": 1.5776, "step": 790 }, { "epoch": 0.0, "learning_rate": 9.999695014295494e-05, "loss": 1.5548, "step": 795 }, { "epoch": 0.0, "learning_rate": 9.999690610272204e-05, "loss": 1.5526, "step": 800 }, { "epoch": 0.0, "learning_rate": 9.999686174680161e-05, "loss": 1.5164, "step": 805 }, { "epoch": 0.0, "learning_rate": 9.999681707519393e-05, "loss": 1.4783, "step": 810 }, { "epoch": 0.0, "learning_rate": 9.999677208789928e-05, "loss": 1.5452, "step": 815 }, { "epoch": 0.0, "learning_rate": 9.999672678491796e-05, "loss": 1.5135, "step": 820 }, { "epoch": 0.0, "learning_rate": 9.999668116625025e-05, "loss": 1.5256, "step": 825 }, { "epoch": 0.0, "learning_rate": 9.999663523189644e-05, "loss": 1.5538, "step": 830 }, { "epoch": 0.0, "learning_rate": 9.999658898185681e-05, "loss": 1.6071, "step": 835 }, { "epoch": 0.0, "learning_rate": 9.999654241613166e-05, "loss": 1.5695, "step": 840 }, { "epoch": 0.0, "learning_rate": 9.999649553472128e-05, "loss": 1.5224, "step": 845 }, { "epoch": 0.0, "learning_rate": 9.999644833762599e-05, "loss": 1.5752, "step": 850 }, { "epoch": 0.0, "learning_rate": 9.999640082484602e-05, "loss": 1.5129, "step": 855 }, { "epoch": 0.0, "learning_rate": 9.999635299638174e-05, "loss": 1.569, "step": 860 }, { "epoch": 0.0, "learning_rate": 9.999630485223343e-05, "loss": 1.5718, "step": 865 }, { "epoch": 0.0, "learning_rate": 9.99962563924014e-05, "loss": 1.5711, "step": 870 }, { "epoch": 0.0, "learning_rate": 9.999620761688595e-05, "loss": 1.5173, "step": 875 }, { "epoch": 0.0, "learning_rate": 9.999615852568738e-05, "loss": 1.6075, "step": 880 }, { "epoch": 0.0, "learning_rate": 9.999610911880599e-05, "loss": 1.549, "step": 885 }, { "epoch": 0.0, "learning_rate": 9.999605939624213e-05, "loss": 1.5395, "step": 890 }, { "epoch": 0.0, "learning_rate": 9.999600935799608e-05, "loss": 1.5699, "step": 895 }, { "epoch": 0.0, "learning_rate": 9.999595900406817e-05, "loss": 1.6032, "step": 900 }, { "epoch": 0.0, "learning_rate": 9.999590833445871e-05, "loss": 1.5008, "step": 905 }, { "epoch": 0.0, "learning_rate": 9.999585734916803e-05, "loss": 1.5711, "step": 910 }, { "epoch": 0.0, "learning_rate": 9.999580604819644e-05, "loss": 1.5846, "step": 915 }, { "epoch": 0.0, "learning_rate": 9.99957544315443e-05, "loss": 1.5641, "step": 920 }, { "epoch": 0.0, "learning_rate": 9.999570249921189e-05, "loss": 1.5393, "step": 925 }, { "epoch": 0.0, "learning_rate": 9.999565025119955e-05, "loss": 1.5406, "step": 930 }, { "epoch": 0.0, "learning_rate": 9.999559768750761e-05, "loss": 1.6012, "step": 935 }, { "epoch": 0.0, "learning_rate": 9.999554480813642e-05, "loss": 1.5826, "step": 940 }, { "epoch": 0.0, "learning_rate": 9.99954916130863e-05, "loss": 1.5666, "step": 945 }, { "epoch": 0.0, "learning_rate": 9.999543810235758e-05, "loss": 1.5688, "step": 950 }, { "epoch": 0.0, "learning_rate": 9.999538427595061e-05, "loss": 1.5858, "step": 955 }, { "epoch": 0.0, "learning_rate": 9.999533013386573e-05, "loss": 1.539, "step": 960 }, { "epoch": 0.0, "learning_rate": 9.999527567610328e-05, "loss": 1.5403, "step": 965 }, { "epoch": 0.0, "learning_rate": 9.999522090266357e-05, "loss": 1.5762, "step": 970 }, { "epoch": 0.0, "learning_rate": 9.999516581354701e-05, "loss": 1.5501, "step": 975 }, { "epoch": 0.0, "learning_rate": 9.99951104087539e-05, "loss": 1.5513, "step": 980 }, { "epoch": 0.0, "learning_rate": 9.99950546882846e-05, "loss": 1.5601, "step": 985 }, { "epoch": 0.01, "learning_rate": 9.999499865213948e-05, "loss": 1.5821, "step": 990 }, { "epoch": 0.01, "learning_rate": 9.999494230031887e-05, "loss": 1.55, "step": 995 }, { "epoch": 0.01, "learning_rate": 9.999488563282313e-05, "loss": 1.5428, "step": 1000 }, { "epoch": 0.01, "learning_rate": 9.999482864965264e-05, "loss": 1.6102, "step": 1005 }, { "epoch": 0.01, "learning_rate": 9.999477135080772e-05, "loss": 1.5822, "step": 1010 }, { "epoch": 0.01, "learning_rate": 9.999471373628877e-05, "loss": 1.5751, "step": 1015 }, { "epoch": 0.01, "learning_rate": 9.999465580609615e-05, "loss": 1.5276, "step": 1020 }, { "epoch": 0.01, "learning_rate": 9.99945975602302e-05, "loss": 1.5953, "step": 1025 }, { "epoch": 0.01, "learning_rate": 9.999453899869129e-05, "loss": 1.5923, "step": 1030 }, { "epoch": 0.01, "learning_rate": 9.999448012147982e-05, "loss": 1.4987, "step": 1035 }, { "epoch": 0.01, "learning_rate": 9.999442092859614e-05, "loss": 1.6472, "step": 1040 }, { "epoch": 0.01, "learning_rate": 9.999436142004062e-05, "loss": 1.5915, "step": 1045 }, { "epoch": 0.01, "learning_rate": 9.999430159581365e-05, "loss": 1.566, "step": 1050 }, { "epoch": 0.01, "learning_rate": 9.999424145591561e-05, "loss": 1.5354, "step": 1055 }, { "epoch": 0.01, "learning_rate": 9.999418100034685e-05, "loss": 1.5651, "step": 1060 }, { "epoch": 0.01, "learning_rate": 9.999412022910779e-05, "loss": 1.5543, "step": 1065 }, { "epoch": 0.01, "learning_rate": 9.999405914219878e-05, "loss": 1.5432, "step": 1070 }, { "epoch": 0.01, "learning_rate": 9.999399773962024e-05, "loss": 1.5405, "step": 1075 }, { "epoch": 0.01, "learning_rate": 9.999393602137253e-05, "loss": 1.4827, "step": 1080 }, { "epoch": 0.01, "learning_rate": 9.999387398745605e-05, "loss": 1.567, "step": 1085 }, { "epoch": 0.01, "learning_rate": 9.999381163787119e-05, "loss": 1.5851, "step": 1090 }, { "epoch": 0.01, "learning_rate": 9.999374897261834e-05, "loss": 1.5537, "step": 1095 }, { "epoch": 0.01, "learning_rate": 9.999368599169791e-05, "loss": 1.5684, "step": 1100 }, { "epoch": 0.01, "learning_rate": 9.999362269511028e-05, "loss": 1.6324, "step": 1105 }, { "epoch": 0.01, "learning_rate": 9.999355908285586e-05, "loss": 1.5908, "step": 1110 }, { "epoch": 0.01, "learning_rate": 9.999349515493504e-05, "loss": 1.5818, "step": 1115 }, { "epoch": 0.01, "learning_rate": 9.999343091134825e-05, "loss": 1.5015, "step": 1120 }, { "epoch": 0.01, "learning_rate": 9.999336635209587e-05, "loss": 1.5314, "step": 1125 }, { "epoch": 0.01, "learning_rate": 9.999330147717831e-05, "loss": 1.6139, "step": 1130 }, { "epoch": 0.01, "learning_rate": 9.9993236286596e-05, "loss": 1.5631, "step": 1135 }, { "epoch": 0.01, "learning_rate": 9.999317078034934e-05, "loss": 1.5393, "step": 1140 }, { "epoch": 0.01, "learning_rate": 9.999310495843873e-05, "loss": 1.5802, "step": 1145 }, { "epoch": 0.01, "learning_rate": 9.999303882086459e-05, "loss": 1.5265, "step": 1150 }, { "epoch": 0.01, "learning_rate": 9.999297236762736e-05, "loss": 1.5805, "step": 1155 }, { "epoch": 0.01, "learning_rate": 9.999290559872742e-05, "loss": 1.4904, "step": 1160 }, { "epoch": 0.01, "learning_rate": 9.999283851416525e-05, "loss": 1.522, "step": 1165 }, { "epoch": 0.01, "learning_rate": 9.99927711139412e-05, "loss": 1.5492, "step": 1170 }, { "epoch": 0.01, "learning_rate": 9.999270339805577e-05, "loss": 1.5396, "step": 1175 }, { "epoch": 0.01, "learning_rate": 9.999263536650934e-05, "loss": 1.5411, "step": 1180 }, { "epoch": 0.01, "learning_rate": 9.999256701930235e-05, "loss": 1.581, "step": 1185 }, { "epoch": 0.01, "learning_rate": 9.999249835643522e-05, "loss": 1.6091, "step": 1190 }, { "epoch": 0.01, "learning_rate": 9.999242937790842e-05, "loss": 1.5867, "step": 1195 }, { "epoch": 0.01, "learning_rate": 9.999236008372235e-05, "loss": 1.5334, "step": 1200 }, { "epoch": 0.01, "learning_rate": 9.999229047387746e-05, "loss": 1.5239, "step": 1205 }, { "epoch": 0.01, "learning_rate": 9.999222054837419e-05, "loss": 1.5217, "step": 1210 }, { "epoch": 0.01, "learning_rate": 9.999215030721298e-05, "loss": 1.5833, "step": 1215 }, { "epoch": 0.01, "learning_rate": 9.999207975039429e-05, "loss": 1.5455, "step": 1220 }, { "epoch": 0.01, "learning_rate": 9.999200887791853e-05, "loss": 1.482, "step": 1225 }, { "epoch": 0.01, "learning_rate": 9.999193768978617e-05, "loss": 1.5395, "step": 1230 }, { "epoch": 0.01, "learning_rate": 9.999186618599767e-05, "loss": 1.5382, "step": 1235 }, { "epoch": 0.01, "learning_rate": 9.999179436655346e-05, "loss": 1.5346, "step": 1240 }, { "epoch": 0.01, "learning_rate": 9.999172223145399e-05, "loss": 1.4847, "step": 1245 }, { "epoch": 0.01, "learning_rate": 9.999164978069974e-05, "loss": 1.5358, "step": 1250 }, { "epoch": 0.01, "learning_rate": 9.999157701429116e-05, "loss": 1.5671, "step": 1255 }, { "epoch": 0.01, "learning_rate": 9.99915039322287e-05, "loss": 1.5468, "step": 1260 }, { "epoch": 0.01, "learning_rate": 9.999143053451282e-05, "loss": 1.6142, "step": 1265 }, { "epoch": 0.01, "learning_rate": 9.9991356821144e-05, "loss": 1.544, "step": 1270 }, { "epoch": 0.01, "learning_rate": 9.999128279212268e-05, "loss": 1.5805, "step": 1275 }, { "epoch": 0.01, "learning_rate": 9.999120844744935e-05, "loss": 1.6026, "step": 1280 }, { "epoch": 0.01, "learning_rate": 9.999113378712447e-05, "loss": 1.5879, "step": 1285 }, { "epoch": 0.01, "learning_rate": 9.999105881114852e-05, "loss": 1.5617, "step": 1290 }, { "epoch": 0.01, "learning_rate": 9.999098351952195e-05, "loss": 1.5493, "step": 1295 }, { "epoch": 0.01, "learning_rate": 9.999090791224527e-05, "loss": 1.4812, "step": 1300 }, { "epoch": 0.01, "learning_rate": 9.999083198931893e-05, "loss": 1.5836, "step": 1305 }, { "epoch": 0.01, "learning_rate": 9.999075575074341e-05, "loss": 1.5515, "step": 1310 }, { "epoch": 0.01, "learning_rate": 9.999067919651921e-05, "loss": 1.5362, "step": 1315 }, { "epoch": 0.01, "learning_rate": 9.999060232664681e-05, "loss": 1.6015, "step": 1320 }, { "epoch": 0.01, "learning_rate": 9.999052514112668e-05, "loss": 1.6118, "step": 1325 }, { "epoch": 0.01, "learning_rate": 9.999044763995932e-05, "loss": 1.5226, "step": 1330 }, { "epoch": 0.01, "learning_rate": 9.999036982314521e-05, "loss": 1.5084, "step": 1335 }, { "epoch": 0.01, "learning_rate": 9.999029169068485e-05, "loss": 1.5113, "step": 1340 }, { "epoch": 0.01, "learning_rate": 9.999021324257873e-05, "loss": 1.5519, "step": 1345 }, { "epoch": 0.01, "learning_rate": 9.999013447882735e-05, "loss": 1.5396, "step": 1350 }, { "epoch": 0.01, "learning_rate": 9.999005539943119e-05, "loss": 1.5679, "step": 1355 }, { "epoch": 0.01, "learning_rate": 9.998997600439077e-05, "loss": 1.5439, "step": 1360 }, { "epoch": 0.01, "learning_rate": 9.998989629370659e-05, "loss": 1.52, "step": 1365 }, { "epoch": 0.01, "learning_rate": 9.998981626737914e-05, "loss": 1.558, "step": 1370 }, { "epoch": 0.01, "learning_rate": 9.998973592540892e-05, "loss": 1.4872, "step": 1375 }, { "epoch": 0.01, "learning_rate": 9.998965526779647e-05, "loss": 1.5766, "step": 1380 }, { "epoch": 0.01, "learning_rate": 9.998957429454227e-05, "loss": 1.5765, "step": 1385 }, { "epoch": 0.01, "learning_rate": 9.998949300564684e-05, "loss": 1.5404, "step": 1390 }, { "epoch": 0.01, "learning_rate": 9.998941140111068e-05, "loss": 1.55, "step": 1395 }, { "epoch": 0.01, "learning_rate": 9.998932948093434e-05, "loss": 1.5632, "step": 1400 }, { "epoch": 0.01, "learning_rate": 9.99892472451183e-05, "loss": 1.5557, "step": 1405 }, { "epoch": 0.01, "learning_rate": 9.998916469366311e-05, "loss": 1.5449, "step": 1410 }, { "epoch": 0.01, "learning_rate": 9.998908182656925e-05, "loss": 1.5139, "step": 1415 }, { "epoch": 0.01, "learning_rate": 9.998899864383728e-05, "loss": 1.5281, "step": 1420 }, { "epoch": 0.01, "learning_rate": 9.998891514546773e-05, "loss": 1.5736, "step": 1425 }, { "epoch": 0.01, "learning_rate": 9.998883133146111e-05, "loss": 1.5173, "step": 1430 }, { "epoch": 0.01, "learning_rate": 9.998874720181795e-05, "loss": 1.5877, "step": 1435 }, { "epoch": 0.01, "learning_rate": 9.998866275653877e-05, "loss": 1.5667, "step": 1440 }, { "epoch": 0.01, "learning_rate": 9.99885779956241e-05, "loss": 1.5054, "step": 1445 }, { "epoch": 0.01, "learning_rate": 9.998849291907453e-05, "loss": 1.556, "step": 1450 }, { "epoch": 0.01, "learning_rate": 9.998840752689053e-05, "loss": 1.5338, "step": 1455 }, { "epoch": 0.01, "learning_rate": 9.998832181907267e-05, "loss": 1.5424, "step": 1460 }, { "epoch": 0.01, "learning_rate": 9.99882357956215e-05, "loss": 1.5117, "step": 1465 }, { "epoch": 0.01, "learning_rate": 9.998814945653754e-05, "loss": 1.531, "step": 1470 }, { "epoch": 0.01, "learning_rate": 9.998806280182135e-05, "loss": 1.5302, "step": 1475 }, { "epoch": 0.01, "learning_rate": 9.998797583147348e-05, "loss": 1.5085, "step": 1480 }, { "epoch": 0.01, "learning_rate": 9.998788854549447e-05, "loss": 1.5074, "step": 1485 }, { "epoch": 0.01, "learning_rate": 9.998780094388487e-05, "loss": 1.5279, "step": 1490 }, { "epoch": 0.01, "learning_rate": 9.998771302664524e-05, "loss": 1.5998, "step": 1495 }, { "epoch": 0.01, "learning_rate": 9.998762479377613e-05, "loss": 1.5471, "step": 1500 }, { "epoch": 0.01, "learning_rate": 9.99875362452781e-05, "loss": 1.5421, "step": 1505 }, { "epoch": 0.01, "learning_rate": 9.998744738115171e-05, "loss": 1.5692, "step": 1510 }, { "epoch": 0.01, "learning_rate": 9.99873582013975e-05, "loss": 1.6098, "step": 1515 }, { "epoch": 0.01, "learning_rate": 9.998726870601609e-05, "loss": 1.5027, "step": 1520 }, { "epoch": 0.01, "learning_rate": 9.998717889500798e-05, "loss": 1.5163, "step": 1525 }, { "epoch": 0.01, "learning_rate": 9.998708876837377e-05, "loss": 1.5064, "step": 1530 }, { "epoch": 0.01, "learning_rate": 9.998699832611403e-05, "loss": 1.5468, "step": 1535 }, { "epoch": 0.01, "learning_rate": 9.998690756822931e-05, "loss": 1.5748, "step": 1540 }, { "epoch": 0.01, "learning_rate": 9.998681649472021e-05, "loss": 1.5112, "step": 1545 }, { "epoch": 0.01, "learning_rate": 9.99867251055873e-05, "loss": 1.5103, "step": 1550 }, { "epoch": 0.01, "learning_rate": 9.998663340083115e-05, "loss": 1.5412, "step": 1555 }, { "epoch": 0.01, "learning_rate": 9.998654138045231e-05, "loss": 1.5342, "step": 1560 }, { "epoch": 0.01, "learning_rate": 9.998644904445143e-05, "loss": 1.5451, "step": 1565 }, { "epoch": 0.01, "learning_rate": 9.998635639282903e-05, "loss": 1.5328, "step": 1570 }, { "epoch": 0.01, "learning_rate": 9.998626342558571e-05, "loss": 1.5288, "step": 1575 }, { "epoch": 0.01, "learning_rate": 9.998617014272208e-05, "loss": 1.5191, "step": 1580 }, { "epoch": 0.01, "learning_rate": 9.998607654423871e-05, "loss": 1.518, "step": 1585 }, { "epoch": 0.01, "learning_rate": 9.99859826301362e-05, "loss": 1.5382, "step": 1590 }, { "epoch": 0.01, "learning_rate": 9.998588840041512e-05, "loss": 1.5361, "step": 1595 }, { "epoch": 0.01, "learning_rate": 9.99857938550761e-05, "loss": 1.4716, "step": 1600 }, { "epoch": 0.01, "learning_rate": 9.998569899411972e-05, "loss": 1.5346, "step": 1605 }, { "epoch": 0.01, "learning_rate": 9.998560381754658e-05, "loss": 1.5335, "step": 1610 }, { "epoch": 0.01, "learning_rate": 9.998550832535727e-05, "loss": 1.571, "step": 1615 }, { "epoch": 0.01, "learning_rate": 9.99854125175524e-05, "loss": 1.5688, "step": 1620 }, { "epoch": 0.01, "learning_rate": 9.99853163941326e-05, "loss": 1.5505, "step": 1625 }, { "epoch": 0.01, "learning_rate": 9.998521995509845e-05, "loss": 1.5353, "step": 1630 }, { "epoch": 0.01, "learning_rate": 9.998512320045055e-05, "loss": 1.5067, "step": 1635 }, { "epoch": 0.01, "learning_rate": 9.998502613018952e-05, "loss": 1.5349, "step": 1640 }, { "epoch": 0.01, "learning_rate": 9.998492874431599e-05, "loss": 1.567, "step": 1645 }, { "epoch": 0.01, "learning_rate": 9.998483104283056e-05, "loss": 1.511, "step": 1650 }, { "epoch": 0.01, "learning_rate": 9.998473302573385e-05, "loss": 1.5579, "step": 1655 }, { "epoch": 0.01, "learning_rate": 9.998463469302647e-05, "loss": 1.5335, "step": 1660 }, { "epoch": 0.01, "learning_rate": 9.998453604470905e-05, "loss": 1.5173, "step": 1665 }, { "epoch": 0.01, "learning_rate": 9.998443708078222e-05, "loss": 1.5424, "step": 1670 }, { "epoch": 0.01, "learning_rate": 9.99843378012466e-05, "loss": 1.5325, "step": 1675 }, { "epoch": 0.01, "learning_rate": 9.998423820610282e-05, "loss": 1.5752, "step": 1680 }, { "epoch": 0.01, "learning_rate": 9.998413829535147e-05, "loss": 1.5561, "step": 1685 }, { "epoch": 0.01, "learning_rate": 9.998403806899324e-05, "loss": 1.5617, "step": 1690 }, { "epoch": 0.01, "learning_rate": 9.998393752702873e-05, "loss": 1.5346, "step": 1695 }, { "epoch": 0.01, "learning_rate": 9.998383666945859e-05, "loss": 1.5467, "step": 1700 }, { "epoch": 0.01, "learning_rate": 9.998373549628343e-05, "loss": 1.4466, "step": 1705 }, { "epoch": 0.01, "learning_rate": 9.998363400750392e-05, "loss": 1.5568, "step": 1710 }, { "epoch": 0.01, "learning_rate": 9.998353220312069e-05, "loss": 1.5046, "step": 1715 }, { "epoch": 0.01, "learning_rate": 9.998343008313437e-05, "loss": 1.5542, "step": 1720 }, { "epoch": 0.01, "learning_rate": 9.99833276475456e-05, "loss": 1.5236, "step": 1725 }, { "epoch": 0.01, "learning_rate": 9.998322489635507e-05, "loss": 1.4776, "step": 1730 }, { "epoch": 0.01, "learning_rate": 9.99831218295634e-05, "loss": 1.614, "step": 1735 }, { "epoch": 0.01, "learning_rate": 9.998301844717123e-05, "loss": 1.5401, "step": 1740 }, { "epoch": 0.01, "learning_rate": 9.998291474917923e-05, "loss": 1.515, "step": 1745 }, { "epoch": 0.01, "learning_rate": 9.998281073558804e-05, "loss": 1.5409, "step": 1750 }, { "epoch": 0.01, "learning_rate": 9.998270640639833e-05, "loss": 1.567, "step": 1755 }, { "epoch": 0.01, "learning_rate": 9.998260176161076e-05, "loss": 1.5215, "step": 1760 }, { "epoch": 0.01, "learning_rate": 9.998249680122599e-05, "loss": 1.5309, "step": 1765 }, { "epoch": 0.01, "learning_rate": 9.998239152524467e-05, "loss": 1.4966, "step": 1770 }, { "epoch": 0.01, "learning_rate": 9.998228593366747e-05, "loss": 1.5245, "step": 1775 }, { "epoch": 0.01, "learning_rate": 9.998218002649506e-05, "loss": 1.4797, "step": 1780 }, { "epoch": 0.01, "learning_rate": 9.998207380372812e-05, "loss": 1.5137, "step": 1785 }, { "epoch": 0.01, "learning_rate": 9.99819672653673e-05, "loss": 1.5058, "step": 1790 }, { "epoch": 0.01, "learning_rate": 9.998186041141329e-05, "loss": 1.6036, "step": 1795 }, { "epoch": 0.01, "learning_rate": 9.998175324186674e-05, "loss": 1.5146, "step": 1800 }, { "epoch": 0.01, "learning_rate": 9.998164575672835e-05, "loss": 1.5022, "step": 1805 }, { "epoch": 0.01, "learning_rate": 9.998153795599879e-05, "loss": 1.5059, "step": 1810 }, { "epoch": 0.01, "learning_rate": 9.998142983967875e-05, "loss": 1.5061, "step": 1815 }, { "epoch": 0.01, "learning_rate": 9.99813214077689e-05, "loss": 1.5005, "step": 1820 }, { "epoch": 0.01, "learning_rate": 9.998121266026993e-05, "loss": 1.5239, "step": 1825 }, { "epoch": 0.01, "learning_rate": 9.998110359718253e-05, "loss": 1.5135, "step": 1830 }, { "epoch": 0.01, "learning_rate": 9.998099421850737e-05, "loss": 1.547, "step": 1835 }, { "epoch": 0.01, "learning_rate": 9.998088452424516e-05, "loss": 1.5184, "step": 1840 }, { "epoch": 0.01, "learning_rate": 9.99807745143966e-05, "loss": 1.5437, "step": 1845 }, { "epoch": 0.01, "learning_rate": 9.998066418896238e-05, "loss": 1.5234, "step": 1850 }, { "epoch": 0.01, "learning_rate": 9.998055354794316e-05, "loss": 1.5954, "step": 1855 }, { "epoch": 0.01, "learning_rate": 9.998044259133969e-05, "loss": 1.5129, "step": 1860 }, { "epoch": 0.01, "learning_rate": 9.998033131915266e-05, "loss": 1.5716, "step": 1865 }, { "epoch": 0.01, "learning_rate": 9.998021973138274e-05, "loss": 1.4783, "step": 1870 }, { "epoch": 0.01, "learning_rate": 9.998010782803066e-05, "loss": 1.5515, "step": 1875 }, { "epoch": 0.01, "learning_rate": 9.997999560909712e-05, "loss": 1.543, "step": 1880 }, { "epoch": 0.01, "learning_rate": 9.997988307458283e-05, "loss": 1.4829, "step": 1885 }, { "epoch": 0.01, "learning_rate": 9.99797702244885e-05, "loss": 1.4991, "step": 1890 }, { "epoch": 0.01, "learning_rate": 9.997965705881485e-05, "loss": 1.5513, "step": 1895 }, { "epoch": 0.01, "learning_rate": 9.99795435775626e-05, "loss": 1.4615, "step": 1900 }, { "epoch": 0.01, "learning_rate": 9.997942978073243e-05, "loss": 1.5439, "step": 1905 }, { "epoch": 0.01, "learning_rate": 9.99793156683251e-05, "loss": 1.5779, "step": 1910 }, { "epoch": 0.01, "learning_rate": 9.997920124034133e-05, "loss": 1.5227, "step": 1915 }, { "epoch": 0.01, "learning_rate": 9.99790864967818e-05, "loss": 1.5125, "step": 1920 }, { "epoch": 0.01, "learning_rate": 9.997897143764727e-05, "loss": 1.4916, "step": 1925 }, { "epoch": 0.01, "learning_rate": 9.997885606293844e-05, "loss": 1.544, "step": 1930 }, { "epoch": 0.01, "learning_rate": 9.997874037265608e-05, "loss": 1.5055, "step": 1935 }, { "epoch": 0.01, "learning_rate": 9.99786243668009e-05, "loss": 1.4588, "step": 1940 }, { "epoch": 0.01, "learning_rate": 9.99785080453736e-05, "loss": 1.5419, "step": 1945 }, { "epoch": 0.01, "learning_rate": 9.997839140837497e-05, "loss": 1.5481, "step": 1950 }, { "epoch": 0.01, "learning_rate": 9.997827445580572e-05, "loss": 1.6031, "step": 1955 }, { "epoch": 0.01, "learning_rate": 9.997815718766658e-05, "loss": 1.5483, "step": 1960 }, { "epoch": 0.01, "learning_rate": 9.99780396039583e-05, "loss": 1.5523, "step": 1965 }, { "epoch": 0.01, "learning_rate": 9.997792170468162e-05, "loss": 1.541, "step": 1970 }, { "epoch": 0.01, "learning_rate": 9.997780348983728e-05, "loss": 1.5834, "step": 1975 }, { "epoch": 0.01, "learning_rate": 9.997768495942605e-05, "loss": 1.5626, "step": 1980 }, { "epoch": 0.01, "learning_rate": 9.997756611344864e-05, "loss": 1.5052, "step": 1985 }, { "epoch": 0.01, "learning_rate": 9.997744695190583e-05, "loss": 1.5214, "step": 1990 }, { "epoch": 0.01, "learning_rate": 9.997732747479837e-05, "loss": 1.496, "step": 1995 }, { "epoch": 0.01, "learning_rate": 9.997720768212701e-05, "loss": 1.5063, "step": 2000 }, { "epoch": 0.01, "learning_rate": 9.99770875738925e-05, "loss": 1.5716, "step": 2005 }, { "epoch": 0.01, "learning_rate": 9.997696715009558e-05, "loss": 1.491, "step": 2010 }, { "epoch": 0.01, "learning_rate": 9.997684641073705e-05, "loss": 1.5733, "step": 2015 }, { "epoch": 0.01, "learning_rate": 9.997672535581768e-05, "loss": 1.5072, "step": 2020 }, { "epoch": 0.01, "learning_rate": 9.997660398533818e-05, "loss": 1.5445, "step": 2025 }, { "epoch": 0.01, "learning_rate": 9.997648229929935e-05, "loss": 1.5033, "step": 2030 }, { "epoch": 0.01, "learning_rate": 9.997636029770197e-05, "loss": 1.5877, "step": 2035 }, { "epoch": 0.01, "learning_rate": 9.997623798054679e-05, "loss": 1.5401, "step": 2040 }, { "epoch": 0.01, "learning_rate": 9.997611534783456e-05, "loss": 1.518, "step": 2045 }, { "epoch": 0.01, "learning_rate": 9.99759923995661e-05, "loss": 1.5661, "step": 2050 }, { "epoch": 0.01, "learning_rate": 9.997586913574217e-05, "loss": 1.6024, "step": 2055 }, { "epoch": 0.01, "learning_rate": 9.997574555636356e-05, "loss": 1.5285, "step": 2060 }, { "epoch": 0.01, "learning_rate": 9.997562166143102e-05, "loss": 1.5251, "step": 2065 }, { "epoch": 0.01, "learning_rate": 9.997549745094535e-05, "loss": 1.513, "step": 2070 }, { "epoch": 0.01, "learning_rate": 9.997537292490734e-05, "loss": 1.5422, "step": 2075 }, { "epoch": 0.01, "learning_rate": 9.997524808331775e-05, "loss": 1.5595, "step": 2080 }, { "epoch": 0.01, "learning_rate": 9.99751229261774e-05, "loss": 1.6162, "step": 2085 }, { "epoch": 0.01, "learning_rate": 9.997499745348708e-05, "loss": 1.4848, "step": 2090 }, { "epoch": 0.01, "learning_rate": 9.997487166524755e-05, "loss": 1.5274, "step": 2095 }, { "epoch": 0.01, "learning_rate": 9.997474556145963e-05, "loss": 1.5433, "step": 2100 }, { "epoch": 0.01, "learning_rate": 9.997461914212411e-05, "loss": 1.5322, "step": 2105 }, { "epoch": 0.01, "learning_rate": 9.997449240724179e-05, "loss": 1.5405, "step": 2110 }, { "epoch": 0.01, "learning_rate": 9.997436535681348e-05, "loss": 1.5629, "step": 2115 }, { "epoch": 0.01, "learning_rate": 9.997423799083995e-05, "loss": 1.5265, "step": 2120 }, { "epoch": 0.01, "learning_rate": 9.997411030932205e-05, "loss": 1.5918, "step": 2125 }, { "epoch": 0.01, "learning_rate": 9.997398231226055e-05, "loss": 1.513, "step": 2130 }, { "epoch": 0.01, "learning_rate": 9.997385399965627e-05, "loss": 1.5206, "step": 2135 }, { "epoch": 0.01, "learning_rate": 9.997372537151002e-05, "loss": 1.5626, "step": 2140 }, { "epoch": 0.01, "learning_rate": 9.99735964278226e-05, "loss": 1.5216, "step": 2145 }, { "epoch": 0.01, "learning_rate": 9.997346716859486e-05, "loss": 1.4917, "step": 2150 }, { "epoch": 0.01, "learning_rate": 9.997333759382757e-05, "loss": 1.5318, "step": 2155 }, { "epoch": 0.01, "learning_rate": 9.997320770352159e-05, "loss": 1.5348, "step": 2160 }, { "epoch": 0.01, "learning_rate": 9.997307749767771e-05, "loss": 1.5397, "step": 2165 }, { "epoch": 0.01, "learning_rate": 9.997294697629676e-05, "loss": 1.5519, "step": 2170 }, { "epoch": 0.01, "learning_rate": 9.997281613937956e-05, "loss": 1.5524, "step": 2175 }, { "epoch": 0.01, "learning_rate": 9.997268498692696e-05, "loss": 1.5372, "step": 2180 }, { "epoch": 0.01, "learning_rate": 9.997255351893976e-05, "loss": 1.6459, "step": 2185 }, { "epoch": 0.01, "learning_rate": 9.997242173541882e-05, "loss": 1.5363, "step": 2190 }, { "epoch": 0.01, "learning_rate": 9.997228963636494e-05, "loss": 1.5172, "step": 2195 }, { "epoch": 0.01, "learning_rate": 9.997215722177896e-05, "loss": 1.5398, "step": 2200 }, { "epoch": 0.01, "learning_rate": 9.997202449166172e-05, "loss": 1.5388, "step": 2205 }, { "epoch": 0.01, "learning_rate": 9.997189144601407e-05, "loss": 1.5532, "step": 2210 }, { "epoch": 0.01, "learning_rate": 9.997175808483686e-05, "loss": 1.5264, "step": 2215 }, { "epoch": 0.01, "learning_rate": 9.997162440813088e-05, "loss": 1.5251, "step": 2220 }, { "epoch": 0.01, "learning_rate": 9.997149041589704e-05, "loss": 1.5257, "step": 2225 }, { "epoch": 0.01, "learning_rate": 9.997135610813613e-05, "loss": 1.4777, "step": 2230 }, { "epoch": 0.01, "learning_rate": 9.997122148484902e-05, "loss": 1.5145, "step": 2235 }, { "epoch": 0.01, "learning_rate": 9.99710865460366e-05, "loss": 1.5228, "step": 2240 }, { "epoch": 0.01, "learning_rate": 9.997095129169965e-05, "loss": 1.4485, "step": 2245 }, { "epoch": 0.01, "learning_rate": 9.997081572183907e-05, "loss": 1.5493, "step": 2250 }, { "epoch": 0.01, "learning_rate": 9.997067983645569e-05, "loss": 1.5696, "step": 2255 }, { "epoch": 0.01, "learning_rate": 9.99705436355504e-05, "loss": 1.5178, "step": 2260 }, { "epoch": 0.01, "learning_rate": 9.997040711912402e-05, "loss": 1.502, "step": 2265 }, { "epoch": 0.01, "learning_rate": 9.997027028717745e-05, "loss": 1.5349, "step": 2270 }, { "epoch": 0.01, "learning_rate": 9.997013313971154e-05, "loss": 1.5049, "step": 2275 }, { "epoch": 0.01, "learning_rate": 9.996999567672716e-05, "loss": 1.5232, "step": 2280 }, { "epoch": 0.01, "learning_rate": 9.996985789822515e-05, "loss": 1.5508, "step": 2285 }, { "epoch": 0.01, "learning_rate": 9.996971980420642e-05, "loss": 1.5511, "step": 2290 }, { "epoch": 0.01, "learning_rate": 9.99695813946718e-05, "loss": 1.5355, "step": 2295 }, { "epoch": 0.01, "learning_rate": 9.996944266962222e-05, "loss": 1.5373, "step": 2300 }, { "epoch": 0.01, "learning_rate": 9.99693036290585e-05, "loss": 1.563, "step": 2305 }, { "epoch": 0.01, "learning_rate": 9.996916427298155e-05, "loss": 1.5324, "step": 2310 }, { "epoch": 0.01, "learning_rate": 9.996902460139226e-05, "loss": 1.5672, "step": 2315 }, { "epoch": 0.01, "learning_rate": 9.996888461429148e-05, "loss": 1.5321, "step": 2320 }, { "epoch": 0.01, "learning_rate": 9.996874431168008e-05, "loss": 1.5211, "step": 2325 }, { "epoch": 0.01, "learning_rate": 9.9968603693559e-05, "loss": 1.4997, "step": 2330 }, { "epoch": 0.01, "learning_rate": 9.99684627599291e-05, "loss": 1.571, "step": 2335 }, { "epoch": 0.01, "learning_rate": 9.996832151079127e-05, "loss": 1.5507, "step": 2340 }, { "epoch": 0.01, "learning_rate": 9.99681799461464e-05, "loss": 1.5583, "step": 2345 }, { "epoch": 0.01, "learning_rate": 9.99680380659954e-05, "loss": 1.517, "step": 2350 }, { "epoch": 0.01, "learning_rate": 9.996789587033912e-05, "loss": 1.5097, "step": 2355 }, { "epoch": 0.01, "learning_rate": 9.996775335917852e-05, "loss": 1.5261, "step": 2360 }, { "epoch": 0.01, "learning_rate": 9.996761053251446e-05, "loss": 1.5335, "step": 2365 }, { "epoch": 0.01, "learning_rate": 9.996746739034786e-05, "loss": 1.5407, "step": 2370 }, { "epoch": 0.01, "learning_rate": 9.99673239326796e-05, "loss": 1.5252, "step": 2375 }, { "epoch": 0.01, "learning_rate": 9.996718015951061e-05, "loss": 1.4937, "step": 2380 }, { "epoch": 0.01, "learning_rate": 9.996703607084179e-05, "loss": 1.5121, "step": 2385 }, { "epoch": 0.01, "learning_rate": 9.996689166667406e-05, "loss": 1.4867, "step": 2390 }, { "epoch": 0.01, "learning_rate": 9.99667469470083e-05, "loss": 1.5516, "step": 2395 }, { "epoch": 0.01, "learning_rate": 9.996660191184546e-05, "loss": 1.5321, "step": 2400 }, { "epoch": 0.01, "learning_rate": 9.996645656118644e-05, "loss": 1.5535, "step": 2405 }, { "epoch": 0.01, "learning_rate": 9.996631089503214e-05, "loss": 1.5573, "step": 2410 }, { "epoch": 0.01, "learning_rate": 9.996616491338352e-05, "loss": 1.5682, "step": 2415 }, { "epoch": 0.01, "learning_rate": 9.996601861624147e-05, "loss": 1.5171, "step": 2420 }, { "epoch": 0.01, "learning_rate": 9.996587200360692e-05, "loss": 1.5273, "step": 2425 }, { "epoch": 0.01, "learning_rate": 9.996572507548081e-05, "loss": 1.4945, "step": 2430 }, { "epoch": 0.01, "learning_rate": 9.996557783186406e-05, "loss": 1.5208, "step": 2435 }, { "epoch": 0.01, "learning_rate": 9.996543027275758e-05, "loss": 1.529, "step": 2440 }, { "epoch": 0.01, "learning_rate": 9.996528239816233e-05, "loss": 1.4699, "step": 2445 }, { "epoch": 0.01, "learning_rate": 9.996513420807923e-05, "loss": 1.5693, "step": 2450 }, { "epoch": 0.01, "learning_rate": 9.996498570250922e-05, "loss": 1.477, "step": 2455 }, { "epoch": 0.01, "learning_rate": 9.996483688145324e-05, "loss": 1.454, "step": 2460 }, { "epoch": 0.01, "learning_rate": 9.996468774491222e-05, "loss": 1.507, "step": 2465 }, { "epoch": 0.01, "learning_rate": 9.99645382928871e-05, "loss": 1.5086, "step": 2470 }, { "epoch": 0.01, "learning_rate": 9.996438852537884e-05, "loss": 1.4753, "step": 2475 }, { "epoch": 0.01, "learning_rate": 9.996423844238836e-05, "loss": 1.5156, "step": 2480 }, { "epoch": 0.01, "learning_rate": 9.996408804391663e-05, "loss": 1.4741, "step": 2485 }, { "epoch": 0.01, "learning_rate": 9.996393732996461e-05, "loss": 1.5344, "step": 2490 }, { "epoch": 0.01, "learning_rate": 9.996378630053322e-05, "loss": 1.5078, "step": 2495 }, { "epoch": 0.01, "learning_rate": 9.996363495562344e-05, "loss": 1.5143, "step": 2500 }, { "epoch": 0.01, "learning_rate": 9.996348329523622e-05, "loss": 1.5224, "step": 2505 }, { "epoch": 0.01, "learning_rate": 9.996333131937248e-05, "loss": 1.5095, "step": 2510 }, { "epoch": 0.01, "learning_rate": 9.996317902803323e-05, "loss": 1.451, "step": 2515 }, { "epoch": 0.01, "learning_rate": 9.996302642121942e-05, "loss": 1.5299, "step": 2520 }, { "epoch": 0.01, "learning_rate": 9.9962873498932e-05, "loss": 1.5151, "step": 2525 }, { "epoch": 0.01, "learning_rate": 9.996272026117196e-05, "loss": 1.5189, "step": 2530 }, { "epoch": 0.01, "learning_rate": 9.996256670794022e-05, "loss": 1.5397, "step": 2535 }, { "epoch": 0.01, "learning_rate": 9.99624128392378e-05, "loss": 1.5134, "step": 2540 }, { "epoch": 0.01, "learning_rate": 9.996225865506564e-05, "loss": 1.5024, "step": 2545 }, { "epoch": 0.01, "learning_rate": 9.996210415542473e-05, "loss": 1.5519, "step": 2550 }, { "epoch": 0.01, "learning_rate": 9.996194934031604e-05, "loss": 1.5404, "step": 2555 }, { "epoch": 0.01, "learning_rate": 9.996179420974055e-05, "loss": 1.4881, "step": 2560 }, { "epoch": 0.01, "learning_rate": 9.996163876369924e-05, "loss": 1.5769, "step": 2565 }, { "epoch": 0.01, "learning_rate": 9.996148300219308e-05, "loss": 1.5814, "step": 2570 }, { "epoch": 0.01, "learning_rate": 9.996132692522306e-05, "loss": 1.4895, "step": 2575 }, { "epoch": 0.01, "learning_rate": 9.996117053279017e-05, "loss": 1.5461, "step": 2580 }, { "epoch": 0.01, "learning_rate": 9.99610138248954e-05, "loss": 1.5383, "step": 2585 }, { "epoch": 0.01, "learning_rate": 9.996085680153974e-05, "loss": 1.5121, "step": 2590 }, { "epoch": 0.01, "learning_rate": 9.996069946272416e-05, "loss": 1.5801, "step": 2595 }, { "epoch": 0.01, "learning_rate": 9.996054180844968e-05, "loss": 1.5253, "step": 2600 }, { "epoch": 0.01, "learning_rate": 9.996038383871729e-05, "loss": 1.5258, "step": 2605 }, { "epoch": 0.01, "learning_rate": 9.996022555352797e-05, "loss": 1.5068, "step": 2610 }, { "epoch": 0.01, "learning_rate": 9.996006695288273e-05, "loss": 1.5297, "step": 2615 }, { "epoch": 0.01, "learning_rate": 9.995990803678259e-05, "loss": 1.5454, "step": 2620 }, { "epoch": 0.01, "learning_rate": 9.995974880522853e-05, "loss": 1.5164, "step": 2625 }, { "epoch": 0.01, "learning_rate": 9.995958925822156e-05, "loss": 1.5505, "step": 2630 }, { "epoch": 0.01, "learning_rate": 9.995942939576268e-05, "loss": 1.5168, "step": 2635 }, { "epoch": 0.01, "learning_rate": 9.995926921785292e-05, "loss": 1.5132, "step": 2640 }, { "epoch": 0.01, "learning_rate": 9.995910872449327e-05, "loss": 1.4964, "step": 2645 }, { "epoch": 0.01, "learning_rate": 9.995894791568477e-05, "loss": 1.5126, "step": 2650 }, { "epoch": 0.01, "learning_rate": 9.99587867914284e-05, "loss": 1.4778, "step": 2655 }, { "epoch": 0.01, "learning_rate": 9.995862535172522e-05, "loss": 1.5032, "step": 2660 }, { "epoch": 0.01, "learning_rate": 9.995846359657622e-05, "loss": 1.494, "step": 2665 }, { "epoch": 0.01, "learning_rate": 9.995830152598241e-05, "loss": 1.5445, "step": 2670 }, { "epoch": 0.01, "learning_rate": 9.995813913994484e-05, "loss": 1.5408, "step": 2675 }, { "epoch": 0.01, "learning_rate": 9.995797643846453e-05, "loss": 1.4772, "step": 2680 }, { "epoch": 0.01, "learning_rate": 9.995781342154249e-05, "loss": 1.5315, "step": 2685 }, { "epoch": 0.01, "learning_rate": 9.995765008917977e-05, "loss": 1.5888, "step": 2690 }, { "epoch": 0.01, "learning_rate": 9.99574864413774e-05, "loss": 1.4974, "step": 2695 }, { "epoch": 0.01, "learning_rate": 9.99573224781364e-05, "loss": 1.509, "step": 2700 }, { "epoch": 0.01, "learning_rate": 9.995715819945783e-05, "loss": 1.5193, "step": 2705 }, { "epoch": 0.01, "learning_rate": 9.995699360534269e-05, "loss": 1.528, "step": 2710 }, { "epoch": 0.01, "learning_rate": 9.995682869579203e-05, "loss": 1.5071, "step": 2715 }, { "epoch": 0.01, "learning_rate": 9.995666347080692e-05, "loss": 1.494, "step": 2720 }, { "epoch": 0.01, "learning_rate": 9.995649793038837e-05, "loss": 1.512, "step": 2725 }, { "epoch": 0.01, "learning_rate": 9.995633207453745e-05, "loss": 1.5239, "step": 2730 }, { "epoch": 0.01, "learning_rate": 9.99561659032552e-05, "loss": 1.5525, "step": 2735 }, { "epoch": 0.01, "learning_rate": 9.995599941654266e-05, "loss": 1.5474, "step": 2740 }, { "epoch": 0.01, "learning_rate": 9.995583261440087e-05, "loss": 1.557, "step": 2745 }, { "epoch": 0.01, "learning_rate": 9.99556654968309e-05, "loss": 1.5016, "step": 2750 }, { "epoch": 0.01, "learning_rate": 9.995549806383383e-05, "loss": 1.5156, "step": 2755 }, { "epoch": 0.01, "learning_rate": 9.995533031541067e-05, "loss": 1.4697, "step": 2760 }, { "epoch": 0.01, "learning_rate": 9.995516225156251e-05, "loss": 1.492, "step": 2765 }, { "epoch": 0.01, "learning_rate": 9.995499387229041e-05, "loss": 1.542, "step": 2770 }, { "epoch": 0.01, "learning_rate": 9.995482517759543e-05, "loss": 1.5223, "step": 2775 }, { "epoch": 0.01, "learning_rate": 9.995465616747861e-05, "loss": 1.5568, "step": 2780 }, { "epoch": 0.01, "learning_rate": 9.995448684194104e-05, "loss": 1.5111, "step": 2785 }, { "epoch": 0.01, "learning_rate": 9.99543172009838e-05, "loss": 1.5416, "step": 2790 }, { "epoch": 0.01, "learning_rate": 9.995414724460793e-05, "loss": 1.4793, "step": 2795 }, { "epoch": 0.01, "learning_rate": 9.995397697281453e-05, "loss": 1.4842, "step": 2800 }, { "epoch": 0.01, "learning_rate": 9.995380638560468e-05, "loss": 1.4727, "step": 2805 }, { "epoch": 0.01, "learning_rate": 9.995363548297943e-05, "loss": 1.5372, "step": 2810 }, { "epoch": 0.01, "learning_rate": 9.995346426493987e-05, "loss": 1.486, "step": 2815 }, { "epoch": 0.01, "learning_rate": 9.995329273148708e-05, "loss": 1.5398, "step": 2820 }, { "epoch": 0.01, "learning_rate": 9.995312088262216e-05, "loss": 1.506, "step": 2825 }, { "epoch": 0.01, "learning_rate": 9.995294871834617e-05, "loss": 1.5192, "step": 2830 }, { "epoch": 0.01, "learning_rate": 9.995277623866022e-05, "loss": 1.5395, "step": 2835 }, { "epoch": 0.01, "learning_rate": 9.995260344356539e-05, "loss": 1.5055, "step": 2840 }, { "epoch": 0.01, "learning_rate": 9.995243033306276e-05, "loss": 1.5103, "step": 2845 }, { "epoch": 0.01, "learning_rate": 9.995225690715344e-05, "loss": 1.4474, "step": 2850 }, { "epoch": 0.01, "learning_rate": 9.995208316583851e-05, "loss": 1.5004, "step": 2855 }, { "epoch": 0.01, "learning_rate": 9.995190910911907e-05, "loss": 1.4629, "step": 2860 }, { "epoch": 0.01, "learning_rate": 9.995173473699621e-05, "loss": 1.5233, "step": 2865 }, { "epoch": 0.01, "learning_rate": 9.995156004947107e-05, "loss": 1.4981, "step": 2870 }, { "epoch": 0.01, "learning_rate": 9.995138504654472e-05, "loss": 1.4929, "step": 2875 }, { "epoch": 0.01, "learning_rate": 9.995120972821828e-05, "loss": 1.505, "step": 2880 }, { "epoch": 0.01, "learning_rate": 9.995103409449282e-05, "loss": 1.5656, "step": 2885 }, { "epoch": 0.01, "learning_rate": 9.99508581453695e-05, "loss": 1.5047, "step": 2890 }, { "epoch": 0.01, "learning_rate": 9.995068188084942e-05, "loss": 1.4868, "step": 2895 }, { "epoch": 0.01, "learning_rate": 9.995050530093367e-05, "loss": 1.5294, "step": 2900 }, { "epoch": 0.01, "learning_rate": 9.995032840562338e-05, "loss": 1.5434, "step": 2905 }, { "epoch": 0.01, "learning_rate": 9.995015119491965e-05, "loss": 1.533, "step": 2910 }, { "epoch": 0.01, "learning_rate": 9.994997366882361e-05, "loss": 1.5523, "step": 2915 }, { "epoch": 0.01, "learning_rate": 9.994979582733642e-05, "loss": 1.5377, "step": 2920 }, { "epoch": 0.01, "learning_rate": 9.994961767045913e-05, "loss": 1.4969, "step": 2925 }, { "epoch": 0.01, "learning_rate": 9.994943919819291e-05, "loss": 1.5979, "step": 2930 }, { "epoch": 0.01, "learning_rate": 9.994926041053889e-05, "loss": 1.5196, "step": 2935 }, { "epoch": 0.01, "learning_rate": 9.994908130749818e-05, "loss": 1.5421, "step": 2940 }, { "epoch": 0.01, "learning_rate": 9.994890188907191e-05, "loss": 1.5627, "step": 2945 }, { "epoch": 0.01, "learning_rate": 9.994872215526124e-05, "loss": 1.5439, "step": 2950 }, { "epoch": 0.01, "learning_rate": 9.994854210606728e-05, "loss": 1.5637, "step": 2955 }, { "epoch": 0.01, "learning_rate": 9.994836174149116e-05, "loss": 1.465, "step": 2960 }, { "epoch": 0.01, "learning_rate": 9.994818106153402e-05, "loss": 1.503, "step": 2965 }, { "epoch": 0.02, "learning_rate": 9.994800006619705e-05, "loss": 1.5463, "step": 2970 }, { "epoch": 0.02, "learning_rate": 9.994781875548134e-05, "loss": 1.5887, "step": 2975 }, { "epoch": 0.02, "learning_rate": 9.994763712938804e-05, "loss": 1.538, "step": 2980 }, { "epoch": 0.02, "learning_rate": 9.994745518791832e-05, "loss": 1.4573, "step": 2985 }, { "epoch": 0.02, "learning_rate": 9.994727293107331e-05, "loss": 1.5372, "step": 2990 }, { "epoch": 0.02, "learning_rate": 9.994709035885417e-05, "loss": 1.5234, "step": 2995 }, { "epoch": 0.02, "learning_rate": 9.994690747126204e-05, "loss": 1.4949, "step": 3000 }, { "epoch": 0.02, "learning_rate": 9.99467242682981e-05, "loss": 1.5092, "step": 3005 }, { "epoch": 0.02, "learning_rate": 9.994654074996348e-05, "loss": 1.5308, "step": 3010 }, { "epoch": 0.02, "learning_rate": 9.994635691625935e-05, "loss": 1.5072, "step": 3015 }, { "epoch": 0.02, "learning_rate": 9.994617276718686e-05, "loss": 1.4754, "step": 3020 }, { "epoch": 0.02, "learning_rate": 9.99459883027472e-05, "loss": 1.4705, "step": 3025 }, { "epoch": 0.02, "learning_rate": 9.994580352294152e-05, "loss": 1.5202, "step": 3030 }, { "epoch": 0.02, "learning_rate": 9.994561842777097e-05, "loss": 1.5326, "step": 3035 }, { "epoch": 0.02, "learning_rate": 9.994543301723674e-05, "loss": 1.4853, "step": 3040 }, { "epoch": 0.02, "learning_rate": 9.994524729134e-05, "loss": 1.5118, "step": 3045 }, { "epoch": 0.02, "learning_rate": 9.994506125008189e-05, "loss": 1.4941, "step": 3050 }, { "epoch": 0.02, "learning_rate": 9.994487489346364e-05, "loss": 1.5047, "step": 3055 }, { "epoch": 0.02, "learning_rate": 9.99446882214864e-05, "loss": 1.5304, "step": 3060 }, { "epoch": 0.02, "learning_rate": 9.994450123415133e-05, "loss": 1.494, "step": 3065 }, { "epoch": 0.02, "learning_rate": 9.994431393145965e-05, "loss": 1.4773, "step": 3070 }, { "epoch": 0.02, "learning_rate": 9.994412631341249e-05, "loss": 1.5026, "step": 3075 }, { "epoch": 0.02, "learning_rate": 9.994393838001108e-05, "loss": 1.5637, "step": 3080 }, { "epoch": 0.02, "learning_rate": 9.994375013125659e-05, "loss": 1.5263, "step": 3085 }, { "epoch": 0.02, "learning_rate": 9.994356156715022e-05, "loss": 1.5137, "step": 3090 }, { "epoch": 0.02, "learning_rate": 9.994337268769314e-05, "loss": 1.5198, "step": 3095 }, { "epoch": 0.02, "learning_rate": 9.994318349288655e-05, "loss": 1.518, "step": 3100 }, { "epoch": 0.02, "learning_rate": 9.994299398273165e-05, "loss": 1.5231, "step": 3105 }, { "epoch": 0.02, "learning_rate": 9.994280415722963e-05, "loss": 1.5613, "step": 3110 }, { "epoch": 0.02, "learning_rate": 9.994261401638171e-05, "loss": 1.4401, "step": 3115 }, { "epoch": 0.02, "learning_rate": 9.994242356018905e-05, "loss": 1.4348, "step": 3120 }, { "epoch": 0.02, "learning_rate": 9.99422327886529e-05, "loss": 1.5746, "step": 3125 }, { "epoch": 0.02, "learning_rate": 9.994204170177444e-05, "loss": 1.5122, "step": 3130 }, { "epoch": 0.02, "learning_rate": 9.994185029955486e-05, "loss": 1.518, "step": 3135 }, { "epoch": 0.02, "learning_rate": 9.994165858199539e-05, "loss": 1.4871, "step": 3140 }, { "epoch": 0.02, "learning_rate": 9.994146654909725e-05, "loss": 1.4953, "step": 3145 }, { "epoch": 0.02, "learning_rate": 9.994127420086161e-05, "loss": 1.5215, "step": 3150 }, { "epoch": 0.02, "learning_rate": 9.994108153728973e-05, "loss": 1.4941, "step": 3155 }, { "epoch": 0.02, "learning_rate": 9.994088855838282e-05, "loss": 1.5802, "step": 3160 }, { "epoch": 0.02, "learning_rate": 9.994069526414208e-05, "loss": 1.5252, "step": 3165 }, { "epoch": 0.02, "learning_rate": 9.994050165456874e-05, "loss": 1.559, "step": 3170 }, { "epoch": 0.02, "learning_rate": 9.994030772966403e-05, "loss": 1.5166, "step": 3175 }, { "epoch": 0.02, "learning_rate": 9.994011348942915e-05, "loss": 1.4929, "step": 3180 }, { "epoch": 0.02, "learning_rate": 9.993991893386534e-05, "loss": 1.5256, "step": 3185 }, { "epoch": 0.02, "learning_rate": 9.993972406297385e-05, "loss": 1.49, "step": 3190 }, { "epoch": 0.02, "learning_rate": 9.993952887675587e-05, "loss": 1.555, "step": 3195 }, { "epoch": 0.02, "learning_rate": 9.993933337521267e-05, "loss": 1.4504, "step": 3200 }, { "epoch": 0.02, "learning_rate": 9.993913755834546e-05, "loss": 1.5377, "step": 3205 }, { "epoch": 0.02, "learning_rate": 9.993894142615549e-05, "loss": 1.5529, "step": 3210 }, { "epoch": 0.02, "learning_rate": 9.993874497864399e-05, "loss": 1.4826, "step": 3215 }, { "epoch": 0.02, "learning_rate": 9.99385482158122e-05, "loss": 1.5276, "step": 3220 }, { "epoch": 0.02, "learning_rate": 9.993835113766136e-05, "loss": 1.4329, "step": 3225 }, { "epoch": 0.02, "learning_rate": 9.993815374419273e-05, "loss": 1.5641, "step": 3230 }, { "epoch": 0.02, "learning_rate": 9.993795603540754e-05, "loss": 1.5104, "step": 3235 }, { "epoch": 0.02, "learning_rate": 9.993775801130706e-05, "loss": 1.5004, "step": 3240 }, { "epoch": 0.02, "learning_rate": 9.99375596718925e-05, "loss": 1.4776, "step": 3245 }, { "epoch": 0.02, "learning_rate": 9.993736101716517e-05, "loss": 1.4893, "step": 3250 }, { "epoch": 0.02, "learning_rate": 9.993716204712626e-05, "loss": 1.5128, "step": 3255 }, { "epoch": 0.02, "learning_rate": 9.993696276177708e-05, "loss": 1.5456, "step": 3260 }, { "epoch": 0.02, "learning_rate": 9.993676316111887e-05, "loss": 1.4825, "step": 3265 }, { "epoch": 0.02, "learning_rate": 9.993656324515286e-05, "loss": 1.5172, "step": 3270 }, { "epoch": 0.02, "learning_rate": 9.993636301388036e-05, "loss": 1.5193, "step": 3275 }, { "epoch": 0.02, "learning_rate": 9.993616246730259e-05, "loss": 1.5517, "step": 3280 }, { "epoch": 0.02, "learning_rate": 9.993596160542087e-05, "loss": 1.5099, "step": 3285 }, { "epoch": 0.02, "learning_rate": 9.993576042823642e-05, "loss": 1.4761, "step": 3290 }, { "epoch": 0.02, "learning_rate": 9.993555893575055e-05, "loss": 1.5321, "step": 3295 }, { "epoch": 0.02, "learning_rate": 9.993535712796447e-05, "loss": 1.5451, "step": 3300 }, { "epoch": 0.02, "learning_rate": 9.993515500487954e-05, "loss": 1.5722, "step": 3305 }, { "epoch": 0.02, "learning_rate": 9.993495256649695e-05, "loss": 1.4602, "step": 3310 }, { "epoch": 0.02, "learning_rate": 9.993474981281805e-05, "loss": 1.5073, "step": 3315 }, { "epoch": 0.02, "learning_rate": 9.993454674384407e-05, "loss": 1.5392, "step": 3320 }, { "epoch": 0.02, "learning_rate": 9.993434335957632e-05, "loss": 1.5624, "step": 3325 }, { "epoch": 0.02, "learning_rate": 9.993413966001608e-05, "loss": 1.5157, "step": 3330 }, { "epoch": 0.02, "learning_rate": 9.993393564516462e-05, "loss": 1.5243, "step": 3335 }, { "epoch": 0.02, "learning_rate": 9.993373131502324e-05, "loss": 1.5236, "step": 3340 }, { "epoch": 0.02, "learning_rate": 9.993352666959324e-05, "loss": 1.5148, "step": 3345 }, { "epoch": 0.02, "learning_rate": 9.993332170887591e-05, "loss": 1.5247, "step": 3350 }, { "epoch": 0.02, "learning_rate": 9.99331164328725e-05, "loss": 1.4501, "step": 3355 }, { "epoch": 0.02, "learning_rate": 9.993291084158438e-05, "loss": 1.5403, "step": 3360 }, { "epoch": 0.02, "learning_rate": 9.99327049350128e-05, "loss": 1.4707, "step": 3365 }, { "epoch": 0.02, "learning_rate": 9.993249871315906e-05, "loss": 1.4824, "step": 3370 }, { "epoch": 0.02, "learning_rate": 9.99322921760245e-05, "loss": 1.4943, "step": 3375 }, { "epoch": 0.02, "learning_rate": 9.993208532361036e-05, "loss": 1.5315, "step": 3380 }, { "epoch": 0.02, "learning_rate": 9.993187815591801e-05, "loss": 1.5344, "step": 3385 }, { "epoch": 0.02, "learning_rate": 9.993167067294873e-05, "loss": 1.5218, "step": 3390 }, { "epoch": 0.02, "learning_rate": 9.993146287470383e-05, "loss": 1.5238, "step": 3395 }, { "epoch": 0.02, "learning_rate": 9.993125476118462e-05, "loss": 1.5105, "step": 3400 }, { "epoch": 0.02, "learning_rate": 9.99310463323924e-05, "loss": 1.5031, "step": 3405 }, { "epoch": 0.02, "learning_rate": 9.993083758832853e-05, "loss": 1.5065, "step": 3410 }, { "epoch": 0.02, "learning_rate": 9.993062852899429e-05, "loss": 1.4841, "step": 3415 }, { "epoch": 0.02, "learning_rate": 9.9930419154391e-05, "loss": 1.5589, "step": 3420 }, { "epoch": 0.02, "learning_rate": 9.993020946452002e-05, "loss": 1.4913, "step": 3425 }, { "epoch": 0.02, "learning_rate": 9.992999945938264e-05, "loss": 1.516, "step": 3430 }, { "epoch": 0.02, "learning_rate": 9.99297891389802e-05, "loss": 1.5157, "step": 3435 }, { "epoch": 0.02, "learning_rate": 9.992957850331399e-05, "loss": 1.5128, "step": 3440 }, { "epoch": 0.02, "learning_rate": 9.99293675523854e-05, "loss": 1.5151, "step": 3445 }, { "epoch": 0.02, "learning_rate": 9.992915628619572e-05, "loss": 1.4946, "step": 3450 }, { "epoch": 0.02, "learning_rate": 9.99289447047463e-05, "loss": 1.4794, "step": 3455 }, { "epoch": 0.02, "learning_rate": 9.992873280803849e-05, "loss": 1.4567, "step": 3460 }, { "epoch": 0.02, "learning_rate": 9.992852059607358e-05, "loss": 1.5259, "step": 3465 }, { "epoch": 0.02, "learning_rate": 9.992830806885296e-05, "loss": 1.5298, "step": 3470 }, { "epoch": 0.02, "learning_rate": 9.992809522637795e-05, "loss": 1.5511, "step": 3475 }, { "epoch": 0.02, "learning_rate": 9.992788206864991e-05, "loss": 1.5268, "step": 3480 }, { "epoch": 0.02, "learning_rate": 9.992766859567017e-05, "loss": 1.4739, "step": 3485 }, { "epoch": 0.02, "learning_rate": 9.992745480744008e-05, "loss": 1.4906, "step": 3490 }, { "epoch": 0.02, "learning_rate": 9.992724070396098e-05, "loss": 1.5345, "step": 3495 }, { "epoch": 0.02, "learning_rate": 9.992702628523422e-05, "loss": 1.4536, "step": 3500 }, { "epoch": 0.02, "learning_rate": 9.99268115512612e-05, "loss": 1.4975, "step": 3505 }, { "epoch": 0.02, "learning_rate": 9.992659650204323e-05, "loss": 1.5348, "step": 3510 }, { "epoch": 0.02, "learning_rate": 9.992638113758168e-05, "loss": 1.5162, "step": 3515 }, { "epoch": 0.02, "learning_rate": 9.99261654578779e-05, "loss": 1.5277, "step": 3520 }, { "epoch": 0.02, "learning_rate": 9.992594946293327e-05, "loss": 1.5186, "step": 3525 }, { "epoch": 0.02, "learning_rate": 9.992573315274914e-05, "loss": 1.4776, "step": 3530 }, { "epoch": 0.02, "learning_rate": 9.99255165273269e-05, "loss": 1.502, "step": 3535 }, { "epoch": 0.02, "learning_rate": 9.992529958666788e-05, "loss": 1.5049, "step": 3540 }, { "epoch": 0.02, "learning_rate": 9.992508233077348e-05, "loss": 1.512, "step": 3545 }, { "epoch": 0.02, "learning_rate": 9.992486475964506e-05, "loss": 1.5675, "step": 3550 }, { "epoch": 0.02, "learning_rate": 9.9924646873284e-05, "loss": 1.5572, "step": 3555 }, { "epoch": 0.02, "learning_rate": 9.992442867169165e-05, "loss": 1.5045, "step": 3560 }, { "epoch": 0.02, "learning_rate": 9.992421015486943e-05, "loss": 1.5, "step": 3565 }, { "epoch": 0.02, "learning_rate": 9.992399132281869e-05, "loss": 1.5228, "step": 3570 }, { "epoch": 0.02, "learning_rate": 9.992377217554082e-05, "loss": 1.516, "step": 3575 }, { "epoch": 0.02, "learning_rate": 9.992355271303719e-05, "loss": 1.4356, "step": 3580 }, { "epoch": 0.02, "learning_rate": 9.992333293530922e-05, "loss": 1.532, "step": 3585 }, { "epoch": 0.02, "learning_rate": 9.992311284235827e-05, "loss": 1.5011, "step": 3590 }, { "epoch": 0.02, "learning_rate": 9.992289243418574e-05, "loss": 1.4729, "step": 3595 }, { "epoch": 0.02, "learning_rate": 9.992267171079301e-05, "loss": 1.4935, "step": 3600 }, { "epoch": 0.02, "learning_rate": 9.99224506721815e-05, "loss": 1.5192, "step": 3605 }, { "epoch": 0.02, "learning_rate": 9.992222931835258e-05, "loss": 1.5306, "step": 3610 }, { "epoch": 0.02, "learning_rate": 9.992200764930763e-05, "loss": 1.5288, "step": 3615 }, { "epoch": 0.02, "learning_rate": 9.992178566504811e-05, "loss": 1.5358, "step": 3620 }, { "epoch": 0.02, "learning_rate": 9.992156336557536e-05, "loss": 1.5034, "step": 3625 }, { "epoch": 0.02, "learning_rate": 9.992134075089084e-05, "loss": 1.4965, "step": 3630 }, { "epoch": 0.02, "learning_rate": 9.99211178209959e-05, "loss": 1.5002, "step": 3635 }, { "epoch": 0.02, "learning_rate": 9.992089457589198e-05, "loss": 1.4796, "step": 3640 }, { "epoch": 0.02, "learning_rate": 9.99206710155805e-05, "loss": 1.5193, "step": 3645 }, { "epoch": 0.02, "learning_rate": 9.992044714006283e-05, "loss": 1.5129, "step": 3650 }, { "epoch": 0.02, "learning_rate": 9.992022294934042e-05, "loss": 1.4832, "step": 3655 }, { "epoch": 0.02, "learning_rate": 9.991999844341467e-05, "loss": 1.4397, "step": 3660 }, { "epoch": 0.02, "learning_rate": 9.9919773622287e-05, "loss": 1.4875, "step": 3665 }, { "epoch": 0.02, "learning_rate": 9.991954848595883e-05, "loss": 1.5253, "step": 3670 }, { "epoch": 0.02, "learning_rate": 9.99193230344316e-05, "loss": 1.4675, "step": 3675 }, { "epoch": 0.02, "learning_rate": 9.991909726770671e-05, "loss": 1.5533, "step": 3680 }, { "epoch": 0.02, "learning_rate": 9.991887118578558e-05, "loss": 1.5269, "step": 3685 }, { "epoch": 0.02, "learning_rate": 9.991864478866966e-05, "loss": 1.4913, "step": 3690 }, { "epoch": 0.02, "learning_rate": 9.991841807636036e-05, "loss": 1.4657, "step": 3695 }, { "epoch": 0.02, "learning_rate": 9.991819104885912e-05, "loss": 1.5898, "step": 3700 }, { "epoch": 0.02, "learning_rate": 9.991796370616738e-05, "loss": 1.471, "step": 3705 }, { "epoch": 0.02, "learning_rate": 9.991773604828657e-05, "loss": 1.5591, "step": 3710 }, { "epoch": 0.02, "learning_rate": 9.991750807521811e-05, "loss": 1.5143, "step": 3715 }, { "epoch": 0.02, "learning_rate": 9.991727978696348e-05, "loss": 1.5713, "step": 3720 }, { "epoch": 0.02, "learning_rate": 9.991705118352408e-05, "loss": 1.4789, "step": 3725 }, { "epoch": 0.02, "learning_rate": 9.991682226490137e-05, "loss": 1.4906, "step": 3730 }, { "epoch": 0.02, "learning_rate": 9.991659303109681e-05, "loss": 1.5641, "step": 3735 }, { "epoch": 0.02, "learning_rate": 9.991636348211184e-05, "loss": 1.5029, "step": 3740 }, { "epoch": 0.02, "learning_rate": 9.99161336179479e-05, "loss": 1.4743, "step": 3745 }, { "epoch": 0.02, "learning_rate": 9.991590343860642e-05, "loss": 1.5302, "step": 3750 }, { "epoch": 0.02, "learning_rate": 9.99156729440889e-05, "loss": 1.5319, "step": 3755 }, { "epoch": 0.02, "learning_rate": 9.991544213439677e-05, "loss": 1.5088, "step": 3760 }, { "epoch": 0.02, "learning_rate": 9.991521100953148e-05, "loss": 1.4649, "step": 3765 }, { "epoch": 0.02, "learning_rate": 9.991497956949452e-05, "loss": 1.5334, "step": 3770 }, { "epoch": 0.02, "learning_rate": 9.99147478142873e-05, "loss": 1.5279, "step": 3775 }, { "epoch": 0.02, "learning_rate": 9.991451574391134e-05, "loss": 1.516, "step": 3780 }, { "epoch": 0.02, "learning_rate": 9.991428335836808e-05, "loss": 1.4928, "step": 3785 }, { "epoch": 0.02, "learning_rate": 9.991405065765898e-05, "loss": 1.4405, "step": 3790 }, { "epoch": 0.02, "learning_rate": 9.991381764178551e-05, "loss": 1.5142, "step": 3795 }, { "epoch": 0.02, "learning_rate": 9.991358431074915e-05, "loss": 1.4963, "step": 3800 }, { "epoch": 0.02, "learning_rate": 9.991335066455138e-05, "loss": 1.5076, "step": 3805 }, { "epoch": 0.02, "learning_rate": 9.991311670319368e-05, "loss": 1.5718, "step": 3810 }, { "epoch": 0.02, "learning_rate": 9.991288242667749e-05, "loss": 1.4916, "step": 3815 }, { "epoch": 0.02, "learning_rate": 9.991264783500431e-05, "loss": 1.4561, "step": 3820 }, { "epoch": 0.02, "learning_rate": 9.991241292817564e-05, "loss": 1.5167, "step": 3825 }, { "epoch": 0.02, "learning_rate": 9.991217770619294e-05, "loss": 1.5802, "step": 3830 }, { "epoch": 0.02, "learning_rate": 9.991194216905771e-05, "loss": 1.542, "step": 3835 }, { "epoch": 0.02, "learning_rate": 9.991170631677143e-05, "loss": 1.4965, "step": 3840 }, { "epoch": 0.02, "learning_rate": 9.991147014933557e-05, "loss": 1.5709, "step": 3845 }, { "epoch": 0.02, "learning_rate": 9.991123366675166e-05, "loss": 1.5383, "step": 3850 }, { "epoch": 0.02, "learning_rate": 9.991099686902117e-05, "loss": 1.4743, "step": 3855 }, { "epoch": 0.02, "learning_rate": 9.99107597561456e-05, "loss": 1.5117, "step": 3860 }, { "epoch": 0.02, "learning_rate": 9.991052232812644e-05, "loss": 1.4882, "step": 3865 }, { "epoch": 0.02, "learning_rate": 9.99102845849652e-05, "loss": 1.5421, "step": 3870 }, { "epoch": 0.02, "learning_rate": 9.991004652666338e-05, "loss": 1.499, "step": 3875 }, { "epoch": 0.02, "learning_rate": 9.990980815322247e-05, "loss": 1.5019, "step": 3880 }, { "epoch": 0.02, "learning_rate": 9.990956946464399e-05, "loss": 1.5189, "step": 3885 }, { "epoch": 0.02, "learning_rate": 9.990933046092944e-05, "loss": 1.5, "step": 3890 }, { "epoch": 0.02, "learning_rate": 9.990909114208033e-05, "loss": 1.504, "step": 3895 }, { "epoch": 0.02, "learning_rate": 9.990885150809817e-05, "loss": 1.493, "step": 3900 }, { "epoch": 0.02, "learning_rate": 9.990861155898448e-05, "loss": 1.5292, "step": 3905 }, { "epoch": 0.02, "learning_rate": 9.990837129474075e-05, "loss": 1.4811, "step": 3910 }, { "epoch": 0.02, "learning_rate": 9.990813071536855e-05, "loss": 1.5442, "step": 3915 }, { "epoch": 0.02, "learning_rate": 9.990788982086934e-05, "loss": 1.5521, "step": 3920 }, { "epoch": 0.02, "learning_rate": 9.990764861124467e-05, "loss": 1.5455, "step": 3925 }, { "epoch": 0.02, "learning_rate": 9.990740708649607e-05, "loss": 1.5175, "step": 3930 }, { "epoch": 0.02, "learning_rate": 9.990716524662506e-05, "loss": 1.5023, "step": 3935 }, { "epoch": 0.02, "learning_rate": 9.990692309163314e-05, "loss": 1.5238, "step": 3940 }, { "epoch": 0.02, "learning_rate": 9.990668062152189e-05, "loss": 1.4671, "step": 3945 }, { "epoch": 0.02, "learning_rate": 9.990643783629279e-05, "loss": 1.4789, "step": 3950 }, { "epoch": 0.02, "learning_rate": 9.99061947359474e-05, "loss": 1.5161, "step": 3955 }, { "epoch": 0.02, "learning_rate": 9.990595132048726e-05, "loss": 1.4652, "step": 3960 }, { "epoch": 0.02, "learning_rate": 9.99057075899139e-05, "loss": 1.4732, "step": 3965 }, { "epoch": 0.02, "learning_rate": 9.990546354422883e-05, "loss": 1.5079, "step": 3970 }, { "epoch": 0.02, "learning_rate": 9.990521918343362e-05, "loss": 1.5139, "step": 3975 }, { "epoch": 0.02, "learning_rate": 9.990497450752983e-05, "loss": 1.4872, "step": 3980 }, { "epoch": 0.02, "learning_rate": 9.990472951651898e-05, "loss": 1.494, "step": 3985 }, { "epoch": 0.02, "learning_rate": 9.990448421040262e-05, "loss": 1.5164, "step": 3990 }, { "epoch": 0.02, "learning_rate": 9.99042385891823e-05, "loss": 1.4692, "step": 3995 }, { "epoch": 0.02, "learning_rate": 9.990399265285956e-05, "loss": 1.4927, "step": 4000 }, { "epoch": 0.02, "learning_rate": 9.990374640143599e-05, "loss": 1.5416, "step": 4005 }, { "epoch": 0.02, "learning_rate": 9.990349983491309e-05, "loss": 1.5582, "step": 4010 }, { "epoch": 0.02, "learning_rate": 9.990325295329246e-05, "loss": 1.4653, "step": 4015 }, { "epoch": 0.02, "learning_rate": 9.990300575657565e-05, "loss": 1.5256, "step": 4020 }, { "epoch": 0.02, "learning_rate": 9.990275824476421e-05, "loss": 1.5464, "step": 4025 }, { "epoch": 0.02, "learning_rate": 9.99025104178597e-05, "loss": 1.5305, "step": 4030 }, { "epoch": 0.02, "learning_rate": 9.990226227586371e-05, "loss": 1.5468, "step": 4035 }, { "epoch": 0.02, "learning_rate": 9.990201381877778e-05, "loss": 1.486, "step": 4040 }, { "epoch": 0.02, "learning_rate": 9.990176504660349e-05, "loss": 1.5599, "step": 4045 }, { "epoch": 0.02, "learning_rate": 9.990151595934242e-05, "loss": 1.4714, "step": 4050 }, { "epoch": 0.02, "learning_rate": 9.990126655699613e-05, "loss": 1.4766, "step": 4055 }, { "epoch": 0.02, "learning_rate": 9.990101683956619e-05, "loss": 1.544, "step": 4060 }, { "epoch": 0.02, "learning_rate": 9.990076680705418e-05, "loss": 1.5168, "step": 4065 }, { "epoch": 0.02, "learning_rate": 9.990051645946168e-05, "loss": 1.5307, "step": 4070 }, { "epoch": 0.02, "learning_rate": 9.990026579679029e-05, "loss": 1.5073, "step": 4075 }, { "epoch": 0.02, "learning_rate": 9.990001481904157e-05, "loss": 1.4967, "step": 4080 }, { "epoch": 0.02, "learning_rate": 9.98997635262171e-05, "loss": 1.5607, "step": 4085 }, { "epoch": 0.02, "learning_rate": 9.989951191831849e-05, "loss": 1.4891, "step": 4090 }, { "epoch": 0.02, "learning_rate": 9.98992599953473e-05, "loss": 1.4783, "step": 4095 }, { "epoch": 0.02, "learning_rate": 9.989900775730516e-05, "loss": 1.4935, "step": 4100 }, { "epoch": 0.02, "learning_rate": 9.989875520419363e-05, "loss": 1.4895, "step": 4105 }, { "epoch": 0.02, "learning_rate": 9.989850233601432e-05, "loss": 1.4632, "step": 4110 }, { "epoch": 0.02, "learning_rate": 9.989824915276881e-05, "loss": 1.4752, "step": 4115 }, { "epoch": 0.02, "learning_rate": 9.989799565445872e-05, "loss": 1.5499, "step": 4120 }, { "epoch": 0.02, "learning_rate": 9.989774184108563e-05, "loss": 1.428, "step": 4125 }, { "epoch": 0.02, "learning_rate": 9.989748771265114e-05, "loss": 1.537, "step": 4130 }, { "epoch": 0.02, "learning_rate": 9.98972332691569e-05, "loss": 1.5157, "step": 4135 }, { "epoch": 0.02, "learning_rate": 9.989697851060446e-05, "loss": 1.4997, "step": 4140 }, { "epoch": 0.02, "learning_rate": 9.989672343699547e-05, "loss": 1.4946, "step": 4145 }, { "epoch": 0.02, "learning_rate": 9.989646804833152e-05, "loss": 1.5161, "step": 4150 }, { "epoch": 0.02, "learning_rate": 9.989621234461422e-05, "loss": 1.5054, "step": 4155 }, { "epoch": 0.02, "learning_rate": 9.989595632584519e-05, "loss": 1.5207, "step": 4160 }, { "epoch": 0.02, "learning_rate": 9.989569999202603e-05, "loss": 1.4765, "step": 4165 }, { "epoch": 0.02, "learning_rate": 9.989544334315841e-05, "loss": 1.5545, "step": 4170 }, { "epoch": 0.02, "learning_rate": 9.989518637924388e-05, "loss": 1.5858, "step": 4175 }, { "epoch": 0.02, "learning_rate": 9.989492910028412e-05, "loss": 1.4902, "step": 4180 }, { "epoch": 0.02, "learning_rate": 9.989467150628073e-05, "loss": 1.4587, "step": 4185 }, { "epoch": 0.02, "learning_rate": 9.989441359723535e-05, "loss": 1.4603, "step": 4190 }, { "epoch": 0.02, "learning_rate": 9.989415537314958e-05, "loss": 1.507, "step": 4195 }, { "epoch": 0.02, "learning_rate": 9.989389683402508e-05, "loss": 1.4852, "step": 4200 }, { "epoch": 0.02, "learning_rate": 9.989363797986346e-05, "loss": 1.5239, "step": 4205 }, { "epoch": 0.02, "learning_rate": 9.989337881066639e-05, "loss": 1.4857, "step": 4210 }, { "epoch": 0.02, "learning_rate": 9.989311932643545e-05, "loss": 1.5302, "step": 4215 }, { "epoch": 0.02, "learning_rate": 9.989285952717234e-05, "loss": 1.5101, "step": 4220 }, { "epoch": 0.02, "learning_rate": 9.989259941287864e-05, "loss": 1.5009, "step": 4225 }, { "epoch": 0.02, "learning_rate": 9.989233898355603e-05, "loss": 1.4918, "step": 4230 }, { "epoch": 0.02, "learning_rate": 9.989207823920617e-05, "loss": 1.5294, "step": 4235 }, { "epoch": 0.02, "learning_rate": 9.989181717983066e-05, "loss": 1.525, "step": 4240 }, { "epoch": 0.02, "learning_rate": 9.989155580543118e-05, "loss": 1.4571, "step": 4245 }, { "epoch": 0.02, "learning_rate": 9.989129411600938e-05, "loss": 1.4515, "step": 4250 }, { "epoch": 0.02, "learning_rate": 9.989103211156689e-05, "loss": 1.4865, "step": 4255 }, { "epoch": 0.02, "learning_rate": 9.989076979210539e-05, "loss": 1.5006, "step": 4260 }, { "epoch": 0.02, "learning_rate": 9.989050715762652e-05, "loss": 1.529, "step": 4265 }, { "epoch": 0.02, "learning_rate": 9.989024420813193e-05, "loss": 1.5761, "step": 4270 }, { "epoch": 0.02, "learning_rate": 9.988998094362332e-05, "loss": 1.5111, "step": 4275 }, { "epoch": 0.02, "learning_rate": 9.98897173641023e-05, "loss": 1.5537, "step": 4280 }, { "epoch": 0.02, "learning_rate": 9.988945346957059e-05, "loss": 1.5933, "step": 4285 }, { "epoch": 0.02, "learning_rate": 9.988918926002981e-05, "loss": 1.581, "step": 4290 }, { "epoch": 0.02, "learning_rate": 9.988892473548163e-05, "loss": 1.4913, "step": 4295 }, { "epoch": 0.02, "learning_rate": 9.988865989592777e-05, "loss": 1.5118, "step": 4300 }, { "epoch": 0.02, "learning_rate": 9.988839474136984e-05, "loss": 1.4788, "step": 4305 }, { "epoch": 0.02, "learning_rate": 9.988812927180956e-05, "loss": 1.4853, "step": 4310 }, { "epoch": 0.02, "learning_rate": 9.988786348724858e-05, "loss": 1.456, "step": 4315 }, { "epoch": 0.02, "learning_rate": 9.988759738768858e-05, "loss": 1.5449, "step": 4320 }, { "epoch": 0.02, "learning_rate": 9.988733097313125e-05, "loss": 1.4819, "step": 4325 }, { "epoch": 0.02, "learning_rate": 9.988706424357827e-05, "loss": 1.5171, "step": 4330 }, { "epoch": 0.02, "learning_rate": 9.988679719903133e-05, "loss": 1.5015, "step": 4335 }, { "epoch": 0.02, "learning_rate": 9.988652983949209e-05, "loss": 1.5576, "step": 4340 }, { "epoch": 0.02, "learning_rate": 9.988626216496229e-05, "loss": 1.5293, "step": 4345 }, { "epoch": 0.02, "learning_rate": 9.988599417544356e-05, "loss": 1.4875, "step": 4350 }, { "epoch": 0.02, "learning_rate": 9.988572587093761e-05, "loss": 1.497, "step": 4355 }, { "epoch": 0.02, "learning_rate": 9.988545725144616e-05, "loss": 1.4931, "step": 4360 }, { "epoch": 0.02, "learning_rate": 9.988518831697089e-05, "loss": 1.4963, "step": 4365 }, { "epoch": 0.02, "learning_rate": 9.988491906751351e-05, "loss": 1.5466, "step": 4370 }, { "epoch": 0.02, "learning_rate": 9.988464950307568e-05, "loss": 1.5052, "step": 4375 }, { "epoch": 0.02, "learning_rate": 9.988437962365915e-05, "loss": 1.505, "step": 4380 }, { "epoch": 0.02, "learning_rate": 9.988410942926561e-05, "loss": 1.5476, "step": 4385 }, { "epoch": 0.02, "learning_rate": 9.988383891989673e-05, "loss": 1.5194, "step": 4390 }, { "epoch": 0.02, "learning_rate": 9.988356809555427e-05, "loss": 1.479, "step": 4395 }, { "epoch": 0.02, "learning_rate": 9.98832969562399e-05, "loss": 1.5269, "step": 4400 }, { "epoch": 0.02, "learning_rate": 9.988302550195537e-05, "loss": 1.5161, "step": 4405 }, { "epoch": 0.02, "learning_rate": 9.988275373270237e-05, "loss": 1.5332, "step": 4410 }, { "epoch": 0.02, "learning_rate": 9.988248164848262e-05, "loss": 1.4943, "step": 4415 }, { "epoch": 0.02, "learning_rate": 9.988220924929784e-05, "loss": 1.5402, "step": 4420 }, { "epoch": 0.02, "learning_rate": 9.988193653514973e-05, "loss": 1.5296, "step": 4425 }, { "epoch": 0.02, "learning_rate": 9.988166350604004e-05, "loss": 1.5358, "step": 4430 }, { "epoch": 0.02, "learning_rate": 9.98813901619705e-05, "loss": 1.4855, "step": 4435 }, { "epoch": 0.02, "learning_rate": 9.988111650294278e-05, "loss": 1.4435, "step": 4440 }, { "epoch": 0.02, "learning_rate": 9.988084252895868e-05, "loss": 1.4716, "step": 4445 }, { "epoch": 0.02, "learning_rate": 9.98805682400199e-05, "loss": 1.4794, "step": 4450 }, { "epoch": 0.02, "learning_rate": 9.988029363612815e-05, "loss": 1.5067, "step": 4455 }, { "epoch": 0.02, "learning_rate": 9.98800187172852e-05, "loss": 1.5073, "step": 4460 }, { "epoch": 0.02, "learning_rate": 9.987974348349276e-05, "loss": 1.5499, "step": 4465 }, { "epoch": 0.02, "learning_rate": 9.987946793475257e-05, "loss": 1.4601, "step": 4470 }, { "epoch": 0.02, "learning_rate": 9.987919207106639e-05, "loss": 1.5021, "step": 4475 }, { "epoch": 0.02, "learning_rate": 9.987891589243594e-05, "loss": 1.5213, "step": 4480 }, { "epoch": 0.02, "learning_rate": 9.987863939886298e-05, "loss": 1.5026, "step": 4485 }, { "epoch": 0.02, "learning_rate": 9.987836259034925e-05, "loss": 1.5198, "step": 4490 }, { "epoch": 0.02, "learning_rate": 9.98780854668965e-05, "loss": 1.477, "step": 4495 }, { "epoch": 0.02, "learning_rate": 9.987780802850646e-05, "loss": 1.4909, "step": 4500 }, { "epoch": 0.02, "learning_rate": 9.98775302751809e-05, "loss": 1.5267, "step": 4505 }, { "epoch": 0.02, "learning_rate": 9.987725220692157e-05, "loss": 1.4857, "step": 4510 }, { "epoch": 0.02, "learning_rate": 9.987697382373025e-05, "loss": 1.5284, "step": 4515 }, { "epoch": 0.02, "learning_rate": 9.987669512560865e-05, "loss": 1.4978, "step": 4520 }, { "epoch": 0.02, "learning_rate": 9.987641611255857e-05, "loss": 1.5073, "step": 4525 }, { "epoch": 0.02, "learning_rate": 9.987613678458174e-05, "loss": 1.4503, "step": 4530 }, { "epoch": 0.02, "learning_rate": 9.987585714167995e-05, "loss": 1.4891, "step": 4535 }, { "epoch": 0.02, "learning_rate": 9.987557718385497e-05, "loss": 1.4578, "step": 4540 }, { "epoch": 0.02, "learning_rate": 9.987529691110853e-05, "loss": 1.4722, "step": 4545 }, { "epoch": 0.02, "learning_rate": 9.987501632344244e-05, "loss": 1.4837, "step": 4550 }, { "epoch": 0.02, "learning_rate": 9.987473542085845e-05, "loss": 1.5406, "step": 4555 }, { "epoch": 0.02, "learning_rate": 9.987445420335833e-05, "loss": 1.5159, "step": 4560 }, { "epoch": 0.02, "learning_rate": 9.987417267094388e-05, "loss": 1.5105, "step": 4565 }, { "epoch": 0.02, "learning_rate": 9.987389082361684e-05, "loss": 1.5373, "step": 4570 }, { "epoch": 0.02, "learning_rate": 9.987360866137903e-05, "loss": 1.5489, "step": 4575 }, { "epoch": 0.02, "learning_rate": 9.987332618423221e-05, "loss": 1.5048, "step": 4580 }, { "epoch": 0.02, "learning_rate": 9.987304339217815e-05, "loss": 1.4216, "step": 4585 }, { "epoch": 0.02, "learning_rate": 9.987276028521867e-05, "loss": 1.4706, "step": 4590 }, { "epoch": 0.02, "learning_rate": 9.987247686335555e-05, "loss": 1.4979, "step": 4595 }, { "epoch": 0.02, "learning_rate": 9.987219312659055e-05, "loss": 1.507, "step": 4600 }, { "epoch": 0.02, "learning_rate": 9.987190907492549e-05, "loss": 1.5173, "step": 4605 }, { "epoch": 0.02, "learning_rate": 9.987162470836215e-05, "loss": 1.4988, "step": 4610 }, { "epoch": 0.02, "learning_rate": 9.987134002690233e-05, "loss": 1.4874, "step": 4615 }, { "epoch": 0.02, "learning_rate": 9.987105503054783e-05, "loss": 1.4652, "step": 4620 }, { "epoch": 0.02, "learning_rate": 9.987076971930045e-05, "loss": 1.4972, "step": 4625 }, { "epoch": 0.02, "learning_rate": 9.987048409316199e-05, "loss": 1.52, "step": 4630 }, { "epoch": 0.02, "learning_rate": 9.987019815213425e-05, "loss": 1.4863, "step": 4635 }, { "epoch": 0.02, "learning_rate": 9.986991189621902e-05, "loss": 1.4899, "step": 4640 }, { "epoch": 0.02, "learning_rate": 9.986962532541814e-05, "loss": 1.457, "step": 4645 }, { "epoch": 0.02, "learning_rate": 9.986933843973341e-05, "loss": 1.5102, "step": 4650 }, { "epoch": 0.02, "learning_rate": 9.986905123916664e-05, "loss": 1.4892, "step": 4655 }, { "epoch": 0.02, "learning_rate": 9.986876372371963e-05, "loss": 1.5298, "step": 4660 }, { "epoch": 0.02, "learning_rate": 9.98684758933942e-05, "loss": 1.5681, "step": 4665 }, { "epoch": 0.02, "learning_rate": 9.986818774819218e-05, "loss": 1.4885, "step": 4670 }, { "epoch": 0.02, "learning_rate": 9.986789928811538e-05, "loss": 1.4887, "step": 4675 }, { "epoch": 0.02, "learning_rate": 9.986761051316563e-05, "loss": 1.5291, "step": 4680 }, { "epoch": 0.02, "learning_rate": 9.986732142334471e-05, "loss": 1.5125, "step": 4685 }, { "epoch": 0.02, "learning_rate": 9.986703201865453e-05, "loss": 1.5467, "step": 4690 }, { "epoch": 0.02, "learning_rate": 9.986674229909683e-05, "loss": 1.5131, "step": 4695 }, { "epoch": 0.02, "learning_rate": 9.98664522646735e-05, "loss": 1.474, "step": 4700 }, { "epoch": 0.02, "learning_rate": 9.986616191538635e-05, "loss": 1.5111, "step": 4705 }, { "epoch": 0.02, "learning_rate": 9.98658712512372e-05, "loss": 1.5524, "step": 4710 }, { "epoch": 0.02, "learning_rate": 9.986558027222793e-05, "loss": 1.5388, "step": 4715 }, { "epoch": 0.02, "learning_rate": 9.986528897836032e-05, "loss": 1.528, "step": 4720 }, { "epoch": 0.02, "learning_rate": 9.986499736963624e-05, "loss": 1.4662, "step": 4725 }, { "epoch": 0.02, "learning_rate": 9.98647054460575e-05, "loss": 1.5473, "step": 4730 }, { "epoch": 0.02, "learning_rate": 9.986441320762601e-05, "loss": 1.5541, "step": 4735 }, { "epoch": 0.02, "learning_rate": 9.986412065434355e-05, "loss": 1.5309, "step": 4740 }, { "epoch": 0.02, "learning_rate": 9.9863827786212e-05, "loss": 1.4741, "step": 4745 }, { "epoch": 0.02, "learning_rate": 9.986353460323321e-05, "loss": 1.442, "step": 4750 }, { "epoch": 0.02, "learning_rate": 9.986324110540901e-05, "loss": 1.4524, "step": 4755 }, { "epoch": 0.02, "learning_rate": 9.986294729274127e-05, "loss": 1.4878, "step": 4760 }, { "epoch": 0.02, "learning_rate": 9.986265316523184e-05, "loss": 1.5478, "step": 4765 }, { "epoch": 0.02, "learning_rate": 9.986235872288256e-05, "loss": 1.4916, "step": 4770 }, { "epoch": 0.02, "learning_rate": 9.986206396569533e-05, "loss": 1.4551, "step": 4775 }, { "epoch": 0.02, "learning_rate": 9.986176889367198e-05, "loss": 1.5169, "step": 4780 }, { "epoch": 0.02, "learning_rate": 9.986147350681439e-05, "loss": 1.5001, "step": 4785 }, { "epoch": 0.02, "learning_rate": 9.986117780512441e-05, "loss": 1.4939, "step": 4790 }, { "epoch": 0.02, "learning_rate": 9.986088178860391e-05, "loss": 1.4576, "step": 4795 }, { "epoch": 0.02, "learning_rate": 9.986058545725476e-05, "loss": 1.5747, "step": 4800 }, { "epoch": 0.02, "learning_rate": 9.986028881107882e-05, "loss": 1.4927, "step": 4805 }, { "epoch": 0.02, "learning_rate": 9.985999185007802e-05, "loss": 1.5061, "step": 4810 }, { "epoch": 0.02, "learning_rate": 9.985969457425414e-05, "loss": 1.4841, "step": 4815 }, { "epoch": 0.02, "learning_rate": 9.985939698360916e-05, "loss": 1.5298, "step": 4820 }, { "epoch": 0.02, "learning_rate": 9.985909907814487e-05, "loss": 1.4187, "step": 4825 }, { "epoch": 0.02, "learning_rate": 9.98588008578632e-05, "loss": 1.4835, "step": 4830 }, { "epoch": 0.02, "learning_rate": 9.985850232276603e-05, "loss": 1.5325, "step": 4835 }, { "epoch": 0.02, "learning_rate": 9.985820347285521e-05, "loss": 1.5165, "step": 4840 }, { "epoch": 0.02, "learning_rate": 9.985790430813269e-05, "loss": 1.5617, "step": 4845 }, { "epoch": 0.02, "learning_rate": 9.98576048286003e-05, "loss": 1.4625, "step": 4850 }, { "epoch": 0.02, "learning_rate": 9.985730503425997e-05, "loss": 1.4876, "step": 4855 }, { "epoch": 0.02, "learning_rate": 9.985700492511356e-05, "loss": 1.515, "step": 4860 }, { "epoch": 0.02, "learning_rate": 9.985670450116297e-05, "loss": 1.5216, "step": 4865 }, { "epoch": 0.02, "learning_rate": 9.985640376241014e-05, "loss": 1.4706, "step": 4870 }, { "epoch": 0.02, "learning_rate": 9.985610270885692e-05, "loss": 1.4992, "step": 4875 }, { "epoch": 0.02, "learning_rate": 9.985580134050522e-05, "loss": 1.4712, "step": 4880 }, { "epoch": 0.02, "learning_rate": 9.985549965735698e-05, "loss": 1.5588, "step": 4885 }, { "epoch": 0.02, "learning_rate": 9.985519765941405e-05, "loss": 1.4993, "step": 4890 }, { "epoch": 0.02, "learning_rate": 9.985489534667837e-05, "loss": 1.5217, "step": 4895 }, { "epoch": 0.02, "learning_rate": 9.985459271915185e-05, "loss": 1.5097, "step": 4900 }, { "epoch": 0.02, "learning_rate": 9.985428977683638e-05, "loss": 1.478, "step": 4905 }, { "epoch": 0.02, "learning_rate": 9.985398651973389e-05, "loss": 1.4965, "step": 4910 }, { "epoch": 0.02, "learning_rate": 9.98536829478463e-05, "loss": 1.527, "step": 4915 }, { "epoch": 0.02, "learning_rate": 9.98533790611755e-05, "loss": 1.5296, "step": 4920 }, { "epoch": 0.02, "learning_rate": 9.985307485972344e-05, "loss": 1.5321, "step": 4925 }, { "epoch": 0.02, "learning_rate": 9.9852770343492e-05, "loss": 1.4849, "step": 4930 }, { "epoch": 0.02, "learning_rate": 9.985246551248317e-05, "loss": 1.4683, "step": 4935 }, { "epoch": 0.02, "learning_rate": 9.98521603666988e-05, "loss": 1.5257, "step": 4940 }, { "epoch": 0.03, "learning_rate": 9.985185490614086e-05, "loss": 1.5138, "step": 4945 }, { "epoch": 0.03, "learning_rate": 9.985154913081127e-05, "loss": 1.5458, "step": 4950 }, { "epoch": 0.03, "learning_rate": 9.985124304071198e-05, "loss": 1.5363, "step": 4955 }, { "epoch": 0.03, "learning_rate": 9.985093663584488e-05, "loss": 1.5001, "step": 4960 }, { "epoch": 0.03, "learning_rate": 9.985062991621193e-05, "loss": 1.4756, "step": 4965 }, { "epoch": 0.03, "learning_rate": 9.985032288181506e-05, "loss": 1.5139, "step": 4970 }, { "epoch": 0.03, "learning_rate": 9.985001553265623e-05, "loss": 1.5024, "step": 4975 }, { "epoch": 0.03, "learning_rate": 9.984970786873735e-05, "loss": 1.5113, "step": 4980 }, { "epoch": 0.03, "learning_rate": 9.984939989006038e-05, "loss": 1.4691, "step": 4985 }, { "epoch": 0.03, "learning_rate": 9.984909159662727e-05, "loss": 1.5188, "step": 4990 }, { "epoch": 0.03, "learning_rate": 9.984878298843994e-05, "loss": 1.4678, "step": 4995 }, { "epoch": 0.03, "learning_rate": 9.984847406550037e-05, "loss": 1.4888, "step": 5000 }, { "epoch": 0.03, "learning_rate": 9.984816482781048e-05, "loss": 1.4616, "step": 5005 }, { "epoch": 0.03, "learning_rate": 9.984785527537226e-05, "loss": 1.5665, "step": 5010 }, { "epoch": 0.03, "learning_rate": 9.984754540818763e-05, "loss": 1.4752, "step": 5015 }, { "epoch": 0.03, "learning_rate": 9.984723522625856e-05, "loss": 1.4979, "step": 5020 }, { "epoch": 0.03, "learning_rate": 9.9846924729587e-05, "loss": 1.5286, "step": 5025 }, { "epoch": 0.03, "learning_rate": 9.984661391817494e-05, "loss": 1.4881, "step": 5030 }, { "epoch": 0.03, "learning_rate": 9.984630279202432e-05, "loss": 1.4904, "step": 5035 }, { "epoch": 0.03, "learning_rate": 9.98459913511371e-05, "loss": 1.4129, "step": 5040 }, { "epoch": 0.03, "learning_rate": 9.984567959551526e-05, "loss": 1.555, "step": 5045 }, { "epoch": 0.03, "learning_rate": 9.984536752516074e-05, "loss": 1.495, "step": 5050 }, { "epoch": 0.03, "learning_rate": 9.984505514007555e-05, "loss": 1.4924, "step": 5055 }, { "epoch": 0.03, "learning_rate": 9.984474244026163e-05, "loss": 1.5308, "step": 5060 }, { "epoch": 0.03, "learning_rate": 9.984442942572099e-05, "loss": 1.4795, "step": 5065 }, { "epoch": 0.03, "learning_rate": 9.984411609645557e-05, "loss": 1.5113, "step": 5070 }, { "epoch": 0.03, "learning_rate": 9.984380245246736e-05, "loss": 1.4893, "step": 5075 }, { "epoch": 0.03, "learning_rate": 9.984348849375836e-05, "loss": 1.5274, "step": 5080 }, { "epoch": 0.03, "learning_rate": 9.984317422033052e-05, "loss": 1.5588, "step": 5085 }, { "epoch": 0.03, "learning_rate": 9.984285963218585e-05, "loss": 1.5325, "step": 5090 }, { "epoch": 0.03, "learning_rate": 9.984254472932633e-05, "loss": 1.5095, "step": 5095 }, { "epoch": 0.03, "learning_rate": 9.984222951175393e-05, "loss": 1.5129, "step": 5100 }, { "epoch": 0.03, "learning_rate": 9.984191397947067e-05, "loss": 1.4927, "step": 5105 }, { "epoch": 0.03, "learning_rate": 9.98415981324785e-05, "loss": 1.5628, "step": 5110 }, { "epoch": 0.03, "learning_rate": 9.984128197077947e-05, "loss": 1.4961, "step": 5115 }, { "epoch": 0.03, "learning_rate": 9.984096549437555e-05, "loss": 1.481, "step": 5120 }, { "epoch": 0.03, "learning_rate": 9.984064870326872e-05, "loss": 1.5379, "step": 5125 }, { "epoch": 0.03, "learning_rate": 9.984033159746102e-05, "loss": 1.5377, "step": 5130 }, { "epoch": 0.03, "learning_rate": 9.984001417695442e-05, "loss": 1.4382, "step": 5135 }, { "epoch": 0.03, "learning_rate": 9.983969644175091e-05, "loss": 1.5358, "step": 5140 }, { "epoch": 0.03, "learning_rate": 9.983937839185255e-05, "loss": 1.5165, "step": 5145 }, { "epoch": 0.03, "learning_rate": 9.983906002726131e-05, "loss": 1.5192, "step": 5150 }, { "epoch": 0.03, "learning_rate": 9.98387413479792e-05, "loss": 1.5313, "step": 5155 }, { "epoch": 0.03, "learning_rate": 9.983842235400824e-05, "loss": 1.4434, "step": 5160 }, { "epoch": 0.03, "learning_rate": 9.983810304535047e-05, "loss": 1.5003, "step": 5165 }, { "epoch": 0.03, "learning_rate": 9.983778342200785e-05, "loss": 1.5131, "step": 5170 }, { "epoch": 0.03, "learning_rate": 9.983746348398244e-05, "loss": 1.487, "step": 5175 }, { "epoch": 0.03, "learning_rate": 9.983714323127625e-05, "loss": 1.4931, "step": 5180 }, { "epoch": 0.03, "learning_rate": 9.98368226638913e-05, "loss": 1.4297, "step": 5185 }, { "epoch": 0.03, "learning_rate": 9.98365017818296e-05, "loss": 1.5028, "step": 5190 }, { "epoch": 0.03, "learning_rate": 9.983618058509321e-05, "loss": 1.473, "step": 5195 }, { "epoch": 0.03, "learning_rate": 9.983585907368413e-05, "loss": 1.5346, "step": 5200 }, { "epoch": 0.03, "learning_rate": 9.98355372476044e-05, "loss": 1.5364, "step": 5205 }, { "epoch": 0.03, "learning_rate": 9.983521510685606e-05, "loss": 1.5042, "step": 5210 }, { "epoch": 0.03, "learning_rate": 9.983489265144112e-05, "loss": 1.5007, "step": 5215 }, { "epoch": 0.03, "learning_rate": 9.983456988136164e-05, "loss": 1.5043, "step": 5220 }, { "epoch": 0.03, "learning_rate": 9.983424679661966e-05, "loss": 1.4609, "step": 5225 }, { "epoch": 0.03, "learning_rate": 9.98339233972172e-05, "loss": 1.5366, "step": 5230 }, { "epoch": 0.03, "learning_rate": 9.983359968315631e-05, "loss": 1.5686, "step": 5235 }, { "epoch": 0.03, "learning_rate": 9.983327565443906e-05, "loss": 1.4633, "step": 5240 }, { "epoch": 0.03, "learning_rate": 9.983295131106744e-05, "loss": 1.4903, "step": 5245 }, { "epoch": 0.03, "learning_rate": 9.983262665304353e-05, "loss": 1.4585, "step": 5250 }, { "epoch": 0.03, "learning_rate": 9.98323016803694e-05, "loss": 1.5265, "step": 5255 }, { "epoch": 0.03, "learning_rate": 9.983197639304706e-05, "loss": 1.482, "step": 5260 }, { "epoch": 0.03, "learning_rate": 9.983165079107859e-05, "loss": 1.5468, "step": 5265 }, { "epoch": 0.03, "learning_rate": 9.983132487446605e-05, "loss": 1.4755, "step": 5270 }, { "epoch": 0.03, "learning_rate": 9.983099864321149e-05, "loss": 1.51, "step": 5275 }, { "epoch": 0.03, "learning_rate": 9.983067209731695e-05, "loss": 1.5016, "step": 5280 }, { "epoch": 0.03, "learning_rate": 9.983034523678454e-05, "loss": 1.4736, "step": 5285 }, { "epoch": 0.03, "learning_rate": 9.983001806161627e-05, "loss": 1.4539, "step": 5290 }, { "epoch": 0.03, "learning_rate": 9.982969057181423e-05, "loss": 1.5463, "step": 5295 }, { "epoch": 0.03, "learning_rate": 9.98293627673805e-05, "loss": 1.4902, "step": 5300 }, { "epoch": 0.03, "learning_rate": 9.982903464831714e-05, "loss": 1.5275, "step": 5305 }, { "epoch": 0.03, "learning_rate": 9.982870621462621e-05, "loss": 1.5566, "step": 5310 }, { "epoch": 0.03, "learning_rate": 9.98283774663098e-05, "loss": 1.4929, "step": 5315 }, { "epoch": 0.03, "learning_rate": 9.982804840336998e-05, "loss": 1.4763, "step": 5320 }, { "epoch": 0.03, "learning_rate": 9.982771902580883e-05, "loss": 1.5196, "step": 5325 }, { "epoch": 0.03, "learning_rate": 9.982738933362842e-05, "loss": 1.5326, "step": 5330 }, { "epoch": 0.03, "learning_rate": 9.982705932683085e-05, "loss": 1.5162, "step": 5335 }, { "epoch": 0.03, "learning_rate": 9.982672900541817e-05, "loss": 1.5219, "step": 5340 }, { "epoch": 0.03, "learning_rate": 9.98263983693925e-05, "loss": 1.5499, "step": 5345 }, { "epoch": 0.03, "learning_rate": 9.982606741875592e-05, "loss": 1.5681, "step": 5350 }, { "epoch": 0.03, "learning_rate": 9.98257361535105e-05, "loss": 1.5491, "step": 5355 }, { "epoch": 0.03, "learning_rate": 9.982540457365836e-05, "loss": 1.5337, "step": 5360 }, { "epoch": 0.03, "learning_rate": 9.982507267920158e-05, "loss": 1.5557, "step": 5365 }, { "epoch": 0.03, "learning_rate": 9.982474047014226e-05, "loss": 1.5216, "step": 5370 }, { "epoch": 0.03, "learning_rate": 9.982440794648249e-05, "loss": 1.506, "step": 5375 }, { "epoch": 0.03, "learning_rate": 9.982407510822437e-05, "loss": 1.4518, "step": 5380 }, { "epoch": 0.03, "learning_rate": 9.982374195537001e-05, "loss": 1.4318, "step": 5385 }, { "epoch": 0.03, "learning_rate": 9.98234084879215e-05, "loss": 1.4863, "step": 5390 }, { "epoch": 0.03, "learning_rate": 9.982307470588098e-05, "loss": 1.5002, "step": 5395 }, { "epoch": 0.03, "learning_rate": 9.98227406092505e-05, "loss": 1.4875, "step": 5400 }, { "epoch": 0.03, "learning_rate": 9.982240619803221e-05, "loss": 1.5092, "step": 5405 }, { "epoch": 0.03, "learning_rate": 9.982207147222822e-05, "loss": 1.5387, "step": 5410 }, { "epoch": 0.03, "learning_rate": 9.982173643184063e-05, "loss": 1.5063, "step": 5415 }, { "epoch": 0.03, "learning_rate": 9.982140107687156e-05, "loss": 1.5373, "step": 5420 }, { "epoch": 0.03, "learning_rate": 9.982106540732312e-05, "loss": 1.4991, "step": 5425 }, { "epoch": 0.03, "learning_rate": 9.982072942319745e-05, "loss": 1.5448, "step": 5430 }, { "epoch": 0.03, "learning_rate": 9.982039312449666e-05, "loss": 1.5134, "step": 5435 }, { "epoch": 0.03, "learning_rate": 9.982005651122288e-05, "loss": 1.5076, "step": 5440 }, { "epoch": 0.03, "learning_rate": 9.981971958337824e-05, "loss": 1.5199, "step": 5445 }, { "epoch": 0.03, "learning_rate": 9.981938234096482e-05, "loss": 1.4118, "step": 5450 }, { "epoch": 0.03, "learning_rate": 9.981904478398482e-05, "loss": 1.5121, "step": 5455 }, { "epoch": 0.03, "learning_rate": 9.981870691244031e-05, "loss": 1.4717, "step": 5460 }, { "epoch": 0.03, "learning_rate": 9.981836872633348e-05, "loss": 1.4902, "step": 5465 }, { "epoch": 0.03, "learning_rate": 9.981803022566641e-05, "loss": 1.5429, "step": 5470 }, { "epoch": 0.03, "learning_rate": 9.981769141044127e-05, "loss": 1.5351, "step": 5475 }, { "epoch": 0.03, "learning_rate": 9.98173522806602e-05, "loss": 1.4532, "step": 5480 }, { "epoch": 0.03, "learning_rate": 9.981701283632532e-05, "loss": 1.4673, "step": 5485 }, { "epoch": 0.03, "learning_rate": 9.98166730774388e-05, "loss": 1.4805, "step": 5490 }, { "epoch": 0.03, "learning_rate": 9.981633300400277e-05, "loss": 1.5073, "step": 5495 }, { "epoch": 0.03, "learning_rate": 9.981599261601939e-05, "loss": 1.4875, "step": 5500 }, { "epoch": 0.03, "learning_rate": 9.981565191349078e-05, "loss": 1.4679, "step": 5505 }, { "epoch": 0.03, "learning_rate": 9.981531089641912e-05, "loss": 1.5476, "step": 5510 }, { "epoch": 0.03, "learning_rate": 9.981496956480655e-05, "loss": 1.4786, "step": 5515 }, { "epoch": 0.03, "learning_rate": 9.981462791865524e-05, "loss": 1.5164, "step": 5520 }, { "epoch": 0.03, "learning_rate": 9.981428595796731e-05, "loss": 1.5029, "step": 5525 }, { "epoch": 0.03, "learning_rate": 9.981394368274497e-05, "loss": 1.4879, "step": 5530 }, { "epoch": 0.03, "learning_rate": 9.981360109299034e-05, "loss": 1.4773, "step": 5535 }, { "epoch": 0.03, "learning_rate": 9.98132581887056e-05, "loss": 1.4871, "step": 5540 }, { "epoch": 0.03, "learning_rate": 9.981291496989294e-05, "loss": 1.5306, "step": 5545 }, { "epoch": 0.03, "learning_rate": 9.981257143655447e-05, "loss": 1.4711, "step": 5550 }, { "epoch": 0.03, "learning_rate": 9.98122275886924e-05, "loss": 1.5054, "step": 5555 }, { "epoch": 0.03, "learning_rate": 9.98118834263089e-05, "loss": 1.4881, "step": 5560 }, { "epoch": 0.03, "learning_rate": 9.981153894940614e-05, "loss": 1.5215, "step": 5565 }, { "epoch": 0.03, "learning_rate": 9.981119415798628e-05, "loss": 1.5511, "step": 5570 }, { "epoch": 0.03, "learning_rate": 9.981084905205149e-05, "loss": 1.5274, "step": 5575 }, { "epoch": 0.03, "learning_rate": 9.981050363160399e-05, "loss": 1.5059, "step": 5580 }, { "epoch": 0.03, "learning_rate": 9.981015789664593e-05, "loss": 1.5123, "step": 5585 }, { "epoch": 0.03, "learning_rate": 9.98098118471795e-05, "loss": 1.4691, "step": 5590 }, { "epoch": 0.03, "learning_rate": 9.980946548320689e-05, "loss": 1.4681, "step": 5595 }, { "epoch": 0.03, "learning_rate": 9.980911880473027e-05, "loss": 1.401, "step": 5600 }, { "epoch": 0.03, "learning_rate": 9.980877181175186e-05, "loss": 1.5356, "step": 5605 }, { "epoch": 0.03, "learning_rate": 9.980842450427382e-05, "loss": 1.5111, "step": 5610 }, { "epoch": 0.03, "learning_rate": 9.980807688229836e-05, "loss": 1.51, "step": 5615 }, { "epoch": 0.03, "learning_rate": 9.980772894582766e-05, "loss": 1.4631, "step": 5620 }, { "epoch": 0.03, "learning_rate": 9.980738069486394e-05, "loss": 1.4893, "step": 5625 }, { "epoch": 0.03, "learning_rate": 9.980703212940938e-05, "loss": 1.4753, "step": 5630 }, { "epoch": 0.03, "learning_rate": 9.980668324946619e-05, "loss": 1.4929, "step": 5635 }, { "epoch": 0.03, "learning_rate": 9.980633405503656e-05, "loss": 1.4866, "step": 5640 }, { "epoch": 0.03, "learning_rate": 9.980598454612271e-05, "loss": 1.5493, "step": 5645 }, { "epoch": 0.03, "learning_rate": 9.980563472272684e-05, "loss": 1.4831, "step": 5650 }, { "epoch": 0.03, "learning_rate": 9.980528458485117e-05, "loss": 1.4877, "step": 5655 }, { "epoch": 0.03, "learning_rate": 9.98049341324979e-05, "loss": 1.4614, "step": 5660 }, { "epoch": 0.03, "learning_rate": 9.980458336566923e-05, "loss": 1.4293, "step": 5665 }, { "epoch": 0.03, "learning_rate": 9.98042322843674e-05, "loss": 1.5266, "step": 5670 }, { "epoch": 0.03, "learning_rate": 9.98038808885946e-05, "loss": 1.4845, "step": 5675 }, { "epoch": 0.03, "learning_rate": 9.980352917835308e-05, "loss": 1.4924, "step": 5680 }, { "epoch": 0.03, "learning_rate": 9.980317715364505e-05, "loss": 1.4654, "step": 5685 }, { "epoch": 0.03, "learning_rate": 9.980282481447272e-05, "loss": 1.3969, "step": 5690 }, { "epoch": 0.03, "learning_rate": 9.980247216083832e-05, "loss": 1.5392, "step": 5695 }, { "epoch": 0.03, "learning_rate": 9.980211919274407e-05, "loss": 1.4587, "step": 5700 }, { "epoch": 0.03, "learning_rate": 9.980176591019222e-05, "loss": 1.5106, "step": 5705 }, { "epoch": 0.03, "learning_rate": 9.980141231318498e-05, "loss": 1.5285, "step": 5710 }, { "epoch": 0.03, "learning_rate": 9.980105840172461e-05, "loss": 1.5293, "step": 5715 }, { "epoch": 0.03, "learning_rate": 9.980070417581331e-05, "loss": 1.4841, "step": 5720 }, { "epoch": 0.03, "learning_rate": 9.980034963545333e-05, "loss": 1.4763, "step": 5725 }, { "epoch": 0.03, "learning_rate": 9.97999947806469e-05, "loss": 1.4851, "step": 5730 }, { "epoch": 0.03, "learning_rate": 9.97996396113963e-05, "loss": 1.5057, "step": 5735 }, { "epoch": 0.03, "learning_rate": 9.979928412770373e-05, "loss": 1.5023, "step": 5740 }, { "epoch": 0.03, "learning_rate": 9.979892832957145e-05, "loss": 1.4776, "step": 5745 }, { "epoch": 0.03, "learning_rate": 9.979857221700171e-05, "loss": 1.5119, "step": 5750 }, { "epoch": 0.03, "learning_rate": 9.979821578999675e-05, "loss": 1.5539, "step": 5755 }, { "epoch": 0.03, "learning_rate": 9.979785904855882e-05, "loss": 1.4454, "step": 5760 }, { "epoch": 0.03, "learning_rate": 9.979750199269018e-05, "loss": 1.4928, "step": 5765 }, { "epoch": 0.03, "learning_rate": 9.979714462239309e-05, "loss": 1.4733, "step": 5770 }, { "epoch": 0.03, "learning_rate": 9.979678693766979e-05, "loss": 1.469, "step": 5775 }, { "epoch": 0.03, "learning_rate": 9.979642893852255e-05, "loss": 1.5397, "step": 5780 }, { "epoch": 0.03, "learning_rate": 9.979607062495362e-05, "loss": 1.5132, "step": 5785 }, { "epoch": 0.03, "learning_rate": 9.979571199696527e-05, "loss": 1.5642, "step": 5790 }, { "epoch": 0.03, "learning_rate": 9.979535305455977e-05, "loss": 1.5103, "step": 5795 }, { "epoch": 0.03, "learning_rate": 9.979499379773936e-05, "loss": 1.5058, "step": 5800 }, { "epoch": 0.03, "learning_rate": 9.979463422650634e-05, "loss": 1.5033, "step": 5805 }, { "epoch": 0.03, "learning_rate": 9.979427434086299e-05, "loss": 1.5078, "step": 5810 }, { "epoch": 0.03, "learning_rate": 9.979391414081153e-05, "loss": 1.4872, "step": 5815 }, { "epoch": 0.03, "learning_rate": 9.97935536263543e-05, "loss": 1.5118, "step": 5820 }, { "epoch": 0.03, "learning_rate": 9.97931927974935e-05, "loss": 1.4501, "step": 5825 }, { "epoch": 0.03, "learning_rate": 9.979283165423148e-05, "loss": 1.4908, "step": 5830 }, { "epoch": 0.03, "learning_rate": 9.979247019657049e-05, "loss": 1.4134, "step": 5835 }, { "epoch": 0.03, "learning_rate": 9.979210842451281e-05, "loss": 1.4692, "step": 5840 }, { "epoch": 0.03, "learning_rate": 9.979174633806072e-05, "loss": 1.4497, "step": 5845 }, { "epoch": 0.03, "learning_rate": 9.979138393721653e-05, "loss": 1.4799, "step": 5850 }, { "epoch": 0.03, "learning_rate": 9.979102122198248e-05, "loss": 1.4355, "step": 5855 }, { "epoch": 0.03, "learning_rate": 9.97906581923609e-05, "loss": 1.4799, "step": 5860 }, { "epoch": 0.03, "learning_rate": 9.97902948483541e-05, "loss": 1.4916, "step": 5865 }, { "epoch": 0.03, "learning_rate": 9.978993118996432e-05, "loss": 1.5522, "step": 5870 }, { "epoch": 0.03, "learning_rate": 9.97895672171939e-05, "loss": 1.4886, "step": 5875 }, { "epoch": 0.03, "learning_rate": 9.978920293004513e-05, "loss": 1.5207, "step": 5880 }, { "epoch": 0.03, "learning_rate": 9.978883832852028e-05, "loss": 1.5016, "step": 5885 }, { "epoch": 0.03, "learning_rate": 9.97884734126217e-05, "loss": 1.5223, "step": 5890 }, { "epoch": 0.03, "learning_rate": 9.978810818235164e-05, "loss": 1.5014, "step": 5895 }, { "epoch": 0.03, "learning_rate": 9.978774263771247e-05, "loss": 1.437, "step": 5900 }, { "epoch": 0.03, "learning_rate": 9.978737677870645e-05, "loss": 1.4876, "step": 5905 }, { "epoch": 0.03, "learning_rate": 9.978701060533589e-05, "loss": 1.5068, "step": 5910 }, { "epoch": 0.03, "learning_rate": 9.978664411760312e-05, "loss": 1.5635, "step": 5915 }, { "epoch": 0.03, "learning_rate": 9.978627731551046e-05, "loss": 1.5147, "step": 5920 }, { "epoch": 0.03, "learning_rate": 9.97859101990602e-05, "loss": 1.5375, "step": 5925 }, { "epoch": 0.03, "learning_rate": 9.978554276825469e-05, "loss": 1.5361, "step": 5930 }, { "epoch": 0.03, "learning_rate": 9.978517502309622e-05, "loss": 1.4982, "step": 5935 }, { "epoch": 0.03, "learning_rate": 9.978480696358714e-05, "loss": 1.4991, "step": 5940 }, { "epoch": 0.03, "learning_rate": 9.978443858972974e-05, "loss": 1.5021, "step": 5945 }, { "epoch": 0.03, "learning_rate": 9.978406990152637e-05, "loss": 1.4616, "step": 5950 }, { "epoch": 0.03, "learning_rate": 9.978370089897938e-05, "loss": 1.5133, "step": 5955 }, { "epoch": 0.03, "learning_rate": 9.978333158209105e-05, "loss": 1.4992, "step": 5960 }, { "epoch": 0.03, "learning_rate": 9.978296195086375e-05, "loss": 1.4619, "step": 5965 }, { "epoch": 0.03, "learning_rate": 9.978259200529978e-05, "loss": 1.4875, "step": 5970 }, { "epoch": 0.03, "learning_rate": 9.97822217454015e-05, "loss": 1.4738, "step": 5975 }, { "epoch": 0.03, "learning_rate": 9.978185117117125e-05, "loss": 1.5233, "step": 5980 }, { "epoch": 0.03, "learning_rate": 9.978148028261136e-05, "loss": 1.4409, "step": 5985 }, { "epoch": 0.03, "learning_rate": 9.978110907972417e-05, "loss": 1.5485, "step": 5990 }, { "epoch": 0.03, "learning_rate": 9.978073756251204e-05, "loss": 1.5185, "step": 5995 }, { "epoch": 0.03, "learning_rate": 9.978036573097729e-05, "loss": 1.515, "step": 6000 } ], "logging_steps": 5, "max_steps": 197780, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.6825629945102336e+19, "trial_name": null, "trial_params": null }