{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4999160839160839, "eval_steps": 500, "global_step": 17874, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999999034602362e-05, "loss": 0.6504, "step": 5 }, { "epoch": 0.0, "learning_rate": 4.999996138410193e-05, "loss": 0.7955, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.9999913114257315e-05, "loss": 0.7155, "step": 15 }, { "epoch": 0.0, "learning_rate": 4.999984553652703e-05, "loss": 0.6366, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.99997586509633e-05, "loss": 0.6721, "step": 25 }, { "epoch": 0.0, "learning_rate": 4.999965245763319e-05, "loss": 0.7246, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.9999526956618735e-05, "loss": 0.6349, "step": 35 }, { "epoch": 0.0, "learning_rate": 4.9999382148016865e-05, "loss": 0.6734, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.999921803193941e-05, "loss": 0.6138, "step": 45 }, { "epoch": 0.0, "learning_rate": 4.9999034608513124e-05, "loss": 0.6627, "step": 50 }, { "epoch": 0.0, "learning_rate": 4.999883187787966e-05, "loss": 0.6896, "step": 55 }, { "epoch": 0.01, "learning_rate": 4.9998609840195606e-05, "loss": 0.6527, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.999836849563244e-05, "loss": 0.5694, "step": 65 }, { "epoch": 0.01, "learning_rate": 4.999810784437655e-05, "loss": 0.5552, "step": 70 }, { "epoch": 0.01, "learning_rate": 4.999782788662924e-05, "loss": 0.6926, "step": 75 }, { "epoch": 0.01, "learning_rate": 4.999752862260674e-05, "loss": 0.6534, "step": 80 }, { "epoch": 0.01, "learning_rate": 4.999721005254016e-05, "loss": 0.5961, "step": 85 }, { "epoch": 0.01, "learning_rate": 4.999687217667556e-05, "loss": 0.6483, "step": 90 }, { "epoch": 0.01, "learning_rate": 4.9996514995273864e-05, "loss": 0.6096, "step": 95 }, { "epoch": 0.01, "learning_rate": 4.9996138508610946e-05, "loss": 0.6331, "step": 100 }, { "epoch": 0.01, "learning_rate": 4.999574271697756e-05, "loss": 0.6005, "step": 105 }, { "epoch": 0.01, "learning_rate": 4.99953276206794e-05, "loss": 0.6452, "step": 110 }, { "epoch": 0.01, "learning_rate": 4.9994893220037034e-05, "loss": 0.6405, "step": 115 }, { "epoch": 0.01, "learning_rate": 4.999443951538597e-05, "loss": 0.5947, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.999396650707661e-05, "loss": 0.5957, "step": 125 }, { "epoch": 0.01, "learning_rate": 4.999347419547426e-05, "loss": 0.6519, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.999296258095916e-05, "loss": 0.6068, "step": 135 }, { "epoch": 0.01, "learning_rate": 4.999243166392641e-05, "loss": 0.5602, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.9991881444786074e-05, "loss": 0.5403, "step": 145 }, { "epoch": 0.01, "learning_rate": 4.999131192396308e-05, "loss": 0.593, "step": 150 }, { "epoch": 0.01, "learning_rate": 4.999072310189728e-05, "loss": 0.5597, "step": 155 }, { "epoch": 0.01, "learning_rate": 4.9990114979043445e-05, "loss": 0.6004, "step": 160 }, { "epoch": 0.01, "learning_rate": 4.9989487555871224e-05, "loss": 0.6527, "step": 165 }, { "epoch": 0.01, "learning_rate": 4.998884083286519e-05, "loss": 0.6302, "step": 170 }, { "epoch": 0.01, "learning_rate": 4.998817481052484e-05, "loss": 0.7051, "step": 175 }, { "epoch": 0.02, "learning_rate": 4.998748948936452e-05, "loss": 0.5861, "step": 180 }, { "epoch": 0.02, "learning_rate": 4.998678486991354e-05, "loss": 0.5961, "step": 185 }, { "epoch": 0.02, "learning_rate": 4.9986060952716086e-05, "loss": 0.664, "step": 190 }, { "epoch": 0.02, "learning_rate": 4.998531773833124e-05, "loss": 0.5733, "step": 195 }, { "epoch": 0.02, "learning_rate": 4.998455522733302e-05, "loss": 0.5057, "step": 200 }, { "epoch": 0.02, "learning_rate": 4.998377342031032e-05, "loss": 0.6486, "step": 205 }, { "epoch": 0.02, "learning_rate": 4.998297231786694e-05, "loss": 0.6395, "step": 210 }, { "epoch": 0.02, "learning_rate": 4.998215192062158e-05, "loss": 0.6239, "step": 215 }, { "epoch": 0.02, "learning_rate": 4.9981312229207856e-05, "loss": 0.6463, "step": 220 }, { "epoch": 0.02, "learning_rate": 4.998045324427428e-05, "loss": 0.5789, "step": 225 }, { "epoch": 0.02, "learning_rate": 4.997957496648426e-05, "loss": 0.576, "step": 230 }, { "epoch": 0.02, "learning_rate": 4.997867739651609e-05, "loss": 0.6222, "step": 235 }, { "epoch": 0.02, "learning_rate": 4.9977760535063e-05, "loss": 0.5979, "step": 240 }, { "epoch": 0.02, "learning_rate": 4.997682438283309e-05, "loss": 0.5998, "step": 245 }, { "epoch": 0.02, "learning_rate": 4.997586894054937e-05, "loss": 0.6217, "step": 250 }, { "epoch": 0.02, "learning_rate": 4.997489420894975e-05, "loss": 0.5838, "step": 255 }, { "epoch": 0.02, "learning_rate": 4.997390018878702e-05, "loss": 0.6061, "step": 260 }, { "epoch": 0.02, "learning_rate": 4.997288688082888e-05, "loss": 0.6262, "step": 265 }, { "epoch": 0.02, "learning_rate": 4.997185428585794e-05, "loss": 0.6078, "step": 270 }, { "epoch": 0.02, "learning_rate": 4.997080240467169e-05, "loss": 0.594, "step": 275 }, { "epoch": 0.02, "learning_rate": 4.9969731238082505e-05, "loss": 0.6341, "step": 280 }, { "epoch": 0.02, "learning_rate": 4.9968640786917675e-05, "loss": 0.6799, "step": 285 }, { "epoch": 0.02, "learning_rate": 4.9967531052019373e-05, "loss": 0.6324, "step": 290 }, { "epoch": 0.02, "learning_rate": 4.996640203424466e-05, "loss": 0.624, "step": 295 }, { "epoch": 0.03, "learning_rate": 4.996525373446552e-05, "loss": 0.6142, "step": 300 }, { "epoch": 0.03, "learning_rate": 4.996408615356878e-05, "loss": 0.6386, "step": 305 }, { "epoch": 0.03, "learning_rate": 4.996289929245619e-05, "loss": 0.5813, "step": 310 }, { "epoch": 0.03, "learning_rate": 4.9961693152044387e-05, "loss": 0.6369, "step": 315 }, { "epoch": 0.03, "learning_rate": 4.99604677332649e-05, "loss": 0.6076, "step": 320 }, { "epoch": 0.03, "learning_rate": 4.995922303706414e-05, "loss": 0.5091, "step": 325 }, { "epoch": 0.03, "learning_rate": 4.995795906440341e-05, "loss": 0.5826, "step": 330 }, { "epoch": 0.03, "learning_rate": 4.995667581625889e-05, "loss": 0.6323, "step": 335 }, { "epoch": 0.03, "learning_rate": 4.9955373293621666e-05, "loss": 0.6139, "step": 340 }, { "epoch": 0.03, "learning_rate": 4.995405149749769e-05, "loss": 0.6334, "step": 345 }, { "epoch": 0.03, "learning_rate": 4.995271042890781e-05, "loss": 0.5882, "step": 350 }, { "epoch": 0.03, "learning_rate": 4.9951350088887774e-05, "loss": 0.6379, "step": 355 }, { "epoch": 0.03, "learning_rate": 4.994997047848818e-05, "loss": 0.5653, "step": 360 }, { "epoch": 0.03, "learning_rate": 4.994857159877453e-05, "loss": 0.6074, "step": 365 }, { "epoch": 0.03, "learning_rate": 4.99471534508272e-05, "loss": 0.6077, "step": 370 }, { "epoch": 0.03, "learning_rate": 4.9945716035741466e-05, "loss": 0.6527, "step": 375 }, { "epoch": 0.03, "learning_rate": 4.994425935462746e-05, "loss": 0.6891, "step": 380 }, { "epoch": 0.03, "learning_rate": 4.99427834086102e-05, "loss": 0.6489, "step": 385 }, { "epoch": 0.03, "learning_rate": 4.994128819882958e-05, "loss": 0.5694, "step": 390 }, { "epoch": 0.03, "learning_rate": 4.9939773726440404e-05, "loss": 0.5975, "step": 395 }, { "epoch": 0.03, "learning_rate": 4.99382399926123e-05, "loss": 0.6449, "step": 400 }, { "epoch": 0.03, "learning_rate": 4.993668699852981e-05, "loss": 0.6319, "step": 405 }, { "epoch": 0.03, "learning_rate": 4.993511474539234e-05, "loss": 0.6056, "step": 410 }, { "epoch": 0.03, "learning_rate": 4.993352323441417e-05, "loss": 0.5135, "step": 415 }, { "epoch": 0.04, "learning_rate": 4.9931912466824436e-05, "loss": 0.6588, "step": 420 }, { "epoch": 0.04, "learning_rate": 4.993028244386719e-05, "loss": 0.582, "step": 425 }, { "epoch": 0.04, "learning_rate": 4.992863316680131e-05, "loss": 0.5621, "step": 430 }, { "epoch": 0.04, "learning_rate": 4.9926964636900566e-05, "loss": 0.5864, "step": 435 }, { "epoch": 0.04, "learning_rate": 4.9925276855453595e-05, "loss": 0.6512, "step": 440 }, { "epoch": 0.04, "learning_rate": 4.992356982376391e-05, "loss": 0.5598, "step": 445 }, { "epoch": 0.04, "learning_rate": 4.992184354314986e-05, "loss": 0.5462, "step": 450 }, { "epoch": 0.04, "learning_rate": 4.992009801494471e-05, "loss": 0.6273, "step": 455 }, { "epoch": 0.04, "learning_rate": 4.991833324049654e-05, "loss": 0.6377, "step": 460 }, { "epoch": 0.04, "learning_rate": 4.991654922116833e-05, "loss": 0.5433, "step": 465 }, { "epoch": 0.04, "learning_rate": 4.99147459583379e-05, "loss": 0.6562, "step": 470 }, { "epoch": 0.04, "learning_rate": 4.9912923453397964e-05, "loss": 0.6092, "step": 475 }, { "epoch": 0.04, "learning_rate": 4.991108170775606e-05, "loss": 0.5924, "step": 480 }, { "epoch": 0.04, "learning_rate": 4.9909220722834594e-05, "loss": 0.6312, "step": 485 }, { "epoch": 0.04, "learning_rate": 4.9907340500070856e-05, "loss": 0.5965, "step": 490 }, { "epoch": 0.04, "learning_rate": 4.990544104091696e-05, "loss": 0.5901, "step": 495 }, { "epoch": 0.04, "learning_rate": 4.99035223468399e-05, "loss": 0.5852, "step": 500 }, { "epoch": 0.04, "learning_rate": 4.9901584419321526e-05, "loss": 0.6511, "step": 505 }, { "epoch": 0.04, "learning_rate": 4.989962725985853e-05, "loss": 0.6199, "step": 510 }, { "epoch": 0.04, "learning_rate": 4.989765086996245e-05, "loss": 0.5874, "step": 515 }, { "epoch": 0.04, "learning_rate": 4.98956552511597e-05, "loss": 0.6341, "step": 520 }, { "epoch": 0.04, "learning_rate": 4.9893640404991527e-05, "loss": 0.5513, "step": 525 }, { "epoch": 0.04, "learning_rate": 4.989160633301404e-05, "loss": 0.5422, "step": 530 }, { "epoch": 0.04, "learning_rate": 4.9889553036798176e-05, "loss": 0.5794, "step": 535 }, { "epoch": 0.05, "learning_rate": 4.9887480517929746e-05, "loss": 0.5728, "step": 540 }, { "epoch": 0.05, "learning_rate": 4.988538877800939e-05, "loss": 0.6341, "step": 545 }, { "epoch": 0.05, "learning_rate": 4.9883277818652594e-05, "loss": 0.5612, "step": 550 }, { "epoch": 0.05, "learning_rate": 4.98811476414897e-05, "loss": 0.5872, "step": 555 }, { "epoch": 0.05, "learning_rate": 4.9878998248165864e-05, "loss": 0.5999, "step": 560 }, { "epoch": 0.05, "learning_rate": 4.9876829640341115e-05, "loss": 0.5782, "step": 565 }, { "epoch": 0.05, "learning_rate": 4.987464181969031e-05, "loss": 0.6016, "step": 570 }, { "epoch": 0.05, "learning_rate": 4.9872434787903136e-05, "loss": 0.6247, "step": 575 }, { "epoch": 0.05, "learning_rate": 4.987020854668412e-05, "loss": 0.5908, "step": 580 }, { "epoch": 0.05, "learning_rate": 4.986796309775264e-05, "loss": 0.613, "step": 585 }, { "epoch": 0.05, "learning_rate": 4.986569844284289e-05, "loss": 0.5631, "step": 590 }, { "epoch": 0.05, "learning_rate": 4.98634145837039e-05, "loss": 0.5453, "step": 595 }, { "epoch": 0.05, "learning_rate": 4.986111152209955e-05, "loss": 0.6656, "step": 600 }, { "epoch": 0.05, "learning_rate": 4.985878925980851e-05, "loss": 0.6388, "step": 605 }, { "epoch": 0.05, "learning_rate": 4.985644779862433e-05, "loss": 0.544, "step": 610 }, { "epoch": 0.05, "learning_rate": 4.985408714035536e-05, "loss": 0.6059, "step": 615 }, { "epoch": 0.05, "learning_rate": 4.9851707286824765e-05, "loss": 0.6051, "step": 620 }, { "epoch": 0.05, "learning_rate": 4.984930823987056e-05, "loss": 0.6517, "step": 625 }, { "epoch": 0.05, "learning_rate": 4.984689000134558e-05, "loss": 0.6093, "step": 630 }, { "epoch": 0.05, "learning_rate": 4.9844452573117456e-05, "loss": 0.6927, "step": 635 }, { "epoch": 0.05, "learning_rate": 4.984199595706867e-05, "loss": 0.5607, "step": 640 }, { "epoch": 0.05, "learning_rate": 4.98395201550965e-05, "loss": 0.6466, "step": 645 }, { "epoch": 0.05, "learning_rate": 4.983702516911307e-05, "loss": 0.5483, "step": 650 }, { "epoch": 0.05, "learning_rate": 4.983451100104529e-05, "loss": 0.6007, "step": 655 }, { "epoch": 0.06, "learning_rate": 4.983197765283489e-05, "loss": 0.6001, "step": 660 }, { "epoch": 0.06, "learning_rate": 4.982942512643845e-05, "loss": 0.6067, "step": 665 }, { "epoch": 0.06, "learning_rate": 4.98268534238273e-05, "loss": 0.59, "step": 670 }, { "epoch": 0.06, "learning_rate": 4.982426254698763e-05, "loss": 0.6584, "step": 675 }, { "epoch": 0.06, "learning_rate": 4.982165249792042e-05, "loss": 0.6062, "step": 680 }, { "epoch": 0.06, "learning_rate": 4.981902327864145e-05, "loss": 0.6054, "step": 685 }, { "epoch": 0.06, "learning_rate": 4.981637489118132e-05, "loss": 0.5694, "step": 690 }, { "epoch": 0.06, "learning_rate": 4.981370733758543e-05, "loss": 0.5368, "step": 695 }, { "epoch": 0.06, "learning_rate": 4.981102061991398e-05, "loss": 0.5957, "step": 700 }, { "epoch": 0.06, "learning_rate": 4.9808314740241954e-05, "loss": 0.6305, "step": 705 }, { "epoch": 0.06, "learning_rate": 4.980558970065918e-05, "loss": 0.6074, "step": 710 }, { "epoch": 0.06, "learning_rate": 4.9802845503270236e-05, "loss": 0.675, "step": 715 }, { "epoch": 0.06, "learning_rate": 4.9800082150194515e-05, "loss": 0.5721, "step": 720 }, { "epoch": 0.06, "learning_rate": 4.979729964356622e-05, "loss": 0.5454, "step": 725 }, { "epoch": 0.06, "learning_rate": 4.979449798553432e-05, "loss": 0.6237, "step": 730 }, { "epoch": 0.06, "learning_rate": 4.979167717826257e-05, "loss": 0.5768, "step": 735 }, { "epoch": 0.06, "learning_rate": 4.9788837223929566e-05, "loss": 0.4998, "step": 740 }, { "epoch": 0.06, "learning_rate": 4.9785978124728624e-05, "loss": 0.6423, "step": 745 }, { "epoch": 0.06, "learning_rate": 4.978309988286791e-05, "loss": 0.5497, "step": 750 }, { "epoch": 0.06, "learning_rate": 4.978020250057031e-05, "loss": 0.5957, "step": 755 }, { "epoch": 0.06, "learning_rate": 4.977728598007354e-05, "loss": 0.5852, "step": 760 }, { "epoch": 0.06, "learning_rate": 4.9774350323630085e-05, "loss": 0.568, "step": 765 }, { "epoch": 0.06, "learning_rate": 4.97713955335072e-05, "loss": 0.6519, "step": 770 }, { "epoch": 0.07, "learning_rate": 4.976842161198693e-05, "loss": 0.5923, "step": 775 }, { "epoch": 0.07, "learning_rate": 4.976542856136608e-05, "loss": 0.5341, "step": 780 }, { "epoch": 0.07, "learning_rate": 4.9762416383956234e-05, "loss": 0.6549, "step": 785 }, { "epoch": 0.07, "learning_rate": 4.9759385082083756e-05, "loss": 0.6181, "step": 790 }, { "epoch": 0.07, "learning_rate": 4.975633465808979e-05, "loss": 0.6261, "step": 795 }, { "epoch": 0.07, "learning_rate": 4.9753265114330204e-05, "loss": 0.6395, "step": 800 }, { "epoch": 0.07, "learning_rate": 4.975017645317568e-05, "loss": 0.6497, "step": 805 }, { "epoch": 0.07, "learning_rate": 4.974706867701165e-05, "loss": 0.5932, "step": 810 }, { "epoch": 0.07, "learning_rate": 4.9743941788238305e-05, "loss": 0.5843, "step": 815 }, { "epoch": 0.07, "learning_rate": 4.9740795789270595e-05, "loss": 0.6297, "step": 820 }, { "epoch": 0.07, "learning_rate": 4.9737630682538226e-05, "loss": 0.5772, "step": 825 }, { "epoch": 0.07, "learning_rate": 4.973444647048567e-05, "loss": 0.5967, "step": 830 }, { "epoch": 0.07, "learning_rate": 4.973124315557216e-05, "loss": 0.5725, "step": 835 }, { "epoch": 0.07, "learning_rate": 4.9728020740271665e-05, "loss": 0.6361, "step": 840 }, { "epoch": 0.07, "learning_rate": 4.9724779227072916e-05, "loss": 0.5876, "step": 845 }, { "epoch": 0.07, "learning_rate": 4.972151861847939e-05, "loss": 0.5685, "step": 850 }, { "epoch": 0.07, "learning_rate": 4.971823891700933e-05, "loss": 0.6042, "step": 855 }, { "epoch": 0.07, "learning_rate": 4.971494012519569e-05, "loss": 0.6192, "step": 860 }, { "epoch": 0.07, "learning_rate": 4.9711622245586185e-05, "loss": 0.5785, "step": 865 }, { "epoch": 0.07, "learning_rate": 4.9708285280743296e-05, "loss": 0.6173, "step": 870 }, { "epoch": 0.07, "learning_rate": 4.97049292332442e-05, "loss": 0.584, "step": 875 }, { "epoch": 0.07, "learning_rate": 4.9701554105680844e-05, "loss": 0.6294, "step": 880 }, { "epoch": 0.07, "learning_rate": 4.9698159900659897e-05, "loss": 0.5792, "step": 885 }, { "epoch": 0.07, "learning_rate": 4.969474662080276e-05, "loss": 0.6264, "step": 890 }, { "epoch": 0.08, "learning_rate": 4.969131426874558e-05, "loss": 0.5478, "step": 895 }, { "epoch": 0.08, "learning_rate": 4.9687862847139225e-05, "loss": 0.6203, "step": 900 }, { "epoch": 0.08, "learning_rate": 4.968439235864928e-05, "loss": 0.6316, "step": 905 }, { "epoch": 0.08, "learning_rate": 4.968090280595607e-05, "loss": 0.5815, "step": 910 }, { "epoch": 0.08, "learning_rate": 4.967739419175464e-05, "loss": 0.5401, "step": 915 }, { "epoch": 0.08, "learning_rate": 4.9673866518754764e-05, "loss": 0.6107, "step": 920 }, { "epoch": 0.08, "learning_rate": 4.967031978968092e-05, "loss": 0.5564, "step": 925 }, { "epoch": 0.08, "learning_rate": 4.966675400727232e-05, "loss": 0.5569, "step": 930 }, { "epoch": 0.08, "learning_rate": 4.966316917428286e-05, "loss": 0.5833, "step": 935 }, { "epoch": 0.08, "learning_rate": 4.9659565293481194e-05, "loss": 0.6003, "step": 940 }, { "epoch": 0.08, "learning_rate": 4.965594236765066e-05, "loss": 0.6348, "step": 945 }, { "epoch": 0.08, "learning_rate": 4.96523003995893e-05, "loss": 0.6352, "step": 950 }, { "epoch": 0.08, "learning_rate": 4.9648639392109885e-05, "loss": 0.6722, "step": 955 }, { "epoch": 0.08, "learning_rate": 4.9644959348039866e-05, "loss": 0.5833, "step": 960 }, { "epoch": 0.08, "learning_rate": 4.964126027022141e-05, "loss": 0.5869, "step": 965 }, { "epoch": 0.08, "learning_rate": 4.9637542161511384e-05, "loss": 0.5798, "step": 970 }, { "epoch": 0.08, "learning_rate": 4.963380502478136e-05, "loss": 0.6732, "step": 975 }, { "epoch": 0.08, "learning_rate": 4.9630048862917575e-05, "loss": 0.5773, "step": 980 }, { "epoch": 0.08, "learning_rate": 4.9626273678821e-05, "loss": 0.6227, "step": 985 }, { "epoch": 0.08, "learning_rate": 4.962247947540727e-05, "loss": 0.5666, "step": 990 }, { "epoch": 0.08, "learning_rate": 4.9618666255606713e-05, "loss": 0.6271, "step": 995 }, { "epoch": 0.08, "learning_rate": 4.961483402236436e-05, "loss": 0.6118, "step": 1000 }, { "epoch": 0.08, "learning_rate": 4.96109827786399e-05, "loss": 0.5996, "step": 1005 }, { "epoch": 0.08, "learning_rate": 4.960711252740773e-05, "loss": 0.619, "step": 1010 }, { "epoch": 0.09, "learning_rate": 4.9603223271656906e-05, "loss": 0.5923, "step": 1015 }, { "epoch": 0.09, "learning_rate": 4.959931501439118e-05, "loss": 0.5953, "step": 1020 }, { "epoch": 0.09, "learning_rate": 4.9595387758628966e-05, "loss": 0.6505, "step": 1025 }, { "epoch": 0.09, "learning_rate": 4.9591441507403344e-05, "loss": 0.5733, "step": 1030 }, { "epoch": 0.09, "learning_rate": 4.958747626376209e-05, "loss": 0.6075, "step": 1035 }, { "epoch": 0.09, "learning_rate": 4.9583492030767634e-05, "loss": 0.5886, "step": 1040 }, { "epoch": 0.09, "learning_rate": 4.957948881149706e-05, "loss": 0.578, "step": 1045 }, { "epoch": 0.09, "learning_rate": 4.957546660904213e-05, "loss": 0.5945, "step": 1050 }, { "epoch": 0.09, "learning_rate": 4.9571425426509264e-05, "loss": 0.6201, "step": 1055 }, { "epoch": 0.09, "learning_rate": 4.956736526701955e-05, "loss": 0.6172, "step": 1060 }, { "epoch": 0.09, "learning_rate": 4.9563286133708706e-05, "loss": 0.5493, "step": 1065 }, { "epoch": 0.09, "learning_rate": 4.9559188029727135e-05, "loss": 0.6024, "step": 1070 }, { "epoch": 0.09, "learning_rate": 4.9555070958239876e-05, "loss": 0.6688, "step": 1075 }, { "epoch": 0.09, "learning_rate": 4.9550934922426604e-05, "loss": 0.5913, "step": 1080 }, { "epoch": 0.09, "learning_rate": 4.954677992548167e-05, "loss": 0.5638, "step": 1085 }, { "epoch": 0.09, "learning_rate": 4.954260597061405e-05, "loss": 0.5874, "step": 1090 }, { "epoch": 0.09, "learning_rate": 4.9538413061047354e-05, "loss": 0.5431, "step": 1095 }, { "epoch": 0.09, "learning_rate": 4.953420120001986e-05, "loss": 0.5853, "step": 1100 }, { "epoch": 0.09, "learning_rate": 4.9529970390784444e-05, "loss": 0.588, "step": 1105 }, { "epoch": 0.09, "learning_rate": 4.952572063660865e-05, "loss": 0.6036, "step": 1110 }, { "epoch": 0.09, "learning_rate": 4.952145194077464e-05, "loss": 0.5383, "step": 1115 }, { "epoch": 0.09, "learning_rate": 4.9517164306579214e-05, "loss": 0.5515, "step": 1120 }, { "epoch": 0.09, "learning_rate": 4.9512857737333765e-05, "loss": 0.6066, "step": 1125 }, { "epoch": 0.09, "learning_rate": 4.9508532236364344e-05, "loss": 0.5458, "step": 1130 }, { "epoch": 0.1, "learning_rate": 4.950418780701163e-05, "loss": 0.6027, "step": 1135 }, { "epoch": 0.1, "learning_rate": 4.9499824452630875e-05, "loss": 0.5872, "step": 1140 }, { "epoch": 0.1, "learning_rate": 4.9495442176592006e-05, "loss": 0.5907, "step": 1145 }, { "epoch": 0.1, "learning_rate": 4.949104098227951e-05, "loss": 0.5825, "step": 1150 }, { "epoch": 0.1, "learning_rate": 4.9486620873092535e-05, "loss": 0.5622, "step": 1155 }, { "epoch": 0.1, "learning_rate": 4.948218185244479e-05, "loss": 0.5751, "step": 1160 }, { "epoch": 0.1, "learning_rate": 4.947772392376462e-05, "loss": 0.6429, "step": 1165 }, { "epoch": 0.1, "learning_rate": 4.9473247090494955e-05, "loss": 0.595, "step": 1170 }, { "epoch": 0.1, "learning_rate": 4.946875135609335e-05, "loss": 0.6295, "step": 1175 }, { "epoch": 0.1, "learning_rate": 4.946423672403193e-05, "loss": 0.6165, "step": 1180 }, { "epoch": 0.1, "learning_rate": 4.945970319779743e-05, "loss": 0.6048, "step": 1185 }, { "epoch": 0.1, "learning_rate": 4.945515078089118e-05, "loss": 0.5636, "step": 1190 }, { "epoch": 0.1, "learning_rate": 4.9450579476829086e-05, "loss": 0.5793, "step": 1195 }, { "epoch": 0.1, "learning_rate": 4.9445989289141646e-05, "loss": 0.6232, "step": 1200 }, { "epoch": 0.1, "learning_rate": 4.9441380221373954e-05, "loss": 0.6257, "step": 1205 }, { "epoch": 0.1, "learning_rate": 4.943675227708568e-05, "loss": 0.618, "step": 1210 }, { "epoch": 0.1, "learning_rate": 4.9432105459851054e-05, "loss": 0.5607, "step": 1215 }, { "epoch": 0.1, "learning_rate": 4.942743977325891e-05, "loss": 0.5854, "step": 1220 }, { "epoch": 0.1, "learning_rate": 4.9422755220912644e-05, "loss": 0.5583, "step": 1225 }, { "epoch": 0.1, "learning_rate": 4.9418051806430206e-05, "loss": 0.579, "step": 1230 }, { "epoch": 0.1, "learning_rate": 4.941332953344414e-05, "loss": 0.6019, "step": 1235 }, { "epoch": 0.1, "learning_rate": 4.9408588405601544e-05, "loss": 0.6068, "step": 1240 }, { "epoch": 0.1, "learning_rate": 4.9403828426564064e-05, "loss": 0.605, "step": 1245 }, { "epoch": 0.1, "learning_rate": 4.939904960000793e-05, "loss": 0.5902, "step": 1250 }, { "epoch": 0.11, "learning_rate": 4.939425192962391e-05, "loss": 0.5861, "step": 1255 }, { "epoch": 0.11, "learning_rate": 4.9389435419117345e-05, "loss": 0.5608, "step": 1260 }, { "epoch": 0.11, "learning_rate": 4.9384600072208097e-05, "loss": 0.5501, "step": 1265 }, { "epoch": 0.11, "learning_rate": 4.9379745892630595e-05, "loss": 0.6286, "step": 1270 }, { "epoch": 0.11, "learning_rate": 4.9374872884133814e-05, "loss": 0.6151, "step": 1275 }, { "epoch": 0.11, "learning_rate": 4.936998105048127e-05, "loss": 0.6417, "step": 1280 }, { "epoch": 0.11, "learning_rate": 4.936507039545101e-05, "loss": 0.5879, "step": 1285 }, { "epoch": 0.11, "learning_rate": 4.936014092283562e-05, "loss": 0.5491, "step": 1290 }, { "epoch": 0.11, "learning_rate": 4.935519263644223e-05, "loss": 0.6091, "step": 1295 }, { "epoch": 0.11, "learning_rate": 4.935022554009247e-05, "loss": 0.5963, "step": 1300 }, { "epoch": 0.11, "learning_rate": 4.934523963762254e-05, "loss": 0.6173, "step": 1305 }, { "epoch": 0.11, "learning_rate": 4.934023493288314e-05, "loss": 0.5565, "step": 1310 }, { "epoch": 0.11, "learning_rate": 4.933521142973948e-05, "loss": 0.5992, "step": 1315 }, { "epoch": 0.11, "learning_rate": 4.933016913207132e-05, "loss": 0.6348, "step": 1320 }, { "epoch": 0.11, "learning_rate": 4.93251080437729e-05, "loss": 0.544, "step": 1325 }, { "epoch": 0.11, "learning_rate": 4.932002816875302e-05, "loss": 0.5248, "step": 1330 }, { "epoch": 0.11, "learning_rate": 4.9314929510934926e-05, "loss": 0.6025, "step": 1335 }, { "epoch": 0.11, "learning_rate": 4.930981207425641e-05, "loss": 0.5929, "step": 1340 }, { "epoch": 0.11, "learning_rate": 4.930467586266978e-05, "loss": 0.614, "step": 1345 }, { "epoch": 0.11, "learning_rate": 4.929952088014181e-05, "loss": 0.5888, "step": 1350 }, { "epoch": 0.11, "learning_rate": 4.92943471306538e-05, "loss": 0.6059, "step": 1355 }, { "epoch": 0.11, "learning_rate": 4.9289154618201504e-05, "loss": 0.5965, "step": 1360 }, { "epoch": 0.11, "learning_rate": 4.928394334679521e-05, "loss": 0.565, "step": 1365 }, { "epoch": 0.11, "learning_rate": 4.927871332045969e-05, "loss": 0.6975, "step": 1370 }, { "epoch": 0.12, "learning_rate": 4.927346454323417e-05, "loss": 0.5413, "step": 1375 }, { "epoch": 0.12, "learning_rate": 4.926819701917238e-05, "loss": 0.6134, "step": 1380 }, { "epoch": 0.12, "learning_rate": 4.926291075234252e-05, "loss": 0.6444, "step": 1385 }, { "epoch": 0.12, "learning_rate": 4.925760574682728e-05, "loss": 0.5752, "step": 1390 }, { "epoch": 0.12, "learning_rate": 4.92522820067238e-05, "loss": 0.591, "step": 1395 }, { "epoch": 0.12, "learning_rate": 4.924693953614372e-05, "loss": 0.56, "step": 1400 }, { "epoch": 0.12, "learning_rate": 4.92415783392131e-05, "loss": 0.6254, "step": 1405 }, { "epoch": 0.12, "learning_rate": 4.9236198420072515e-05, "loss": 0.5653, "step": 1410 }, { "epoch": 0.12, "learning_rate": 4.923079978287696e-05, "loss": 0.5769, "step": 1415 }, { "epoch": 0.12, "learning_rate": 4.92253824317959e-05, "loss": 0.5821, "step": 1420 }, { "epoch": 0.12, "learning_rate": 4.921994637101326e-05, "loss": 0.5865, "step": 1425 }, { "epoch": 0.12, "learning_rate": 4.92144916047274e-05, "loss": 0.6418, "step": 1430 }, { "epoch": 0.12, "learning_rate": 4.920901813715114e-05, "loss": 0.57, "step": 1435 }, { "epoch": 0.12, "learning_rate": 4.9203525972511736e-05, "loss": 0.6147, "step": 1440 }, { "epoch": 0.12, "learning_rate": 4.9198015115050886e-05, "loss": 0.5311, "step": 1445 }, { "epoch": 0.12, "learning_rate": 4.919248556902474e-05, "loss": 0.5699, "step": 1450 }, { "epoch": 0.12, "learning_rate": 4.918693733870384e-05, "loss": 0.5865, "step": 1455 }, { "epoch": 0.12, "learning_rate": 4.9181370428373206e-05, "loss": 0.5913, "step": 1460 }, { "epoch": 0.12, "learning_rate": 4.9175784842332254e-05, "loss": 0.6451, "step": 1465 }, { "epoch": 0.12, "learning_rate": 4.917018058489483e-05, "loss": 0.7359, "step": 1470 }, { "epoch": 0.12, "learning_rate": 4.916455766038921e-05, "loss": 0.5967, "step": 1475 }, { "epoch": 0.12, "learning_rate": 4.915891607315808e-05, "loss": 0.6231, "step": 1480 }, { "epoch": 0.12, "learning_rate": 4.915325582755853e-05, "loss": 0.6062, "step": 1485 }, { "epoch": 0.13, "learning_rate": 4.914757692796209e-05, "loss": 0.6125, "step": 1490 }, { "epoch": 0.13, "learning_rate": 4.9141879378754666e-05, "loss": 0.6036, "step": 1495 }, { "epoch": 0.13, "learning_rate": 4.913616318433657e-05, "loss": 0.6082, "step": 1500 }, { "epoch": 0.13, "learning_rate": 4.9130428349122535e-05, "loss": 0.5709, "step": 1505 }, { "epoch": 0.13, "learning_rate": 4.9124674877541674e-05, "loss": 0.6207, "step": 1510 }, { "epoch": 0.13, "learning_rate": 4.91189027740375e-05, "loss": 0.5841, "step": 1515 }, { "epoch": 0.13, "learning_rate": 4.911311204306791e-05, "loss": 0.588, "step": 1520 }, { "epoch": 0.13, "learning_rate": 4.9107302689105195e-05, "loss": 0.6204, "step": 1525 }, { "epoch": 0.13, "learning_rate": 4.9101474716636006e-05, "loss": 0.6137, "step": 1530 }, { "epoch": 0.13, "learning_rate": 4.909562813016142e-05, "loss": 0.6478, "step": 1535 }, { "epoch": 0.13, "learning_rate": 4.908976293419684e-05, "loss": 0.5473, "step": 1540 }, { "epoch": 0.13, "learning_rate": 4.908387913327207e-05, "loss": 0.5791, "step": 1545 }, { "epoch": 0.13, "learning_rate": 4.907797673193129e-05, "loss": 0.6064, "step": 1550 }, { "epoch": 0.13, "learning_rate": 4.9072055734733e-05, "loss": 0.5585, "step": 1555 }, { "epoch": 0.13, "learning_rate": 4.9066116146250116e-05, "loss": 0.5816, "step": 1560 }, { "epoch": 0.13, "learning_rate": 4.906015797106989e-05, "loss": 0.602, "step": 1565 }, { "epoch": 0.13, "learning_rate": 4.905418121379392e-05, "loss": 0.5541, "step": 1570 }, { "epoch": 0.13, "learning_rate": 4.904818587903817e-05, "loss": 0.5935, "step": 1575 }, { "epoch": 0.13, "learning_rate": 4.904217197143294e-05, "loss": 0.6165, "step": 1580 }, { "epoch": 0.13, "learning_rate": 4.903613949562288e-05, "loss": 0.5821, "step": 1585 }, { "epoch": 0.13, "learning_rate": 4.9030088456266986e-05, "loss": 0.5618, "step": 1590 }, { "epoch": 0.13, "learning_rate": 4.902401885803858e-05, "loss": 0.5734, "step": 1595 }, { "epoch": 0.13, "learning_rate": 4.901793070562532e-05, "loss": 0.5707, "step": 1600 }, { "epoch": 0.13, "learning_rate": 4.901182400372922e-05, "loss": 0.6397, "step": 1605 }, { "epoch": 0.14, "learning_rate": 4.900569875706657e-05, "loss": 0.5447, "step": 1610 }, { "epoch": 0.14, "learning_rate": 4.899955497036801e-05, "loss": 0.5752, "step": 1615 }, { "epoch": 0.14, "learning_rate": 4.89933926483785e-05, "loss": 0.6205, "step": 1620 }, { "epoch": 0.14, "learning_rate": 4.898721179585734e-05, "loss": 0.5643, "step": 1625 }, { "epoch": 0.14, "learning_rate": 4.898101241757807e-05, "loss": 0.6612, "step": 1630 }, { "epoch": 0.14, "learning_rate": 4.897479451832861e-05, "loss": 0.5241, "step": 1635 }, { "epoch": 0.14, "learning_rate": 4.896855810291116e-05, "loss": 0.5854, "step": 1640 }, { "epoch": 0.14, "learning_rate": 4.896230317614219e-05, "loss": 0.6105, "step": 1645 }, { "epoch": 0.14, "learning_rate": 4.895602974285252e-05, "loss": 0.6263, "step": 1650 }, { "epoch": 0.14, "learning_rate": 4.894973780788722e-05, "loss": 0.5958, "step": 1655 }, { "epoch": 0.14, "learning_rate": 4.894342737610568e-05, "loss": 0.5952, "step": 1660 }, { "epoch": 0.14, "learning_rate": 4.8937098452381544e-05, "loss": 0.5917, "step": 1665 }, { "epoch": 0.14, "learning_rate": 4.893075104160277e-05, "loss": 0.6102, "step": 1670 }, { "epoch": 0.14, "learning_rate": 4.892438514867157e-05, "loss": 0.5238, "step": 1675 }, { "epoch": 0.14, "learning_rate": 4.891800077850443e-05, "loss": 0.55, "step": 1680 }, { "epoch": 0.14, "learning_rate": 4.8911597936032135e-05, "loss": 0.6109, "step": 1685 }, { "epoch": 0.14, "learning_rate": 4.89051766261997e-05, "loss": 0.5782, "step": 1690 }, { "epoch": 0.14, "learning_rate": 4.889873685396642e-05, "loss": 0.6195, "step": 1695 }, { "epoch": 0.14, "learning_rate": 4.889227862430586e-05, "loss": 0.5849, "step": 1700 }, { "epoch": 0.14, "learning_rate": 4.888580194220581e-05, "loss": 0.5929, "step": 1705 }, { "epoch": 0.14, "learning_rate": 4.887930681266835e-05, "loss": 0.6375, "step": 1710 }, { "epoch": 0.14, "learning_rate": 4.887279324070977e-05, "loss": 0.4814, "step": 1715 }, { "epoch": 0.14, "learning_rate": 4.8866261231360615e-05, "loss": 0.5723, "step": 1720 }, { "epoch": 0.14, "learning_rate": 4.885971078966569e-05, "loss": 0.581, "step": 1725 }, { "epoch": 0.15, "learning_rate": 4.8853141920684014e-05, "loss": 0.6084, "step": 1730 }, { "epoch": 0.15, "learning_rate": 4.884655462948885e-05, "loss": 0.534, "step": 1735 }, { "epoch": 0.15, "learning_rate": 4.883994892116766e-05, "loss": 0.5843, "step": 1740 }, { "epoch": 0.15, "learning_rate": 4.8833324800822175e-05, "loss": 0.5861, "step": 1745 }, { "epoch": 0.15, "learning_rate": 4.882668227356831e-05, "loss": 0.5652, "step": 1750 }, { "epoch": 0.15, "learning_rate": 4.8820021344536216e-05, "loss": 0.5883, "step": 1755 }, { "epoch": 0.15, "learning_rate": 4.881334201887025e-05, "loss": 0.5622, "step": 1760 }, { "epoch": 0.15, "learning_rate": 4.8806644301728974e-05, "loss": 0.5309, "step": 1765 }, { "epoch": 0.15, "learning_rate": 4.879992819828515e-05, "loss": 0.5657, "step": 1770 }, { "epoch": 0.15, "learning_rate": 4.879319371372576e-05, "loss": 0.6398, "step": 1775 }, { "epoch": 0.15, "learning_rate": 4.878644085325196e-05, "loss": 0.5516, "step": 1780 }, { "epoch": 0.15, "learning_rate": 4.8779669622079094e-05, "loss": 0.6412, "step": 1785 }, { "epoch": 0.15, "learning_rate": 4.877288002543673e-05, "loss": 0.6131, "step": 1790 }, { "epoch": 0.15, "learning_rate": 4.876607206856859e-05, "loss": 0.5685, "step": 1795 }, { "epoch": 0.15, "learning_rate": 4.875924575673256e-05, "loss": 0.5452, "step": 1800 }, { "epoch": 0.15, "learning_rate": 4.8752401095200764e-05, "loss": 0.5797, "step": 1805 }, { "epoch": 0.15, "learning_rate": 4.8745538089259424e-05, "loss": 0.5459, "step": 1810 }, { "epoch": 0.15, "learning_rate": 4.8738656744208976e-05, "loss": 0.5219, "step": 1815 }, { "epoch": 0.15, "learning_rate": 4.873175706536402e-05, "loss": 0.5795, "step": 1820 }, { "epoch": 0.15, "learning_rate": 4.872483905805327e-05, "loss": 0.6416, "step": 1825 }, { "epoch": 0.15, "learning_rate": 4.871790272761966e-05, "loss": 0.6168, "step": 1830 }, { "epoch": 0.15, "learning_rate": 4.8710948079420234e-05, "loss": 0.5931, "step": 1835 }, { "epoch": 0.15, "learning_rate": 4.870397511882619e-05, "loss": 0.5106, "step": 1840 }, { "epoch": 0.15, "learning_rate": 4.869698385122287e-05, "loss": 0.5702, "step": 1845 }, { "epoch": 0.16, "learning_rate": 4.8689974282009766e-05, "loss": 0.5853, "step": 1850 }, { "epoch": 0.16, "learning_rate": 4.8682946416600484e-05, "loss": 0.5907, "step": 1855 }, { "epoch": 0.16, "learning_rate": 4.867590026042277e-05, "loss": 0.5875, "step": 1860 }, { "epoch": 0.16, "learning_rate": 4.8668835818918514e-05, "loss": 0.5455, "step": 1865 }, { "epoch": 0.16, "learning_rate": 4.8661753097543705e-05, "loss": 0.6147, "step": 1870 }, { "epoch": 0.16, "learning_rate": 4.865465210176845e-05, "loss": 0.5697, "step": 1875 }, { "epoch": 0.16, "learning_rate": 4.864753283707698e-05, "loss": 0.5854, "step": 1880 }, { "epoch": 0.16, "learning_rate": 4.8640395308967637e-05, "loss": 0.6098, "step": 1885 }, { "epoch": 0.16, "learning_rate": 4.863323952295286e-05, "loss": 0.563, "step": 1890 }, { "epoch": 0.16, "learning_rate": 4.862606548455918e-05, "loss": 0.5581, "step": 1895 }, { "epoch": 0.16, "learning_rate": 4.861887319932726e-05, "loss": 0.5632, "step": 1900 }, { "epoch": 0.16, "learning_rate": 4.861166267281182e-05, "loss": 0.6175, "step": 1905 }, { "epoch": 0.16, "learning_rate": 4.860443391058168e-05, "loss": 0.6339, "step": 1910 }, { "epoch": 0.16, "learning_rate": 4.859718691821974e-05, "loss": 0.54, "step": 1915 }, { "epoch": 0.16, "learning_rate": 4.8589921701323e-05, "loss": 0.5693, "step": 1920 }, { "epoch": 0.16, "learning_rate": 4.8582638265502486e-05, "loss": 0.4845, "step": 1925 }, { "epoch": 0.16, "learning_rate": 4.857533661638336e-05, "loss": 0.5726, "step": 1930 }, { "epoch": 0.16, "learning_rate": 4.856801675960482e-05, "loss": 0.638, "step": 1935 }, { "epoch": 0.16, "learning_rate": 4.856067870082009e-05, "loss": 0.5177, "step": 1940 }, { "epoch": 0.16, "learning_rate": 4.8553322445696514e-05, "loss": 0.5159, "step": 1945 }, { "epoch": 0.16, "learning_rate": 4.8545947999915456e-05, "loss": 0.5795, "step": 1950 }, { "epoch": 0.16, "learning_rate": 4.8538555369172324e-05, "loss": 0.5711, "step": 1955 }, { "epoch": 0.16, "learning_rate": 4.853114455917659e-05, "loss": 0.6815, "step": 1960 }, { "epoch": 0.16, "learning_rate": 4.852371557565175e-05, "loss": 0.6084, "step": 1965 }, { "epoch": 0.17, "learning_rate": 4.851626842433535e-05, "loss": 0.539, "step": 1970 }, { "epoch": 0.17, "learning_rate": 4.850880311097896e-05, "loss": 0.6391, "step": 1975 }, { "epoch": 0.17, "learning_rate": 4.8501319641348175e-05, "loss": 0.5398, "step": 1980 }, { "epoch": 0.17, "learning_rate": 4.84938180212226e-05, "loss": 0.5772, "step": 1985 }, { "epoch": 0.17, "learning_rate": 4.84862982563959e-05, "loss": 0.5479, "step": 1990 }, { "epoch": 0.17, "learning_rate": 4.84787603526757e-05, "loss": 0.5754, "step": 1995 }, { "epoch": 0.17, "learning_rate": 4.8471204315883674e-05, "loss": 0.5648, "step": 2000 }, { "epoch": 0.17, "learning_rate": 4.8463630151855474e-05, "loss": 0.5819, "step": 2005 }, { "epoch": 0.17, "learning_rate": 4.845603786644078e-05, "loss": 0.5746, "step": 2010 }, { "epoch": 0.17, "learning_rate": 4.844842746550324e-05, "loss": 0.5681, "step": 2015 }, { "epoch": 0.17, "learning_rate": 4.8440798954920516e-05, "loss": 0.5662, "step": 2020 }, { "epoch": 0.17, "learning_rate": 4.843315234058423e-05, "loss": 0.5267, "step": 2025 }, { "epoch": 0.17, "learning_rate": 4.84254876284e-05, "loss": 0.5887, "step": 2030 }, { "epoch": 0.17, "learning_rate": 4.8417804824287436e-05, "loss": 0.6591, "step": 2035 }, { "epoch": 0.17, "learning_rate": 4.841010393418011e-05, "loss": 0.5858, "step": 2040 }, { "epoch": 0.17, "learning_rate": 4.840238496402553e-05, "loss": 0.5662, "step": 2045 }, { "epoch": 0.17, "learning_rate": 4.839464791978523e-05, "loss": 0.522, "step": 2050 }, { "epoch": 0.17, "learning_rate": 4.8386892807434645e-05, "loss": 0.5964, "step": 2055 }, { "epoch": 0.17, "learning_rate": 4.83791196329632e-05, "loss": 0.5513, "step": 2060 }, { "epoch": 0.17, "learning_rate": 4.837132840237426e-05, "loss": 0.5916, "step": 2065 }, { "epoch": 0.17, "learning_rate": 4.836351912168513e-05, "loss": 0.6027, "step": 2070 }, { "epoch": 0.17, "learning_rate": 4.835569179692706e-05, "loss": 0.6657, "step": 2075 }, { "epoch": 0.17, "learning_rate": 4.834784643414524e-05, "loss": 0.6027, "step": 2080 }, { "epoch": 0.17, "learning_rate": 4.833998303939877e-05, "loss": 0.645, "step": 2085 }, { "epoch": 0.18, "learning_rate": 4.8332101618760706e-05, "loss": 0.6074, "step": 2090 }, { "epoch": 0.18, "learning_rate": 4.8324202178318006e-05, "loss": 0.5946, "step": 2095 }, { "epoch": 0.18, "learning_rate": 4.831628472417156e-05, "loss": 0.5997, "step": 2100 }, { "epoch": 0.18, "learning_rate": 4.8308349262436144e-05, "loss": 0.5561, "step": 2105 }, { "epoch": 0.18, "learning_rate": 4.830039579924047e-05, "loss": 0.6488, "step": 2110 }, { "epoch": 0.18, "learning_rate": 4.8292424340727146e-05, "loss": 0.571, "step": 2115 }, { "epoch": 0.18, "learning_rate": 4.828443489305267e-05, "loss": 0.5753, "step": 2120 }, { "epoch": 0.18, "learning_rate": 4.8276427462387423e-05, "loss": 0.5748, "step": 2125 }, { "epoch": 0.18, "learning_rate": 4.826840205491571e-05, "loss": 0.6434, "step": 2130 }, { "epoch": 0.18, "learning_rate": 4.826035867683569e-05, "loss": 0.6227, "step": 2135 }, { "epoch": 0.18, "learning_rate": 4.825229733435941e-05, "loss": 0.6716, "step": 2140 }, { "epoch": 0.18, "learning_rate": 4.824421803371278e-05, "loss": 0.6283, "step": 2145 }, { "epoch": 0.18, "learning_rate": 4.823612078113561e-05, "loss": 0.5593, "step": 2150 }, { "epoch": 0.18, "learning_rate": 4.822800558288155e-05, "loss": 0.5473, "step": 2155 }, { "epoch": 0.18, "learning_rate": 4.82198724452181e-05, "loss": 0.5341, "step": 2160 }, { "epoch": 0.18, "learning_rate": 4.821172137442664e-05, "loss": 0.6007, "step": 2165 }, { "epoch": 0.18, "learning_rate": 4.820355237680239e-05, "loss": 0.6022, "step": 2170 }, { "epoch": 0.18, "learning_rate": 4.8195365458654416e-05, "loss": 0.5633, "step": 2175 }, { "epoch": 0.18, "learning_rate": 4.8187160626305616e-05, "loss": 0.5429, "step": 2180 }, { "epoch": 0.18, "learning_rate": 4.817893788609274e-05, "loss": 0.5799, "step": 2185 }, { "epoch": 0.18, "learning_rate": 4.817069724436635e-05, "loss": 0.5799, "step": 2190 }, { "epoch": 0.18, "learning_rate": 4.816243870749085e-05, "loss": 0.5723, "step": 2195 }, { "epoch": 0.18, "learning_rate": 4.815416228184446e-05, "loss": 0.5797, "step": 2200 }, { "epoch": 0.19, "learning_rate": 4.8145867973819196e-05, "loss": 0.5586, "step": 2205 }, { "epoch": 0.19, "learning_rate": 4.813755578982092e-05, "loss": 0.5532, "step": 2210 }, { "epoch": 0.19, "learning_rate": 4.8129225736269276e-05, "loss": 0.5198, "step": 2215 }, { "epoch": 0.19, "learning_rate": 4.812087781959772e-05, "loss": 0.5716, "step": 2220 }, { "epoch": 0.19, "learning_rate": 4.81125120462535e-05, "loss": 0.5795, "step": 2225 }, { "epoch": 0.19, "learning_rate": 4.810412842269764e-05, "loss": 0.5563, "step": 2230 }, { "epoch": 0.19, "learning_rate": 4.809572695540498e-05, "loss": 0.5467, "step": 2235 }, { "epoch": 0.19, "learning_rate": 4.808730765086411e-05, "loss": 0.6153, "step": 2240 }, { "epoch": 0.19, "learning_rate": 4.807887051557743e-05, "loss": 0.6471, "step": 2245 }, { "epoch": 0.19, "learning_rate": 4.807041555606108e-05, "loss": 0.5962, "step": 2250 }, { "epoch": 0.19, "learning_rate": 4.806194277884498e-05, "loss": 0.5767, "step": 2255 }, { "epoch": 0.19, "learning_rate": 4.805345219047281e-05, "loss": 0.555, "step": 2260 }, { "epoch": 0.19, "learning_rate": 4.804494379750201e-05, "loss": 0.603, "step": 2265 }, { "epoch": 0.19, "learning_rate": 4.803641760650376e-05, "loss": 0.5732, "step": 2270 }, { "epoch": 0.19, "learning_rate": 4.802787362406299e-05, "loss": 0.5694, "step": 2275 }, { "epoch": 0.19, "learning_rate": 4.801931185677838e-05, "loss": 0.5468, "step": 2280 }, { "epoch": 0.19, "learning_rate": 4.801073231126233e-05, "loss": 0.5763, "step": 2285 }, { "epoch": 0.19, "learning_rate": 4.800213499414099e-05, "loss": 0.5805, "step": 2290 }, { "epoch": 0.19, "learning_rate": 4.799351991205421e-05, "loss": 0.5688, "step": 2295 }, { "epoch": 0.19, "learning_rate": 4.7984887071655585e-05, "loss": 0.5902, "step": 2300 }, { "epoch": 0.19, "learning_rate": 4.79762364796124e-05, "loss": 0.5741, "step": 2305 }, { "epoch": 0.19, "learning_rate": 4.796756814260567e-05, "loss": 0.5738, "step": 2310 }, { "epoch": 0.19, "learning_rate": 4.7958882067330124e-05, "loss": 0.5833, "step": 2315 }, { "epoch": 0.19, "learning_rate": 4.795017826049415e-05, "loss": 0.6099, "step": 2320 }, { "epoch": 0.2, "learning_rate": 4.794145672881988e-05, "loss": 0.66, "step": 2325 }, { "epoch": 0.2, "learning_rate": 4.793271747904309e-05, "loss": 0.5844, "step": 2330 }, { "epoch": 0.2, "learning_rate": 4.792396051791328e-05, "loss": 0.5614, "step": 2335 }, { "epoch": 0.2, "learning_rate": 4.791518585219359e-05, "loss": 0.593, "step": 2340 }, { "epoch": 0.2, "learning_rate": 4.790639348866087e-05, "loss": 0.6045, "step": 2345 }, { "epoch": 0.2, "learning_rate": 4.7897583434105606e-05, "loss": 0.6102, "step": 2350 }, { "epoch": 0.2, "learning_rate": 4.7888755695331974e-05, "loss": 0.6076, "step": 2355 }, { "epoch": 0.2, "learning_rate": 4.7879910279157796e-05, "loss": 0.6183, "step": 2360 }, { "epoch": 0.2, "learning_rate": 4.7871047192414544e-05, "loss": 0.5834, "step": 2365 }, { "epoch": 0.2, "learning_rate": 4.7862166441947346e-05, "loss": 0.6126, "step": 2370 }, { "epoch": 0.2, "learning_rate": 4.7853268034614954e-05, "loss": 0.6164, "step": 2375 }, { "epoch": 0.2, "learning_rate": 4.784435197728978e-05, "loss": 0.6034, "step": 2380 }, { "epoch": 0.2, "learning_rate": 4.783541827685786e-05, "loss": 0.5698, "step": 2385 }, { "epoch": 0.2, "learning_rate": 4.782646694021883e-05, "loss": 0.6112, "step": 2390 }, { "epoch": 0.2, "learning_rate": 4.7817497974286e-05, "loss": 0.5672, "step": 2395 }, { "epoch": 0.2, "learning_rate": 4.7808511385986244e-05, "loss": 0.5739, "step": 2400 }, { "epoch": 0.2, "learning_rate": 4.779950718226007e-05, "loss": 0.5614, "step": 2405 }, { "epoch": 0.2, "learning_rate": 4.77904853700616e-05, "loss": 0.523, "step": 2410 }, { "epoch": 0.2, "learning_rate": 4.778144595635853e-05, "loss": 0.5636, "step": 2415 }, { "epoch": 0.2, "learning_rate": 4.777238894813216e-05, "loss": 0.5497, "step": 2420 }, { "epoch": 0.2, "learning_rate": 4.776331435237739e-05, "loss": 0.5368, "step": 2425 }, { "epoch": 0.2, "learning_rate": 4.7754222176102694e-05, "loss": 0.695, "step": 2430 }, { "epoch": 0.2, "learning_rate": 4.774511242633013e-05, "loss": 0.5958, "step": 2435 }, { "epoch": 0.2, "learning_rate": 4.773598511009531e-05, "loss": 0.5833, "step": 2440 }, { "epoch": 0.21, "learning_rate": 4.772684023444743e-05, "loss": 0.6197, "step": 2445 }, { "epoch": 0.21, "learning_rate": 4.771767780644924e-05, "loss": 0.6145, "step": 2450 }, { "epoch": 0.21, "learning_rate": 4.770849783317707e-05, "loss": 0.6244, "step": 2455 }, { "epoch": 0.21, "learning_rate": 4.7699300321720744e-05, "loss": 0.5686, "step": 2460 }, { "epoch": 0.21, "learning_rate": 4.76900852791837e-05, "loss": 0.6017, "step": 2465 }, { "epoch": 0.21, "learning_rate": 4.768085271268286e-05, "loss": 0.5655, "step": 2470 }, { "epoch": 0.21, "learning_rate": 4.767160262934871e-05, "loss": 0.5447, "step": 2475 }, { "epoch": 0.21, "learning_rate": 4.766233503632527e-05, "loss": 0.5565, "step": 2480 }, { "epoch": 0.21, "learning_rate": 4.7653049940770045e-05, "loss": 0.5493, "step": 2485 }, { "epoch": 0.21, "learning_rate": 4.76437473498541e-05, "loss": 0.5641, "step": 2490 }, { "epoch": 0.21, "learning_rate": 4.763442727076198e-05, "loss": 0.5845, "step": 2495 }, { "epoch": 0.21, "learning_rate": 4.762508971069177e-05, "loss": 0.6336, "step": 2500 }, { "epoch": 0.21, "learning_rate": 4.7615734676855014e-05, "loss": 0.7, "step": 2505 }, { "epoch": 0.21, "learning_rate": 4.76063621764768e-05, "loss": 0.6915, "step": 2510 }, { "epoch": 0.21, "learning_rate": 4.7596972216795666e-05, "loss": 0.5862, "step": 2515 }, { "epoch": 0.21, "learning_rate": 4.7587564805063644e-05, "loss": 0.5456, "step": 2520 }, { "epoch": 0.21, "learning_rate": 4.757813994854625e-05, "loss": 0.6012, "step": 2525 }, { "epoch": 0.21, "learning_rate": 4.756869765452248e-05, "loss": 0.5866, "step": 2530 }, { "epoch": 0.21, "learning_rate": 4.7559237930284785e-05, "loss": 0.5467, "step": 2535 }, { "epoch": 0.21, "learning_rate": 4.754976078313908e-05, "loss": 0.6425, "step": 2540 }, { "epoch": 0.21, "learning_rate": 4.7540266220404725e-05, "loss": 0.588, "step": 2545 }, { "epoch": 0.21, "learning_rate": 4.753075424941456e-05, "loss": 0.5663, "step": 2550 }, { "epoch": 0.21, "learning_rate": 4.7521224877514845e-05, "loss": 0.6102, "step": 2555 }, { "epoch": 0.21, "learning_rate": 4.751167811206529e-05, "loss": 0.5809, "step": 2560 }, { "epoch": 0.22, "learning_rate": 4.750211396043904e-05, "loss": 0.5155, "step": 2565 }, { "epoch": 0.22, "learning_rate": 4.749253243002265e-05, "loss": 0.6118, "step": 2570 }, { "epoch": 0.22, "learning_rate": 4.7482933528216125e-05, "loss": 0.6111, "step": 2575 }, { "epoch": 0.22, "learning_rate": 4.747331726243285e-05, "loss": 0.5567, "step": 2580 }, { "epoch": 0.22, "learning_rate": 4.746368364009965e-05, "loss": 0.5892, "step": 2585 }, { "epoch": 0.22, "learning_rate": 4.745403266865676e-05, "loss": 0.561, "step": 2590 }, { "epoch": 0.22, "learning_rate": 4.744436435555778e-05, "loss": 0.5461, "step": 2595 }, { "epoch": 0.22, "learning_rate": 4.743467870826973e-05, "loss": 0.6151, "step": 2600 }, { "epoch": 0.22, "learning_rate": 4.742497573427301e-05, "loss": 0.7443, "step": 2605 }, { "epoch": 0.22, "learning_rate": 4.741525544106141e-05, "loss": 0.5787, "step": 2610 }, { "epoch": 0.22, "learning_rate": 4.740551783614209e-05, "loss": 0.6291, "step": 2615 }, { "epoch": 0.22, "learning_rate": 4.7395762927035555e-05, "loss": 0.6189, "step": 2620 }, { "epoch": 0.22, "learning_rate": 4.738599072127573e-05, "loss": 0.6303, "step": 2625 }, { "epoch": 0.22, "learning_rate": 4.7376201226409845e-05, "loss": 0.5905, "step": 2630 }, { "epoch": 0.22, "learning_rate": 4.7366394449998516e-05, "loss": 0.5584, "step": 2635 }, { "epoch": 0.22, "learning_rate": 4.735657039961568e-05, "loss": 0.6332, "step": 2640 }, { "epoch": 0.22, "learning_rate": 4.734672908284864e-05, "loss": 0.6577, "step": 2645 }, { "epoch": 0.22, "learning_rate": 4.733687050729802e-05, "loss": 0.5848, "step": 2650 }, { "epoch": 0.22, "learning_rate": 4.7326994680577786e-05, "loss": 0.5799, "step": 2655 }, { "epoch": 0.22, "learning_rate": 4.731710161031521e-05, "loss": 0.5513, "step": 2660 }, { "epoch": 0.22, "learning_rate": 4.730719130415088e-05, "loss": 0.5827, "step": 2665 }, { "epoch": 0.22, "learning_rate": 4.729726376973871e-05, "loss": 0.5616, "step": 2670 }, { "epoch": 0.22, "learning_rate": 4.7287319014745924e-05, "loss": 0.5749, "step": 2675 }, { "epoch": 0.22, "learning_rate": 4.727735704685303e-05, "loss": 0.6076, "step": 2680 }, { "epoch": 0.23, "learning_rate": 4.726737787375384e-05, "loss": 0.5346, "step": 2685 }, { "epoch": 0.23, "learning_rate": 4.725738150315544e-05, "loss": 0.5866, "step": 2690 }, { "epoch": 0.23, "learning_rate": 4.724736794277822e-05, "loss": 0.6, "step": 2695 }, { "epoch": 0.23, "learning_rate": 4.723733720035582e-05, "loss": 0.6019, "step": 2700 }, { "epoch": 0.23, "learning_rate": 4.722728928363519e-05, "loss": 0.5912, "step": 2705 }, { "epoch": 0.23, "learning_rate": 4.7217224200376484e-05, "loss": 0.5544, "step": 2710 }, { "epoch": 0.23, "learning_rate": 4.720714195835317e-05, "loss": 0.5817, "step": 2715 }, { "epoch": 0.23, "learning_rate": 4.719704256535195e-05, "loss": 0.5942, "step": 2720 }, { "epoch": 0.23, "learning_rate": 4.718692602917275e-05, "loss": 0.591, "step": 2725 }, { "epoch": 0.23, "learning_rate": 4.7176792357628766e-05, "loss": 0.558, "step": 2730 }, { "epoch": 0.23, "learning_rate": 4.716664155854641e-05, "loss": 0.5631, "step": 2735 }, { "epoch": 0.23, "learning_rate": 4.715647363976533e-05, "loss": 0.5911, "step": 2740 }, { "epoch": 0.23, "learning_rate": 4.71462886091384e-05, "loss": 0.5577, "step": 2745 }, { "epoch": 0.23, "learning_rate": 4.7136086474531695e-05, "loss": 0.5561, "step": 2750 }, { "epoch": 0.23, "learning_rate": 4.712586724382452e-05, "loss": 0.542, "step": 2755 }, { "epoch": 0.23, "learning_rate": 4.711563092490935e-05, "loss": 0.6227, "step": 2760 }, { "epoch": 0.23, "learning_rate": 4.7105377525691905e-05, "loss": 0.5879, "step": 2765 }, { "epoch": 0.23, "learning_rate": 4.709510705409106e-05, "loss": 0.5713, "step": 2770 }, { "epoch": 0.23, "learning_rate": 4.708481951803888e-05, "loss": 0.5659, "step": 2775 }, { "epoch": 0.23, "learning_rate": 4.7074514925480615e-05, "loss": 0.5543, "step": 2780 }, { "epoch": 0.23, "learning_rate": 4.706419328437471e-05, "loss": 0.6146, "step": 2785 }, { "epoch": 0.23, "learning_rate": 4.705385460269273e-05, "loss": 0.6348, "step": 2790 }, { "epoch": 0.23, "learning_rate": 4.704349888841944e-05, "loss": 0.5716, "step": 2795 }, { "epoch": 0.23, "learning_rate": 4.703312614955273e-05, "loss": 0.5688, "step": 2800 }, { "epoch": 0.24, "learning_rate": 4.7022736394103686e-05, "loss": 0.5762, "step": 2805 }, { "epoch": 0.24, "learning_rate": 4.701232963009647e-05, "loss": 0.6299, "step": 2810 }, { "epoch": 0.24, "learning_rate": 4.700190586556843e-05, "loss": 0.5825, "step": 2815 }, { "epoch": 0.24, "learning_rate": 4.699146510857003e-05, "loss": 0.5984, "step": 2820 }, { "epoch": 0.24, "learning_rate": 4.698100736716485e-05, "loss": 0.6062, "step": 2825 }, { "epoch": 0.24, "learning_rate": 4.69705326494296e-05, "loss": 0.518, "step": 2830 }, { "epoch": 0.24, "learning_rate": 4.6960040963454076e-05, "loss": 0.6126, "step": 2835 }, { "epoch": 0.24, "learning_rate": 4.694953231734123e-05, "loss": 0.5699, "step": 2840 }, { "epoch": 0.24, "learning_rate": 4.6939006719207045e-05, "loss": 0.6111, "step": 2845 }, { "epoch": 0.24, "learning_rate": 4.6928464177180656e-05, "loss": 0.6149, "step": 2850 }, { "epoch": 0.24, "learning_rate": 4.691790469940424e-05, "loss": 0.5743, "step": 2855 }, { "epoch": 0.24, "learning_rate": 4.690732829403309e-05, "loss": 0.5559, "step": 2860 }, { "epoch": 0.24, "learning_rate": 4.6896734969235537e-05, "loss": 0.5636, "step": 2865 }, { "epoch": 0.24, "learning_rate": 4.688612473319302e-05, "loss": 0.5349, "step": 2870 }, { "epoch": 0.24, "learning_rate": 4.6875497594100006e-05, "loss": 0.566, "step": 2875 }, { "epoch": 0.24, "learning_rate": 4.6864853560164014e-05, "loss": 0.5966, "step": 2880 }, { "epoch": 0.24, "learning_rate": 4.6854192639605645e-05, "loss": 0.5921, "step": 2885 }, { "epoch": 0.24, "learning_rate": 4.684351484065851e-05, "loss": 0.6006, "step": 2890 }, { "epoch": 0.24, "learning_rate": 4.683282017156927e-05, "loss": 0.6475, "step": 2895 }, { "epoch": 0.24, "learning_rate": 4.68221086405976e-05, "loss": 0.5898, "step": 2900 }, { "epoch": 0.24, "learning_rate": 4.681138025601623e-05, "loss": 0.6108, "step": 2905 }, { "epoch": 0.24, "learning_rate": 4.680063502611087e-05, "loss": 0.5857, "step": 2910 }, { "epoch": 0.24, "learning_rate": 4.678987295918027e-05, "loss": 0.6127, "step": 2915 }, { "epoch": 0.25, "learning_rate": 4.6779094063536156e-05, "loss": 0.5744, "step": 2920 }, { "epoch": 0.25, "learning_rate": 4.676829834750326e-05, "loss": 0.5819, "step": 2925 }, { "epoch": 0.25, "learning_rate": 4.675748581941932e-05, "loss": 0.574, "step": 2930 }, { "epoch": 0.25, "learning_rate": 4.6746656487635056e-05, "loss": 0.6167, "step": 2935 }, { "epoch": 0.25, "learning_rate": 4.673581036051413e-05, "loss": 0.5375, "step": 2940 }, { "epoch": 0.25, "learning_rate": 4.6724947446433234e-05, "loss": 0.5736, "step": 2945 }, { "epoch": 0.25, "learning_rate": 4.671406775378196e-05, "loss": 0.5837, "step": 2950 }, { "epoch": 0.25, "learning_rate": 4.670317129096292e-05, "loss": 0.6521, "step": 2955 }, { "epoch": 0.25, "learning_rate": 4.669225806639164e-05, "loss": 0.5752, "step": 2960 }, { "epoch": 0.25, "learning_rate": 4.66813280884966e-05, "loss": 0.5647, "step": 2965 }, { "epoch": 0.25, "learning_rate": 4.6670381365719215e-05, "loss": 0.6357, "step": 2970 }, { "epoch": 0.25, "learning_rate": 4.665941790651384e-05, "loss": 0.5375, "step": 2975 }, { "epoch": 0.25, "learning_rate": 4.664843771934776e-05, "loss": 0.5686, "step": 2980 }, { "epoch": 0.25, "learning_rate": 4.663744081270116e-05, "loss": 0.5813, "step": 2985 }, { "epoch": 0.25, "learning_rate": 4.6626427195067166e-05, "loss": 0.5542, "step": 2990 }, { "epoch": 0.25, "learning_rate": 4.661539687495178e-05, "loss": 0.6067, "step": 2995 }, { "epoch": 0.25, "learning_rate": 4.660434986087393e-05, "loss": 0.5982, "step": 3000 }, { "epoch": 0.25, "learning_rate": 4.6593286161365416e-05, "loss": 0.5327, "step": 3005 }, { "epoch": 0.25, "learning_rate": 4.6582205784970934e-05, "loss": 0.6232, "step": 3010 }, { "epoch": 0.25, "learning_rate": 4.657110874024806e-05, "loss": 0.5309, "step": 3015 }, { "epoch": 0.25, "learning_rate": 4.655999503576725e-05, "loss": 0.6058, "step": 3020 }, { "epoch": 0.25, "learning_rate": 4.65488646801118e-05, "loss": 0.5496, "step": 3025 }, { "epoch": 0.25, "learning_rate": 4.6537717681877914e-05, "loss": 0.6181, "step": 3030 }, { "epoch": 0.25, "learning_rate": 4.6526554049674596e-05, "loss": 0.6168, "step": 3035 }, { "epoch": 0.26, "learning_rate": 4.651537379212374e-05, "loss": 0.6236, "step": 3040 }, { "epoch": 0.26, "learning_rate": 4.650417691786004e-05, "loss": 0.5742, "step": 3045 }, { "epoch": 0.26, "learning_rate": 4.649296343553107e-05, "loss": 0.5961, "step": 3050 }, { "epoch": 0.26, "learning_rate": 4.648173335379719e-05, "loss": 0.5531, "step": 3055 }, { "epoch": 0.26, "learning_rate": 4.647048668133158e-05, "loss": 0.5711, "step": 3060 }, { "epoch": 0.26, "learning_rate": 4.645922342682029e-05, "loss": 0.5688, "step": 3065 }, { "epoch": 0.26, "learning_rate": 4.6447943598962093e-05, "loss": 0.6326, "step": 3070 }, { "epoch": 0.26, "learning_rate": 4.643664720646864e-05, "loss": 0.5777, "step": 3075 }, { "epoch": 0.26, "learning_rate": 4.642533425806431e-05, "loss": 0.5567, "step": 3080 }, { "epoch": 0.26, "learning_rate": 4.641400476248632e-05, "loss": 0.5718, "step": 3085 }, { "epoch": 0.26, "learning_rate": 4.6402658728484636e-05, "loss": 0.6025, "step": 3090 }, { "epoch": 0.26, "learning_rate": 4.639129616482199e-05, "loss": 0.6225, "step": 3095 }, { "epoch": 0.26, "learning_rate": 4.637991708027393e-05, "loss": 0.5738, "step": 3100 }, { "epoch": 0.26, "learning_rate": 4.63685214836287e-05, "loss": 0.6, "step": 3105 }, { "epoch": 0.26, "learning_rate": 4.635710938368733e-05, "loss": 0.5721, "step": 3110 }, { "epoch": 0.26, "learning_rate": 4.634568078926361e-05, "loss": 0.5575, "step": 3115 }, { "epoch": 0.26, "learning_rate": 4.633423570918404e-05, "loss": 0.5812, "step": 3120 }, { "epoch": 0.26, "learning_rate": 4.632277415228785e-05, "loss": 0.5702, "step": 3125 }, { "epoch": 0.26, "learning_rate": 4.631129612742702e-05, "loss": 0.5588, "step": 3130 }, { "epoch": 0.26, "learning_rate": 4.629980164346623e-05, "loss": 0.5689, "step": 3135 }, { "epoch": 0.26, "learning_rate": 4.628829070928288e-05, "loss": 0.567, "step": 3140 }, { "epoch": 0.26, "learning_rate": 4.627676333376708e-05, "loss": 0.6092, "step": 3145 }, { "epoch": 0.26, "learning_rate": 4.626521952582163e-05, "loss": 0.5834, "step": 3150 }, { "epoch": 0.26, "learning_rate": 4.6253659294362017e-05, "loss": 0.5937, "step": 3155 }, { "epoch": 0.27, "learning_rate": 4.624208264831642e-05, "loss": 0.5639, "step": 3160 }, { "epoch": 0.27, "learning_rate": 4.6230489596625695e-05, "loss": 0.5733, "step": 3165 }, { "epoch": 0.27, "learning_rate": 4.621888014824335e-05, "loss": 0.5908, "step": 3170 }, { "epoch": 0.27, "learning_rate": 4.6207254312135586e-05, "loss": 0.5641, "step": 3175 }, { "epoch": 0.27, "learning_rate": 4.619561209728125e-05, "loss": 0.5802, "step": 3180 }, { "epoch": 0.27, "learning_rate": 4.618395351267183e-05, "loss": 0.632, "step": 3185 }, { "epoch": 0.27, "learning_rate": 4.6172278567311455e-05, "loss": 0.5832, "step": 3190 }, { "epoch": 0.27, "learning_rate": 4.616058727021692e-05, "loss": 0.6057, "step": 3195 }, { "epoch": 0.27, "learning_rate": 4.6148879630417583e-05, "loss": 0.6062, "step": 3200 }, { "epoch": 0.27, "learning_rate": 4.613715565695551e-05, "loss": 0.5831, "step": 3205 }, { "epoch": 0.27, "learning_rate": 4.6125415358885324e-05, "loss": 0.5763, "step": 3210 }, { "epoch": 0.27, "learning_rate": 4.6113658745274255e-05, "loss": 0.6103, "step": 3215 }, { "epoch": 0.27, "learning_rate": 4.610188582520217e-05, "loss": 0.6861, "step": 3220 }, { "epoch": 0.27, "learning_rate": 4.609009660776149e-05, "loss": 0.5629, "step": 3225 }, { "epoch": 0.27, "learning_rate": 4.6078291102057256e-05, "loss": 0.5906, "step": 3230 }, { "epoch": 0.27, "learning_rate": 4.606646931720706e-05, "loss": 0.5581, "step": 3235 }, { "epoch": 0.27, "learning_rate": 4.6054631262341096e-05, "loss": 0.5932, "step": 3240 }, { "epoch": 0.27, "learning_rate": 4.60427769466021e-05, "loss": 0.5599, "step": 3245 }, { "epoch": 0.27, "learning_rate": 4.603090637914537e-05, "loss": 0.5812, "step": 3250 }, { "epoch": 0.27, "learning_rate": 4.6019019569138764e-05, "loss": 0.5625, "step": 3255 }, { "epoch": 0.27, "learning_rate": 4.600711652576268e-05, "loss": 0.6301, "step": 3260 }, { "epoch": 0.27, "learning_rate": 4.599519725821006e-05, "loss": 0.5365, "step": 3265 }, { "epoch": 0.27, "learning_rate": 4.598326177568636e-05, "loss": 0.5714, "step": 3270 }, { "epoch": 0.27, "learning_rate": 4.5971310087409584e-05, "loss": 0.5449, "step": 3275 }, { "epoch": 0.28, "learning_rate": 4.595934220261022e-05, "loss": 0.6157, "step": 3280 }, { "epoch": 0.28, "learning_rate": 4.5947358130531306e-05, "loss": 0.5631, "step": 3285 }, { "epoch": 0.28, "learning_rate": 4.593535788042833e-05, "loss": 0.5879, "step": 3290 }, { "epoch": 0.28, "learning_rate": 4.5923341461569314e-05, "loss": 0.6334, "step": 3295 }, { "epoch": 0.28, "learning_rate": 4.591130888323476e-05, "loss": 0.629, "step": 3300 }, { "epoch": 0.28, "learning_rate": 4.5899260154717636e-05, "loss": 0.4705, "step": 3305 }, { "epoch": 0.28, "learning_rate": 4.588719528532342e-05, "loss": 0.5652, "step": 3310 }, { "epoch": 0.28, "learning_rate": 4.587511428436999e-05, "loss": 0.5937, "step": 3315 }, { "epoch": 0.28, "learning_rate": 4.586301716118775e-05, "loss": 0.5615, "step": 3320 }, { "epoch": 0.28, "learning_rate": 4.585090392511951e-05, "loss": 0.5959, "step": 3325 }, { "epoch": 0.28, "learning_rate": 4.583877458552056e-05, "loss": 0.5575, "step": 3330 }, { "epoch": 0.28, "learning_rate": 4.5826629151758596e-05, "loss": 0.574, "step": 3335 }, { "epoch": 0.28, "learning_rate": 4.5814467633213754e-05, "loss": 0.6069, "step": 3340 }, { "epoch": 0.28, "learning_rate": 4.5802290039278605e-05, "loss": 0.5121, "step": 3345 }, { "epoch": 0.28, "learning_rate": 4.579009637935812e-05, "loss": 0.5786, "step": 3350 }, { "epoch": 0.28, "learning_rate": 4.5777886662869684e-05, "loss": 0.5314, "step": 3355 }, { "epoch": 0.28, "learning_rate": 4.576566089924309e-05, "loss": 0.6365, "step": 3360 }, { "epoch": 0.28, "learning_rate": 4.57534190979205e-05, "loss": 0.5964, "step": 3365 }, { "epoch": 0.28, "learning_rate": 4.5741161268356485e-05, "loss": 0.5727, "step": 3370 }, { "epoch": 0.28, "learning_rate": 4.5728887420018e-05, "loss": 0.6014, "step": 3375 }, { "epoch": 0.28, "learning_rate": 4.571659756238435e-05, "loss": 0.5826, "step": 3380 }, { "epoch": 0.28, "learning_rate": 4.570429170494721e-05, "loss": 0.5908, "step": 3385 }, { "epoch": 0.28, "learning_rate": 4.569196985721063e-05, "loss": 0.6819, "step": 3390 }, { "epoch": 0.28, "learning_rate": 4.5679632028690974e-05, "loss": 0.6723, "step": 3395 }, { "epoch": 0.29, "learning_rate": 4.5667278228917006e-05, "loss": 0.6055, "step": 3400 }, { "epoch": 0.29, "learning_rate": 4.5654908467429756e-05, "loss": 0.6189, "step": 3405 }, { "epoch": 0.29, "learning_rate": 4.564252275378263e-05, "loss": 0.5965, "step": 3410 }, { "epoch": 0.29, "learning_rate": 4.5630121097541346e-05, "loss": 0.5105, "step": 3415 }, { "epoch": 0.29, "learning_rate": 4.56177035082839e-05, "loss": 0.5954, "step": 3420 }, { "epoch": 0.29, "learning_rate": 4.560526999560065e-05, "loss": 0.5436, "step": 3425 }, { "epoch": 0.29, "learning_rate": 4.559282056909422e-05, "loss": 0.6257, "step": 3430 }, { "epoch": 0.29, "learning_rate": 4.558035523837951e-05, "loss": 0.6453, "step": 3435 }, { "epoch": 0.29, "learning_rate": 4.556787401308374e-05, "loss": 0.5943, "step": 3440 }, { "epoch": 0.29, "learning_rate": 4.555537690284637e-05, "loss": 0.6003, "step": 3445 }, { "epoch": 0.29, "learning_rate": 4.554286391731915e-05, "loss": 0.5674, "step": 3450 }, { "epoch": 0.29, "learning_rate": 4.553033506616609e-05, "loss": 0.6082, "step": 3455 }, { "epoch": 0.29, "learning_rate": 4.5517790359063445e-05, "loss": 0.585, "step": 3460 }, { "epoch": 0.29, "learning_rate": 4.5505229805699723e-05, "loss": 0.5357, "step": 3465 }, { "epoch": 0.29, "learning_rate": 4.5492653415775653e-05, "loss": 0.6065, "step": 3470 }, { "epoch": 0.29, "learning_rate": 4.548006119900423e-05, "loss": 0.5903, "step": 3475 }, { "epoch": 0.29, "learning_rate": 4.546745316511064e-05, "loss": 0.5452, "step": 3480 }, { "epoch": 0.29, "learning_rate": 4.545482932383229e-05, "loss": 0.6042, "step": 3485 }, { "epoch": 0.29, "learning_rate": 4.544218968491881e-05, "loss": 0.6051, "step": 3490 }, { "epoch": 0.29, "learning_rate": 4.5429534258132026e-05, "loss": 0.5758, "step": 3495 }, { "epoch": 0.29, "learning_rate": 4.541686305324594e-05, "loss": 0.5526, "step": 3500 }, { "epoch": 0.29, "learning_rate": 4.540417608004677e-05, "loss": 0.5885, "step": 3505 }, { "epoch": 0.29, "learning_rate": 4.539147334833288e-05, "loss": 0.5901, "step": 3510 }, { "epoch": 0.29, "learning_rate": 4.537875486791482e-05, "loss": 0.5269, "step": 3515 }, { "epoch": 0.3, "learning_rate": 4.536602064861532e-05, "loss": 0.5671, "step": 3520 }, { "epoch": 0.3, "learning_rate": 4.5353270700269225e-05, "loss": 0.5738, "step": 3525 }, { "epoch": 0.3, "learning_rate": 4.534050503272356e-05, "loss": 0.5359, "step": 3530 }, { "epoch": 0.3, "learning_rate": 4.532772365583749e-05, "loss": 0.552, "step": 3535 }, { "epoch": 0.3, "learning_rate": 4.53149265794823e-05, "loss": 0.5618, "step": 3540 }, { "epoch": 0.3, "learning_rate": 4.530211381354139e-05, "loss": 0.61, "step": 3545 }, { "epoch": 0.3, "learning_rate": 4.5289285367910305e-05, "loss": 0.5793, "step": 3550 }, { "epoch": 0.3, "learning_rate": 4.527644125249669e-05, "loss": 0.5283, "step": 3555 }, { "epoch": 0.3, "learning_rate": 4.526358147722027e-05, "loss": 0.5317, "step": 3560 }, { "epoch": 0.3, "learning_rate": 4.525070605201291e-05, "loss": 0.5865, "step": 3565 }, { "epoch": 0.3, "learning_rate": 4.5237814986818504e-05, "loss": 0.6206, "step": 3570 }, { "epoch": 0.3, "learning_rate": 4.5224908291593074e-05, "loss": 0.568, "step": 3575 }, { "epoch": 0.3, "learning_rate": 4.521198597630469e-05, "loss": 0.6064, "step": 3580 }, { "epoch": 0.3, "learning_rate": 4.5199048050933484e-05, "loss": 0.5587, "step": 3585 }, { "epoch": 0.3, "learning_rate": 4.5186094525471655e-05, "loss": 0.6328, "step": 3590 }, { "epoch": 0.3, "learning_rate": 4.5173125409923456e-05, "loss": 0.549, "step": 3595 }, { "epoch": 0.3, "learning_rate": 4.516014071430515e-05, "loss": 0.6468, "step": 3600 }, { "epoch": 0.3, "learning_rate": 4.5147140448645064e-05, "loss": 0.551, "step": 3605 }, { "epoch": 0.3, "learning_rate": 4.513412462298353e-05, "loss": 0.6009, "step": 3610 }, { "epoch": 0.3, "learning_rate": 4.512109324737293e-05, "loss": 0.5974, "step": 3615 }, { "epoch": 0.3, "learning_rate": 4.5108046331877593e-05, "loss": 0.6443, "step": 3620 }, { "epoch": 0.3, "learning_rate": 4.5094983886573924e-05, "loss": 0.5717, "step": 3625 }, { "epoch": 0.3, "learning_rate": 4.508190592155026e-05, "loss": 0.633, "step": 3630 }, { "epoch": 0.31, "learning_rate": 4.5068812446906974e-05, "loss": 0.5939, "step": 3635 }, { "epoch": 0.31, "learning_rate": 4.505570347275637e-05, "loss": 0.5937, "step": 3640 }, { "epoch": 0.31, "learning_rate": 4.5042579009222756e-05, "loss": 0.5269, "step": 3645 }, { "epoch": 0.31, "learning_rate": 4.50294390664424e-05, "loss": 0.6221, "step": 3650 }, { "epoch": 0.31, "learning_rate": 4.5016283654563505e-05, "loss": 0.601, "step": 3655 }, { "epoch": 0.31, "learning_rate": 4.5003112783746246e-05, "loss": 0.5799, "step": 3660 }, { "epoch": 0.31, "learning_rate": 4.4989926464162724e-05, "loss": 0.5282, "step": 3665 }, { "epoch": 0.31, "learning_rate": 4.497672470599695e-05, "loss": 0.5581, "step": 3670 }, { "epoch": 0.31, "learning_rate": 4.4963507519444916e-05, "loss": 0.5694, "step": 3675 }, { "epoch": 0.31, "learning_rate": 4.4950274914714476e-05, "loss": 0.5499, "step": 3680 }, { "epoch": 0.31, "learning_rate": 4.49370269020254e-05, "loss": 0.5777, "step": 3685 }, { "epoch": 0.31, "learning_rate": 4.4923763491609395e-05, "loss": 0.527, "step": 3690 }, { "epoch": 0.31, "learning_rate": 4.491048469371001e-05, "loss": 0.5872, "step": 3695 }, { "epoch": 0.31, "learning_rate": 4.489719051858271e-05, "loss": 0.5311, "step": 3700 }, { "epoch": 0.31, "learning_rate": 4.488388097649483e-05, "loss": 0.5716, "step": 3705 }, { "epoch": 0.31, "learning_rate": 4.4870556077725556e-05, "loss": 0.6061, "step": 3710 }, { "epoch": 0.31, "learning_rate": 4.4857215832565966e-05, "loss": 0.5905, "step": 3715 }, { "epoch": 0.31, "learning_rate": 4.484386025131897e-05, "loss": 0.5829, "step": 3720 }, { "epoch": 0.31, "learning_rate": 4.483048934429932e-05, "loss": 0.5892, "step": 3725 }, { "epoch": 0.31, "learning_rate": 4.4817103121833604e-05, "loss": 0.6429, "step": 3730 }, { "epoch": 0.31, "learning_rate": 4.480370159426025e-05, "loss": 0.5741, "step": 3735 }, { "epoch": 0.31, "learning_rate": 4.479028477192951e-05, "loss": 0.5299, "step": 3740 }, { "epoch": 0.31, "learning_rate": 4.477685266520343e-05, "loss": 0.6464, "step": 3745 }, { "epoch": 0.31, "learning_rate": 4.476340528445587e-05, "loss": 0.5857, "step": 3750 }, { "epoch": 0.32, "learning_rate": 4.474994264007248e-05, "loss": 0.5055, "step": 3755 }, { "epoch": 0.32, "learning_rate": 4.4736464742450713e-05, "loss": 0.5647, "step": 3760 }, { "epoch": 0.32, "learning_rate": 4.4722971601999786e-05, "loss": 0.5831, "step": 3765 }, { "epoch": 0.32, "learning_rate": 4.47094632291407e-05, "loss": 0.5647, "step": 3770 }, { "epoch": 0.32, "learning_rate": 4.469593963430622e-05, "loss": 0.5717, "step": 3775 }, { "epoch": 0.32, "learning_rate": 4.468240082794086e-05, "loss": 0.5926, "step": 3780 }, { "epoch": 0.32, "learning_rate": 4.4668846820500874e-05, "loss": 0.5836, "step": 3785 }, { "epoch": 0.32, "learning_rate": 4.465527762245429e-05, "loss": 0.5505, "step": 3790 }, { "epoch": 0.32, "learning_rate": 4.464169324428082e-05, "loss": 0.5795, "step": 3795 }, { "epoch": 0.32, "learning_rate": 4.462809369647195e-05, "loss": 0.5336, "step": 3800 }, { "epoch": 0.32, "learning_rate": 4.461447898953084e-05, "loss": 0.5852, "step": 3805 }, { "epoch": 0.32, "learning_rate": 4.460084913397239e-05, "loss": 0.5555, "step": 3810 }, { "epoch": 0.32, "learning_rate": 4.458720414032316e-05, "loss": 0.549, "step": 3815 }, { "epoch": 0.32, "learning_rate": 4.457354401912145e-05, "loss": 0.5216, "step": 3820 }, { "epoch": 0.32, "learning_rate": 4.45598687809172e-05, "loss": 0.5817, "step": 3825 }, { "epoch": 0.32, "learning_rate": 4.4546178436272055e-05, "loss": 0.5841, "step": 3830 }, { "epoch": 0.32, "learning_rate": 4.453247299575932e-05, "loss": 0.6002, "step": 3835 }, { "epoch": 0.32, "learning_rate": 4.4518752469963946e-05, "loss": 0.6817, "step": 3840 }, { "epoch": 0.32, "learning_rate": 4.450501686948255e-05, "loss": 0.5508, "step": 3845 }, { "epoch": 0.32, "learning_rate": 4.4491266204923385e-05, "loss": 0.5886, "step": 3850 }, { "epoch": 0.32, "learning_rate": 4.447750048690632e-05, "loss": 0.5456, "step": 3855 }, { "epoch": 0.32, "learning_rate": 4.44637197260629e-05, "loss": 0.5792, "step": 3860 }, { "epoch": 0.32, "learning_rate": 4.444992393303623e-05, "loss": 0.577, "step": 3865 }, { "epoch": 0.32, "learning_rate": 4.4436113118481066e-05, "loss": 0.615, "step": 3870 }, { "epoch": 0.33, "learning_rate": 4.442228729306374e-05, "loss": 0.6839, "step": 3875 }, { "epoch": 0.33, "learning_rate": 4.440844646746219e-05, "loss": 0.5819, "step": 3880 }, { "epoch": 0.33, "learning_rate": 4.439459065236594e-05, "loss": 0.6399, "step": 3885 }, { "epoch": 0.33, "learning_rate": 4.438071985847609e-05, "loss": 0.557, "step": 3890 }, { "epoch": 0.33, "learning_rate": 4.436683409650529e-05, "loss": 0.5392, "step": 3895 }, { "epoch": 0.33, "learning_rate": 4.4352933377177785e-05, "loss": 0.5791, "step": 3900 }, { "epoch": 0.33, "learning_rate": 4.4339017711229344e-05, "loss": 0.5385, "step": 3905 }, { "epoch": 0.33, "learning_rate": 4.432508710940728e-05, "loss": 0.5869, "step": 3910 }, { "epoch": 0.33, "learning_rate": 4.431114158247047e-05, "loss": 0.572, "step": 3915 }, { "epoch": 0.33, "learning_rate": 4.429718114118927e-05, "loss": 0.5264, "step": 3920 }, { "epoch": 0.33, "learning_rate": 4.42832057963456e-05, "loss": 0.617, "step": 3925 }, { "epoch": 0.33, "learning_rate": 4.4269215558732863e-05, "loss": 0.6226, "step": 3930 }, { "epoch": 0.33, "learning_rate": 4.425521043915598e-05, "loss": 0.6044, "step": 3935 }, { "epoch": 0.33, "learning_rate": 4.424119044843135e-05, "loss": 0.55, "step": 3940 }, { "epoch": 0.33, "learning_rate": 4.422715559738687e-05, "loss": 0.5599, "step": 3945 }, { "epoch": 0.33, "learning_rate": 4.4213105896861916e-05, "loss": 0.514, "step": 3950 }, { "epoch": 0.33, "learning_rate": 4.4199041357707327e-05, "loss": 0.6108, "step": 3955 }, { "epoch": 0.33, "learning_rate": 4.418496199078539e-05, "loss": 0.5727, "step": 3960 }, { "epoch": 0.33, "learning_rate": 4.417086780696985e-05, "loss": 0.5697, "step": 3965 }, { "epoch": 0.33, "learning_rate": 4.4156758817145914e-05, "loss": 0.5898, "step": 3970 }, { "epoch": 0.33, "learning_rate": 4.414263503221021e-05, "loss": 0.5111, "step": 3975 }, { "epoch": 0.33, "learning_rate": 4.4128496463070797e-05, "loss": 0.5708, "step": 3980 }, { "epoch": 0.33, "learning_rate": 4.411434312064713e-05, "loss": 0.6259, "step": 3985 }, { "epoch": 0.33, "learning_rate": 4.4100175015870104e-05, "loss": 0.5938, "step": 3990 }, { "epoch": 0.34, "learning_rate": 4.4085992159682e-05, "loss": 0.5741, "step": 3995 }, { "epoch": 0.34, "learning_rate": 4.4071794563036506e-05, "loss": 0.578, "step": 4000 }, { "epoch": 0.34, "learning_rate": 4.405758223689866e-05, "loss": 0.562, "step": 4005 }, { "epoch": 0.34, "learning_rate": 4.404335519224492e-05, "loss": 0.5982, "step": 4010 }, { "epoch": 0.34, "learning_rate": 4.402911344006308e-05, "loss": 0.5816, "step": 4015 }, { "epoch": 0.34, "learning_rate": 4.40148569913523e-05, "loss": 0.5856, "step": 4020 }, { "epoch": 0.34, "learning_rate": 4.400058585712311e-05, "loss": 0.5952, "step": 4025 }, { "epoch": 0.34, "learning_rate": 4.3986300048397344e-05, "loss": 0.5637, "step": 4030 }, { "epoch": 0.34, "learning_rate": 4.3971999576208205e-05, "loss": 0.585, "step": 4035 }, { "epoch": 0.34, "learning_rate": 4.39576844516002e-05, "loss": 0.5488, "step": 4040 }, { "epoch": 0.34, "learning_rate": 4.394335468562917e-05, "loss": 0.633, "step": 4045 }, { "epoch": 0.34, "learning_rate": 4.392901028936223e-05, "loss": 0.5631, "step": 4050 }, { "epoch": 0.34, "learning_rate": 4.391465127387784e-05, "loss": 0.6309, "step": 4055 }, { "epoch": 0.34, "learning_rate": 4.390027765026572e-05, "loss": 0.5142, "step": 4060 }, { "epoch": 0.34, "learning_rate": 4.3885889429626884e-05, "loss": 0.5983, "step": 4065 }, { "epoch": 0.34, "learning_rate": 4.38714866230736e-05, "loss": 0.6078, "step": 4070 }, { "epoch": 0.34, "learning_rate": 4.385706924172942e-05, "loss": 0.5856, "step": 4075 }, { "epoch": 0.34, "learning_rate": 4.3842637296729174e-05, "loss": 0.5885, "step": 4080 }, { "epoch": 0.34, "learning_rate": 4.382819079921888e-05, "loss": 0.5029, "step": 4085 }, { "epoch": 0.34, "learning_rate": 4.381372976035586e-05, "loss": 0.4966, "step": 4090 }, { "epoch": 0.34, "learning_rate": 4.379925419130861e-05, "loss": 0.5864, "step": 4095 }, { "epoch": 0.34, "learning_rate": 4.378476410325689e-05, "loss": 0.6115, "step": 4100 }, { "epoch": 0.34, "learning_rate": 4.377025950739165e-05, "loss": 0.6069, "step": 4105 }, { "epoch": 0.34, "learning_rate": 4.375574041491506e-05, "loss": 0.5074, "step": 4110 }, { "epoch": 0.35, "learning_rate": 4.374120683704047e-05, "loss": 0.5813, "step": 4115 }, { "epoch": 0.35, "learning_rate": 4.3726658784992433e-05, "loss": 0.5524, "step": 4120 }, { "epoch": 0.35, "learning_rate": 4.371209627000667e-05, "loss": 0.5393, "step": 4125 }, { "epoch": 0.35, "learning_rate": 4.369751930333007e-05, "loss": 0.5915, "step": 4130 }, { "epoch": 0.35, "learning_rate": 4.3682927896220695e-05, "loss": 0.6193, "step": 4135 }, { "epoch": 0.35, "learning_rate": 4.3668322059947744e-05, "loss": 0.5677, "step": 4140 }, { "epoch": 0.35, "learning_rate": 4.365370180579158e-05, "loss": 0.5799, "step": 4145 }, { "epoch": 0.35, "learning_rate": 4.363906714504368e-05, "loss": 0.5658, "step": 4150 }, { "epoch": 0.35, "learning_rate": 4.362441808900667e-05, "loss": 0.5683, "step": 4155 }, { "epoch": 0.35, "learning_rate": 4.360975464899427e-05, "loss": 0.5466, "step": 4160 }, { "epoch": 0.35, "learning_rate": 4.359507683633133e-05, "loss": 0.5778, "step": 4165 }, { "epoch": 0.35, "learning_rate": 4.3580384662353774e-05, "loss": 0.598, "step": 4170 }, { "epoch": 0.35, "learning_rate": 4.356567813840865e-05, "loss": 0.5666, "step": 4175 }, { "epoch": 0.35, "learning_rate": 4.3550957275854066e-05, "loss": 0.5757, "step": 4180 }, { "epoch": 0.35, "learning_rate": 4.353622208605922e-05, "loss": 0.6115, "step": 4185 }, { "epoch": 0.35, "learning_rate": 4.352147258040435e-05, "loss": 0.5383, "step": 4190 }, { "epoch": 0.35, "learning_rate": 4.350670877028078e-05, "loss": 0.6165, "step": 4195 }, { "epoch": 0.35, "learning_rate": 4.349193066709086e-05, "loss": 0.6069, "step": 4200 }, { "epoch": 0.35, "learning_rate": 4.347713828224799e-05, "loss": 0.6269, "step": 4205 }, { "epoch": 0.35, "learning_rate": 4.346233162717659e-05, "loss": 0.5224, "step": 4210 }, { "epoch": 0.35, "learning_rate": 4.344751071331212e-05, "loss": 0.6038, "step": 4215 }, { "epoch": 0.35, "learning_rate": 4.3432675552101035e-05, "loss": 0.5588, "step": 4220 }, { "epoch": 0.35, "learning_rate": 4.3417826155000796e-05, "loss": 0.6218, "step": 4225 }, { "epoch": 0.35, "learning_rate": 4.340296253347986e-05, "loss": 0.586, "step": 4230 }, { "epoch": 0.36, "learning_rate": 4.338808469901768e-05, "loss": 0.5654, "step": 4235 }, { "epoch": 0.36, "learning_rate": 4.337319266310467e-05, "loss": 0.6442, "step": 4240 }, { "epoch": 0.36, "learning_rate": 4.335828643724222e-05, "loss": 0.5839, "step": 4245 }, { "epoch": 0.36, "learning_rate": 4.334336603294268e-05, "loss": 0.6191, "step": 4250 }, { "epoch": 0.36, "learning_rate": 4.3328431461729337e-05, "loss": 0.6021, "step": 4255 }, { "epoch": 0.36, "learning_rate": 4.3313482735136455e-05, "loss": 0.6133, "step": 4260 }, { "epoch": 0.36, "learning_rate": 4.3298519864709174e-05, "loss": 0.5707, "step": 4265 }, { "epoch": 0.36, "learning_rate": 4.3283542862003626e-05, "loss": 0.6201, "step": 4270 }, { "epoch": 0.36, "learning_rate": 4.326855173858679e-05, "loss": 0.569, "step": 4275 }, { "epoch": 0.36, "learning_rate": 4.32535465060366e-05, "loss": 0.5888, "step": 4280 }, { "epoch": 0.36, "learning_rate": 4.3238527175941865e-05, "loss": 0.5342, "step": 4285 }, { "epoch": 0.36, "learning_rate": 4.322349375990229e-05, "loss": 0.5379, "step": 4290 }, { "epoch": 0.36, "learning_rate": 4.320844626952844e-05, "loss": 0.5766, "step": 4295 }, { "epoch": 0.36, "learning_rate": 4.319338471644177e-05, "loss": 0.5515, "step": 4300 }, { "epoch": 0.36, "learning_rate": 4.3178309112274614e-05, "loss": 0.646, "step": 4305 }, { "epoch": 0.36, "learning_rate": 4.3163219468670105e-05, "loss": 0.6511, "step": 4310 }, { "epoch": 0.36, "learning_rate": 4.3148115797282246e-05, "loss": 0.5411, "step": 4315 }, { "epoch": 0.36, "learning_rate": 4.313299810977589e-05, "loss": 0.6117, "step": 4320 }, { "epoch": 0.36, "learning_rate": 4.3117866417826706e-05, "loss": 0.5814, "step": 4325 }, { "epoch": 0.36, "learning_rate": 4.310272073312116e-05, "loss": 0.5949, "step": 4330 }, { "epoch": 0.36, "learning_rate": 4.3087561067356545e-05, "loss": 0.6245, "step": 4335 }, { "epoch": 0.36, "learning_rate": 4.307238743224095e-05, "loss": 0.5359, "step": 4340 }, { "epoch": 0.36, "learning_rate": 4.305719983949323e-05, "loss": 0.5064, "step": 4345 }, { "epoch": 0.37, "learning_rate": 4.3041998300843064e-05, "loss": 0.5803, "step": 4350 }, { "epoch": 0.37, "learning_rate": 4.302678282803085e-05, "loss": 0.5768, "step": 4355 }, { "epoch": 0.37, "learning_rate": 4.301155343280779e-05, "loss": 0.5859, "step": 4360 }, { "epoch": 0.37, "learning_rate": 4.299631012693582e-05, "loss": 0.5827, "step": 4365 }, { "epoch": 0.37, "learning_rate": 4.298105292218763e-05, "loss": 0.557, "step": 4370 }, { "epoch": 0.37, "learning_rate": 4.296578183034661e-05, "loss": 0.5976, "step": 4375 }, { "epoch": 0.37, "learning_rate": 4.295049686320691e-05, "loss": 0.6202, "step": 4380 }, { "epoch": 0.37, "learning_rate": 4.29351980325734e-05, "loss": 0.5395, "step": 4385 }, { "epoch": 0.37, "learning_rate": 4.291988535026164e-05, "loss": 0.568, "step": 4390 }, { "epoch": 0.37, "learning_rate": 4.290455882809787e-05, "loss": 0.6002, "step": 4395 }, { "epoch": 0.37, "learning_rate": 4.288921847791907e-05, "loss": 0.4993, "step": 4400 }, { "epoch": 0.37, "learning_rate": 4.2873864311572855e-05, "loss": 0.5405, "step": 4405 }, { "epoch": 0.37, "learning_rate": 4.285849634091753e-05, "loss": 0.6151, "step": 4410 }, { "epoch": 0.37, "learning_rate": 4.2843114577822066e-05, "loss": 0.6002, "step": 4415 }, { "epoch": 0.37, "learning_rate": 4.282771903416605e-05, "loss": 0.6122, "step": 4420 }, { "epoch": 0.37, "learning_rate": 4.281230972183977e-05, "loss": 0.548, "step": 4425 }, { "epoch": 0.37, "learning_rate": 4.27968866527441e-05, "loss": 0.6874, "step": 4430 }, { "epoch": 0.37, "learning_rate": 4.2781449838790555e-05, "loss": 0.6519, "step": 4435 }, { "epoch": 0.37, "learning_rate": 4.2765999291901276e-05, "loss": 0.539, "step": 4440 }, { "epoch": 0.37, "learning_rate": 4.275053502400899e-05, "loss": 0.6859, "step": 4445 }, { "epoch": 0.37, "learning_rate": 4.2735057047057037e-05, "loss": 0.5317, "step": 4450 }, { "epoch": 0.37, "learning_rate": 4.2719565372999335e-05, "loss": 0.6452, "step": 4455 }, { "epoch": 0.37, "learning_rate": 4.27040600138004e-05, "loss": 0.5393, "step": 4460 }, { "epoch": 0.37, "learning_rate": 4.268854098143527e-05, "loss": 0.6168, "step": 4465 }, { "epoch": 0.38, "learning_rate": 4.2673008287889604e-05, "loss": 0.5173, "step": 4470 }, { "epoch": 0.38, "learning_rate": 4.265746194515957e-05, "loss": 0.5305, "step": 4475 }, { "epoch": 0.38, "learning_rate": 4.2641901965251884e-05, "loss": 0.6051, "step": 4480 }, { "epoch": 0.38, "learning_rate": 4.262632836018381e-05, "loss": 0.5747, "step": 4485 }, { "epoch": 0.38, "learning_rate": 4.261074114198313e-05, "loss": 0.6299, "step": 4490 }, { "epoch": 0.38, "learning_rate": 4.259514032268813e-05, "loss": 0.516, "step": 4495 }, { "epoch": 0.38, "learning_rate": 4.25795259143476e-05, "loss": 0.5354, "step": 4500 }, { "epoch": 0.38, "learning_rate": 4.2563897929020837e-05, "loss": 0.5117, "step": 4505 }, { "epoch": 0.38, "learning_rate": 4.254825637877761e-05, "loss": 0.6071, "step": 4510 }, { "epoch": 0.38, "learning_rate": 4.2532601275698175e-05, "loss": 0.5673, "step": 4515 }, { "epoch": 0.38, "learning_rate": 4.251693263187325e-05, "loss": 0.5818, "step": 4520 }, { "epoch": 0.38, "learning_rate": 4.250125045940402e-05, "loss": 0.601, "step": 4525 }, { "epoch": 0.38, "learning_rate": 4.24855547704021e-05, "loss": 0.5689, "step": 4530 }, { "epoch": 0.38, "learning_rate": 4.246984557698957e-05, "loss": 0.5692, "step": 4535 }, { "epoch": 0.38, "learning_rate": 4.245412289129891e-05, "loss": 0.5992, "step": 4540 }, { "epoch": 0.38, "learning_rate": 4.243838672547303e-05, "loss": 0.6054, "step": 4545 }, { "epoch": 0.38, "learning_rate": 4.242263709166527e-05, "loss": 0.608, "step": 4550 }, { "epoch": 0.38, "learning_rate": 4.240687400203937e-05, "loss": 0.5719, "step": 4555 }, { "epoch": 0.38, "learning_rate": 4.239109746876941e-05, "loss": 0.5988, "step": 4560 }, { "epoch": 0.38, "learning_rate": 4.237530750403993e-05, "loss": 0.5575, "step": 4565 }, { "epoch": 0.38, "learning_rate": 4.2359504120045776e-05, "loss": 0.5311, "step": 4570 }, { "epoch": 0.38, "learning_rate": 4.234368732899222e-05, "loss": 0.5986, "step": 4575 }, { "epoch": 0.38, "learning_rate": 4.2327857143094826e-05, "loss": 0.5394, "step": 4580 }, { "epoch": 0.38, "learning_rate": 4.231201357457955e-05, "loss": 0.5335, "step": 4585 }, { "epoch": 0.39, "learning_rate": 4.229615663568266e-05, "loss": 0.5188, "step": 4590 }, { "epoch": 0.39, "learning_rate": 4.228028633865076e-05, "loss": 0.598, "step": 4595 }, { "epoch": 0.39, "learning_rate": 4.2264402695740774e-05, "loss": 0.5705, "step": 4600 }, { "epoch": 0.39, "learning_rate": 4.224850571921992e-05, "loss": 0.5557, "step": 4605 }, { "epoch": 0.39, "learning_rate": 4.2232595421365714e-05, "loss": 0.5186, "step": 4610 }, { "epoch": 0.39, "learning_rate": 4.221667181446597e-05, "loss": 0.6212, "step": 4615 }, { "epoch": 0.39, "learning_rate": 4.2200734910818794e-05, "loss": 0.5816, "step": 4620 }, { "epoch": 0.39, "learning_rate": 4.218478472273253e-05, "loss": 0.5588, "step": 4625 }, { "epoch": 0.39, "learning_rate": 4.2168821262525806e-05, "loss": 0.5206, "step": 4630 }, { "epoch": 0.39, "learning_rate": 4.2152844542527484e-05, "loss": 0.5395, "step": 4635 }, { "epoch": 0.39, "learning_rate": 4.213685457507667e-05, "loss": 0.5802, "step": 4640 }, { "epoch": 0.39, "learning_rate": 4.2120851372522716e-05, "loss": 0.5861, "step": 4645 }, { "epoch": 0.39, "learning_rate": 4.210483494722518e-05, "loss": 0.5552, "step": 4650 }, { "epoch": 0.39, "learning_rate": 4.208880531155384e-05, "loss": 0.5666, "step": 4655 }, { "epoch": 0.39, "learning_rate": 4.2072762477888675e-05, "loss": 0.5117, "step": 4660 }, { "epoch": 0.39, "learning_rate": 4.205670645861985e-05, "loss": 0.6011, "step": 4665 }, { "epoch": 0.39, "learning_rate": 4.204063726614773e-05, "loss": 0.5703, "step": 4670 }, { "epoch": 0.39, "learning_rate": 4.202455491288283e-05, "loss": 0.5899, "step": 4675 }, { "epoch": 0.39, "learning_rate": 4.200845941124585e-05, "loss": 0.5699, "step": 4680 }, { "epoch": 0.39, "learning_rate": 4.199235077366764e-05, "loss": 0.5596, "step": 4685 }, { "epoch": 0.39, "learning_rate": 4.1976229012589185e-05, "loss": 0.6167, "step": 4690 }, { "epoch": 0.39, "learning_rate": 4.196009414046161e-05, "loss": 0.5655, "step": 4695 }, { "epoch": 0.39, "learning_rate": 4.19439461697462e-05, "loss": 0.5791, "step": 4700 }, { "epoch": 0.39, "learning_rate": 4.1927785112914284e-05, "loss": 0.5823, "step": 4705 }, { "epoch": 0.4, "learning_rate": 4.1911610982447353e-05, "loss": 0.5884, "step": 4710 }, { "epoch": 0.4, "learning_rate": 4.189542379083698e-05, "loss": 0.553, "step": 4715 }, { "epoch": 0.4, "learning_rate": 4.1879223550584846e-05, "loss": 0.5256, "step": 4720 }, { "epoch": 0.4, "learning_rate": 4.186301027420266e-05, "loss": 0.4843, "step": 4725 }, { "epoch": 0.4, "learning_rate": 4.1846783974212255e-05, "loss": 0.605, "step": 4730 }, { "epoch": 0.4, "learning_rate": 4.183054466314549e-05, "loss": 0.5537, "step": 4735 }, { "epoch": 0.4, "learning_rate": 4.181429235354426e-05, "loss": 0.5624, "step": 4740 }, { "epoch": 0.4, "learning_rate": 4.179802705796053e-05, "loss": 0.671, "step": 4745 }, { "epoch": 0.4, "learning_rate": 4.178174878895629e-05, "loss": 0.5325, "step": 4750 }, { "epoch": 0.4, "learning_rate": 4.176545755910354e-05, "loss": 0.5586, "step": 4755 }, { "epoch": 0.4, "learning_rate": 4.1749153380984274e-05, "loss": 0.5829, "step": 4760 }, { "epoch": 0.4, "learning_rate": 4.1732836267190526e-05, "loss": 0.5774, "step": 4765 }, { "epoch": 0.4, "learning_rate": 4.1716506230324294e-05, "loss": 0.6312, "step": 4770 }, { "epoch": 0.4, "learning_rate": 4.170016328299755e-05, "loss": 0.587, "step": 4775 }, { "epoch": 0.4, "learning_rate": 4.168380743783226e-05, "loss": 0.5649, "step": 4780 }, { "epoch": 0.4, "learning_rate": 4.166743870746034e-05, "loss": 0.5921, "step": 4785 }, { "epoch": 0.4, "learning_rate": 4.165105710452364e-05, "loss": 0.5206, "step": 4790 }, { "epoch": 0.4, "learning_rate": 4.1634662641673996e-05, "loss": 0.553, "step": 4795 }, { "epoch": 0.4, "learning_rate": 4.161825533157312e-05, "loss": 0.5985, "step": 4800 }, { "epoch": 0.4, "learning_rate": 4.16018351868927e-05, "loss": 0.6135, "step": 4805 }, { "epoch": 0.4, "learning_rate": 4.158540222031428e-05, "loss": 0.5964, "step": 4810 }, { "epoch": 0.4, "learning_rate": 4.156895644452937e-05, "loss": 0.553, "step": 4815 }, { "epoch": 0.4, "learning_rate": 4.1552497872239324e-05, "loss": 0.5536, "step": 4820 }, { "epoch": 0.4, "learning_rate": 4.1536026516155404e-05, "loss": 0.6226, "step": 4825 }, { "epoch": 0.41, "learning_rate": 4.1519542388998725e-05, "loss": 0.6147, "step": 4830 }, { "epoch": 0.41, "learning_rate": 4.150304550350029e-05, "loss": 0.5997, "step": 4835 }, { "epoch": 0.41, "learning_rate": 4.148653587240093e-05, "loss": 0.531, "step": 4840 }, { "epoch": 0.41, "learning_rate": 4.1470013508451344e-05, "loss": 0.5874, "step": 4845 }, { "epoch": 0.41, "learning_rate": 4.145347842441204e-05, "loss": 0.6003, "step": 4850 }, { "epoch": 0.41, "learning_rate": 4.1436930633053375e-05, "loss": 0.6857, "step": 4855 }, { "epoch": 0.41, "learning_rate": 4.142037014715549e-05, "loss": 0.6083, "step": 4860 }, { "epoch": 0.41, "learning_rate": 4.140379697950838e-05, "loss": 0.5397, "step": 4865 }, { "epoch": 0.41, "learning_rate": 4.138721114291178e-05, "loss": 0.553, "step": 4870 }, { "epoch": 0.41, "learning_rate": 4.1370612650175226e-05, "loss": 0.5283, "step": 4875 }, { "epoch": 0.41, "learning_rate": 4.135400151411804e-05, "loss": 0.5819, "step": 4880 }, { "epoch": 0.41, "learning_rate": 4.1337377747569316e-05, "loss": 0.5475, "step": 4885 }, { "epoch": 0.41, "learning_rate": 4.1320741363367876e-05, "loss": 0.5949, "step": 4890 }, { "epoch": 0.41, "learning_rate": 4.13040923743623e-05, "loss": 0.5856, "step": 4895 }, { "epoch": 0.41, "learning_rate": 4.128743079341092e-05, "loss": 0.5784, "step": 4900 }, { "epoch": 0.41, "learning_rate": 4.1270756633381766e-05, "loss": 0.5755, "step": 4905 }, { "epoch": 0.41, "learning_rate": 4.125406990715259e-05, "loss": 0.552, "step": 4910 }, { "epoch": 0.41, "learning_rate": 4.123737062761086e-05, "loss": 0.5767, "step": 4915 }, { "epoch": 0.41, "learning_rate": 4.122065880765372e-05, "loss": 0.54, "step": 4920 }, { "epoch": 0.41, "learning_rate": 4.120393446018803e-05, "loss": 0.562, "step": 4925 }, { "epoch": 0.41, "learning_rate": 4.118719759813029e-05, "loss": 0.5777, "step": 4930 }, { "epoch": 0.41, "learning_rate": 4.11704482344067e-05, "loss": 0.5532, "step": 4935 }, { "epoch": 0.41, "learning_rate": 4.1153686381953076e-05, "loss": 0.596, "step": 4940 }, { "epoch": 0.41, "learning_rate": 4.113691205371491e-05, "loss": 0.5905, "step": 4945 }, { "epoch": 0.42, "learning_rate": 4.1120125262647314e-05, "loss": 0.6121, "step": 4950 }, { "epoch": 0.42, "learning_rate": 4.1103326021715046e-05, "loss": 0.5718, "step": 4955 }, { "epoch": 0.42, "learning_rate": 4.108651434389245e-05, "loss": 0.5613, "step": 4960 }, { "epoch": 0.42, "learning_rate": 4.1069690242163484e-05, "loss": 0.579, "step": 4965 }, { "epoch": 0.42, "learning_rate": 4.105285372952172e-05, "loss": 0.5519, "step": 4970 }, { "epoch": 0.42, "learning_rate": 4.10360048189703e-05, "loss": 0.5769, "step": 4975 }, { "epoch": 0.42, "learning_rate": 4.101914352352194e-05, "loss": 0.6272, "step": 4980 }, { "epoch": 0.42, "learning_rate": 4.100226985619893e-05, "loss": 0.6023, "step": 4985 }, { "epoch": 0.42, "learning_rate": 4.0985383830033087e-05, "loss": 0.5538, "step": 4990 }, { "epoch": 0.42, "learning_rate": 4.096848545806582e-05, "loss": 0.5511, "step": 4995 }, { "epoch": 0.42, "learning_rate": 4.095157475334803e-05, "loss": 0.6128, "step": 5000 }, { "epoch": 0.42, "learning_rate": 4.0934651728940175e-05, "loss": 0.5784, "step": 5005 }, { "epoch": 0.42, "learning_rate": 4.0917716397912206e-05, "loss": 0.5921, "step": 5010 }, { "epoch": 0.42, "learning_rate": 4.090076877334359e-05, "loss": 0.612, "step": 5015 }, { "epoch": 0.42, "learning_rate": 4.088380886832327e-05, "loss": 0.5716, "step": 5020 }, { "epoch": 0.42, "learning_rate": 4.0866836695949705e-05, "loss": 0.6129, "step": 5025 }, { "epoch": 0.42, "learning_rate": 4.084985226933081e-05, "loss": 0.5891, "step": 5030 }, { "epoch": 0.42, "learning_rate": 4.083285560158395e-05, "loss": 0.547, "step": 5035 }, { "epoch": 0.42, "learning_rate": 4.081584670583597e-05, "loss": 0.6149, "step": 5040 }, { "epoch": 0.42, "learning_rate": 4.079882559522316e-05, "loss": 0.5774, "step": 5045 }, { "epoch": 0.42, "learning_rate": 4.078179228289121e-05, "loss": 0.5312, "step": 5050 }, { "epoch": 0.42, "learning_rate": 4.076474678199527e-05, "loss": 0.5608, "step": 5055 }, { "epoch": 0.42, "learning_rate": 4.074768910569988e-05, "loss": 0.5774, "step": 5060 }, { "epoch": 0.43, "learning_rate": 4.073061926717899e-05, "loss": 0.5984, "step": 5065 }, { "epoch": 0.43, "learning_rate": 4.071353727961597e-05, "loss": 0.6012, "step": 5070 }, { "epoch": 0.43, "learning_rate": 4.069644315620351e-05, "loss": 0.5865, "step": 5075 }, { "epoch": 0.43, "learning_rate": 4.067933691014375e-05, "loss": 0.4743, "step": 5080 }, { "epoch": 0.43, "learning_rate": 4.0662218554648125e-05, "loss": 0.5772, "step": 5085 }, { "epoch": 0.43, "learning_rate": 4.064508810293746e-05, "loss": 0.6317, "step": 5090 }, { "epoch": 0.43, "learning_rate": 4.062794556824191e-05, "loss": 0.5782, "step": 5095 }, { "epoch": 0.43, "learning_rate": 4.061079096380098e-05, "loss": 0.5938, "step": 5100 }, { "epoch": 0.43, "learning_rate": 4.0593624302863464e-05, "loss": 0.5887, "step": 5105 }, { "epoch": 0.43, "learning_rate": 4.05764455986875e-05, "loss": 0.7015, "step": 5110 }, { "epoch": 0.43, "learning_rate": 4.05592548645405e-05, "loss": 0.5441, "step": 5115 }, { "epoch": 0.43, "learning_rate": 4.054205211369918e-05, "loss": 0.5804, "step": 5120 }, { "epoch": 0.43, "learning_rate": 4.052483735944954e-05, "loss": 0.5246, "step": 5125 }, { "epoch": 0.43, "learning_rate": 4.050761061508685e-05, "loss": 0.6039, "step": 5130 }, { "epoch": 0.43, "learning_rate": 4.0490371893915626e-05, "loss": 0.5399, "step": 5135 }, { "epoch": 0.43, "learning_rate": 4.047312120924965e-05, "loss": 0.5908, "step": 5140 }, { "epoch": 0.43, "learning_rate": 4.0455858574411956e-05, "loss": 0.6072, "step": 5145 }, { "epoch": 0.43, "learning_rate": 4.043858400273476e-05, "loss": 0.5894, "step": 5150 }, { "epoch": 0.43, "learning_rate": 4.042129750755954e-05, "loss": 0.5535, "step": 5155 }, { "epoch": 0.43, "learning_rate": 4.040399910223697e-05, "loss": 0.6188, "step": 5160 }, { "epoch": 0.43, "learning_rate": 4.0386688800126924e-05, "loss": 0.625, "step": 5165 }, { "epoch": 0.43, "learning_rate": 4.036936661459846e-05, "loss": 0.5635, "step": 5170 }, { "epoch": 0.43, "learning_rate": 4.035203255902981e-05, "loss": 0.5961, "step": 5175 }, { "epoch": 0.43, "learning_rate": 4.033468664680837e-05, "loss": 0.5722, "step": 5180 }, { "epoch": 0.44, "learning_rate": 4.0317328891330735e-05, "loss": 0.5737, "step": 5185 }, { "epoch": 0.44, "learning_rate": 4.029995930600259e-05, "loss": 0.5188, "step": 5190 }, { "epoch": 0.44, "learning_rate": 4.028257790423878e-05, "loss": 0.5519, "step": 5195 }, { "epoch": 0.44, "learning_rate": 4.026518469946328e-05, "loss": 0.5745, "step": 5200 }, { "epoch": 0.44, "learning_rate": 4.0247779705109176e-05, "loss": 0.5999, "step": 5205 }, { "epoch": 0.44, "learning_rate": 4.023036293461867e-05, "loss": 0.6049, "step": 5210 }, { "epoch": 0.44, "learning_rate": 4.0212934401443034e-05, "loss": 0.5785, "step": 5215 }, { "epoch": 0.44, "learning_rate": 4.0195494119042656e-05, "loss": 0.5601, "step": 5220 }, { "epoch": 0.44, "learning_rate": 4.017804210088697e-05, "loss": 0.5691, "step": 5225 }, { "epoch": 0.44, "learning_rate": 4.016057836045448e-05, "loss": 0.6184, "step": 5230 }, { "epoch": 0.44, "learning_rate": 4.014310291123276e-05, "loss": 0.6377, "step": 5235 }, { "epoch": 0.44, "learning_rate": 4.012561576671843e-05, "loss": 0.5597, "step": 5240 }, { "epoch": 0.44, "learning_rate": 4.0108116940417097e-05, "loss": 0.5727, "step": 5245 }, { "epoch": 0.44, "learning_rate": 4.009060644584344e-05, "loss": 0.55, "step": 5250 }, { "epoch": 0.44, "learning_rate": 4.007308429652113e-05, "loss": 0.5531, "step": 5255 }, { "epoch": 0.44, "learning_rate": 4.0055550505982835e-05, "loss": 0.5639, "step": 5260 }, { "epoch": 0.44, "learning_rate": 4.0038005087770224e-05, "loss": 0.523, "step": 5265 }, { "epoch": 0.44, "learning_rate": 4.0020448055433934e-05, "loss": 0.5908, "step": 5270 }, { "epoch": 0.44, "learning_rate": 4.000287942253359e-05, "loss": 0.4783, "step": 5275 }, { "epoch": 0.44, "learning_rate": 3.998529920263776e-05, "loss": 0.6633, "step": 5280 }, { "epoch": 0.44, "learning_rate": 3.996770740932396e-05, "loss": 0.6695, "step": 5285 }, { "epoch": 0.44, "learning_rate": 3.995010405617867e-05, "loss": 0.5187, "step": 5290 }, { "epoch": 0.44, "learning_rate": 3.993248915679725e-05, "loss": 0.596, "step": 5295 }, { "epoch": 0.44, "learning_rate": 3.991486272478403e-05, "loss": 0.668, "step": 5300 }, { "epoch": 0.45, "learning_rate": 3.9897224773752204e-05, "loss": 0.5522, "step": 5305 }, { "epoch": 0.45, "learning_rate": 3.98795753173239e-05, "loss": 0.5445, "step": 5310 }, { "epoch": 0.45, "learning_rate": 3.986191436913009e-05, "loss": 0.5818, "step": 5315 }, { "epoch": 0.45, "learning_rate": 3.9844241942810675e-05, "loss": 0.6032, "step": 5320 }, { "epoch": 0.45, "learning_rate": 3.982655805201436e-05, "loss": 0.6435, "step": 5325 }, { "epoch": 0.45, "learning_rate": 3.9808862710398754e-05, "loss": 0.4963, "step": 5330 }, { "epoch": 0.45, "learning_rate": 3.979115593163028e-05, "loss": 0.5985, "step": 5335 }, { "epoch": 0.45, "learning_rate": 3.97734377293842e-05, "loss": 0.6152, "step": 5340 }, { "epoch": 0.45, "learning_rate": 3.9755708117344615e-05, "loss": 0.5306, "step": 5345 }, { "epoch": 0.45, "learning_rate": 3.9737967109204415e-05, "loss": 0.5717, "step": 5350 }, { "epoch": 0.45, "learning_rate": 3.9720214718665305e-05, "loss": 0.558, "step": 5355 }, { "epoch": 0.45, "learning_rate": 3.970245095943779e-05, "loss": 0.5985, "step": 5360 }, { "epoch": 0.45, "learning_rate": 3.9684675845241115e-05, "loss": 0.495, "step": 5365 }, { "epoch": 0.45, "learning_rate": 3.966688938980335e-05, "loss": 0.5671, "step": 5370 }, { "epoch": 0.45, "learning_rate": 3.964909160686128e-05, "loss": 0.5698, "step": 5375 }, { "epoch": 0.45, "learning_rate": 3.963128251016047e-05, "loss": 0.5215, "step": 5380 }, { "epoch": 0.45, "learning_rate": 3.961346211345519e-05, "loss": 0.5492, "step": 5385 }, { "epoch": 0.45, "learning_rate": 3.9595630430508466e-05, "loss": 0.5723, "step": 5390 }, { "epoch": 0.45, "learning_rate": 3.957778747509203e-05, "loss": 0.5814, "step": 5395 }, { "epoch": 0.45, "learning_rate": 3.955993326098631e-05, "loss": 0.5794, "step": 5400 }, { "epoch": 0.45, "learning_rate": 3.9542067801980456e-05, "loss": 0.5618, "step": 5405 }, { "epoch": 0.45, "learning_rate": 3.9524191111872275e-05, "loss": 0.5506, "step": 5410 }, { "epoch": 0.45, "learning_rate": 3.950630320446825e-05, "loss": 0.5861, "step": 5415 }, { "epoch": 0.45, "learning_rate": 3.948840409358355e-05, "loss": 0.5845, "step": 5420 }, { "epoch": 0.46, "learning_rate": 3.947049379304198e-05, "loss": 0.6014, "step": 5425 }, { "epoch": 0.46, "learning_rate": 3.9452572316675985e-05, "loss": 0.5646, "step": 5430 }, { "epoch": 0.46, "learning_rate": 3.943463967832665e-05, "loss": 0.5814, "step": 5435 }, { "epoch": 0.46, "learning_rate": 3.941669589184367e-05, "loss": 0.5856, "step": 5440 }, { "epoch": 0.46, "learning_rate": 3.939874097108536e-05, "loss": 0.6157, "step": 5445 }, { "epoch": 0.46, "learning_rate": 3.938077492991864e-05, "loss": 0.6358, "step": 5450 }, { "epoch": 0.46, "learning_rate": 3.936279778221898e-05, "loss": 0.562, "step": 5455 }, { "epoch": 0.46, "learning_rate": 3.93448095418705e-05, "loss": 0.6085, "step": 5460 }, { "epoch": 0.46, "learning_rate": 3.9326810222765814e-05, "loss": 0.6072, "step": 5465 }, { "epoch": 0.46, "learning_rate": 3.930879983880612e-05, "loss": 0.5363, "step": 5470 }, { "epoch": 0.46, "learning_rate": 3.9290778403901176e-05, "loss": 0.6785, "step": 5475 }, { "epoch": 0.46, "learning_rate": 3.927274593196926e-05, "loss": 0.5594, "step": 5480 }, { "epoch": 0.46, "learning_rate": 3.9254702436937176e-05, "loss": 0.5338, "step": 5485 }, { "epoch": 0.46, "learning_rate": 3.9236647932740245e-05, "loss": 0.5596, "step": 5490 }, { "epoch": 0.46, "learning_rate": 3.921858243332228e-05, "loss": 0.6303, "step": 5495 }, { "epoch": 0.46, "learning_rate": 3.92005059526356e-05, "loss": 0.5812, "step": 5500 }, { "epoch": 0.46, "learning_rate": 3.918241850464099e-05, "loss": 0.5541, "step": 5505 }, { "epoch": 0.46, "learning_rate": 3.916432010330772e-05, "loss": 0.5851, "step": 5510 }, { "epoch": 0.46, "learning_rate": 3.914621076261351e-05, "loss": 0.6124, "step": 5515 }, { "epoch": 0.46, "learning_rate": 3.9128090496544546e-05, "loss": 0.6, "step": 5520 }, { "epoch": 0.46, "learning_rate": 3.9109959319095415e-05, "loss": 0.5725, "step": 5525 }, { "epoch": 0.46, "learning_rate": 3.909181724426917e-05, "loss": 0.5542, "step": 5530 }, { "epoch": 0.46, "learning_rate": 3.907366428607726e-05, "loss": 0.6247, "step": 5535 }, { "epoch": 0.46, "learning_rate": 3.905550045853954e-05, "loss": 0.5672, "step": 5540 }, { "epoch": 0.47, "learning_rate": 3.903732577568427e-05, "loss": 0.5448, "step": 5545 }, { "epoch": 0.47, "learning_rate": 3.901914025154807e-05, "loss": 0.5625, "step": 5550 }, { "epoch": 0.47, "learning_rate": 3.900094390017596e-05, "loss": 0.5116, "step": 5555 }, { "epoch": 0.47, "learning_rate": 3.898273673562132e-05, "loss": 0.5858, "step": 5560 }, { "epoch": 0.47, "learning_rate": 3.896451877194587e-05, "loss": 0.6126, "step": 5565 }, { "epoch": 0.47, "learning_rate": 3.894629002321967e-05, "loss": 0.5837, "step": 5570 }, { "epoch": 0.47, "learning_rate": 3.892805050352111e-05, "loss": 0.55, "step": 5575 }, { "epoch": 0.47, "learning_rate": 3.8909800226936896e-05, "loss": 0.4885, "step": 5580 }, { "epoch": 0.47, "learning_rate": 3.8891539207562054e-05, "loss": 0.6041, "step": 5585 }, { "epoch": 0.47, "learning_rate": 3.8873267459499906e-05, "loss": 0.5772, "step": 5590 }, { "epoch": 0.47, "learning_rate": 3.8854984996862053e-05, "loss": 0.569, "step": 5595 }, { "epoch": 0.47, "learning_rate": 3.883669183376836e-05, "loss": 0.5673, "step": 5600 }, { "epoch": 0.47, "learning_rate": 3.881838798434698e-05, "loss": 0.5499, "step": 5605 }, { "epoch": 0.47, "learning_rate": 3.8800073462734296e-05, "loss": 0.5766, "step": 5610 }, { "epoch": 0.47, "learning_rate": 3.878174828307496e-05, "loss": 0.5817, "step": 5615 }, { "epoch": 0.47, "learning_rate": 3.8763412459521833e-05, "loss": 0.5739, "step": 5620 }, { "epoch": 0.47, "learning_rate": 3.8745066006235995e-05, "loss": 0.5724, "step": 5625 }, { "epoch": 0.47, "learning_rate": 3.8726708937386756e-05, "loss": 0.6099, "step": 5630 }, { "epoch": 0.47, "learning_rate": 3.8708341267151605e-05, "loss": 0.6428, "step": 5635 }, { "epoch": 0.47, "learning_rate": 3.8689963009716236e-05, "loss": 0.7571, "step": 5640 }, { "epoch": 0.47, "learning_rate": 3.86715741792745e-05, "loss": 0.5454, "step": 5645 }, { "epoch": 0.47, "learning_rate": 3.865317479002842e-05, "loss": 0.5139, "step": 5650 }, { "epoch": 0.47, "learning_rate": 3.86347648561882e-05, "loss": 0.5507, "step": 5655 }, { "epoch": 0.47, "learning_rate": 3.861634439197214e-05, "loss": 0.5928, "step": 5660 }, { "epoch": 0.48, "learning_rate": 3.859791341160671e-05, "loss": 0.5628, "step": 5665 }, { "epoch": 0.48, "learning_rate": 3.8579471929326494e-05, "loss": 0.5522, "step": 5670 }, { "epoch": 0.48, "learning_rate": 3.856101995937417e-05, "loss": 0.5889, "step": 5675 }, { "epoch": 0.48, "learning_rate": 3.854255751600054e-05, "loss": 0.646, "step": 5680 }, { "epoch": 0.48, "learning_rate": 3.8524084613464475e-05, "loss": 0.6128, "step": 5685 }, { "epoch": 0.48, "learning_rate": 3.8505601266032946e-05, "loss": 0.6145, "step": 5690 }, { "epoch": 0.48, "learning_rate": 3.848710748798096e-05, "loss": 0.5233, "step": 5695 }, { "epoch": 0.48, "learning_rate": 3.8468603293591595e-05, "loss": 0.5976, "step": 5700 }, { "epoch": 0.48, "learning_rate": 3.8450088697156e-05, "loss": 0.6426, "step": 5705 }, { "epoch": 0.48, "learning_rate": 3.8431563712973306e-05, "loss": 0.5667, "step": 5710 }, { "epoch": 0.48, "learning_rate": 3.8413028355350714e-05, "loss": 0.5695, "step": 5715 }, { "epoch": 0.48, "learning_rate": 3.8394482638603405e-05, "loss": 0.5088, "step": 5720 }, { "epoch": 0.48, "learning_rate": 3.837592657705457e-05, "loss": 0.5807, "step": 5725 }, { "epoch": 0.48, "learning_rate": 3.8357360185035395e-05, "loss": 0.6027, "step": 5730 }, { "epoch": 0.48, "learning_rate": 3.8338783476885045e-05, "loss": 0.5867, "step": 5735 }, { "epoch": 0.48, "learning_rate": 3.8320196466950645e-05, "loss": 0.5682, "step": 5740 }, { "epoch": 0.48, "learning_rate": 3.830159916958729e-05, "loss": 0.5941, "step": 5745 }, { "epoch": 0.48, "learning_rate": 3.8282991599157986e-05, "loss": 0.5957, "step": 5750 }, { "epoch": 0.48, "learning_rate": 3.8264373770033714e-05, "loss": 0.5588, "step": 5755 }, { "epoch": 0.48, "learning_rate": 3.824574569659335e-05, "loss": 0.5525, "step": 5760 }, { "epoch": 0.48, "learning_rate": 3.822710739322371e-05, "loss": 0.4834, "step": 5765 }, { "epoch": 0.48, "learning_rate": 3.820845887431947e-05, "loss": 0.5956, "step": 5770 }, { "epoch": 0.48, "learning_rate": 3.818980015428324e-05, "loss": 0.5953, "step": 5775 }, { "epoch": 0.49, "learning_rate": 3.817113124752547e-05, "loss": 0.641, "step": 5780 }, { "epoch": 0.49, "learning_rate": 3.8152452168464506e-05, "loss": 0.5592, "step": 5785 }, { "epoch": 0.49, "learning_rate": 3.813376293152654e-05, "loss": 0.5698, "step": 5790 }, { "epoch": 0.49, "learning_rate": 3.8115063551145605e-05, "loss": 0.5334, "step": 5795 }, { "epoch": 0.49, "learning_rate": 3.809635404176356e-05, "loss": 0.5903, "step": 5800 }, { "epoch": 0.49, "learning_rate": 3.8077634417830114e-05, "loss": 0.5657, "step": 5805 }, { "epoch": 0.49, "learning_rate": 3.805890469380276e-05, "loss": 0.5737, "step": 5810 }, { "epoch": 0.49, "learning_rate": 3.8040164884146824e-05, "loss": 0.6006, "step": 5815 }, { "epoch": 0.49, "learning_rate": 3.8021415003335374e-05, "loss": 0.5936, "step": 5820 }, { "epoch": 0.49, "learning_rate": 3.8002655065849295e-05, "loss": 0.6053, "step": 5825 }, { "epoch": 0.49, "learning_rate": 3.7983885086177227e-05, "loss": 0.5549, "step": 5830 }, { "epoch": 0.49, "learning_rate": 3.796510507881557e-05, "loss": 0.5851, "step": 5835 }, { "epoch": 0.49, "learning_rate": 3.794631505826845e-05, "loss": 0.5389, "step": 5840 }, { "epoch": 0.49, "learning_rate": 3.792751503904774e-05, "loss": 0.5957, "step": 5845 }, { "epoch": 0.49, "learning_rate": 3.7908705035673056e-05, "loss": 0.5434, "step": 5850 }, { "epoch": 0.49, "learning_rate": 3.788988506267168e-05, "loss": 0.5504, "step": 5855 }, { "epoch": 0.49, "learning_rate": 3.7871055134578635e-05, "loss": 0.6311, "step": 5860 }, { "epoch": 0.49, "learning_rate": 3.785221526593661e-05, "loss": 0.6283, "step": 5865 }, { "epoch": 0.49, "learning_rate": 3.7833365471295976e-05, "loss": 0.589, "step": 5870 }, { "epoch": 0.49, "learning_rate": 3.781450576521477e-05, "loss": 0.5448, "step": 5875 }, { "epoch": 0.49, "learning_rate": 3.779563616225868e-05, "loss": 0.4969, "step": 5880 }, { "epoch": 0.49, "learning_rate": 3.777675667700106e-05, "loss": 0.638, "step": 5885 }, { "epoch": 0.49, "learning_rate": 3.7757867324022855e-05, "loss": 0.5433, "step": 5890 }, { "epoch": 0.49, "learning_rate": 3.773896811791267e-05, "loss": 0.5232, "step": 5895 }, { "epoch": 0.5, "learning_rate": 3.77200590732667e-05, "loss": 0.565, "step": 5900 }, { "epoch": 0.5, "learning_rate": 3.770114020468874e-05, "loss": 0.5503, "step": 5905 }, { "epoch": 0.5, "learning_rate": 3.768221152679018e-05, "loss": 0.58, "step": 5910 }, { "epoch": 0.5, "learning_rate": 3.766327305418997e-05, "loss": 0.5044, "step": 5915 }, { "epoch": 0.5, "learning_rate": 3.764432480151465e-05, "loss": 0.4979, "step": 5920 }, { "epoch": 0.5, "learning_rate": 3.76253667833983e-05, "loss": 0.5577, "step": 5925 }, { "epoch": 0.5, "learning_rate": 3.760639901448252e-05, "loss": 0.5488, "step": 5930 }, { "epoch": 0.5, "learning_rate": 3.758742150941647e-05, "loss": 0.6001, "step": 5935 }, { "epoch": 0.5, "learning_rate": 3.756843428285684e-05, "loss": 0.5348, "step": 5940 }, { "epoch": 0.5, "learning_rate": 3.754943734946779e-05, "loss": 0.5951, "step": 5945 }, { "epoch": 0.5, "learning_rate": 3.753043072392098e-05, "loss": 0.532, "step": 5950 }, { "epoch": 0.5, "learning_rate": 3.751141442089562e-05, "loss": 0.5692, "step": 5955 }, { "epoch": 0.5, "learning_rate": 3.74923884550783e-05, "loss": 0.5453, "step": 5960 }, { "epoch": 0.5, "learning_rate": 3.747335284116313e-05, "loss": 0.5478, "step": 5965 }, { "epoch": 0.5, "learning_rate": 3.745430759385167e-05, "loss": 0.5929, "step": 5970 }, { "epoch": 0.5, "learning_rate": 3.7435252727852904e-05, "loss": 0.5748, "step": 5975 }, { "epoch": 0.5, "learning_rate": 3.741618825788324e-05, "loss": 0.6144, "step": 5980 }, { "epoch": 0.5, "learning_rate": 3.7397114198666536e-05, "loss": 0.5311, "step": 5985 }, { "epoch": 0.5, "learning_rate": 3.7378030564934014e-05, "loss": 0.5736, "step": 5990 }, { "epoch": 0.5, "learning_rate": 3.7358937371424314e-05, "loss": 0.59, "step": 5995 }, { "epoch": 0.5, "learning_rate": 3.733983463288346e-05, "loss": 0.6324, "step": 6000 }, { "epoch": 0.5, "learning_rate": 3.7320722364064834e-05, "loss": 0.5535, "step": 6005 }, { "epoch": 0.5, "learning_rate": 3.730160057972919e-05, "loss": 0.5062, "step": 6010 }, { "epoch": 0.5, "learning_rate": 3.7282469294644635e-05, "loss": 0.5809, "step": 6015 }, { "epoch": 0.51, "learning_rate": 3.72633285235866e-05, "loss": 0.5879, "step": 6020 }, { "epoch": 0.51, "learning_rate": 3.7244178281337865e-05, "loss": 0.5886, "step": 6025 }, { "epoch": 0.51, "learning_rate": 3.7225018582688485e-05, "loss": 0.5573, "step": 6030 }, { "epoch": 0.51, "learning_rate": 3.720584944243586e-05, "loss": 0.5937, "step": 6035 }, { "epoch": 0.51, "learning_rate": 3.718667087538465e-05, "loss": 0.5861, "step": 6040 }, { "epoch": 0.51, "learning_rate": 3.7167482896346816e-05, "loss": 0.5646, "step": 6045 }, { "epoch": 0.51, "learning_rate": 3.71482855201416e-05, "loss": 0.5704, "step": 6050 }, { "epoch": 0.51, "learning_rate": 3.712907876159545e-05, "loss": 0.5497, "step": 6055 }, { "epoch": 0.51, "learning_rate": 3.710986263554211e-05, "loss": 0.5637, "step": 6060 }, { "epoch": 0.51, "learning_rate": 3.709063715682255e-05, "loss": 0.541, "step": 6065 }, { "epoch": 0.51, "learning_rate": 3.707140234028495e-05, "loss": 0.5603, "step": 6070 }, { "epoch": 0.51, "learning_rate": 3.7052158200784684e-05, "loss": 0.5552, "step": 6075 }, { "epoch": 0.51, "learning_rate": 3.7032904753184394e-05, "loss": 0.5938, "step": 6080 }, { "epoch": 0.51, "learning_rate": 3.701364201235383e-05, "loss": 0.6069, "step": 6085 }, { "epoch": 0.51, "learning_rate": 3.699436999316997e-05, "loss": 0.6067, "step": 6090 }, { "epoch": 0.51, "learning_rate": 3.6975088710516944e-05, "loss": 0.5265, "step": 6095 }, { "epoch": 0.51, "learning_rate": 3.695579817928603e-05, "loss": 0.5991, "step": 6100 }, { "epoch": 0.51, "learning_rate": 3.6936498414375656e-05, "loss": 0.6091, "step": 6105 }, { "epoch": 0.51, "learning_rate": 3.691718943069139e-05, "loss": 0.5951, "step": 6110 }, { "epoch": 0.51, "learning_rate": 3.689787124314589e-05, "loss": 0.5483, "step": 6115 }, { "epoch": 0.51, "learning_rate": 3.687854386665895e-05, "loss": 0.5768, "step": 6120 }, { "epoch": 0.51, "learning_rate": 3.685920731615747e-05, "loss": 0.5276, "step": 6125 }, { "epoch": 0.51, "learning_rate": 3.6839861606575404e-05, "loss": 0.5721, "step": 6130 }, { "epoch": 0.51, "learning_rate": 3.6820506752853794e-05, "loss": 0.5933, "step": 6135 }, { "epoch": 0.52, "learning_rate": 3.6801142769940735e-05, "loss": 0.5872, "step": 6140 }, { "epoch": 0.52, "learning_rate": 3.67817696727914e-05, "loss": 0.563, "step": 6145 }, { "epoch": 0.52, "learning_rate": 3.6762387476367965e-05, "loss": 0.5965, "step": 6150 }, { "epoch": 0.52, "learning_rate": 3.674299619563967e-05, "loss": 0.5697, "step": 6155 }, { "epoch": 0.52, "learning_rate": 3.672359584558274e-05, "loss": 0.5173, "step": 6160 }, { "epoch": 0.52, "learning_rate": 3.670418644118042e-05, "loss": 0.6217, "step": 6165 }, { "epoch": 0.52, "learning_rate": 3.6684767997422944e-05, "loss": 0.5658, "step": 6170 }, { "epoch": 0.52, "learning_rate": 3.6665340529307527e-05, "loss": 0.5751, "step": 6175 }, { "epoch": 0.52, "learning_rate": 3.664590405183835e-05, "loss": 0.5855, "step": 6180 }, { "epoch": 0.52, "learning_rate": 3.6626458580026564e-05, "loss": 0.5176, "step": 6185 }, { "epoch": 0.52, "learning_rate": 3.660700412889026e-05, "loss": 0.5497, "step": 6190 }, { "epoch": 0.52, "learning_rate": 3.658754071345446e-05, "loss": 0.6466, "step": 6195 }, { "epoch": 0.52, "learning_rate": 3.656806834875111e-05, "loss": 0.5633, "step": 6200 }, { "epoch": 0.52, "learning_rate": 3.654858704981907e-05, "loss": 0.5783, "step": 6205 }, { "epoch": 0.52, "learning_rate": 3.65290968317041e-05, "loss": 0.5821, "step": 6210 }, { "epoch": 0.52, "learning_rate": 3.650959770945885e-05, "loss": 0.5326, "step": 6215 }, { "epoch": 0.52, "learning_rate": 3.649008969814285e-05, "loss": 0.5342, "step": 6220 }, { "epoch": 0.52, "learning_rate": 3.647057281282249e-05, "loss": 0.5709, "step": 6225 }, { "epoch": 0.52, "learning_rate": 3.6451047068571006e-05, "loss": 0.5639, "step": 6230 }, { "epoch": 0.52, "learning_rate": 3.643151248046849e-05, "loss": 0.6148, "step": 6235 }, { "epoch": 0.52, "learning_rate": 3.6411969063601846e-05, "loss": 0.5301, "step": 6240 }, { "epoch": 0.52, "learning_rate": 3.639241683306483e-05, "loss": 0.6402, "step": 6245 }, { "epoch": 0.52, "learning_rate": 3.637285580395797e-05, "loss": 0.525, "step": 6250 }, { "epoch": 0.52, "learning_rate": 3.63532859913886e-05, "loss": 0.5728, "step": 6255 }, { "epoch": 0.53, "learning_rate": 3.633370741047084e-05, "loss": 0.553, "step": 6260 }, { "epoch": 0.53, "learning_rate": 3.63141200763256e-05, "loss": 0.5933, "step": 6265 }, { "epoch": 0.53, "learning_rate": 3.629452400408052e-05, "loss": 0.5476, "step": 6270 }, { "epoch": 0.53, "learning_rate": 3.627491920887001e-05, "loss": 0.5406, "step": 6275 }, { "epoch": 0.53, "learning_rate": 3.625530570583519e-05, "loss": 0.6011, "step": 6280 }, { "epoch": 0.53, "learning_rate": 3.623568351012394e-05, "loss": 0.6046, "step": 6285 }, { "epoch": 0.53, "learning_rate": 3.621605263689083e-05, "loss": 0.5666, "step": 6290 }, { "epoch": 0.53, "learning_rate": 3.6196413101297145e-05, "loss": 0.5635, "step": 6295 }, { "epoch": 0.53, "learning_rate": 3.617676491851085e-05, "loss": 0.5302, "step": 6300 }, { "epoch": 0.53, "learning_rate": 3.6157108103706596e-05, "loss": 0.6235, "step": 6305 }, { "epoch": 0.53, "learning_rate": 3.613744267206568e-05, "loss": 0.5799, "step": 6310 }, { "epoch": 0.53, "learning_rate": 3.6117768638776095e-05, "loss": 0.5554, "step": 6315 }, { "epoch": 0.53, "learning_rate": 3.609808601903244e-05, "loss": 0.539, "step": 6320 }, { "epoch": 0.53, "learning_rate": 3.6078394828035964e-05, "loss": 0.5326, "step": 6325 }, { "epoch": 0.53, "learning_rate": 3.6058695080994524e-05, "loss": 0.5471, "step": 6330 }, { "epoch": 0.53, "learning_rate": 3.60389867931226e-05, "loss": 0.5817, "step": 6335 }, { "epoch": 0.53, "learning_rate": 3.601926997964126e-05, "loss": 0.5465, "step": 6340 }, { "epoch": 0.53, "learning_rate": 3.599954465577814e-05, "loss": 0.5802, "step": 6345 }, { "epoch": 0.53, "learning_rate": 3.5979810836767475e-05, "loss": 0.6502, "step": 6350 }, { "epoch": 0.53, "learning_rate": 3.596006853785006e-05, "loss": 0.5577, "step": 6355 }, { "epoch": 0.53, "learning_rate": 3.594031777427322e-05, "loss": 0.5767, "step": 6360 }, { "epoch": 0.53, "learning_rate": 3.592055856129082e-05, "loss": 0.5664, "step": 6365 }, { "epoch": 0.53, "learning_rate": 3.5900790914163276e-05, "loss": 0.5385, "step": 6370 }, { "epoch": 0.53, "learning_rate": 3.58810148481575e-05, "loss": 0.5692, "step": 6375 }, { "epoch": 0.54, "learning_rate": 3.586123037854688e-05, "loss": 0.5852, "step": 6380 }, { "epoch": 0.54, "learning_rate": 3.584143752061135e-05, "loss": 0.5959, "step": 6385 }, { "epoch": 0.54, "learning_rate": 3.582163628963728e-05, "loss": 0.5904, "step": 6390 }, { "epoch": 0.54, "learning_rate": 3.5801826700917505e-05, "loss": 0.5608, "step": 6395 }, { "epoch": 0.54, "learning_rate": 3.5782008769751354e-05, "loss": 0.5606, "step": 6400 }, { "epoch": 0.54, "learning_rate": 3.5762182511444564e-05, "loss": 0.58, "step": 6405 }, { "epoch": 0.54, "learning_rate": 3.5742347941309314e-05, "loss": 0.5994, "step": 6410 }, { "epoch": 0.54, "learning_rate": 3.5722505074664195e-05, "loss": 0.5861, "step": 6415 }, { "epoch": 0.54, "learning_rate": 3.570265392683422e-05, "loss": 0.5792, "step": 6420 }, { "epoch": 0.54, "learning_rate": 3.5682794513150795e-05, "loss": 0.5535, "step": 6425 }, { "epoch": 0.54, "learning_rate": 3.5662926848951685e-05, "loss": 0.5735, "step": 6430 }, { "epoch": 0.54, "learning_rate": 3.5643050949581065e-05, "loss": 0.5418, "step": 6435 }, { "epoch": 0.54, "learning_rate": 3.562316683038945e-05, "loss": 0.5823, "step": 6440 }, { "epoch": 0.54, "learning_rate": 3.56032745067337e-05, "loss": 0.5841, "step": 6445 }, { "epoch": 0.54, "learning_rate": 3.558337399397702e-05, "loss": 0.6169, "step": 6450 }, { "epoch": 0.54, "learning_rate": 3.556346530748893e-05, "loss": 0.572, "step": 6455 }, { "epoch": 0.54, "learning_rate": 3.554354846264527e-05, "loss": 0.6297, "step": 6460 }, { "epoch": 0.54, "learning_rate": 3.55236234748282e-05, "loss": 0.5389, "step": 6465 }, { "epoch": 0.54, "learning_rate": 3.550369035942612e-05, "loss": 0.607, "step": 6470 }, { "epoch": 0.54, "learning_rate": 3.5483749131833754e-05, "loss": 0.5698, "step": 6475 }, { "epoch": 0.54, "learning_rate": 3.5463799807452075e-05, "loss": 0.5893, "step": 6480 }, { "epoch": 0.54, "learning_rate": 3.544384240168829e-05, "loss": 0.5547, "step": 6485 }, { "epoch": 0.54, "learning_rate": 3.542387692995588e-05, "loss": 0.6316, "step": 6490 }, { "epoch": 0.55, "learning_rate": 3.5403903407674535e-05, "loss": 0.5816, "step": 6495 }, { "epoch": 0.55, "learning_rate": 3.5383921850270174e-05, "loss": 0.5564, "step": 6500 }, { "epoch": 0.55, "learning_rate": 3.5363932273174905e-05, "loss": 0.5304, "step": 6505 }, { "epoch": 0.55, "learning_rate": 3.5343934691827053e-05, "loss": 0.5668, "step": 6510 }, { "epoch": 0.55, "learning_rate": 3.53239291216711e-05, "loss": 0.6055, "step": 6515 }, { "epoch": 0.55, "learning_rate": 3.530391557815772e-05, "loss": 0.5774, "step": 6520 }, { "epoch": 0.55, "learning_rate": 3.5283894076743726e-05, "loss": 0.5634, "step": 6525 }, { "epoch": 0.55, "learning_rate": 3.5263864632892096e-05, "loss": 0.5818, "step": 6530 }, { "epoch": 0.55, "learning_rate": 3.524382726207192e-05, "loss": 0.5888, "step": 6535 }, { "epoch": 0.55, "learning_rate": 3.522378197975844e-05, "loss": 0.6028, "step": 6540 }, { "epoch": 0.55, "learning_rate": 3.5203728801432976e-05, "loss": 0.555, "step": 6545 }, { "epoch": 0.55, "learning_rate": 3.518366774258296e-05, "loss": 0.5283, "step": 6550 }, { "epoch": 0.55, "learning_rate": 3.516359881870192e-05, "loss": 0.5894, "step": 6555 }, { "epoch": 0.55, "learning_rate": 3.514352204528944e-05, "loss": 0.5579, "step": 6560 }, { "epoch": 0.55, "learning_rate": 3.512343743785119e-05, "loss": 0.5404, "step": 6565 }, { "epoch": 0.55, "learning_rate": 3.510334501189886e-05, "loss": 0.5758, "step": 6570 }, { "epoch": 0.55, "learning_rate": 3.5083244782950206e-05, "loss": 0.5848, "step": 6575 }, { "epoch": 0.55, "learning_rate": 3.506313676652899e-05, "loss": 0.6231, "step": 6580 }, { "epoch": 0.55, "learning_rate": 3.504302097816501e-05, "loss": 0.5404, "step": 6585 }, { "epoch": 0.55, "learning_rate": 3.5022897433394035e-05, "loss": 0.5561, "step": 6590 }, { "epoch": 0.55, "learning_rate": 3.500276614775786e-05, "loss": 0.494, "step": 6595 }, { "epoch": 0.55, "learning_rate": 3.498262713680423e-05, "loss": 0.5156, "step": 6600 }, { "epoch": 0.55, "learning_rate": 3.496248041608688e-05, "loss": 0.5076, "step": 6605 }, { "epoch": 0.55, "learning_rate": 3.4942326001165474e-05, "loss": 0.6165, "step": 6610 }, { "epoch": 0.56, "learning_rate": 3.492216390760564e-05, "loss": 0.5837, "step": 6615 }, { "epoch": 0.56, "learning_rate": 3.490199415097892e-05, "loss": 0.5542, "step": 6620 }, { "epoch": 0.56, "learning_rate": 3.4881816746862796e-05, "loss": 0.6144, "step": 6625 }, { "epoch": 0.56, "learning_rate": 3.486163171084063e-05, "loss": 0.5793, "step": 6630 }, { "epoch": 0.56, "learning_rate": 3.48414390585017e-05, "loss": 0.5341, "step": 6635 }, { "epoch": 0.56, "learning_rate": 3.4821238805441145e-05, "loss": 0.5763, "step": 6640 }, { "epoch": 0.56, "learning_rate": 3.480103096726e-05, "loss": 0.5484, "step": 6645 }, { "epoch": 0.56, "learning_rate": 3.4780815559565135e-05, "loss": 0.5693, "step": 6650 }, { "epoch": 0.56, "learning_rate": 3.476059259796929e-05, "loss": 0.5568, "step": 6655 }, { "epoch": 0.56, "learning_rate": 3.4740362098091e-05, "loss": 0.5498, "step": 6660 }, { "epoch": 0.56, "learning_rate": 3.472012407555466e-05, "loss": 0.5509, "step": 6665 }, { "epoch": 0.56, "learning_rate": 3.469987854599047e-05, "loss": 0.5253, "step": 6670 }, { "epoch": 0.56, "learning_rate": 3.4679625525034396e-05, "loss": 0.5476, "step": 6675 }, { "epoch": 0.56, "learning_rate": 3.465936502832824e-05, "loss": 0.6058, "step": 6680 }, { "epoch": 0.56, "learning_rate": 3.4639097071519535e-05, "loss": 0.6085, "step": 6685 }, { "epoch": 0.56, "learning_rate": 3.4618821670261595e-05, "loss": 0.6338, "step": 6690 }, { "epoch": 0.56, "learning_rate": 3.459853884021347e-05, "loss": 0.4965, "step": 6695 }, { "epoch": 0.56, "learning_rate": 3.4578248597039974e-05, "loss": 0.5458, "step": 6700 }, { "epoch": 0.56, "learning_rate": 3.455795095641161e-05, "loss": 0.5893, "step": 6705 }, { "epoch": 0.56, "learning_rate": 3.453764593400463e-05, "loss": 0.5793, "step": 6710 }, { "epoch": 0.56, "learning_rate": 3.4517333545500974e-05, "loss": 0.5686, "step": 6715 }, { "epoch": 0.56, "learning_rate": 3.449701380658825e-05, "loss": 0.5283, "step": 6720 }, { "epoch": 0.56, "learning_rate": 3.447668673295977e-05, "loss": 0.6657, "step": 6725 }, { "epoch": 0.56, "learning_rate": 3.4456352340314494e-05, "loss": 0.5577, "step": 6730 }, { "epoch": 0.57, "learning_rate": 3.443601064435704e-05, "loss": 0.5245, "step": 6735 }, { "epoch": 0.57, "learning_rate": 3.4415661660797687e-05, "loss": 0.5624, "step": 6740 }, { "epoch": 0.57, "learning_rate": 3.439530540535232e-05, "loss": 0.5212, "step": 6745 }, { "epoch": 0.57, "learning_rate": 3.437494189374242e-05, "loss": 0.4812, "step": 6750 }, { "epoch": 0.57, "learning_rate": 3.435457114169511e-05, "loss": 0.6335, "step": 6755 }, { "epoch": 0.57, "learning_rate": 3.433419316494311e-05, "loss": 0.5372, "step": 6760 }, { "epoch": 0.57, "learning_rate": 3.4313807979224666e-05, "loss": 0.5525, "step": 6765 }, { "epoch": 0.57, "learning_rate": 3.429341560028365e-05, "loss": 0.601, "step": 6770 }, { "epoch": 0.57, "learning_rate": 3.427301604386946e-05, "loss": 0.5226, "step": 6775 }, { "epoch": 0.57, "learning_rate": 3.425260932573704e-05, "loss": 0.6149, "step": 6780 }, { "epoch": 0.57, "learning_rate": 3.423219546164687e-05, "loss": 0.5527, "step": 6785 }, { "epoch": 0.57, "learning_rate": 3.4211774467364935e-05, "loss": 0.5674, "step": 6790 }, { "epoch": 0.57, "learning_rate": 3.4191346358662764e-05, "loss": 0.5564, "step": 6795 }, { "epoch": 0.57, "learning_rate": 3.417091115131734e-05, "loss": 0.5574, "step": 6800 }, { "epoch": 0.57, "learning_rate": 3.415046886111113e-05, "loss": 0.6256, "step": 6805 }, { "epoch": 0.57, "learning_rate": 3.413001950383211e-05, "loss": 0.5456, "step": 6810 }, { "epoch": 0.57, "learning_rate": 3.410956309527368e-05, "loss": 0.5688, "step": 6815 }, { "epoch": 0.57, "learning_rate": 3.408909965123469e-05, "loss": 0.53, "step": 6820 }, { "epoch": 0.57, "learning_rate": 3.406862918751944e-05, "loss": 0.6051, "step": 6825 }, { "epoch": 0.57, "learning_rate": 3.404815171993763e-05, "loss": 0.5923, "step": 6830 }, { "epoch": 0.57, "learning_rate": 3.4027667264304376e-05, "loss": 0.5638, "step": 6835 }, { "epoch": 0.57, "learning_rate": 3.40071758364402e-05, "loss": 0.581, "step": 6840 }, { "epoch": 0.57, "learning_rate": 3.3986677452171e-05, "loss": 0.5676, "step": 6845 }, { "epoch": 0.57, "learning_rate": 3.396617212732805e-05, "loss": 0.5684, "step": 6850 }, { "epoch": 0.58, "learning_rate": 3.394565987774799e-05, "loss": 0.5246, "step": 6855 }, { "epoch": 0.58, "learning_rate": 3.3925140719272794e-05, "loss": 0.5736, "step": 6860 }, { "epoch": 0.58, "learning_rate": 3.390461466774979e-05, "loss": 0.6364, "step": 6865 }, { "epoch": 0.58, "learning_rate": 3.38840817390316e-05, "loss": 0.6408, "step": 6870 }, { "epoch": 0.58, "learning_rate": 3.38635419489762e-05, "loss": 0.5833, "step": 6875 }, { "epoch": 0.58, "learning_rate": 3.3842995313446826e-05, "loss": 0.5692, "step": 6880 }, { "epoch": 0.58, "learning_rate": 3.3822441848312017e-05, "loss": 0.5877, "step": 6885 }, { "epoch": 0.58, "learning_rate": 3.380188156944559e-05, "loss": 0.4928, "step": 6890 }, { "epoch": 0.58, "learning_rate": 3.378131449272662e-05, "loss": 0.6309, "step": 6895 }, { "epoch": 0.58, "learning_rate": 3.3760740634039445e-05, "loss": 0.5549, "step": 6900 }, { "epoch": 0.58, "learning_rate": 3.374016000927361e-05, "loss": 0.5523, "step": 6905 }, { "epoch": 0.58, "learning_rate": 3.371957263432392e-05, "loss": 0.6317, "step": 6910 }, { "epoch": 0.58, "learning_rate": 3.369897852509035e-05, "loss": 0.5776, "step": 6915 }, { "epoch": 0.58, "learning_rate": 3.3678377697478137e-05, "loss": 0.547, "step": 6920 }, { "epoch": 0.58, "learning_rate": 3.365777016739766e-05, "loss": 0.5555, "step": 6925 }, { "epoch": 0.58, "learning_rate": 3.3637155950764485e-05, "loss": 0.5988, "step": 6930 }, { "epoch": 0.58, "learning_rate": 3.3616535063499344e-05, "loss": 0.5875, "step": 6935 }, { "epoch": 0.58, "learning_rate": 3.3595907521528134e-05, "loss": 0.554, "step": 6940 }, { "epoch": 0.58, "learning_rate": 3.357527334078186e-05, "loss": 0.5432, "step": 6945 }, { "epoch": 0.58, "learning_rate": 3.3554632537196684e-05, "loss": 0.606, "step": 6950 }, { "epoch": 0.58, "learning_rate": 3.3533985126713866e-05, "loss": 0.5519, "step": 6955 }, { "epoch": 0.58, "learning_rate": 3.351333112527979e-05, "loss": 0.564, "step": 6960 }, { "epoch": 0.58, "learning_rate": 3.3492670548845905e-05, "loss": 0.5817, "step": 6965 }, { "epoch": 0.58, "learning_rate": 3.347200341336876e-05, "loss": 0.5964, "step": 6970 }, { "epoch": 0.59, "learning_rate": 3.345132973480993e-05, "loss": 0.5217, "step": 6975 }, { "epoch": 0.59, "learning_rate": 3.3430649529136094e-05, "loss": 0.5789, "step": 6980 }, { "epoch": 0.59, "learning_rate": 3.340996281231894e-05, "loss": 0.5742, "step": 6985 }, { "epoch": 0.59, "learning_rate": 3.33892696003352e-05, "loss": 0.6044, "step": 6990 }, { "epoch": 0.59, "learning_rate": 3.336856990916662e-05, "loss": 0.5521, "step": 6995 }, { "epoch": 0.59, "learning_rate": 3.334786375479995e-05, "loss": 0.5804, "step": 7000 }, { "epoch": 0.59, "learning_rate": 3.33271511532269e-05, "loss": 0.5375, "step": 7005 }, { "epoch": 0.59, "learning_rate": 3.330643212044421e-05, "loss": 0.543, "step": 7010 }, { "epoch": 0.59, "learning_rate": 3.3285706672453563e-05, "loss": 0.6384, "step": 7015 }, { "epoch": 0.59, "learning_rate": 3.3264974825261595e-05, "loss": 0.5151, "step": 7020 }, { "epoch": 0.59, "learning_rate": 3.324423659487989e-05, "loss": 0.5333, "step": 7025 }, { "epoch": 0.59, "learning_rate": 3.322349199732495e-05, "loss": 0.5308, "step": 7030 }, { "epoch": 0.59, "learning_rate": 3.3202741048618226e-05, "loss": 0.6059, "step": 7035 }, { "epoch": 0.59, "learning_rate": 3.3181983764786016e-05, "loss": 0.5788, "step": 7040 }, { "epoch": 0.59, "learning_rate": 3.316122016185957e-05, "loss": 0.5531, "step": 7045 }, { "epoch": 0.59, "learning_rate": 3.3140450255875e-05, "loss": 0.593, "step": 7050 }, { "epoch": 0.59, "learning_rate": 3.311967406287326e-05, "loss": 0.5867, "step": 7055 }, { "epoch": 0.59, "learning_rate": 3.309889159890019e-05, "loss": 0.5083, "step": 7060 }, { "epoch": 0.59, "learning_rate": 3.3078102880006466e-05, "loss": 0.5601, "step": 7065 }, { "epoch": 0.59, "learning_rate": 3.30573079222476e-05, "loss": 0.5544, "step": 7070 }, { "epoch": 0.59, "learning_rate": 3.303650674168389e-05, "loss": 0.5465, "step": 7075 }, { "epoch": 0.59, "learning_rate": 3.301569935438049e-05, "loss": 0.6744, "step": 7080 }, { "epoch": 0.59, "learning_rate": 3.29948857764073e-05, "loss": 0.5188, "step": 7085 }, { "epoch": 0.59, "learning_rate": 3.2974066023839045e-05, "loss": 0.5629, "step": 7090 }, { "epoch": 0.6, "learning_rate": 3.295324011275518e-05, "loss": 0.5501, "step": 7095 }, { "epoch": 0.6, "learning_rate": 3.2932408059239945e-05, "loss": 0.5421, "step": 7100 }, { "epoch": 0.6, "learning_rate": 3.29115698793823e-05, "loss": 0.5912, "step": 7105 }, { "epoch": 0.6, "learning_rate": 3.2890725589275965e-05, "loss": 0.561, "step": 7110 }, { "epoch": 0.6, "learning_rate": 3.286987520501934e-05, "loss": 0.644, "step": 7115 }, { "epoch": 0.6, "learning_rate": 3.284901874271557e-05, "loss": 0.5388, "step": 7120 }, { "epoch": 0.6, "learning_rate": 3.282815621847248e-05, "loss": 0.5608, "step": 7125 }, { "epoch": 0.6, "learning_rate": 3.2807287648402574e-05, "loss": 0.5511, "step": 7130 }, { "epoch": 0.6, "learning_rate": 3.278641304862302e-05, "loss": 0.5414, "step": 7135 }, { "epoch": 0.6, "learning_rate": 3.2765532435255657e-05, "loss": 0.6379, "step": 7140 }, { "epoch": 0.6, "learning_rate": 3.2744645824426954e-05, "loss": 0.5578, "step": 7145 }, { "epoch": 0.6, "learning_rate": 3.272375323226802e-05, "loss": 0.5569, "step": 7150 }, { "epoch": 0.6, "learning_rate": 3.270285467491459e-05, "loss": 0.5697, "step": 7155 }, { "epoch": 0.6, "learning_rate": 3.268195016850699e-05, "loss": 0.5787, "step": 7160 }, { "epoch": 0.6, "learning_rate": 3.2661039729190144e-05, "loss": 0.5388, "step": 7165 }, { "epoch": 0.6, "learning_rate": 3.264012337311358e-05, "loss": 0.5535, "step": 7170 }, { "epoch": 0.6, "learning_rate": 3.261920111643137e-05, "loss": 0.5984, "step": 7175 }, { "epoch": 0.6, "learning_rate": 3.259827297530215e-05, "loss": 0.5648, "step": 7180 }, { "epoch": 0.6, "learning_rate": 3.25773389658891e-05, "loss": 0.6302, "step": 7185 }, { "epoch": 0.6, "learning_rate": 3.255639910435994e-05, "loss": 0.6288, "step": 7190 }, { "epoch": 0.6, "learning_rate": 3.25354534068869e-05, "loss": 0.5765, "step": 7195 }, { "epoch": 0.6, "learning_rate": 3.251450188964672e-05, "loss": 0.5868, "step": 7200 }, { "epoch": 0.6, "learning_rate": 3.249354456882064e-05, "loss": 0.5462, "step": 7205 }, { "epoch": 0.61, "learning_rate": 3.247258146059439e-05, "loss": 0.5696, "step": 7210 }, { "epoch": 0.61, "learning_rate": 3.245161258115815e-05, "loss": 0.5778, "step": 7215 }, { "epoch": 0.61, "learning_rate": 3.243063794670654e-05, "loss": 0.5697, "step": 7220 }, { "epoch": 0.61, "learning_rate": 3.240965757343869e-05, "loss": 0.5749, "step": 7225 }, { "epoch": 0.61, "learning_rate": 3.238867147755809e-05, "loss": 0.5488, "step": 7230 }, { "epoch": 0.61, "learning_rate": 3.2367679675272705e-05, "loss": 0.6171, "step": 7235 }, { "epoch": 0.61, "learning_rate": 3.234668218279487e-05, "loss": 0.5913, "step": 7240 }, { "epoch": 0.61, "learning_rate": 3.232567901634135e-05, "loss": 0.5939, "step": 7245 }, { "epoch": 0.61, "learning_rate": 3.2304670192133236e-05, "loss": 0.5775, "step": 7250 }, { "epoch": 0.61, "learning_rate": 3.228365572639604e-05, "loss": 0.568, "step": 7255 }, { "epoch": 0.61, "learning_rate": 3.226263563535963e-05, "loss": 0.5592, "step": 7260 }, { "epoch": 0.61, "learning_rate": 3.224160993525818e-05, "loss": 0.6442, "step": 7265 }, { "epoch": 0.61, "learning_rate": 3.222057864233023e-05, "loss": 0.5519, "step": 7270 }, { "epoch": 0.61, "learning_rate": 3.219954177281864e-05, "loss": 0.6409, "step": 7275 }, { "epoch": 0.61, "learning_rate": 3.217849934297054e-05, "loss": 0.5525, "step": 7280 }, { "epoch": 0.61, "learning_rate": 3.215745136903739e-05, "loss": 0.6155, "step": 7285 }, { "epoch": 0.61, "learning_rate": 3.2136397867274925e-05, "loss": 0.5557, "step": 7290 }, { "epoch": 0.61, "learning_rate": 3.211533885394314e-05, "loss": 0.5518, "step": 7295 }, { "epoch": 0.61, "learning_rate": 3.209427434530631e-05, "loss": 0.5739, "step": 7300 }, { "epoch": 0.61, "learning_rate": 3.207320435763291e-05, "loss": 0.5895, "step": 7305 }, { "epoch": 0.61, "learning_rate": 3.205212890719569e-05, "loss": 0.5368, "step": 7310 }, { "epoch": 0.61, "learning_rate": 3.20310480102716e-05, "loss": 0.5689, "step": 7315 }, { "epoch": 0.61, "learning_rate": 3.2009961683141796e-05, "loss": 0.5154, "step": 7320 }, { "epoch": 0.61, "learning_rate": 3.1988869942091636e-05, "loss": 0.5694, "step": 7325 }, { "epoch": 0.62, "learning_rate": 3.196777280341064e-05, "loss": 0.5, "step": 7330 }, { "epoch": 0.62, "learning_rate": 3.194667028339252e-05, "loss": 0.586, "step": 7335 }, { "epoch": 0.62, "learning_rate": 3.192556239833513e-05, "loss": 0.5872, "step": 7340 }, { "epoch": 0.62, "learning_rate": 3.190444916454048e-05, "loss": 0.637, "step": 7345 }, { "epoch": 0.62, "learning_rate": 3.18833305983147e-05, "loss": 0.5132, "step": 7350 }, { "epoch": 0.62, "learning_rate": 3.186220671596804e-05, "loss": 0.5958, "step": 7355 }, { "epoch": 0.62, "learning_rate": 3.184107753381485e-05, "loss": 0.5163, "step": 7360 }, { "epoch": 0.62, "learning_rate": 3.181994306817358e-05, "loss": 0.6186, "step": 7365 }, { "epoch": 0.62, "learning_rate": 3.179880333536678e-05, "loss": 0.5572, "step": 7370 }, { "epoch": 0.62, "learning_rate": 3.177765835172103e-05, "loss": 0.5814, "step": 7375 }, { "epoch": 0.62, "learning_rate": 3.175650813356699e-05, "loss": 0.5967, "step": 7380 }, { "epoch": 0.62, "learning_rate": 3.173535269723935e-05, "loss": 0.5401, "step": 7385 }, { "epoch": 0.62, "learning_rate": 3.171419205907685e-05, "loss": 0.5515, "step": 7390 }, { "epoch": 0.62, "learning_rate": 3.169302623542222e-05, "loss": 0.5863, "step": 7395 }, { "epoch": 0.62, "learning_rate": 3.1671855242622214e-05, "loss": 0.5936, "step": 7400 }, { "epoch": 0.62, "learning_rate": 3.165067909702757e-05, "loss": 0.6049, "step": 7405 }, { "epoch": 0.62, "learning_rate": 3.162949781499302e-05, "loss": 0.5501, "step": 7410 }, { "epoch": 0.62, "learning_rate": 3.160831141287724e-05, "loss": 0.5174, "step": 7415 }, { "epoch": 0.62, "learning_rate": 3.158711990704289e-05, "loss": 0.5543, "step": 7420 }, { "epoch": 0.62, "learning_rate": 3.1565923313856524e-05, "loss": 0.5525, "step": 7425 }, { "epoch": 0.62, "learning_rate": 3.154472164968868e-05, "loss": 0.5793, "step": 7430 }, { "epoch": 0.62, "learning_rate": 3.1523514930913775e-05, "loss": 0.5939, "step": 7435 }, { "epoch": 0.62, "learning_rate": 3.150230317391015e-05, "loss": 0.5632, "step": 7440 }, { "epoch": 0.62, "learning_rate": 3.1481086395060016e-05, "loss": 0.594, "step": 7445 }, { "epoch": 0.63, "learning_rate": 3.145986461074949e-05, "loss": 0.5696, "step": 7450 }, { "epoch": 0.63, "learning_rate": 3.1438637837368534e-05, "loss": 0.6042, "step": 7455 }, { "epoch": 0.63, "learning_rate": 3.141740609131097e-05, "loss": 0.535, "step": 7460 }, { "epoch": 0.63, "learning_rate": 3.139616938897446e-05, "loss": 0.5363, "step": 7465 }, { "epoch": 0.63, "learning_rate": 3.137492774676049e-05, "loss": 0.5847, "step": 7470 }, { "epoch": 0.63, "learning_rate": 3.1353681181074366e-05, "loss": 0.5542, "step": 7475 }, { "epoch": 0.63, "learning_rate": 3.133242970832521e-05, "loss": 0.5702, "step": 7480 }, { "epoch": 0.63, "learning_rate": 3.1311173344925896e-05, "loss": 0.6044, "step": 7485 }, { "epoch": 0.63, "learning_rate": 3.1289912107293114e-05, "loss": 0.5627, "step": 7490 }, { "epoch": 0.63, "learning_rate": 3.12686460118473e-05, "loss": 0.5831, "step": 7495 }, { "epoch": 0.63, "learning_rate": 3.124737507501264e-05, "loss": 0.6411, "step": 7500 }, { "epoch": 0.63, "learning_rate": 3.122609931321707e-05, "loss": 0.589, "step": 7505 }, { "epoch": 0.63, "learning_rate": 3.120481874289224e-05, "loss": 0.5835, "step": 7510 }, { "epoch": 0.63, "learning_rate": 3.1183533380473526e-05, "loss": 0.5228, "step": 7515 }, { "epoch": 0.63, "learning_rate": 3.1162243242399995e-05, "loss": 0.5281, "step": 7520 }, { "epoch": 0.63, "learning_rate": 3.114094834511441e-05, "loss": 0.554, "step": 7525 }, { "epoch": 0.63, "learning_rate": 3.1119648705063196e-05, "loss": 0.5633, "step": 7530 }, { "epoch": 0.63, "learning_rate": 3.109834433869646e-05, "loss": 0.5868, "step": 7535 }, { "epoch": 0.63, "learning_rate": 3.1077035262467955e-05, "loss": 0.5649, "step": 7540 }, { "epoch": 0.63, "learning_rate": 3.1055721492835054e-05, "loss": 0.5814, "step": 7545 }, { "epoch": 0.63, "learning_rate": 3.103440304625877e-05, "loss": 0.5493, "step": 7550 }, { "epoch": 0.63, "learning_rate": 3.1013079939203724e-05, "loss": 0.5571, "step": 7555 }, { "epoch": 0.63, "learning_rate": 3.099175218813816e-05, "loss": 0.6048, "step": 7560 }, { "epoch": 0.63, "learning_rate": 3.0970419809533846e-05, "loss": 0.6175, "step": 7565 }, { "epoch": 0.64, "learning_rate": 3.09490828198662e-05, "loss": 0.5312, "step": 7570 }, { "epoch": 0.64, "learning_rate": 3.092774123561415e-05, "loss": 0.5509, "step": 7575 }, { "epoch": 0.64, "learning_rate": 3.0906395073260184e-05, "loss": 0.5105, "step": 7580 }, { "epoch": 0.64, "learning_rate": 3.088504434929034e-05, "loss": 0.5512, "step": 7585 }, { "epoch": 0.64, "learning_rate": 3.0863689080194156e-05, "loss": 0.5757, "step": 7590 }, { "epoch": 0.64, "learning_rate": 3.084232928246472e-05, "loss": 0.5715, "step": 7595 }, { "epoch": 0.64, "learning_rate": 3.082096497259855e-05, "loss": 0.5124, "step": 7600 }, { "epoch": 0.64, "learning_rate": 3.079959616709573e-05, "loss": 0.5556, "step": 7605 }, { "epoch": 0.64, "learning_rate": 3.077822288245975e-05, "loss": 0.556, "step": 7610 }, { "epoch": 0.64, "learning_rate": 3.075684513519759e-05, "loss": 0.5309, "step": 7615 }, { "epoch": 0.64, "learning_rate": 3.0735462941819675e-05, "loss": 0.5612, "step": 7620 }, { "epoch": 0.64, "learning_rate": 3.071407631883986e-05, "loss": 0.5754, "step": 7625 }, { "epoch": 0.64, "learning_rate": 3.069268528277542e-05, "loss": 0.6116, "step": 7630 }, { "epoch": 0.64, "learning_rate": 3.067128985014704e-05, "loss": 0.6222, "step": 7635 }, { "epoch": 0.64, "learning_rate": 3.0649890037478794e-05, "loss": 0.5345, "step": 7640 }, { "epoch": 0.64, "learning_rate": 3.062848586129815e-05, "loss": 0.5407, "step": 7645 }, { "epoch": 0.64, "learning_rate": 3.060707733813595e-05, "loss": 0.5311, "step": 7650 }, { "epoch": 0.64, "learning_rate": 3.058566448452637e-05, "loss": 0.5995, "step": 7655 }, { "epoch": 0.64, "learning_rate": 3.056424731700694e-05, "loss": 0.5396, "step": 7660 }, { "epoch": 0.64, "learning_rate": 3.054282585211856e-05, "loss": 0.5293, "step": 7665 }, { "epoch": 0.64, "learning_rate": 3.052140010640537e-05, "loss": 0.5637, "step": 7670 }, { "epoch": 0.64, "learning_rate": 3.0499970096414888e-05, "loss": 0.5657, "step": 7675 }, { "epoch": 0.64, "learning_rate": 3.0478535838697898e-05, "loss": 0.6137, "step": 7680 }, { "epoch": 0.64, "learning_rate": 3.045709734980846e-05, "loss": 0.5369, "step": 7685 }, { "epoch": 0.65, "learning_rate": 3.0435654646303908e-05, "loss": 0.5476, "step": 7690 }, { "epoch": 0.65, "learning_rate": 3.041420774474483e-05, "loss": 0.5495, "step": 7695 }, { "epoch": 0.65, "learning_rate": 3.0392756661695058e-05, "loss": 0.5686, "step": 7700 }, { "epoch": 0.65, "learning_rate": 3.0371301413721653e-05, "loss": 0.6265, "step": 7705 }, { "epoch": 0.65, "learning_rate": 3.034984201739488e-05, "loss": 0.578, "step": 7710 }, { "epoch": 0.65, "learning_rate": 3.0328378489288246e-05, "loss": 0.5274, "step": 7715 }, { "epoch": 0.65, "learning_rate": 3.030691084597839e-05, "loss": 0.5394, "step": 7720 }, { "epoch": 0.65, "learning_rate": 3.0285439104045183e-05, "loss": 0.5553, "step": 7725 }, { "epoch": 0.65, "learning_rate": 3.0263963280071633e-05, "loss": 0.5842, "step": 7730 }, { "epoch": 0.65, "learning_rate": 3.0242483390643915e-05, "loss": 0.594, "step": 7735 }, { "epoch": 0.65, "learning_rate": 3.0220999452351333e-05, "loss": 0.5427, "step": 7740 }, { "epoch": 0.65, "learning_rate": 3.0199511481786314e-05, "loss": 0.5074, "step": 7745 }, { "epoch": 0.65, "learning_rate": 3.0178019495544408e-05, "loss": 0.5489, "step": 7750 }, { "epoch": 0.65, "learning_rate": 3.0156523510224273e-05, "loss": 0.5734, "step": 7755 }, { "epoch": 0.65, "learning_rate": 3.0135023542427643e-05, "loss": 0.5713, "step": 7760 }, { "epoch": 0.65, "learning_rate": 3.0113519608759338e-05, "loss": 0.5593, "step": 7765 }, { "epoch": 0.65, "learning_rate": 3.0092011725827228e-05, "loss": 0.606, "step": 7770 }, { "epoch": 0.65, "learning_rate": 3.0070499910242246e-05, "loss": 0.6185, "step": 7775 }, { "epoch": 0.65, "learning_rate": 3.004898417861835e-05, "loss": 0.558, "step": 7780 }, { "epoch": 0.65, "learning_rate": 3.0027464547572538e-05, "loss": 0.6179, "step": 7785 }, { "epoch": 0.65, "learning_rate": 3.00059410337248e-05, "loss": 0.545, "step": 7790 }, { "epoch": 0.65, "learning_rate": 2.998441365369815e-05, "loss": 0.5756, "step": 7795 }, { "epoch": 0.65, "learning_rate": 2.996288242411856e-05, "loss": 0.6018, "step": 7800 }, { "epoch": 0.65, "learning_rate": 2.9941347361615004e-05, "loss": 0.6004, "step": 7805 }, { "epoch": 0.66, "learning_rate": 2.9919808482819378e-05, "loss": 0.5993, "step": 7810 }, { "epoch": 0.66, "learning_rate": 2.9898265804366565e-05, "loss": 0.5937, "step": 7815 }, { "epoch": 0.66, "learning_rate": 2.9876719342894367e-05, "loss": 0.5731, "step": 7820 }, { "epoch": 0.66, "learning_rate": 2.9855169115043497e-05, "loss": 0.6368, "step": 7825 }, { "epoch": 0.66, "learning_rate": 2.9833615137457587e-05, "loss": 0.6078, "step": 7830 }, { "epoch": 0.66, "learning_rate": 2.981205742678317e-05, "loss": 0.5412, "step": 7835 }, { "epoch": 0.66, "learning_rate": 2.979049599966966e-05, "loss": 0.519, "step": 7840 }, { "epoch": 0.66, "learning_rate": 2.9768930872769323e-05, "loss": 0.6168, "step": 7845 }, { "epoch": 0.66, "learning_rate": 2.9747362062737317e-05, "loss": 0.6293, "step": 7850 }, { "epoch": 0.66, "learning_rate": 2.97257895862316e-05, "loss": 0.5505, "step": 7855 }, { "epoch": 0.66, "learning_rate": 2.9704213459913007e-05, "loss": 0.5686, "step": 7860 }, { "epoch": 0.66, "learning_rate": 2.9682633700445157e-05, "loss": 0.503, "step": 7865 }, { "epoch": 0.66, "learning_rate": 2.96610503244945e-05, "loss": 0.5829, "step": 7870 }, { "epoch": 0.66, "learning_rate": 2.9639463348730266e-05, "loss": 0.5427, "step": 7875 }, { "epoch": 0.66, "learning_rate": 2.961787278982447e-05, "loss": 0.5489, "step": 7880 }, { "epoch": 0.66, "learning_rate": 2.9596278664451875e-05, "loss": 0.5864, "step": 7885 }, { "epoch": 0.66, "learning_rate": 2.9574680989290032e-05, "loss": 0.5268, "step": 7890 }, { "epoch": 0.66, "learning_rate": 2.955307978101921e-05, "loss": 0.5521, "step": 7895 }, { "epoch": 0.66, "learning_rate": 2.953147505632241e-05, "loss": 0.5742, "step": 7900 }, { "epoch": 0.66, "learning_rate": 2.950986683188537e-05, "loss": 0.5572, "step": 7905 }, { "epoch": 0.66, "learning_rate": 2.9488255124396496e-05, "loss": 0.6023, "step": 7910 }, { "epoch": 0.66, "learning_rate": 2.9466639950546902e-05, "loss": 0.5509, "step": 7915 }, { "epoch": 0.66, "learning_rate": 2.9445021327030386e-05, "loss": 0.5387, "step": 7920 }, { "epoch": 0.67, "learning_rate": 2.9423399270543388e-05, "loss": 0.5724, "step": 7925 }, { "epoch": 0.67, "learning_rate": 2.9401773797785032e-05, "loss": 0.5638, "step": 7930 }, { "epoch": 0.67, "learning_rate": 2.9380144925457055e-05, "loss": 0.6086, "step": 7935 }, { "epoch": 0.67, "learning_rate": 2.9358512670263822e-05, "loss": 0.6185, "step": 7940 }, { "epoch": 0.67, "learning_rate": 2.9336877048912325e-05, "loss": 0.5843, "step": 7945 }, { "epoch": 0.67, "learning_rate": 2.9315238078112132e-05, "loss": 0.5448, "step": 7950 }, { "epoch": 0.67, "learning_rate": 2.9293595774575428e-05, "loss": 0.5875, "step": 7955 }, { "epoch": 0.67, "learning_rate": 2.9271950155016947e-05, "loss": 0.6437, "step": 7960 }, { "epoch": 0.67, "learning_rate": 2.9250301236153988e-05, "loss": 0.5135, "step": 7965 }, { "epoch": 0.67, "learning_rate": 2.9228649034706413e-05, "loss": 0.5228, "step": 7970 }, { "epoch": 0.67, "learning_rate": 2.9206993567396608e-05, "loss": 0.5827, "step": 7975 }, { "epoch": 0.67, "learning_rate": 2.918533485094948e-05, "loss": 0.5356, "step": 7980 }, { "epoch": 0.67, "learning_rate": 2.9163672902092447e-05, "loss": 0.5882, "step": 7985 }, { "epoch": 0.67, "learning_rate": 2.9142007737555432e-05, "loss": 0.4995, "step": 7990 }, { "epoch": 0.67, "learning_rate": 2.912033937407082e-05, "loss": 0.6018, "step": 7995 }, { "epoch": 0.67, "learning_rate": 2.9098667828373495e-05, "loss": 0.5946, "step": 8000 }, { "epoch": 0.67, "learning_rate": 2.9076993117200768e-05, "loss": 0.5747, "step": 8005 }, { "epoch": 0.67, "learning_rate": 2.9055315257292425e-05, "loss": 0.5639, "step": 8010 }, { "epoch": 0.67, "learning_rate": 2.9033634265390668e-05, "loss": 0.5833, "step": 8015 }, { "epoch": 0.67, "learning_rate": 2.901195015824012e-05, "loss": 0.5435, "step": 8020 }, { "epoch": 0.67, "learning_rate": 2.8990262952587803e-05, "loss": 0.5168, "step": 8025 }, { "epoch": 0.67, "learning_rate": 2.896857266518314e-05, "loss": 0.6029, "step": 8030 }, { "epoch": 0.67, "learning_rate": 2.8946879312777936e-05, "loss": 0.5473, "step": 8035 }, { "epoch": 0.67, "learning_rate": 2.8925182912126358e-05, "loss": 0.6284, "step": 8040 }, { "epoch": 0.68, "learning_rate": 2.8903483479984937e-05, "loss": 0.5114, "step": 8045 }, { "epoch": 0.68, "learning_rate": 2.888178103311253e-05, "loss": 0.5815, "step": 8050 }, { "epoch": 0.68, "learning_rate": 2.8860075588270325e-05, "loss": 0.624, "step": 8055 }, { "epoch": 0.68, "learning_rate": 2.8838367162221837e-05, "loss": 0.57, "step": 8060 }, { "epoch": 0.68, "learning_rate": 2.881665577173287e-05, "loss": 0.5816, "step": 8065 }, { "epoch": 0.68, "learning_rate": 2.8794941433571544e-05, "loss": 0.5808, "step": 8070 }, { "epoch": 0.68, "learning_rate": 2.8773224164508205e-05, "loss": 0.6112, "step": 8075 }, { "epoch": 0.68, "learning_rate": 2.875150398131552e-05, "loss": 0.5596, "step": 8080 }, { "epoch": 0.68, "learning_rate": 2.8729780900768368e-05, "loss": 0.5422, "step": 8085 }, { "epoch": 0.68, "learning_rate": 2.870805493964387e-05, "loss": 0.6, "step": 8090 }, { "epoch": 0.68, "learning_rate": 2.868632611472139e-05, "loss": 0.5026, "step": 8095 }, { "epoch": 0.68, "learning_rate": 2.86645944427825e-05, "loss": 0.5747, "step": 8100 }, { "epoch": 0.68, "learning_rate": 2.8642859940610946e-05, "loss": 0.5662, "step": 8105 }, { "epoch": 0.68, "learning_rate": 2.8621122624992686e-05, "loss": 0.5633, "step": 8110 }, { "epoch": 0.68, "learning_rate": 2.8599382512715843e-05, "loss": 0.5233, "step": 8115 }, { "epoch": 0.68, "learning_rate": 2.8577639620570705e-05, "loss": 0.5976, "step": 8120 }, { "epoch": 0.68, "learning_rate": 2.855589396534969e-05, "loss": 0.5813, "step": 8125 }, { "epoch": 0.68, "learning_rate": 2.8534145563847375e-05, "loss": 0.5511, "step": 8130 }, { "epoch": 0.68, "learning_rate": 2.8512394432860428e-05, "loss": 0.5708, "step": 8135 }, { "epoch": 0.68, "learning_rate": 2.8490640589187652e-05, "loss": 0.5578, "step": 8140 }, { "epoch": 0.68, "learning_rate": 2.8468884049629924e-05, "loss": 0.6012, "step": 8145 }, { "epoch": 0.68, "learning_rate": 2.844712483099023e-05, "loss": 0.4779, "step": 8150 }, { "epoch": 0.68, "learning_rate": 2.8425362950073604e-05, "loss": 0.5872, "step": 8155 }, { "epoch": 0.68, "learning_rate": 2.8403598423687132e-05, "loss": 0.6343, "step": 8160 }, { "epoch": 0.69, "learning_rate": 2.8381831268639956e-05, "loss": 0.593, "step": 8165 }, { "epoch": 0.69, "learning_rate": 2.8360061501743246e-05, "loss": 0.6163, "step": 8170 }, { "epoch": 0.69, "learning_rate": 2.833828913981018e-05, "loss": 0.5754, "step": 8175 }, { "epoch": 0.69, "learning_rate": 2.8316514199655963e-05, "loss": 0.5691, "step": 8180 }, { "epoch": 0.69, "learning_rate": 2.8294736698097762e-05, "loss": 0.539, "step": 8185 }, { "epoch": 0.69, "learning_rate": 2.8272956651954746e-05, "loss": 0.5533, "step": 8190 }, { "epoch": 0.69, "learning_rate": 2.8251174078048025e-05, "loss": 0.6116, "step": 8195 }, { "epoch": 0.69, "learning_rate": 2.8229388993200685e-05, "loss": 0.5856, "step": 8200 }, { "epoch": 0.69, "learning_rate": 2.820760141423774e-05, "loss": 0.5712, "step": 8205 }, { "epoch": 0.69, "learning_rate": 2.8185811357986136e-05, "loss": 0.5828, "step": 8210 }, { "epoch": 0.69, "learning_rate": 2.8164018841274718e-05, "loss": 0.556, "step": 8215 }, { "epoch": 0.69, "learning_rate": 2.8142223880934248e-05, "loss": 0.5556, "step": 8220 }, { "epoch": 0.69, "learning_rate": 2.8120426493797376e-05, "loss": 0.6017, "step": 8225 }, { "epoch": 0.69, "learning_rate": 2.8098626696698606e-05, "loss": 0.5651, "step": 8230 }, { "epoch": 0.69, "learning_rate": 2.8076824506474316e-05, "loss": 0.5754, "step": 8235 }, { "epoch": 0.69, "learning_rate": 2.805501993996274e-05, "loss": 0.5968, "step": 8240 }, { "epoch": 0.69, "learning_rate": 2.8033213014003935e-05, "loss": 0.6041, "step": 8245 }, { "epoch": 0.69, "learning_rate": 2.801140374543979e-05, "loss": 0.5494, "step": 8250 }, { "epoch": 0.69, "learning_rate": 2.798959215111399e-05, "loss": 0.5751, "step": 8255 }, { "epoch": 0.69, "learning_rate": 2.7967778247872035e-05, "loss": 0.5544, "step": 8260 }, { "epoch": 0.69, "learning_rate": 2.7945962052561185e-05, "loss": 0.5765, "step": 8265 }, { "epoch": 0.69, "learning_rate": 2.7924143582030497e-05, "loss": 0.5272, "step": 8270 }, { "epoch": 0.69, "learning_rate": 2.7902322853130757e-05, "loss": 0.5409, "step": 8275 }, { "epoch": 0.69, "learning_rate": 2.7880499882714518e-05, "loss": 0.5584, "step": 8280 }, { "epoch": 0.7, "learning_rate": 2.7858674687636048e-05, "loss": 0.5749, "step": 8285 }, { "epoch": 0.7, "learning_rate": 2.7836847284751355e-05, "loss": 0.5842, "step": 8290 }, { "epoch": 0.7, "learning_rate": 2.781501769091812e-05, "loss": 0.5906, "step": 8295 }, { "epoch": 0.7, "learning_rate": 2.7793185922995745e-05, "loss": 0.5503, "step": 8300 }, { "epoch": 0.7, "learning_rate": 2.7771351997845292e-05, "loss": 0.7508, "step": 8305 }, { "epoch": 0.7, "learning_rate": 2.77495159323295e-05, "loss": 0.6294, "step": 8310 }, { "epoch": 0.7, "learning_rate": 2.772767774331276e-05, "loss": 0.537, "step": 8315 }, { "epoch": 0.7, "learning_rate": 2.7705837447661103e-05, "loss": 0.5844, "step": 8320 }, { "epoch": 0.7, "learning_rate": 2.7683995062242174e-05, "loss": 0.6043, "step": 8325 }, { "epoch": 0.7, "learning_rate": 2.7662150603925256e-05, "loss": 0.5676, "step": 8330 }, { "epoch": 0.7, "learning_rate": 2.7640304089581216e-05, "loss": 0.6465, "step": 8335 }, { "epoch": 0.7, "learning_rate": 2.7618455536082504e-05, "loss": 0.5627, "step": 8340 }, { "epoch": 0.7, "learning_rate": 2.7596604960303164e-05, "loss": 0.5816, "step": 8345 }, { "epoch": 0.7, "learning_rate": 2.757475237911879e-05, "loss": 0.5701, "step": 8350 }, { "epoch": 0.7, "learning_rate": 2.755289780940652e-05, "loss": 0.5689, "step": 8355 }, { "epoch": 0.7, "learning_rate": 2.7531041268045033e-05, "loss": 0.5732, "step": 8360 }, { "epoch": 0.7, "learning_rate": 2.7509182771914543e-05, "loss": 0.5419, "step": 8365 }, { "epoch": 0.7, "learning_rate": 2.7487322337896754e-05, "loss": 0.632, "step": 8370 }, { "epoch": 0.7, "learning_rate": 2.746545998287488e-05, "loss": 0.5858, "step": 8375 }, { "epoch": 0.7, "learning_rate": 2.7443595723733613e-05, "loss": 0.5305, "step": 8380 }, { "epoch": 0.7, "learning_rate": 2.7421729577359105e-05, "loss": 0.5735, "step": 8385 }, { "epoch": 0.7, "learning_rate": 2.739986156063899e-05, "loss": 0.5853, "step": 8390 }, { "epoch": 0.7, "learning_rate": 2.737799169046233e-05, "loss": 0.5815, "step": 8395 }, { "epoch": 0.7, "learning_rate": 2.735611998371962e-05, "loss": 0.5318, "step": 8400 }, { "epoch": 0.71, "learning_rate": 2.733424645730278e-05, "loss": 0.528, "step": 8405 }, { "epoch": 0.71, "learning_rate": 2.7312371128105124e-05, "loss": 0.5863, "step": 8410 }, { "epoch": 0.71, "learning_rate": 2.7290494013021366e-05, "loss": 0.5277, "step": 8415 }, { "epoch": 0.71, "learning_rate": 2.72686151289476e-05, "loss": 0.5534, "step": 8420 }, { "epoch": 0.71, "learning_rate": 2.7246734492781283e-05, "loss": 0.587, "step": 8425 }, { "epoch": 0.71, "learning_rate": 2.7224852121421234e-05, "loss": 0.628, "step": 8430 }, { "epoch": 0.71, "learning_rate": 2.72029680317676e-05, "loss": 0.5535, "step": 8435 }, { "epoch": 0.71, "learning_rate": 2.7181082240721855e-05, "loss": 0.5062, "step": 8440 }, { "epoch": 0.71, "learning_rate": 2.7159194765186792e-05, "loss": 0.7016, "step": 8445 }, { "epoch": 0.71, "learning_rate": 2.7137305622066504e-05, "loss": 0.5564, "step": 8450 }, { "epoch": 0.71, "learning_rate": 2.7115414828266383e-05, "loss": 0.5727, "step": 8455 }, { "epoch": 0.71, "learning_rate": 2.7093522400693084e-05, "loss": 0.5875, "step": 8460 }, { "epoch": 0.71, "learning_rate": 2.7071628356254512e-05, "loss": 0.5351, "step": 8465 }, { "epoch": 0.71, "learning_rate": 2.704973271185985e-05, "loss": 0.5347, "step": 8470 }, { "epoch": 0.71, "learning_rate": 2.702783548441949e-05, "loss": 0.5271, "step": 8475 }, { "epoch": 0.71, "learning_rate": 2.7005936690845056e-05, "loss": 0.5705, "step": 8480 }, { "epoch": 0.71, "learning_rate": 2.6984036348049385e-05, "loss": 0.5765, "step": 8485 }, { "epoch": 0.71, "learning_rate": 2.696213447294652e-05, "loss": 0.6702, "step": 8490 }, { "epoch": 0.71, "learning_rate": 2.6940231082451662e-05, "loss": 0.6043, "step": 8495 }, { "epoch": 0.71, "learning_rate": 2.69183261934812e-05, "loss": 0.553, "step": 8500 }, { "epoch": 0.71, "learning_rate": 2.6896419822952686e-05, "loss": 0.6892, "step": 8505 }, { "epoch": 0.71, "learning_rate": 2.6874511987784783e-05, "loss": 0.5674, "step": 8510 }, { "epoch": 0.71, "learning_rate": 2.685260270489734e-05, "loss": 0.5483, "step": 8515 }, { "epoch": 0.71, "learning_rate": 2.6830691991211266e-05, "loss": 0.6051, "step": 8520 }, { "epoch": 0.72, "learning_rate": 2.680877986364861e-05, "loss": 0.4733, "step": 8525 }, { "epoch": 0.72, "learning_rate": 2.678686633913251e-05, "loss": 0.5878, "step": 8530 }, { "epoch": 0.72, "learning_rate": 2.6764951434587172e-05, "loss": 0.6478, "step": 8535 }, { "epoch": 0.72, "learning_rate": 2.6743035166937885e-05, "loss": 0.5721, "step": 8540 }, { "epoch": 0.72, "learning_rate": 2.6721117553110973e-05, "loss": 0.615, "step": 8545 }, { "epoch": 0.72, "learning_rate": 2.6699198610033797e-05, "loss": 0.4919, "step": 8550 }, { "epoch": 0.72, "learning_rate": 2.6677278354634764e-05, "loss": 0.5671, "step": 8555 }, { "epoch": 0.72, "learning_rate": 2.6655356803843285e-05, "loss": 0.6125, "step": 8560 }, { "epoch": 0.72, "learning_rate": 2.663343397458976e-05, "loss": 0.6405, "step": 8565 }, { "epoch": 0.72, "learning_rate": 2.66115098838056e-05, "loss": 0.5284, "step": 8570 }, { "epoch": 0.72, "learning_rate": 2.6589584548423174e-05, "loss": 0.5449, "step": 8575 }, { "epoch": 0.72, "learning_rate": 2.6567657985375812e-05, "loss": 0.6408, "step": 8580 }, { "epoch": 0.72, "learning_rate": 2.6545730211597793e-05, "loss": 0.5764, "step": 8585 }, { "epoch": 0.72, "learning_rate": 2.6523801244024332e-05, "loss": 0.5166, "step": 8590 }, { "epoch": 0.72, "learning_rate": 2.650187109959158e-05, "loss": 0.5233, "step": 8595 }, { "epoch": 0.72, "learning_rate": 2.647993979523658e-05, "loss": 0.4735, "step": 8600 }, { "epoch": 0.72, "learning_rate": 2.6458007347897274e-05, "loss": 0.6418, "step": 8605 }, { "epoch": 0.72, "learning_rate": 2.643607377451249e-05, "loss": 0.6357, "step": 8610 }, { "epoch": 0.72, "learning_rate": 2.6414139092021915e-05, "loss": 0.5987, "step": 8615 }, { "epoch": 0.72, "learning_rate": 2.6392203317366114e-05, "loss": 0.5751, "step": 8620 }, { "epoch": 0.72, "learning_rate": 2.6370266467486477e-05, "loss": 0.5523, "step": 8625 }, { "epoch": 0.72, "learning_rate": 2.634832855932523e-05, "loss": 0.5265, "step": 8630 }, { "epoch": 0.72, "learning_rate": 2.6326389609825415e-05, "loss": 0.6095, "step": 8635 }, { "epoch": 0.73, "learning_rate": 2.6304449635930882e-05, "loss": 0.5795, "step": 8640 }, { "epoch": 0.73, "learning_rate": 2.628250865458628e-05, "loss": 0.6781, "step": 8645 }, { "epoch": 0.73, "learning_rate": 2.6260566682737013e-05, "loss": 0.5385, "step": 8650 }, { "epoch": 0.73, "learning_rate": 2.623862373732927e-05, "loss": 0.5289, "step": 8655 }, { "epoch": 0.73, "learning_rate": 2.6216679835309976e-05, "loss": 0.5648, "step": 8660 }, { "epoch": 0.73, "learning_rate": 2.6194734993626813e-05, "loss": 0.6226, "step": 8665 }, { "epoch": 0.73, "learning_rate": 2.6172789229228177e-05, "loss": 0.6338, "step": 8670 }, { "epoch": 0.73, "learning_rate": 2.6150842559063177e-05, "loss": 0.5265, "step": 8675 }, { "epoch": 0.73, "learning_rate": 2.6128895000081637e-05, "loss": 0.525, "step": 8680 }, { "epoch": 0.73, "learning_rate": 2.6106946569234043e-05, "loss": 0.5045, "step": 8685 }, { "epoch": 0.73, "learning_rate": 2.6084997283471556e-05, "loss": 0.5995, "step": 8690 }, { "epoch": 0.73, "learning_rate": 2.606304715974602e-05, "loss": 0.6382, "step": 8695 }, { "epoch": 0.73, "learning_rate": 2.6041096215009908e-05, "loss": 0.5422, "step": 8700 }, { "epoch": 0.73, "learning_rate": 2.601914446621634e-05, "loss": 0.5713, "step": 8705 }, { "epoch": 0.73, "learning_rate": 2.5997191930319042e-05, "loss": 0.6137, "step": 8710 }, { "epoch": 0.73, "learning_rate": 2.5975238624272365e-05, "loss": 0.5627, "step": 8715 }, { "epoch": 0.73, "learning_rate": 2.595328456503122e-05, "loss": 0.6384, "step": 8720 }, { "epoch": 0.73, "learning_rate": 2.593132976955115e-05, "loss": 0.5514, "step": 8725 }, { "epoch": 0.73, "learning_rate": 2.5909374254788227e-05, "loss": 0.5686, "step": 8730 }, { "epoch": 0.73, "learning_rate": 2.5887418037699107e-05, "loss": 0.5669, "step": 8735 }, { "epoch": 0.73, "learning_rate": 2.5865461135240958e-05, "loss": 0.6014, "step": 8740 }, { "epoch": 0.73, "learning_rate": 2.5843503564371506e-05, "loss": 0.5819, "step": 8745 }, { "epoch": 0.73, "learning_rate": 2.582154534204897e-05, "loss": 0.6202, "step": 8750 }, { "epoch": 0.73, "learning_rate": 2.579958648523209e-05, "loss": 0.548, "step": 8755 }, { "epoch": 0.74, "learning_rate": 2.5777627010880083e-05, "loss": 0.5777, "step": 8760 }, { "epoch": 0.74, "learning_rate": 2.5755666935952662e-05, "loss": 0.6238, "step": 8765 }, { "epoch": 0.74, "learning_rate": 2.573370627740997e-05, "loss": 0.5581, "step": 8770 }, { "epoch": 0.74, "learning_rate": 2.5711745052212632e-05, "loss": 0.5888, "step": 8775 }, { "epoch": 0.74, "learning_rate": 2.568978327732171e-05, "loss": 0.5529, "step": 8780 }, { "epoch": 0.74, "learning_rate": 2.5667820969698663e-05, "loss": 0.5439, "step": 8785 }, { "epoch": 0.74, "learning_rate": 2.5645858146305392e-05, "loss": 0.5078, "step": 8790 }, { "epoch": 0.74, "learning_rate": 2.5623894824104176e-05, "loss": 0.5884, "step": 8795 }, { "epoch": 0.74, "learning_rate": 2.5601931020057684e-05, "loss": 0.6262, "step": 8800 }, { "epoch": 0.74, "learning_rate": 2.5579966751128966e-05, "loss": 0.579, "step": 8805 }, { "epoch": 0.74, "learning_rate": 2.555800203428142e-05, "loss": 0.5989, "step": 8810 }, { "epoch": 0.74, "learning_rate": 2.553603688647881e-05, "loss": 0.5469, "step": 8815 }, { "epoch": 0.74, "learning_rate": 2.5514071324685192e-05, "loss": 0.5149, "step": 8820 }, { "epoch": 0.74, "learning_rate": 2.549210536586499e-05, "loss": 0.6305, "step": 8825 }, { "epoch": 0.74, "learning_rate": 2.547013902698289e-05, "loss": 0.5985, "step": 8830 }, { "epoch": 0.74, "learning_rate": 2.544817232500391e-05, "loss": 0.6099, "step": 8835 }, { "epoch": 0.74, "learning_rate": 2.5426205276893323e-05, "loss": 0.5353, "step": 8840 }, { "epoch": 0.74, "learning_rate": 2.540423789961669e-05, "loss": 0.5458, "step": 8845 }, { "epoch": 0.74, "learning_rate": 2.5382270210139808e-05, "loss": 0.5793, "step": 8850 }, { "epoch": 0.74, "learning_rate": 2.5360302225428712e-05, "loss": 0.6149, "step": 8855 }, { "epoch": 0.74, "learning_rate": 2.5338333962449685e-05, "loss": 0.5074, "step": 8860 }, { "epoch": 0.74, "learning_rate": 2.5316365438169203e-05, "loss": 0.6202, "step": 8865 }, { "epoch": 0.74, "learning_rate": 2.529439666955397e-05, "loss": 0.5535, "step": 8870 }, { "epoch": 0.74, "learning_rate": 2.5272427673570865e-05, "loss": 0.5452, "step": 8875 }, { "epoch": 0.75, "learning_rate": 2.5250458467186928e-05, "loss": 0.5607, "step": 8880 }, { "epoch": 0.75, "learning_rate": 2.5228489067369383e-05, "loss": 0.5864, "step": 8885 }, { "epoch": 0.75, "learning_rate": 2.5206519491085588e-05, "loss": 0.6091, "step": 8890 }, { "epoch": 0.75, "learning_rate": 2.518454975530305e-05, "loss": 0.5421, "step": 8895 }, { "epoch": 0.75, "learning_rate": 2.5162579876989393e-05, "loss": 0.5652, "step": 8900 }, { "epoch": 0.75, "learning_rate": 2.514060987311236e-05, "loss": 0.6063, "step": 8905 }, { "epoch": 0.75, "learning_rate": 2.511863976063977e-05, "loss": 0.5479, "step": 8910 }, { "epoch": 0.75, "learning_rate": 2.5096669556539544e-05, "loss": 0.5892, "step": 8915 }, { "epoch": 0.75, "learning_rate": 2.5074699277779674e-05, "loss": 0.5817, "step": 8920 }, { "epoch": 0.75, "learning_rate": 2.5052728941328197e-05, "loss": 0.6609, "step": 8925 }, { "epoch": 0.75, "learning_rate": 2.5030758564153202e-05, "loss": 0.6005, "step": 8930 }, { "epoch": 0.75, "learning_rate": 2.5008788163222806e-05, "loss": 0.6105, "step": 8935 }, { "epoch": 0.75, "learning_rate": 2.4986817755505158e-05, "loss": 0.5742, "step": 8940 }, { "epoch": 0.75, "learning_rate": 2.4964847357968392e-05, "loss": 0.5471, "step": 8945 }, { "epoch": 0.75, "learning_rate": 2.4942876987580652e-05, "loss": 0.6002, "step": 8950 }, { "epoch": 0.75, "learning_rate": 2.4920906661310054e-05, "loss": 0.5549, "step": 8955 }, { "epoch": 0.75, "learning_rate": 2.4898936396124663e-05, "loss": 0.5467, "step": 8960 }, { "epoch": 0.75, "learning_rate": 2.4876966208992528e-05, "loss": 0.5796, "step": 8965 }, { "epoch": 0.75, "learning_rate": 2.485499611688162e-05, "loss": 0.6106, "step": 8970 }, { "epoch": 0.75, "learning_rate": 2.4833026136759844e-05, "loss": 0.6402, "step": 8975 }, { "epoch": 0.75, "learning_rate": 2.4811056285594997e-05, "loss": 0.6169, "step": 8980 }, { "epoch": 0.75, "learning_rate": 2.47890865803548e-05, "loss": 0.5351, "step": 8985 }, { "epoch": 0.75, "learning_rate": 2.4767117038006865e-05, "loss": 0.5476, "step": 8990 }, { "epoch": 0.75, "learning_rate": 2.4745147675518655e-05, "loss": 0.6095, "step": 8995 }, { "epoch": 0.76, "learning_rate": 2.4723178509857507e-05, "loss": 0.6074, "step": 9000 }, { "epoch": 0.76, "learning_rate": 2.470120955799061e-05, "loss": 0.483, "step": 9005 }, { "epoch": 0.76, "learning_rate": 2.4679240836884983e-05, "loss": 0.6387, "step": 9010 }, { "epoch": 0.76, "learning_rate": 2.4657272363507474e-05, "loss": 0.5963, "step": 9015 }, { "epoch": 0.76, "learning_rate": 2.4635304154824716e-05, "loss": 0.5508, "step": 9020 }, { "epoch": 0.76, "learning_rate": 2.4613336227803167e-05, "loss": 0.5445, "step": 9025 }, { "epoch": 0.76, "learning_rate": 2.4591368599409054e-05, "loss": 0.5794, "step": 9030 }, { "epoch": 0.76, "learning_rate": 2.4569401286608375e-05, "loss": 0.5841, "step": 9035 }, { "epoch": 0.76, "learning_rate": 2.4547434306366875e-05, "loss": 0.6202, "step": 9040 }, { "epoch": 0.76, "learning_rate": 2.4525467675650068e-05, "loss": 0.5308, "step": 9045 }, { "epoch": 0.76, "learning_rate": 2.450350141142316e-05, "loss": 0.6492, "step": 9050 }, { "epoch": 0.76, "learning_rate": 2.44815355306511e-05, "loss": 0.5672, "step": 9055 }, { "epoch": 0.76, "learning_rate": 2.445957005029854e-05, "loss": 0.5872, "step": 9060 }, { "epoch": 0.76, "learning_rate": 2.4437604987329822e-05, "loss": 0.5095, "step": 9065 }, { "epoch": 0.76, "learning_rate": 2.4415640358708956e-05, "loss": 0.523, "step": 9070 }, { "epoch": 0.76, "learning_rate": 2.4393676181399625e-05, "loss": 0.5364, "step": 9075 }, { "epoch": 0.76, "learning_rate": 2.4371712472365154e-05, "loss": 0.5498, "step": 9080 }, { "epoch": 0.76, "learning_rate": 2.4349749248568522e-05, "loss": 0.5024, "step": 9085 }, { "epoch": 0.76, "learning_rate": 2.4327786526972325e-05, "loss": 0.5339, "step": 9090 }, { "epoch": 0.76, "learning_rate": 2.430582432453877e-05, "loss": 0.5698, "step": 9095 }, { "epoch": 0.76, "learning_rate": 2.428386265822965e-05, "loss": 0.5797, "step": 9100 }, { "epoch": 0.76, "learning_rate": 2.4261901545006377e-05, "loss": 0.598, "step": 9105 }, { "epoch": 0.76, "learning_rate": 2.4239941001829907e-05, "loss": 0.5899, "step": 9110 }, { "epoch": 0.76, "learning_rate": 2.4217981045660774e-05, "loss": 0.6133, "step": 9115 }, { "epoch": 0.77, "learning_rate": 2.419602169345903e-05, "loss": 0.6287, "step": 9120 }, { "epoch": 0.77, "learning_rate": 2.4174062962184297e-05, "loss": 0.6422, "step": 9125 }, { "epoch": 0.77, "learning_rate": 2.4152104868795697e-05, "loss": 0.5219, "step": 9130 }, { "epoch": 0.77, "learning_rate": 2.413014743025186e-05, "loss": 0.6196, "step": 9135 }, { "epoch": 0.77, "learning_rate": 2.4108190663510916e-05, "loss": 0.5151, "step": 9140 }, { "epoch": 0.77, "learning_rate": 2.4086234585530474e-05, "loss": 0.5147, "step": 9145 }, { "epoch": 0.77, "learning_rate": 2.4064279213267614e-05, "loss": 0.6384, "step": 9150 }, { "epoch": 0.77, "learning_rate": 2.404232456367886e-05, "loss": 0.5817, "step": 9155 }, { "epoch": 0.77, "learning_rate": 2.4020370653720187e-05, "loss": 0.5582, "step": 9160 }, { "epoch": 0.77, "learning_rate": 2.3998417500347e-05, "loss": 0.5643, "step": 9165 }, { "epoch": 0.77, "learning_rate": 2.3976465120514117e-05, "loss": 0.4848, "step": 9170 }, { "epoch": 0.77, "learning_rate": 2.3954513531175765e-05, "loss": 0.5501, "step": 9175 }, { "epoch": 0.77, "learning_rate": 2.3932562749285543e-05, "loss": 0.5466, "step": 9180 }, { "epoch": 0.77, "learning_rate": 2.391061279179645e-05, "loss": 0.5493, "step": 9185 }, { "epoch": 0.77, "learning_rate": 2.3888663675660817e-05, "loss": 0.5819, "step": 9190 }, { "epoch": 0.77, "learning_rate": 2.3866715417830352e-05, "loss": 0.6148, "step": 9195 }, { "epoch": 0.77, "learning_rate": 2.3844768035256095e-05, "loss": 0.599, "step": 9200 }, { "epoch": 0.77, "learning_rate": 2.3822821544888406e-05, "loss": 0.589, "step": 9205 }, { "epoch": 0.77, "learning_rate": 2.3800875963676963e-05, "loss": 0.5764, "step": 9210 }, { "epoch": 0.77, "learning_rate": 2.3778931308570727e-05, "loss": 0.5882, "step": 9215 }, { "epoch": 0.77, "learning_rate": 2.3756987596517953e-05, "loss": 0.5378, "step": 9220 }, { "epoch": 0.77, "learning_rate": 2.373504484446617e-05, "loss": 0.5796, "step": 9225 }, { "epoch": 0.77, "learning_rate": 2.3713103069362163e-05, "loss": 0.5359, "step": 9230 }, { "epoch": 0.77, "learning_rate": 2.3691162288151963e-05, "loss": 0.5697, "step": 9235 }, { "epoch": 0.78, "learning_rate": 2.3669222517780833e-05, "loss": 0.6193, "step": 9240 }, { "epoch": 0.78, "learning_rate": 2.364728377519325e-05, "loss": 0.5849, "step": 9245 }, { "epoch": 0.78, "learning_rate": 2.3625346077332903e-05, "loss": 0.5966, "step": 9250 }, { "epoch": 0.78, "learning_rate": 2.360340944114269e-05, "loss": 0.5283, "step": 9255 }, { "epoch": 0.78, "learning_rate": 2.358147388356465e-05, "loss": 0.5672, "step": 9260 }, { "epoch": 0.78, "learning_rate": 2.355953942154002e-05, "loss": 0.5757, "step": 9265 }, { "epoch": 0.78, "learning_rate": 2.3537606072009177e-05, "loss": 0.5263, "step": 9270 }, { "epoch": 0.78, "learning_rate": 2.3515673851911652e-05, "loss": 0.5358, "step": 9275 }, { "epoch": 0.78, "learning_rate": 2.3493742778186092e-05, "loss": 0.5776, "step": 9280 }, { "epoch": 0.78, "learning_rate": 2.3471812867770266e-05, "loss": 0.5843, "step": 9285 }, { "epoch": 0.78, "learning_rate": 2.3449884137601038e-05, "loss": 0.5689, "step": 9290 }, { "epoch": 0.78, "learning_rate": 2.3427956604614364e-05, "loss": 0.5917, "step": 9295 }, { "epoch": 0.78, "learning_rate": 2.3406030285745272e-05, "loss": 0.534, "step": 9300 }, { "epoch": 0.78, "learning_rate": 2.3384105197927858e-05, "loss": 0.5875, "step": 9305 }, { "epoch": 0.78, "learning_rate": 2.336218135809526e-05, "loss": 0.561, "step": 9310 }, { "epoch": 0.78, "learning_rate": 2.3340258783179673e-05, "loss": 0.5339, "step": 9315 }, { "epoch": 0.78, "learning_rate": 2.3318337490112275e-05, "loss": 0.5283, "step": 9320 }, { "epoch": 0.78, "learning_rate": 2.3296417495823305e-05, "loss": 0.6238, "step": 9325 }, { "epoch": 0.78, "learning_rate": 2.3274498817241945e-05, "loss": 0.5455, "step": 9330 }, { "epoch": 0.78, "learning_rate": 2.3252581471296396e-05, "loss": 0.5771, "step": 9335 }, { "epoch": 0.78, "learning_rate": 2.323066547491382e-05, "loss": 0.5395, "step": 9340 }, { "epoch": 0.78, "learning_rate": 2.3208750845020343e-05, "loss": 0.4973, "step": 9345 }, { "epoch": 0.78, "learning_rate": 2.3186837598541037e-05, "loss": 0.548, "step": 9350 }, { "epoch": 0.79, "learning_rate": 2.3164925752399883e-05, "loss": 0.5566, "step": 9355 }, { "epoch": 0.79, "learning_rate": 2.3143015323519805e-05, "loss": 0.593, "step": 9360 }, { "epoch": 0.79, "learning_rate": 2.3121106328822623e-05, "loss": 0.5857, "step": 9365 }, { "epoch": 0.79, "learning_rate": 2.3099198785229058e-05, "loss": 0.5293, "step": 9370 }, { "epoch": 0.79, "learning_rate": 2.3077292709658686e-05, "loss": 0.5971, "step": 9375 }, { "epoch": 0.79, "learning_rate": 2.305538811902998e-05, "loss": 0.5544, "step": 9380 }, { "epoch": 0.79, "learning_rate": 2.3033485030260243e-05, "loss": 0.6552, "step": 9385 }, { "epoch": 0.79, "learning_rate": 2.3011583460265634e-05, "loss": 0.5588, "step": 9390 }, { "epoch": 0.79, "learning_rate": 2.298968342596114e-05, "loss": 0.5946, "step": 9395 }, { "epoch": 0.79, "learning_rate": 2.2967784944260532e-05, "loss": 0.6247, "step": 9400 }, { "epoch": 0.79, "learning_rate": 2.2945888032076423e-05, "loss": 0.5747, "step": 9405 }, { "epoch": 0.79, "learning_rate": 2.2923992706320183e-05, "loss": 0.5862, "step": 9410 }, { "epoch": 0.79, "learning_rate": 2.2902098983901972e-05, "loss": 0.6103, "step": 9415 }, { "epoch": 0.79, "learning_rate": 2.2880206881730708e-05, "loss": 0.5866, "step": 9420 }, { "epoch": 0.79, "learning_rate": 2.285831641671406e-05, "loss": 0.5642, "step": 9425 }, { "epoch": 0.79, "learning_rate": 2.2836427605758436e-05, "loss": 0.5452, "step": 9430 }, { "epoch": 0.79, "learning_rate": 2.2814540465768953e-05, "loss": 0.5581, "step": 9435 }, { "epoch": 0.79, "learning_rate": 2.2792655013649444e-05, "loss": 0.538, "step": 9440 }, { "epoch": 0.79, "learning_rate": 2.2770771266302442e-05, "loss": 0.5751, "step": 9445 }, { "epoch": 0.79, "learning_rate": 2.2748889240629167e-05, "loss": 0.5992, "step": 9450 }, { "epoch": 0.79, "learning_rate": 2.2727008953529505e-05, "loss": 0.5067, "step": 9455 }, { "epoch": 0.79, "learning_rate": 2.2705130421901987e-05, "loss": 0.5811, "step": 9460 }, { "epoch": 0.79, "learning_rate": 2.268325366264382e-05, "loss": 0.6189, "step": 9465 }, { "epoch": 0.79, "learning_rate": 2.266137869265079e-05, "loss": 0.5786, "step": 9470 }, { "epoch": 0.8, "learning_rate": 2.263950552881735e-05, "loss": 0.6097, "step": 9475 }, { "epoch": 0.8, "learning_rate": 2.2617634188036542e-05, "loss": 0.5727, "step": 9480 }, { "epoch": 0.8, "learning_rate": 2.2595764687199998e-05, "loss": 0.5583, "step": 9485 }, { "epoch": 0.8, "learning_rate": 2.2573897043197927e-05, "loss": 0.5724, "step": 9490 }, { "epoch": 0.8, "learning_rate": 2.2552031272919106e-05, "loss": 0.5476, "step": 9495 }, { "epoch": 0.8, "learning_rate": 2.2530167393250865e-05, "loss": 0.4983, "step": 9500 }, { "epoch": 0.8, "learning_rate": 2.2508305421079075e-05, "loss": 0.642, "step": 9505 }, { "epoch": 0.8, "learning_rate": 2.2486445373288143e-05, "loss": 0.5709, "step": 9510 }, { "epoch": 0.8, "learning_rate": 2.2464587266760964e-05, "loss": 0.6497, "step": 9515 }, { "epoch": 0.8, "learning_rate": 2.2442731118378956e-05, "loss": 0.5658, "step": 9520 }, { "epoch": 0.8, "learning_rate": 2.2420876945022017e-05, "loss": 0.5435, "step": 9525 }, { "epoch": 0.8, "learning_rate": 2.2399024763568522e-05, "loss": 0.5933, "step": 9530 }, { "epoch": 0.8, "learning_rate": 2.237717459089532e-05, "loss": 0.5379, "step": 9535 }, { "epoch": 0.8, "learning_rate": 2.235532644387767e-05, "loss": 0.5423, "step": 9540 }, { "epoch": 0.8, "learning_rate": 2.2333480339389308e-05, "loss": 0.6245, "step": 9545 }, { "epoch": 0.8, "learning_rate": 2.2311636294302367e-05, "loss": 0.6058, "step": 9550 }, { "epoch": 0.8, "learning_rate": 2.2289794325487404e-05, "loss": 0.518, "step": 9555 }, { "epoch": 0.8, "learning_rate": 2.2267954449813365e-05, "loss": 0.556, "step": 9560 }, { "epoch": 0.8, "learning_rate": 2.2246116684147587e-05, "loss": 0.5569, "step": 9565 }, { "epoch": 0.8, "learning_rate": 2.222428104535577e-05, "loss": 0.5927, "step": 9570 }, { "epoch": 0.8, "learning_rate": 2.2202447550301962e-05, "loss": 0.6412, "step": 9575 }, { "epoch": 0.8, "learning_rate": 2.2180616215848582e-05, "loss": 0.5437, "step": 9580 }, { "epoch": 0.8, "learning_rate": 2.2158787058856355e-05, "loss": 0.5921, "step": 9585 }, { "epoch": 0.8, "learning_rate": 2.2136960096184344e-05, "loss": 0.5695, "step": 9590 }, { "epoch": 0.81, "learning_rate": 2.2115135344689897e-05, "loss": 0.539, "step": 9595 }, { "epoch": 0.81, "learning_rate": 2.2093312821228668e-05, "loss": 0.5134, "step": 9600 }, { "epoch": 0.81, "learning_rate": 2.20714925426546e-05, "loss": 0.5148, "step": 9605 }, { "epoch": 0.81, "learning_rate": 2.204967452581987e-05, "loss": 0.5208, "step": 9610 }, { "epoch": 0.81, "learning_rate": 2.2027858787574932e-05, "loss": 0.6106, "step": 9615 }, { "epoch": 0.81, "learning_rate": 2.2006045344768477e-05, "loss": 0.5342, "step": 9620 }, { "epoch": 0.81, "learning_rate": 2.1984234214247435e-05, "loss": 0.5741, "step": 9625 }, { "epoch": 0.81, "learning_rate": 2.1962425412856916e-05, "loss": 0.5317, "step": 9630 }, { "epoch": 0.81, "learning_rate": 2.1940618957440264e-05, "loss": 0.627, "step": 9635 }, { "epoch": 0.81, "learning_rate": 2.1918814864839e-05, "loss": 0.5845, "step": 9640 }, { "epoch": 0.81, "learning_rate": 2.1897013151892814e-05, "loss": 0.619, "step": 9645 }, { "epoch": 0.81, "learning_rate": 2.1875213835439573e-05, "loss": 0.5415, "step": 9650 }, { "epoch": 0.81, "learning_rate": 2.1853416932315274e-05, "loss": 0.6026, "step": 9655 }, { "epoch": 0.81, "learning_rate": 2.183162245935406e-05, "loss": 0.528, "step": 9660 }, { "epoch": 0.81, "learning_rate": 2.1809830433388207e-05, "loss": 0.5867, "step": 9665 }, { "epoch": 0.81, "learning_rate": 2.1788040871248083e-05, "loss": 0.5799, "step": 9670 }, { "epoch": 0.81, "learning_rate": 2.1766253789762177e-05, "loss": 0.5577, "step": 9675 }, { "epoch": 0.81, "learning_rate": 2.1744469205757022e-05, "loss": 0.5598, "step": 9680 }, { "epoch": 0.81, "learning_rate": 2.172268713605726e-05, "loss": 0.6117, "step": 9685 }, { "epoch": 0.81, "learning_rate": 2.1700907597485576e-05, "loss": 0.5409, "step": 9690 }, { "epoch": 0.81, "learning_rate": 2.16791306068627e-05, "loss": 0.5297, "step": 9695 }, { "epoch": 0.81, "learning_rate": 2.16573561810074e-05, "loss": 0.5461, "step": 9700 }, { "epoch": 0.81, "learning_rate": 2.163558433673646e-05, "loss": 0.5525, "step": 9705 }, { "epoch": 0.81, "learning_rate": 2.1613815090864664e-05, "loss": 0.5891, "step": 9710 }, { "epoch": 0.82, "learning_rate": 2.1592048460204802e-05, "loss": 0.5818, "step": 9715 }, { "epoch": 0.82, "learning_rate": 2.1570284461567632e-05, "loss": 0.5731, "step": 9720 }, { "epoch": 0.82, "learning_rate": 2.1548523111761883e-05, "loss": 0.5228, "step": 9725 }, { "epoch": 0.82, "learning_rate": 2.152676442759425e-05, "loss": 0.5231, "step": 9730 }, { "epoch": 0.82, "learning_rate": 2.1505008425869345e-05, "loss": 0.5816, "step": 9735 }, { "epoch": 0.82, "learning_rate": 2.148325512338974e-05, "loss": 0.609, "step": 9740 }, { "epoch": 0.82, "learning_rate": 2.146150453695588e-05, "loss": 0.5692, "step": 9745 }, { "epoch": 0.82, "learning_rate": 2.1439756683366152e-05, "loss": 0.548, "step": 9750 }, { "epoch": 0.82, "learning_rate": 2.1418011579416815e-05, "loss": 0.5708, "step": 9755 }, { "epoch": 0.82, "learning_rate": 2.1396269241902007e-05, "loss": 0.5951, "step": 9760 }, { "epoch": 0.82, "learning_rate": 2.1374529687613733e-05, "loss": 0.5729, "step": 9765 }, { "epoch": 0.82, "learning_rate": 2.135279293334183e-05, "loss": 0.575, "step": 9770 }, { "epoch": 0.82, "learning_rate": 2.1331058995874005e-05, "loss": 0.6638, "step": 9775 }, { "epoch": 0.82, "learning_rate": 2.130932789199576e-05, "loss": 0.5656, "step": 9780 }, { "epoch": 0.82, "learning_rate": 2.128759963849042e-05, "loss": 0.4986, "step": 9785 }, { "epoch": 0.82, "learning_rate": 2.1265874252139124e-05, "loss": 0.5742, "step": 9790 }, { "epoch": 0.82, "learning_rate": 2.1244151749720762e-05, "loss": 0.5371, "step": 9795 }, { "epoch": 0.82, "learning_rate": 2.1222432148012024e-05, "loss": 0.584, "step": 9800 }, { "epoch": 0.82, "learning_rate": 2.120071546378735e-05, "loss": 0.5719, "step": 9805 }, { "epoch": 0.82, "learning_rate": 2.1179001713818942e-05, "loss": 0.5863, "step": 9810 }, { "epoch": 0.82, "learning_rate": 2.11572909148767e-05, "loss": 0.5921, "step": 9815 }, { "epoch": 0.82, "learning_rate": 2.1135583083728277e-05, "loss": 0.5642, "step": 9820 }, { "epoch": 0.82, "learning_rate": 2.1113878237139024e-05, "loss": 0.5341, "step": 9825 }, { "epoch": 0.82, "learning_rate": 2.109217639187199e-05, "loss": 0.5554, "step": 9830 }, { "epoch": 0.83, "learning_rate": 2.1070477564687895e-05, "loss": 0.5143, "step": 9835 }, { "epoch": 0.83, "learning_rate": 2.104878177234515e-05, "loss": 0.5629, "step": 9840 }, { "epoch": 0.83, "learning_rate": 2.10270890315998e-05, "loss": 0.5853, "step": 9845 }, { "epoch": 0.83, "learning_rate": 2.1005399359205535e-05, "loss": 0.5741, "step": 9850 }, { "epoch": 0.83, "learning_rate": 2.0983712771913695e-05, "loss": 0.5584, "step": 9855 }, { "epoch": 0.83, "learning_rate": 2.096202928647321e-05, "loss": 0.522, "step": 9860 }, { "epoch": 0.83, "learning_rate": 2.094034891963064e-05, "loss": 0.6641, "step": 9865 }, { "epoch": 0.83, "learning_rate": 2.0918671688130133e-05, "loss": 0.5568, "step": 9870 }, { "epoch": 0.83, "learning_rate": 2.0896997608713385e-05, "loss": 0.5754, "step": 9875 }, { "epoch": 0.83, "learning_rate": 2.0875326698119698e-05, "loss": 0.5314, "step": 9880 }, { "epoch": 0.83, "learning_rate": 2.0853658973085895e-05, "loss": 0.5967, "step": 9885 }, { "epoch": 0.83, "learning_rate": 2.083199445034636e-05, "loss": 0.5798, "step": 9890 }, { "epoch": 0.83, "learning_rate": 2.0810333146632994e-05, "loss": 0.584, "step": 9895 }, { "epoch": 0.83, "learning_rate": 2.0788675078675213e-05, "loss": 0.5584, "step": 9900 }, { "epoch": 0.83, "learning_rate": 2.0767020263199945e-05, "loss": 0.5563, "step": 9905 }, { "epoch": 0.83, "learning_rate": 2.0745368716931586e-05, "loss": 0.5687, "step": 9910 }, { "epoch": 0.83, "learning_rate": 2.0723720456592017e-05, "loss": 0.51, "step": 9915 }, { "epoch": 0.83, "learning_rate": 2.0702075498900588e-05, "loss": 0.5715, "step": 9920 }, { "epoch": 0.83, "learning_rate": 2.068043386057409e-05, "loss": 0.5939, "step": 9925 }, { "epoch": 0.83, "learning_rate": 2.0658795558326743e-05, "loss": 0.5337, "step": 9930 }, { "epoch": 0.83, "learning_rate": 2.063716060887021e-05, "loss": 0.5432, "step": 9935 }, { "epoch": 0.83, "learning_rate": 2.0615529028913553e-05, "loss": 0.5369, "step": 9940 }, { "epoch": 0.83, "learning_rate": 2.059390083516323e-05, "loss": 0.5594, "step": 9945 }, { "epoch": 0.83, "learning_rate": 2.05722760443231e-05, "loss": 0.5223, "step": 9950 }, { "epoch": 0.84, "learning_rate": 2.055065467309436e-05, "loss": 0.6063, "step": 9955 }, { "epoch": 0.84, "learning_rate": 2.0529036738175595e-05, "loss": 0.5844, "step": 9960 }, { "epoch": 0.84, "learning_rate": 2.0507422256262727e-05, "loss": 0.599, "step": 9965 }, { "epoch": 0.84, "learning_rate": 2.0485811244049013e-05, "loss": 0.5637, "step": 9970 }, { "epoch": 0.84, "learning_rate": 2.0464203718225028e-05, "loss": 0.5815, "step": 9975 }, { "epoch": 0.84, "learning_rate": 2.0442599695478655e-05, "loss": 0.5272, "step": 9980 }, { "epoch": 0.84, "learning_rate": 2.0420999192495082e-05, "loss": 0.5715, "step": 9985 }, { "epoch": 0.84, "learning_rate": 2.0399402225956754e-05, "loss": 0.5886, "step": 9990 }, { "epoch": 0.84, "learning_rate": 2.0377808812543404e-05, "loss": 0.5464, "step": 9995 }, { "epoch": 0.84, "learning_rate": 2.035621896893202e-05, "loss": 0.5437, "step": 10000 }, { "epoch": 0.84, "learning_rate": 2.033463271179683e-05, "loss": 0.6395, "step": 10005 }, { "epoch": 0.84, "learning_rate": 2.0313050057809284e-05, "loss": 0.5005, "step": 10010 }, { "epoch": 0.84, "learning_rate": 2.029147102363806e-05, "loss": 0.555, "step": 10015 }, { "epoch": 0.84, "learning_rate": 2.0269895625949054e-05, "loss": 0.5392, "step": 10020 }, { "epoch": 0.84, "learning_rate": 2.0248323881405304e-05, "loss": 0.5338, "step": 10025 }, { "epoch": 0.84, "learning_rate": 2.0226755806667078e-05, "loss": 0.5892, "step": 10030 }, { "epoch": 0.84, "learning_rate": 2.0205191418391785e-05, "loss": 0.5387, "step": 10035 }, { "epoch": 0.84, "learning_rate": 2.0183630733234006e-05, "loss": 0.5782, "step": 10040 }, { "epoch": 0.84, "learning_rate": 2.0162073767845427e-05, "loss": 0.5797, "step": 10045 }, { "epoch": 0.84, "learning_rate": 2.0140520538874896e-05, "loss": 0.5801, "step": 10050 }, { "epoch": 0.84, "learning_rate": 2.011897106296836e-05, "loss": 0.5698, "step": 10055 }, { "epoch": 0.84, "learning_rate": 2.009742535676887e-05, "loss": 0.6325, "step": 10060 }, { "epoch": 0.84, "learning_rate": 2.007588343691657e-05, "loss": 0.5645, "step": 10065 }, { "epoch": 0.85, "learning_rate": 2.0054345320048663e-05, "loss": 0.6322, "step": 10070 }, { "epoch": 0.85, "learning_rate": 2.0032811022799436e-05, "loss": 0.5883, "step": 10075 }, { "epoch": 0.85, "learning_rate": 2.001128056180021e-05, "loss": 0.5253, "step": 10080 }, { "epoch": 0.85, "learning_rate": 1.9989753953679357e-05, "loss": 0.6222, "step": 10085 }, { "epoch": 0.85, "learning_rate": 1.9968231215062273e-05, "loss": 0.5374, "step": 10090 }, { "epoch": 0.85, "learning_rate": 1.9946712362571338e-05, "loss": 0.5659, "step": 10095 }, { "epoch": 0.85, "learning_rate": 1.9925197412825964e-05, "loss": 0.5839, "step": 10100 }, { "epoch": 0.85, "learning_rate": 1.9903686382442537e-05, "loss": 0.4978, "step": 10105 }, { "epoch": 0.85, "learning_rate": 1.9882179288034408e-05, "loss": 0.5546, "step": 10110 }, { "epoch": 0.85, "learning_rate": 1.9860676146211906e-05, "loss": 0.5496, "step": 10115 }, { "epoch": 0.85, "learning_rate": 1.983917697358229e-05, "loss": 0.6104, "step": 10120 }, { "epoch": 0.85, "learning_rate": 1.9817681786749758e-05, "loss": 0.58, "step": 10125 }, { "epoch": 0.85, "learning_rate": 1.979619060231544e-05, "loss": 0.559, "step": 10130 }, { "epoch": 0.85, "learning_rate": 1.9774703436877362e-05, "loss": 0.5028, "step": 10135 }, { "epoch": 0.85, "learning_rate": 1.975322030703045e-05, "loss": 0.5116, "step": 10140 }, { "epoch": 0.85, "learning_rate": 1.9731741229366522e-05, "loss": 0.6294, "step": 10145 }, { "epoch": 0.85, "learning_rate": 1.9710266220474247e-05, "loss": 0.5267, "step": 10150 }, { "epoch": 0.85, "learning_rate": 1.9688795296939173e-05, "loss": 0.5985, "step": 10155 }, { "epoch": 0.85, "learning_rate": 1.9667328475343686e-05, "loss": 0.518, "step": 10160 }, { "epoch": 0.85, "learning_rate": 1.9645865772266984e-05, "loss": 0.5454, "step": 10165 }, { "epoch": 0.85, "learning_rate": 1.962440720428511e-05, "loss": 0.6012, "step": 10170 }, { "epoch": 0.85, "learning_rate": 1.960295278797091e-05, "loss": 0.571, "step": 10175 }, { "epoch": 0.85, "learning_rate": 1.958150253989402e-05, "loss": 0.5862, "step": 10180 }, { "epoch": 0.85, "learning_rate": 1.956005647662084e-05, "loss": 0.6131, "step": 10185 }, { "epoch": 0.86, "learning_rate": 1.953861461471457e-05, "loss": 0.524, "step": 10190 }, { "epoch": 0.86, "learning_rate": 1.9517176970735134e-05, "loss": 0.5536, "step": 10195 }, { "epoch": 0.86, "learning_rate": 1.949574356123922e-05, "loss": 0.6543, "step": 10200 }, { "epoch": 0.86, "learning_rate": 1.9474314402780246e-05, "loss": 0.5712, "step": 10205 }, { "epoch": 0.86, "learning_rate": 1.9452889511908327e-05, "loss": 0.5919, "step": 10210 }, { "epoch": 0.86, "learning_rate": 1.9431468905170298e-05, "loss": 0.6314, "step": 10215 }, { "epoch": 0.86, "learning_rate": 1.941005259910968e-05, "loss": 0.5192, "step": 10220 }, { "epoch": 0.86, "learning_rate": 1.9388640610266685e-05, "loss": 0.5272, "step": 10225 }, { "epoch": 0.86, "learning_rate": 1.9367232955178168e-05, "loss": 0.5873, "step": 10230 }, { "epoch": 0.86, "learning_rate": 1.934582965037765e-05, "loss": 0.6024, "step": 10235 }, { "epoch": 0.86, "learning_rate": 1.9324430712395284e-05, "loss": 0.6048, "step": 10240 }, { "epoch": 0.86, "learning_rate": 1.9303036157757874e-05, "loss": 0.547, "step": 10245 }, { "epoch": 0.86, "learning_rate": 1.928164600298881e-05, "loss": 0.5689, "step": 10250 }, { "epoch": 0.86, "learning_rate": 1.92602602646081e-05, "loss": 0.5651, "step": 10255 }, { "epoch": 0.86, "learning_rate": 1.923887895913234e-05, "loss": 0.4979, "step": 10260 }, { "epoch": 0.86, "learning_rate": 1.9217502103074693e-05, "loss": 0.5773, "step": 10265 }, { "epoch": 0.86, "learning_rate": 1.9196129712944894e-05, "loss": 0.6001, "step": 10270 }, { "epoch": 0.86, "learning_rate": 1.917476180524923e-05, "loss": 0.5175, "step": 10275 }, { "epoch": 0.86, "learning_rate": 1.915339839649052e-05, "loss": 0.5695, "step": 10280 }, { "epoch": 0.86, "learning_rate": 1.9132039503168117e-05, "loss": 0.6719, "step": 10285 }, { "epoch": 0.86, "learning_rate": 1.9110685141777874e-05, "loss": 0.6524, "step": 10290 }, { "epoch": 0.86, "learning_rate": 1.9089335328812152e-05, "loss": 0.5859, "step": 10295 }, { "epoch": 0.86, "learning_rate": 1.906799008075981e-05, "loss": 0.6028, "step": 10300 }, { "epoch": 0.86, "learning_rate": 1.9046649414106153e-05, "loss": 0.5777, "step": 10305 }, { "epoch": 0.87, "learning_rate": 1.902531334533297e-05, "loss": 0.5488, "step": 10310 }, { "epoch": 0.87, "learning_rate": 1.900398189091849e-05, "loss": 0.603, "step": 10315 }, { "epoch": 0.87, "learning_rate": 1.8982655067337392e-05, "loss": 0.549, "step": 10320 }, { "epoch": 0.87, "learning_rate": 1.8961332891060763e-05, "loss": 0.5223, "step": 10325 }, { "epoch": 0.87, "learning_rate": 1.8940015378556096e-05, "loss": 0.5488, "step": 10330 }, { "epoch": 0.87, "learning_rate": 1.891870254628731e-05, "loss": 0.5629, "step": 10335 }, { "epoch": 0.87, "learning_rate": 1.8897394410714682e-05, "loss": 0.5264, "step": 10340 }, { "epoch": 0.87, "learning_rate": 1.887609098829487e-05, "loss": 0.5922, "step": 10345 }, { "epoch": 0.87, "learning_rate": 1.885479229548089e-05, "loss": 0.5748, "step": 10350 }, { "epoch": 0.87, "learning_rate": 1.8833498348722116e-05, "loss": 0.5768, "step": 10355 }, { "epoch": 0.87, "learning_rate": 1.881220916446424e-05, "loss": 0.5284, "step": 10360 }, { "epoch": 0.87, "learning_rate": 1.87909247591493e-05, "loss": 0.5602, "step": 10365 }, { "epoch": 0.87, "learning_rate": 1.8769645149215618e-05, "loss": 0.5785, "step": 10370 }, { "epoch": 0.87, "learning_rate": 1.874837035109782e-05, "loss": 0.5879, "step": 10375 }, { "epoch": 0.87, "learning_rate": 1.8727100381226814e-05, "loss": 0.5749, "step": 10380 }, { "epoch": 0.87, "learning_rate": 1.8705835256029785e-05, "loss": 0.5968, "step": 10385 }, { "epoch": 0.87, "learning_rate": 1.868457499193018e-05, "loss": 0.5305, "step": 10390 }, { "epoch": 0.87, "learning_rate": 1.8663319605347685e-05, "loss": 0.6126, "step": 10395 }, { "epoch": 0.87, "learning_rate": 1.8642069112698222e-05, "loss": 0.543, "step": 10400 }, { "epoch": 0.87, "learning_rate": 1.8620823530393928e-05, "loss": 0.5399, "step": 10405 }, { "epoch": 0.87, "learning_rate": 1.8599582874843148e-05, "loss": 0.5279, "step": 10410 }, { "epoch": 0.87, "learning_rate": 1.8578347162450426e-05, "loss": 0.5849, "step": 10415 }, { "epoch": 0.87, "learning_rate": 1.8557116409616486e-05, "loss": 0.623, "step": 10420 }, { "epoch": 0.87, "learning_rate": 1.853589063273824e-05, "loss": 0.5456, "step": 10425 }, { "epoch": 0.88, "learning_rate": 1.8514669848208716e-05, "loss": 0.5916, "step": 10430 }, { "epoch": 0.88, "learning_rate": 1.849345407241712e-05, "loss": 0.5475, "step": 10435 }, { "epoch": 0.88, "learning_rate": 1.8472243321748792e-05, "loss": 0.5325, "step": 10440 }, { "epoch": 0.88, "learning_rate": 1.8451037612585152e-05, "loss": 0.5983, "step": 10445 }, { "epoch": 0.88, "learning_rate": 1.8429836961303776e-05, "loss": 0.5329, "step": 10450 }, { "epoch": 0.88, "learning_rate": 1.8408641384278303e-05, "loss": 0.5518, "step": 10455 }, { "epoch": 0.88, "learning_rate": 1.8387450897878458e-05, "loss": 0.6049, "step": 10460 }, { "epoch": 0.88, "learning_rate": 1.8366265518470038e-05, "loss": 0.5441, "step": 10465 }, { "epoch": 0.88, "learning_rate": 1.83450852624149e-05, "loss": 0.5406, "step": 10470 }, { "epoch": 0.88, "learning_rate": 1.832391014607094e-05, "loss": 0.5155, "step": 10475 }, { "epoch": 0.88, "learning_rate": 1.830274018579208e-05, "loss": 0.5914, "step": 10480 }, { "epoch": 0.88, "learning_rate": 1.8281575397928262e-05, "loss": 0.6334, "step": 10485 }, { "epoch": 0.88, "learning_rate": 1.826041579882544e-05, "loss": 0.5835, "step": 10490 }, { "epoch": 0.88, "learning_rate": 1.823926140482555e-05, "loss": 0.6061, "step": 10495 }, { "epoch": 0.88, "learning_rate": 1.8218112232266518e-05, "loss": 0.5624, "step": 10500 }, { "epoch": 0.88, "learning_rate": 1.8196968297482238e-05, "loss": 0.555, "step": 10505 }, { "epoch": 0.88, "learning_rate": 1.8175829616802547e-05, "loss": 0.5424, "step": 10510 }, { "epoch": 0.88, "learning_rate": 1.815469620655323e-05, "loss": 0.6261, "step": 10515 }, { "epoch": 0.88, "learning_rate": 1.8133568083056e-05, "loss": 0.556, "step": 10520 }, { "epoch": 0.88, "learning_rate": 1.811244526262849e-05, "loss": 0.5471, "step": 10525 }, { "epoch": 0.88, "learning_rate": 1.8091327761584244e-05, "loss": 0.5455, "step": 10530 }, { "epoch": 0.88, "learning_rate": 1.8070215596232688e-05, "loss": 0.5476, "step": 10535 }, { "epoch": 0.88, "learning_rate": 1.804910878287913e-05, "loss": 0.5485, "step": 10540 }, { "epoch": 0.88, "learning_rate": 1.802800733782474e-05, "loss": 0.5316, "step": 10545 }, { "epoch": 0.89, "learning_rate": 1.8006911277366545e-05, "loss": 0.6126, "step": 10550 }, { "epoch": 0.89, "learning_rate": 1.7985820617797423e-05, "loss": 0.5959, "step": 10555 }, { "epoch": 0.89, "learning_rate": 1.796473537540607e-05, "loss": 0.503, "step": 10560 }, { "epoch": 0.89, "learning_rate": 1.7943655566476993e-05, "loss": 0.5309, "step": 10565 }, { "epoch": 0.89, "learning_rate": 1.7922581207290516e-05, "loss": 0.5242, "step": 10570 }, { "epoch": 0.89, "learning_rate": 1.790151231412275e-05, "loss": 0.558, "step": 10575 }, { "epoch": 0.89, "learning_rate": 1.7880448903245584e-05, "loss": 0.5217, "step": 10580 }, { "epoch": 0.89, "learning_rate": 1.785939099092666e-05, "loss": 0.6245, "step": 10585 }, { "epoch": 0.89, "learning_rate": 1.7838338593429395e-05, "loss": 0.5604, "step": 10590 }, { "epoch": 0.89, "learning_rate": 1.781729172701294e-05, "loss": 0.6648, "step": 10595 }, { "epoch": 0.89, "learning_rate": 1.779625040793216e-05, "loss": 0.4995, "step": 10600 }, { "epoch": 0.89, "learning_rate": 1.7775214652437655e-05, "loss": 0.5372, "step": 10605 }, { "epoch": 0.89, "learning_rate": 1.7754184476775716e-05, "loss": 0.5355, "step": 10610 }, { "epoch": 0.89, "learning_rate": 1.773315989718833e-05, "loss": 0.5395, "step": 10615 }, { "epoch": 0.89, "learning_rate": 1.771214092991317e-05, "loss": 0.6, "step": 10620 }, { "epoch": 0.89, "learning_rate": 1.7691127591183543e-05, "loss": 0.5769, "step": 10625 }, { "epoch": 0.89, "learning_rate": 1.767011989722845e-05, "loss": 0.5352, "step": 10630 }, { "epoch": 0.89, "learning_rate": 1.7649117864272504e-05, "loss": 0.6378, "step": 10635 }, { "epoch": 0.89, "learning_rate": 1.762812150853596e-05, "loss": 0.5736, "step": 10640 }, { "epoch": 0.89, "learning_rate": 1.760713084623469e-05, "loss": 0.5818, "step": 10645 }, { "epoch": 0.89, "learning_rate": 1.7586145893580153e-05, "loss": 0.5594, "step": 10650 }, { "epoch": 0.89, "learning_rate": 1.7565166666779413e-05, "loss": 0.5702, "step": 10655 }, { "epoch": 0.89, "learning_rate": 1.7544193182035097e-05, "loss": 0.5434, "step": 10660 }, { "epoch": 0.89, "learning_rate": 1.7523225455545415e-05, "loss": 0.5744, "step": 10665 }, { "epoch": 0.9, "learning_rate": 1.7502263503504118e-05, "loss": 0.6051, "step": 10670 }, { "epoch": 0.9, "learning_rate": 1.7481307342100514e-05, "loss": 0.5966, "step": 10675 }, { "epoch": 0.9, "learning_rate": 1.7460356987519412e-05, "loss": 0.5403, "step": 10680 }, { "epoch": 0.9, "learning_rate": 1.743941245594115e-05, "loss": 0.569, "step": 10685 }, { "epoch": 0.9, "learning_rate": 1.7418473763541576e-05, "loss": 0.5785, "step": 10690 }, { "epoch": 0.9, "learning_rate": 1.7397540926492017e-05, "loss": 0.5732, "step": 10695 }, { "epoch": 0.9, "learning_rate": 1.7376613960959293e-05, "loss": 0.5746, "step": 10700 }, { "epoch": 0.9, "learning_rate": 1.7355692883105663e-05, "loss": 0.5106, "step": 10705 }, { "epoch": 0.9, "learning_rate": 1.7334777709088857e-05, "loss": 0.5738, "step": 10710 }, { "epoch": 0.9, "learning_rate": 1.731386845506205e-05, "loss": 0.575, "step": 10715 }, { "epoch": 0.9, "learning_rate": 1.7292965137173845e-05, "loss": 0.5725, "step": 10720 }, { "epoch": 0.9, "learning_rate": 1.7272067771568227e-05, "loss": 0.6024, "step": 10725 }, { "epoch": 0.9, "learning_rate": 1.7251176374384632e-05, "loss": 0.6048, "step": 10730 }, { "epoch": 0.9, "learning_rate": 1.723029096175786e-05, "loss": 0.5481, "step": 10735 }, { "epoch": 0.9, "learning_rate": 1.7209411549818087e-05, "loss": 0.5815, "step": 10740 }, { "epoch": 0.9, "learning_rate": 1.7188538154690863e-05, "loss": 0.6171, "step": 10745 }, { "epoch": 0.9, "learning_rate": 1.7167670792497093e-05, "loss": 0.484, "step": 10750 }, { "epoch": 0.9, "learning_rate": 1.7146809479353012e-05, "loss": 0.5876, "step": 10755 }, { "epoch": 0.9, "learning_rate": 1.7125954231370203e-05, "loss": 0.6236, "step": 10760 }, { "epoch": 0.9, "learning_rate": 1.710510506465554e-05, "loss": 0.5367, "step": 10765 }, { "epoch": 0.9, "learning_rate": 1.7084261995311213e-05, "loss": 0.5511, "step": 10770 }, { "epoch": 0.9, "learning_rate": 1.70634250394347e-05, "loss": 0.5656, "step": 10775 }, { "epoch": 0.9, "learning_rate": 1.7042594213118777e-05, "loss": 0.5629, "step": 10780 }, { "epoch": 0.91, "learning_rate": 1.7021769532451445e-05, "loss": 0.5871, "step": 10785 }, { "epoch": 0.91, "learning_rate": 1.7000951013516004e-05, "loss": 0.5847, "step": 10790 }, { "epoch": 0.91, "learning_rate": 1.698013867239095e-05, "loss": 0.5303, "step": 10795 }, { "epoch": 0.91, "learning_rate": 1.6959332525150046e-05, "loss": 0.5736, "step": 10800 }, { "epoch": 0.91, "learning_rate": 1.693853258786225e-05, "loss": 0.5433, "step": 10805 }, { "epoch": 0.91, "learning_rate": 1.6917738876591734e-05, "loss": 0.5279, "step": 10810 }, { "epoch": 0.91, "learning_rate": 1.6896951407397857e-05, "loss": 0.6703, "step": 10815 }, { "epoch": 0.91, "learning_rate": 1.6876170196335157e-05, "loss": 0.5677, "step": 10820 }, { "epoch": 0.91, "learning_rate": 1.6855395259453338e-05, "loss": 0.557, "step": 10825 }, { "epoch": 0.91, "learning_rate": 1.6834626612797262e-05, "loss": 0.6043, "step": 10830 }, { "epoch": 0.91, "learning_rate": 1.6813864272406934e-05, "loss": 0.614, "step": 10835 }, { "epoch": 0.91, "learning_rate": 1.6793108254317484e-05, "loss": 0.627, "step": 10840 }, { "epoch": 0.91, "learning_rate": 1.677235857455916e-05, "loss": 0.5771, "step": 10845 }, { "epoch": 0.91, "learning_rate": 1.675161524915731e-05, "loss": 0.6362, "step": 10850 }, { "epoch": 0.91, "learning_rate": 1.6730878294132387e-05, "loss": 0.5563, "step": 10855 }, { "epoch": 0.91, "learning_rate": 1.6710147725499924e-05, "loss": 0.5704, "step": 10860 }, { "epoch": 0.91, "learning_rate": 1.6689423559270494e-05, "loss": 0.5807, "step": 10865 }, { "epoch": 0.91, "learning_rate": 1.6668705811449763e-05, "loss": 0.5726, "step": 10870 }, { "epoch": 0.91, "learning_rate": 1.6647994498038416e-05, "loss": 0.6217, "step": 10875 }, { "epoch": 0.91, "learning_rate": 1.6627289635032174e-05, "loss": 0.5692, "step": 10880 }, { "epoch": 0.91, "learning_rate": 1.6606591238421777e-05, "loss": 0.6283, "step": 10885 }, { "epoch": 0.91, "learning_rate": 1.6585899324192977e-05, "loss": 0.5637, "step": 10890 }, { "epoch": 0.91, "learning_rate": 1.6565213908326515e-05, "loss": 0.5506, "step": 10895 }, { "epoch": 0.91, "learning_rate": 1.6544535006798107e-05, "loss": 0.5867, "step": 10900 }, { "epoch": 0.92, "learning_rate": 1.6523862635578445e-05, "loss": 0.5438, "step": 10905 }, { "epoch": 0.92, "learning_rate": 1.6503196810633174e-05, "loss": 0.5748, "step": 10910 }, { "epoch": 0.92, "learning_rate": 1.6482537547922887e-05, "loss": 0.5373, "step": 10915 }, { "epoch": 0.92, "learning_rate": 1.646188486340311e-05, "loss": 0.5775, "step": 10920 }, { "epoch": 0.92, "learning_rate": 1.6441238773024282e-05, "loss": 0.5755, "step": 10925 }, { "epoch": 0.92, "learning_rate": 1.6420599292731758e-05, "loss": 0.6507, "step": 10930 }, { "epoch": 0.92, "learning_rate": 1.6399966438465767e-05, "loss": 0.6179, "step": 10935 }, { "epoch": 0.92, "learning_rate": 1.6379340226161446e-05, "loss": 0.5675, "step": 10940 }, { "epoch": 0.92, "learning_rate": 1.6358720671748794e-05, "loss": 0.5126, "step": 10945 }, { "epoch": 0.92, "learning_rate": 1.6338107791152664e-05, "loss": 0.5611, "step": 10950 }, { "epoch": 0.92, "learning_rate": 1.6317501600292755e-05, "loss": 0.6081, "step": 10955 }, { "epoch": 0.92, "learning_rate": 1.6296902115083607e-05, "loss": 0.5627, "step": 10960 }, { "epoch": 0.92, "learning_rate": 1.627630935143457e-05, "loss": 0.5319, "step": 10965 }, { "epoch": 0.92, "learning_rate": 1.625572332524981e-05, "loss": 0.6006, "step": 10970 }, { "epoch": 0.92, "learning_rate": 1.623514405242829e-05, "loss": 0.5725, "step": 10975 }, { "epoch": 0.92, "learning_rate": 1.6214571548863756e-05, "loss": 0.502, "step": 10980 }, { "epoch": 0.92, "learning_rate": 1.619400583044472e-05, "loss": 0.5711, "step": 10985 }, { "epoch": 0.92, "learning_rate": 1.6173446913054462e-05, "loss": 0.5813, "step": 10990 }, { "epoch": 0.92, "learning_rate": 1.6152894812571e-05, "loss": 0.5717, "step": 10995 }, { "epoch": 0.92, "learning_rate": 1.6132349544867115e-05, "loss": 0.5398, "step": 11000 }, { "epoch": 0.92, "learning_rate": 1.611181112581026e-05, "loss": 0.5561, "step": 11005 }, { "epoch": 0.92, "learning_rate": 1.6091279571262647e-05, "loss": 0.5545, "step": 11010 }, { "epoch": 0.92, "learning_rate": 1.607075489708116e-05, "loss": 0.5986, "step": 11015 }, { "epoch": 0.92, "learning_rate": 1.605023711911737e-05, "loss": 0.57, "step": 11020 }, { "epoch": 0.93, "learning_rate": 1.6029726253217546e-05, "loss": 0.5363, "step": 11025 }, { "epoch": 0.93, "learning_rate": 1.600922231522259e-05, "loss": 0.5789, "step": 11030 }, { "epoch": 0.93, "learning_rate": 1.5988725320968068e-05, "loss": 0.5517, "step": 11035 }, { "epoch": 0.93, "learning_rate": 1.5968235286284176e-05, "loss": 0.6131, "step": 11040 }, { "epoch": 0.93, "learning_rate": 1.5947752226995738e-05, "loss": 0.5914, "step": 11045 }, { "epoch": 0.93, "learning_rate": 1.59272761589222e-05, "loss": 0.5098, "step": 11050 }, { "epoch": 0.93, "learning_rate": 1.5906807097877592e-05, "loss": 0.5658, "step": 11055 }, { "epoch": 0.93, "learning_rate": 1.5886345059670548e-05, "loss": 0.6126, "step": 11060 }, { "epoch": 0.93, "learning_rate": 1.5865890060104265e-05, "loss": 0.5636, "step": 11065 }, { "epoch": 0.93, "learning_rate": 1.584544211497652e-05, "loss": 0.5751, "step": 11070 }, { "epoch": 0.93, "learning_rate": 1.582500124007961e-05, "loss": 0.6119, "step": 11075 }, { "epoch": 0.93, "learning_rate": 1.5804567451200408e-05, "loss": 0.5453, "step": 11080 }, { "epoch": 0.93, "learning_rate": 1.5784140764120293e-05, "loss": 0.5342, "step": 11085 }, { "epoch": 0.93, "learning_rate": 1.576372119461517e-05, "loss": 0.5471, "step": 11090 }, { "epoch": 0.93, "learning_rate": 1.574330875845545e-05, "loss": 0.5753, "step": 11095 }, { "epoch": 0.93, "learning_rate": 1.5722903471406012e-05, "loss": 0.5073, "step": 11100 }, { "epoch": 0.93, "learning_rate": 1.570250534922623e-05, "loss": 0.5649, "step": 11105 }, { "epoch": 0.93, "learning_rate": 1.5682114407669956e-05, "loss": 0.6477, "step": 11110 }, { "epoch": 0.93, "learning_rate": 1.5661730662485476e-05, "loss": 0.6322, "step": 11115 }, { "epoch": 0.93, "learning_rate": 1.564135412941552e-05, "loss": 0.5766, "step": 11120 }, { "epoch": 0.93, "learning_rate": 1.5620984824197262e-05, "loss": 0.6691, "step": 11125 }, { "epoch": 0.93, "learning_rate": 1.560062276256228e-05, "loss": 0.6431, "step": 11130 }, { "epoch": 0.93, "learning_rate": 1.5580267960236567e-05, "loss": 0.5144, "step": 11135 }, { "epoch": 0.93, "learning_rate": 1.5559920432940513e-05, "loss": 0.6355, "step": 11140 }, { "epoch": 0.94, "learning_rate": 1.553958019638886e-05, "loss": 0.5475, "step": 11145 }, { "epoch": 0.94, "learning_rate": 1.551924726629076e-05, "loss": 0.5764, "step": 11150 }, { "epoch": 0.94, "learning_rate": 1.549892165834969e-05, "loss": 0.5891, "step": 11155 }, { "epoch": 0.94, "learning_rate": 1.547860338826349e-05, "loss": 0.5234, "step": 11160 }, { "epoch": 0.94, "learning_rate": 1.5458292471724333e-05, "loss": 0.5543, "step": 11165 }, { "epoch": 0.94, "learning_rate": 1.5437988924418705e-05, "loss": 0.5136, "step": 11170 }, { "epoch": 0.94, "learning_rate": 1.541769276202741e-05, "loss": 0.5738, "step": 11175 }, { "epoch": 0.94, "learning_rate": 1.5397404000225524e-05, "loss": 0.5021, "step": 11180 }, { "epoch": 0.94, "learning_rate": 1.5377122654682437e-05, "loss": 0.5874, "step": 11185 }, { "epoch": 0.94, "learning_rate": 1.5356848741061797e-05, "loss": 0.6244, "step": 11190 }, { "epoch": 0.94, "learning_rate": 1.533658227502152e-05, "loss": 0.594, "step": 11195 }, { "epoch": 0.94, "learning_rate": 1.531632327221376e-05, "loss": 0.5188, "step": 11200 }, { "epoch": 0.94, "learning_rate": 1.5296071748284913e-05, "loss": 0.5296, "step": 11205 }, { "epoch": 0.94, "learning_rate": 1.5275827718875603e-05, "loss": 0.5882, "step": 11210 }, { "epoch": 0.94, "learning_rate": 1.5255591199620645e-05, "loss": 0.5247, "step": 11215 }, { "epoch": 0.94, "learning_rate": 1.5235362206149079e-05, "loss": 0.555, "step": 11220 }, { "epoch": 0.94, "learning_rate": 1.5215140754084123e-05, "loss": 0.5631, "step": 11225 }, { "epoch": 0.94, "learning_rate": 1.5194926859043173e-05, "loss": 0.5613, "step": 11230 }, { "epoch": 0.94, "learning_rate": 1.5174720536637782e-05, "loss": 0.5455, "step": 11235 }, { "epoch": 0.94, "learning_rate": 1.5154521802473659e-05, "loss": 0.589, "step": 11240 }, { "epoch": 0.94, "learning_rate": 1.5134330672150656e-05, "loss": 0.5444, "step": 11245 }, { "epoch": 0.94, "learning_rate": 1.5114147161262742e-05, "loss": 0.5399, "step": 11250 }, { "epoch": 0.94, "learning_rate": 1.5093971285398023e-05, "loss": 0.5685, "step": 11255 }, { "epoch": 0.94, "learning_rate": 1.5073803060138675e-05, "loss": 0.5466, "step": 11260 }, { "epoch": 0.95, "learning_rate": 1.5053642501060989e-05, "loss": 0.5064, "step": 11265 }, { "epoch": 0.95, "learning_rate": 1.5033489623735334e-05, "loss": 0.6116, "step": 11270 }, { "epoch": 0.95, "learning_rate": 1.501334444372614e-05, "loss": 0.5393, "step": 11275 }, { "epoch": 0.95, "learning_rate": 1.4993206976591904e-05, "loss": 0.5471, "step": 11280 }, { "epoch": 0.95, "learning_rate": 1.4973077237885135e-05, "loss": 0.5901, "step": 11285 }, { "epoch": 0.95, "learning_rate": 1.4952955243152416e-05, "loss": 0.537, "step": 11290 }, { "epoch": 0.95, "learning_rate": 1.4932841007934311e-05, "loss": 0.5872, "step": 11295 }, { "epoch": 0.95, "learning_rate": 1.4912734547765418e-05, "loss": 0.5622, "step": 11300 }, { "epoch": 0.95, "learning_rate": 1.4892635878174316e-05, "loss": 0.5704, "step": 11305 }, { "epoch": 0.95, "learning_rate": 1.487254501468358e-05, "loss": 0.5825, "step": 11310 }, { "epoch": 0.95, "learning_rate": 1.4852461972809739e-05, "loss": 0.5343, "step": 11315 }, { "epoch": 0.95, "learning_rate": 1.4832386768063289e-05, "loss": 0.5712, "step": 11320 }, { "epoch": 0.95, "learning_rate": 1.4812319415948677e-05, "loss": 0.5921, "step": 11325 }, { "epoch": 0.95, "learning_rate": 1.4792259931964285e-05, "loss": 0.554, "step": 11330 }, { "epoch": 0.95, "learning_rate": 1.4772208331602422e-05, "loss": 0.602, "step": 11335 }, { "epoch": 0.95, "learning_rate": 1.4752164630349286e-05, "loss": 0.5726, "step": 11340 }, { "epoch": 0.95, "learning_rate": 1.4732128843684998e-05, "loss": 0.5491, "step": 11345 }, { "epoch": 0.95, "learning_rate": 1.4712100987083571e-05, "loss": 0.5275, "step": 11350 }, { "epoch": 0.95, "learning_rate": 1.469208107601286e-05, "loss": 0.5683, "step": 11355 }, { "epoch": 0.95, "learning_rate": 1.4672069125934612e-05, "loss": 0.5574, "step": 11360 }, { "epoch": 0.95, "learning_rate": 1.4652065152304422e-05, "loss": 0.5496, "step": 11365 }, { "epoch": 0.95, "learning_rate": 1.4632069170571724e-05, "loss": 0.558, "step": 11370 }, { "epoch": 0.95, "learning_rate": 1.4612081196179773e-05, "loss": 0.5803, "step": 11375 }, { "epoch": 0.95, "learning_rate": 1.4592101244565642e-05, "loss": 0.5026, "step": 11380 }, { "epoch": 0.96, "learning_rate": 1.457212933116021e-05, "loss": 0.5721, "step": 11385 }, { "epoch": 0.96, "learning_rate": 1.4552165471388149e-05, "loss": 0.5605, "step": 11390 }, { "epoch": 0.96, "learning_rate": 1.4532209680667908e-05, "loss": 0.5563, "step": 11395 }, { "epoch": 0.96, "learning_rate": 1.4512261974411707e-05, "loss": 0.5399, "step": 11400 }, { "epoch": 0.96, "learning_rate": 1.4492322368025518e-05, "loss": 0.5712, "step": 11405 }, { "epoch": 0.96, "learning_rate": 1.4472390876909066e-05, "loss": 0.5474, "step": 11410 }, { "epoch": 0.96, "learning_rate": 1.4452467516455797e-05, "loss": 0.5373, "step": 11415 }, { "epoch": 0.96, "learning_rate": 1.4432552302052898e-05, "loss": 0.5563, "step": 11420 }, { "epoch": 0.96, "learning_rate": 1.4412645249081225e-05, "loss": 0.574, "step": 11425 }, { "epoch": 0.96, "learning_rate": 1.4392746372915367e-05, "loss": 0.5236, "step": 11430 }, { "epoch": 0.96, "learning_rate": 1.4372855688923586e-05, "loss": 0.5515, "step": 11435 }, { "epoch": 0.96, "learning_rate": 1.4352973212467833e-05, "loss": 0.486, "step": 11440 }, { "epoch": 0.96, "learning_rate": 1.4333098958903676e-05, "loss": 0.5693, "step": 11445 }, { "epoch": 0.96, "learning_rate": 1.4313232943580376e-05, "loss": 0.541, "step": 11450 }, { "epoch": 0.96, "learning_rate": 1.4293375181840816e-05, "loss": 0.535, "step": 11455 }, { "epoch": 0.96, "learning_rate": 1.4273525689021505e-05, "loss": 0.5453, "step": 11460 }, { "epoch": 0.96, "learning_rate": 1.4253684480452561e-05, "loss": 0.557, "step": 11465 }, { "epoch": 0.96, "learning_rate": 1.4233851571457719e-05, "loss": 0.5558, "step": 11470 }, { "epoch": 0.96, "learning_rate": 1.4214026977354283e-05, "loss": 0.5657, "step": 11475 }, { "epoch": 0.96, "learning_rate": 1.4194210713453154e-05, "loss": 0.5493, "step": 11480 }, { "epoch": 0.96, "learning_rate": 1.4174402795058794e-05, "loss": 0.576, "step": 11485 }, { "epoch": 0.96, "learning_rate": 1.4154603237469205e-05, "loss": 0.5759, "step": 11490 }, { "epoch": 0.96, "learning_rate": 1.413481205597596e-05, "loss": 0.5927, "step": 11495 }, { "epoch": 0.97, "learning_rate": 1.4115029265864127e-05, "loss": 0.5936, "step": 11500 }, { "epoch": 0.97, "learning_rate": 1.4095254882412324e-05, "loss": 0.5589, "step": 11505 }, { "epoch": 0.97, "learning_rate": 1.4075488920892665e-05, "loss": 0.5985, "step": 11510 }, { "epoch": 0.97, "learning_rate": 1.405573139657076e-05, "loss": 0.5273, "step": 11515 }, { "epoch": 0.97, "learning_rate": 1.40359823247057e-05, "loss": 0.5665, "step": 11520 }, { "epoch": 0.97, "learning_rate": 1.4016241720550056e-05, "loss": 0.5956, "step": 11525 }, { "epoch": 0.97, "learning_rate": 1.399650959934985e-05, "loss": 0.6018, "step": 11530 }, { "epoch": 0.97, "learning_rate": 1.3976785976344558e-05, "loss": 0.5144, "step": 11535 }, { "epoch": 0.97, "learning_rate": 1.395707086676709e-05, "loss": 0.5618, "step": 11540 }, { "epoch": 0.97, "learning_rate": 1.3937364285843795e-05, "loss": 0.5731, "step": 11545 }, { "epoch": 0.97, "learning_rate": 1.3917666248794403e-05, "loss": 0.6306, "step": 11550 }, { "epoch": 0.97, "learning_rate": 1.3897976770832072e-05, "loss": 0.5656, "step": 11555 }, { "epoch": 0.97, "learning_rate": 1.3878295867163351e-05, "loss": 0.5516, "step": 11560 }, { "epoch": 0.97, "learning_rate": 1.3858623552988142e-05, "loss": 0.5888, "step": 11565 }, { "epoch": 0.97, "learning_rate": 1.3838959843499738e-05, "loss": 0.5559, "step": 11570 }, { "epoch": 0.97, "learning_rate": 1.3819304753884776e-05, "loss": 0.533, "step": 11575 }, { "epoch": 0.97, "learning_rate": 1.3799658299323242e-05, "loss": 0.5336, "step": 11580 }, { "epoch": 0.97, "learning_rate": 1.3780020494988446e-05, "loss": 0.5973, "step": 11585 }, { "epoch": 0.97, "learning_rate": 1.376039135604702e-05, "loss": 0.6091, "step": 11590 }, { "epoch": 0.97, "learning_rate": 1.3740770897658906e-05, "loss": 0.5932, "step": 11595 }, { "epoch": 0.97, "learning_rate": 1.372115913497734e-05, "loss": 0.5758, "step": 11600 }, { "epoch": 0.97, "learning_rate": 1.3701556083148836e-05, "loss": 0.5612, "step": 11605 }, { "epoch": 0.97, "learning_rate": 1.3681961757313183e-05, "loss": 0.5305, "step": 11610 }, { "epoch": 0.97, "learning_rate": 1.3662376172603442e-05, "loss": 0.5114, "step": 11615 }, { "epoch": 0.98, "learning_rate": 1.3642799344145912e-05, "loss": 0.6398, "step": 11620 }, { "epoch": 0.98, "learning_rate": 1.3623231287060132e-05, "loss": 0.5868, "step": 11625 }, { "epoch": 0.98, "learning_rate": 1.3603672016458877e-05, "loss": 0.6033, "step": 11630 }, { "epoch": 0.98, "learning_rate": 1.3584121547448104e-05, "loss": 0.5572, "step": 11635 }, { "epoch": 0.98, "learning_rate": 1.3564579895127008e-05, "loss": 0.5519, "step": 11640 }, { "epoch": 0.98, "learning_rate": 1.3545047074587958e-05, "loss": 0.6084, "step": 11645 }, { "epoch": 0.98, "learning_rate": 1.3525523100916509e-05, "loss": 0.604, "step": 11650 }, { "epoch": 0.98, "learning_rate": 1.3506007989191371e-05, "loss": 0.5919, "step": 11655 }, { "epoch": 0.98, "learning_rate": 1.348650175448444e-05, "loss": 0.5618, "step": 11660 }, { "epoch": 0.98, "learning_rate": 1.3467004411860707e-05, "loss": 0.5411, "step": 11665 }, { "epoch": 0.98, "learning_rate": 1.344751597637833e-05, "loss": 0.5482, "step": 11670 }, { "epoch": 0.98, "learning_rate": 1.3428036463088588e-05, "loss": 0.5443, "step": 11675 }, { "epoch": 0.98, "learning_rate": 1.3408565887035857e-05, "loss": 0.5831, "step": 11680 }, { "epoch": 0.98, "learning_rate": 1.3389104263257618e-05, "loss": 0.557, "step": 11685 }, { "epoch": 0.98, "learning_rate": 1.3369651606784436e-05, "loss": 0.5962, "step": 11690 }, { "epoch": 0.98, "learning_rate": 1.3350207932639957e-05, "loss": 0.5849, "step": 11695 }, { "epoch": 0.98, "learning_rate": 1.3330773255840864e-05, "loss": 0.5864, "step": 11700 }, { "epoch": 0.98, "learning_rate": 1.3311347591396922e-05, "loss": 0.5857, "step": 11705 }, { "epoch": 0.98, "learning_rate": 1.329193095431092e-05, "loss": 0.589, "step": 11710 }, { "epoch": 0.98, "learning_rate": 1.3272523359578686e-05, "loss": 0.606, "step": 11715 }, { "epoch": 0.98, "learning_rate": 1.3253124822189048e-05, "loss": 0.5794, "step": 11720 }, { "epoch": 0.98, "learning_rate": 1.3233735357123847e-05, "loss": 0.561, "step": 11725 }, { "epoch": 0.98, "learning_rate": 1.3214354979357921e-05, "loss": 0.5456, "step": 11730 }, { "epoch": 0.98, "learning_rate": 1.3194983703859087e-05, "loss": 0.5476, "step": 11735 }, { "epoch": 0.99, "learning_rate": 1.3175621545588135e-05, "loss": 0.6137, "step": 11740 }, { "epoch": 0.99, "learning_rate": 1.3156268519498804e-05, "loss": 0.5602, "step": 11745 }, { "epoch": 0.99, "learning_rate": 1.3136924640537796e-05, "loss": 0.5601, "step": 11750 }, { "epoch": 0.99, "learning_rate": 1.311758992364473e-05, "loss": 0.6328, "step": 11755 }, { "epoch": 0.99, "learning_rate": 1.3098264383752157e-05, "loss": 0.5789, "step": 11760 }, { "epoch": 0.99, "learning_rate": 1.307894803578556e-05, "loss": 0.5078, "step": 11765 }, { "epoch": 0.99, "learning_rate": 1.3059640894663282e-05, "loss": 0.551, "step": 11770 }, { "epoch": 0.99, "learning_rate": 1.3040342975296593e-05, "loss": 0.5566, "step": 11775 }, { "epoch": 0.99, "learning_rate": 1.3021054292589607e-05, "loss": 0.5788, "step": 11780 }, { "epoch": 0.99, "learning_rate": 1.300177486143934e-05, "loss": 0.559, "step": 11785 }, { "epoch": 0.99, "learning_rate": 1.2982504696735637e-05, "loss": 0.5428, "step": 11790 }, { "epoch": 0.99, "learning_rate": 1.2963243813361198e-05, "loss": 0.5508, "step": 11795 }, { "epoch": 0.99, "learning_rate": 1.2943992226191554e-05, "loss": 0.5047, "step": 11800 }, { "epoch": 0.99, "learning_rate": 1.292474995009505e-05, "loss": 0.5882, "step": 11805 }, { "epoch": 0.99, "learning_rate": 1.2905516999932849e-05, "loss": 0.6052, "step": 11810 }, { "epoch": 0.99, "learning_rate": 1.2886293390558906e-05, "loss": 0.5877, "step": 11815 }, { "epoch": 0.99, "learning_rate": 1.286707913681997e-05, "loss": 0.6078, "step": 11820 }, { "epoch": 0.99, "learning_rate": 1.2847874253555542e-05, "loss": 0.5232, "step": 11825 }, { "epoch": 0.99, "learning_rate": 1.2828678755597907e-05, "loss": 0.55, "step": 11830 }, { "epoch": 0.99, "learning_rate": 1.2809492657772103e-05, "loss": 0.5198, "step": 11835 }, { "epoch": 0.99, "learning_rate": 1.2790315974895889e-05, "loss": 0.5618, "step": 11840 }, { "epoch": 0.99, "learning_rate": 1.2771148721779766e-05, "loss": 0.599, "step": 11845 }, { "epoch": 0.99, "learning_rate": 1.2751990913226958e-05, "loss": 0.5736, "step": 11850 }, { "epoch": 0.99, "learning_rate": 1.273284256403338e-05, "loss": 0.6104, "step": 11855 }, { "epoch": 1.0, "learning_rate": 1.2713703688987658e-05, "loss": 0.5407, "step": 11860 }, { "epoch": 1.0, "learning_rate": 1.2694574302871087e-05, "loss": 0.5421, "step": 11865 }, { "epoch": 1.0, "learning_rate": 1.2675454420457633e-05, "loss": 0.5645, "step": 11870 }, { "epoch": 1.0, "learning_rate": 1.2656344056513936e-05, "loss": 0.5515, "step": 11875 }, { "epoch": 1.0, "learning_rate": 1.2637243225799283e-05, "loss": 0.5331, "step": 11880 }, { "epoch": 1.0, "learning_rate": 1.261815194306557e-05, "loss": 0.5888, "step": 11885 }, { "epoch": 1.0, "learning_rate": 1.2599070223057355e-05, "loss": 0.5675, "step": 11890 }, { "epoch": 1.0, "learning_rate": 1.257999808051179e-05, "loss": 0.5694, "step": 11895 }, { "epoch": 1.0, "learning_rate": 1.2560935530158641e-05, "loss": 0.5675, "step": 11900 }, { "epoch": 1.0, "learning_rate": 1.2541882586720266e-05, "loss": 0.5519, "step": 11905 }, { "epoch": 1.0, "learning_rate": 1.2522839264911582e-05, "loss": 0.6126, "step": 11910 }, { "epoch": 1.0, "learning_rate": 1.2503805579440098e-05, "loss": 0.5358, "step": 11915 }, { "epoch": 1.0, "learning_rate": 1.248478154500588e-05, "loss": 0.5702, "step": 11920 }, { "epoch": 1.0, "learning_rate": 1.2465767176301529e-05, "loss": 0.5363, "step": 11925 }, { "epoch": 1.0, "learning_rate": 1.2446762488012187e-05, "loss": 0.5696, "step": 11930 }, { "epoch": 1.0, "learning_rate": 1.242776749481553e-05, "loss": 0.4985, "step": 11935 }, { "epoch": 1.0, "learning_rate": 1.2408782211381717e-05, "loss": 0.651, "step": 11940 }, { "epoch": 1.0, "learning_rate": 1.2389806652373432e-05, "loss": 0.5478, "step": 11945 }, { "epoch": 1.0, "learning_rate": 1.2370840832445845e-05, "loss": 0.6309, "step": 11950 }, { "epoch": 1.0, "learning_rate": 1.2351884766246605e-05, "loss": 0.5694, "step": 11955 }, { "epoch": 1.0, "learning_rate": 1.2332938468415822e-05, "loss": 0.6299, "step": 11960 }, { "epoch": 1.0, "learning_rate": 1.2314001953586064e-05, "loss": 0.5598, "step": 11965 }, { "epoch": 1.0, "learning_rate": 1.2295075236382347e-05, "loss": 0.5648, "step": 11970 }, { "epoch": 1.0, "learning_rate": 1.2276158331422125e-05, "loss": 0.5398, "step": 11975 }, { "epoch": 1.01, "learning_rate": 1.225725125331525e-05, "loss": 0.5054, "step": 11980 }, { "epoch": 1.01, "learning_rate": 1.2238354016664005e-05, "loss": 0.5442, "step": 11985 }, { "epoch": 1.01, "learning_rate": 1.2219466636063074e-05, "loss": 0.5487, "step": 11990 }, { "epoch": 1.01, "learning_rate": 1.220058912609953e-05, "loss": 0.5551, "step": 11995 }, { "epoch": 1.01, "learning_rate": 1.2181721501352797e-05, "loss": 0.5484, "step": 12000 }, { "epoch": 1.01, "learning_rate": 1.2162863776394693e-05, "loss": 0.5785, "step": 12005 }, { "epoch": 1.01, "learning_rate": 1.2144015965789379e-05, "loss": 0.5117, "step": 12010 }, { "epoch": 1.01, "learning_rate": 1.2125178084093363e-05, "loss": 0.5135, "step": 12015 }, { "epoch": 1.01, "learning_rate": 1.2106350145855484e-05, "loss": 0.5041, "step": 12020 }, { "epoch": 1.01, "learning_rate": 1.2087532165616891e-05, "loss": 0.5298, "step": 12025 }, { "epoch": 1.01, "learning_rate": 1.206872415791106e-05, "loss": 0.5489, "step": 12030 }, { "epoch": 1.01, "learning_rate": 1.204992613726375e-05, "loss": 0.5369, "step": 12035 }, { "epoch": 1.01, "learning_rate": 1.2031138118193027e-05, "loss": 0.5557, "step": 12040 }, { "epoch": 1.01, "learning_rate": 1.2012360115209197e-05, "loss": 0.5326, "step": 12045 }, { "epoch": 1.01, "learning_rate": 1.1993592142814872e-05, "loss": 0.5585, "step": 12050 }, { "epoch": 1.01, "learning_rate": 1.1974834215504876e-05, "loss": 0.5762, "step": 12055 }, { "epoch": 1.01, "learning_rate": 1.1956086347766304e-05, "loss": 0.5454, "step": 12060 }, { "epoch": 1.01, "learning_rate": 1.193734855407848e-05, "loss": 0.5747, "step": 12065 }, { "epoch": 1.01, "learning_rate": 1.1918620848912936e-05, "loss": 0.6023, "step": 12070 }, { "epoch": 1.01, "learning_rate": 1.189990324673342e-05, "loss": 0.5115, "step": 12075 }, { "epoch": 1.01, "learning_rate": 1.1881195761995874e-05, "loss": 0.5285, "step": 12080 }, { "epoch": 1.01, "learning_rate": 1.1862498409148428e-05, "loss": 0.5913, "step": 12085 }, { "epoch": 1.01, "learning_rate": 1.1843811202631389e-05, "loss": 0.5746, "step": 12090 }, { "epoch": 1.01, "learning_rate": 1.1825134156877218e-05, "loss": 0.5634, "step": 12095 }, { "epoch": 1.02, "learning_rate": 1.1806467286310552e-05, "loss": 0.5457, "step": 12100 }, { "epoch": 1.02, "learning_rate": 1.1787810605348132e-05, "loss": 0.5083, "step": 12105 }, { "epoch": 1.02, "learning_rate": 1.1769164128398857e-05, "loss": 0.5686, "step": 12110 }, { "epoch": 1.02, "learning_rate": 1.1750527869863754e-05, "loss": 0.5407, "step": 12115 }, { "epoch": 1.02, "learning_rate": 1.173190184413592e-05, "loss": 0.567, "step": 12120 }, { "epoch": 1.02, "learning_rate": 1.1713286065600581e-05, "loss": 0.4763, "step": 12125 }, { "epoch": 1.02, "learning_rate": 1.1694680548635043e-05, "loss": 0.582, "step": 12130 }, { "epoch": 1.02, "learning_rate": 1.1676085307608684e-05, "loss": 0.5679, "step": 12135 }, { "epoch": 1.02, "learning_rate": 1.165750035688294e-05, "loss": 0.5414, "step": 12140 }, { "epoch": 1.02, "learning_rate": 1.1638925710811311e-05, "loss": 0.5727, "step": 12145 }, { "epoch": 1.02, "learning_rate": 1.1620361383739328e-05, "loss": 0.5124, "step": 12150 }, { "epoch": 1.02, "learning_rate": 1.1601807390004568e-05, "loss": 0.5351, "step": 12155 }, { "epoch": 1.02, "learning_rate": 1.1583263743936602e-05, "loss": 0.5607, "step": 12160 }, { "epoch": 1.02, "learning_rate": 1.1564730459857026e-05, "loss": 0.5307, "step": 12165 }, { "epoch": 1.02, "learning_rate": 1.1546207552079433e-05, "loss": 0.619, "step": 12170 }, { "epoch": 1.02, "learning_rate": 1.1527695034909402e-05, "loss": 0.5174, "step": 12175 }, { "epoch": 1.02, "learning_rate": 1.1509192922644488e-05, "loss": 0.4608, "step": 12180 }, { "epoch": 1.02, "learning_rate": 1.149070122957421e-05, "loss": 0.5703, "step": 12185 }, { "epoch": 1.02, "learning_rate": 1.1472219969980022e-05, "loss": 0.539, "step": 12190 }, { "epoch": 1.02, "learning_rate": 1.1453749158135346e-05, "loss": 0.553, "step": 12195 }, { "epoch": 1.02, "learning_rate": 1.1435288808305524e-05, "loss": 0.5674, "step": 12200 }, { "epoch": 1.02, "learning_rate": 1.1416838934747818e-05, "loss": 0.5666, "step": 12205 }, { "epoch": 1.02, "learning_rate": 1.1398399551711396e-05, "loss": 0.5712, "step": 12210 }, { "epoch": 1.03, "learning_rate": 1.1379970673437344e-05, "loss": 0.6039, "step": 12215 }, { "epoch": 1.03, "learning_rate": 1.1361552314158594e-05, "loss": 0.5956, "step": 12220 }, { "epoch": 1.03, "learning_rate": 1.1343144488099992e-05, "loss": 0.5614, "step": 12225 }, { "epoch": 1.03, "learning_rate": 1.1324747209478228e-05, "loss": 0.6717, "step": 12230 }, { "epoch": 1.03, "learning_rate": 1.1306360492501864e-05, "loss": 0.5222, "step": 12235 }, { "epoch": 1.03, "learning_rate": 1.1287984351371287e-05, "loss": 0.5905, "step": 12240 }, { "epoch": 1.03, "learning_rate": 1.1269618800278725e-05, "loss": 0.519, "step": 12245 }, { "epoch": 1.03, "learning_rate": 1.1251263853408226e-05, "loss": 0.5748, "step": 12250 }, { "epoch": 1.03, "learning_rate": 1.123291952493566e-05, "loss": 0.5509, "step": 12255 }, { "epoch": 1.03, "learning_rate": 1.1214585829028659e-05, "loss": 0.5865, "step": 12260 }, { "epoch": 1.03, "learning_rate": 1.1196262779846686e-05, "loss": 0.5734, "step": 12265 }, { "epoch": 1.03, "learning_rate": 1.1177950391540967e-05, "loss": 0.5341, "step": 12270 }, { "epoch": 1.03, "learning_rate": 1.1159648678254475e-05, "loss": 0.4894, "step": 12275 }, { "epoch": 1.03, "learning_rate": 1.1141357654121961e-05, "loss": 0.5596, "step": 12280 }, { "epoch": 1.03, "learning_rate": 1.1123077333269922e-05, "loss": 0.4774, "step": 12285 }, { "epoch": 1.03, "learning_rate": 1.1104807729816569e-05, "loss": 0.6417, "step": 12290 }, { "epoch": 1.03, "learning_rate": 1.1086548857871856e-05, "loss": 0.5083, "step": 12295 }, { "epoch": 1.03, "learning_rate": 1.106830073153744e-05, "loss": 0.5692, "step": 12300 }, { "epoch": 1.03, "learning_rate": 1.1050063364906676e-05, "loss": 0.5827, "step": 12305 }, { "epoch": 1.03, "learning_rate": 1.1031836772064616e-05, "loss": 0.6147, "step": 12310 }, { "epoch": 1.03, "learning_rate": 1.1013620967087984e-05, "loss": 0.5563, "step": 12315 }, { "epoch": 1.03, "learning_rate": 1.0995415964045188e-05, "loss": 0.574, "step": 12320 }, { "epoch": 1.03, "learning_rate": 1.0977221776996267e-05, "loss": 0.5888, "step": 12325 }, { "epoch": 1.03, "learning_rate": 1.0959038419992929e-05, "loss": 0.512, "step": 12330 }, { "epoch": 1.04, "learning_rate": 1.0940865907078501e-05, "loss": 0.5516, "step": 12335 }, { "epoch": 1.04, "learning_rate": 1.0922704252287949e-05, "loss": 0.5854, "step": 12340 }, { "epoch": 1.04, "learning_rate": 1.0904553469647847e-05, "loss": 0.5827, "step": 12345 }, { "epoch": 1.04, "learning_rate": 1.0886413573176377e-05, "loss": 0.521, "step": 12350 }, { "epoch": 1.04, "learning_rate": 1.0868284576883305e-05, "loss": 0.5441, "step": 12355 }, { "epoch": 1.04, "learning_rate": 1.0850166494769987e-05, "loss": 0.5766, "step": 12360 }, { "epoch": 1.04, "learning_rate": 1.0832059340829345e-05, "loss": 0.6025, "step": 12365 }, { "epoch": 1.04, "learning_rate": 1.081396312904586e-05, "loss": 0.5356, "step": 12370 }, { "epoch": 1.04, "learning_rate": 1.0795877873395577e-05, "loss": 0.5426, "step": 12375 }, { "epoch": 1.04, "learning_rate": 1.0777803587846041e-05, "loss": 0.4874, "step": 12380 }, { "epoch": 1.04, "learning_rate": 1.0759740286356368e-05, "loss": 0.5886, "step": 12385 }, { "epoch": 1.04, "learning_rate": 1.074168798287717e-05, "loss": 0.547, "step": 12390 }, { "epoch": 1.04, "learning_rate": 1.0723646691350573e-05, "loss": 0.5464, "step": 12395 }, { "epoch": 1.04, "learning_rate": 1.0705616425710177e-05, "loss": 0.5746, "step": 12400 }, { "epoch": 1.04, "learning_rate": 1.0687597199881094e-05, "loss": 0.516, "step": 12405 }, { "epoch": 1.04, "learning_rate": 1.0669589027779898e-05, "loss": 0.5344, "step": 12410 }, { "epoch": 1.04, "learning_rate": 1.0651591923314622e-05, "loss": 0.5919, "step": 12415 }, { "epoch": 1.04, "learning_rate": 1.0633605900384764e-05, "loss": 0.5889, "step": 12420 }, { "epoch": 1.04, "learning_rate": 1.0615630972881246e-05, "loss": 0.5684, "step": 12425 }, { "epoch": 1.04, "learning_rate": 1.0597667154686436e-05, "loss": 0.6377, "step": 12430 }, { "epoch": 1.04, "learning_rate": 1.0579714459674126e-05, "loss": 0.5692, "step": 12435 }, { "epoch": 1.04, "learning_rate": 1.0561772901709486e-05, "loss": 0.5971, "step": 12440 }, { "epoch": 1.04, "learning_rate": 1.0543842494649115e-05, "loss": 0.6144, "step": 12445 }, { "epoch": 1.04, "learning_rate": 1.0525923252340991e-05, "loss": 0.551, "step": 12450 }, { "epoch": 1.05, "learning_rate": 1.0508015188624474e-05, "loss": 0.5299, "step": 12455 }, { "epoch": 1.05, "learning_rate": 1.0490118317330281e-05, "loss": 0.5352, "step": 12460 }, { "epoch": 1.05, "learning_rate": 1.04722326522805e-05, "loss": 0.4998, "step": 12465 }, { "epoch": 1.05, "learning_rate": 1.0454358207288537e-05, "loss": 0.5192, "step": 12470 }, { "epoch": 1.05, "learning_rate": 1.043649499615916e-05, "loss": 0.4971, "step": 12475 }, { "epoch": 1.05, "learning_rate": 1.0418643032688446e-05, "loss": 0.5089, "step": 12480 }, { "epoch": 1.05, "learning_rate": 1.0400802330663791e-05, "loss": 0.5426, "step": 12485 }, { "epoch": 1.05, "learning_rate": 1.0382972903863908e-05, "loss": 0.5138, "step": 12490 }, { "epoch": 1.05, "learning_rate": 1.0365154766058757e-05, "loss": 0.5604, "step": 12495 }, { "epoch": 1.05, "learning_rate": 1.0347347931009627e-05, "loss": 0.5011, "step": 12500 }, { "epoch": 1.05, "learning_rate": 1.0329552412469054e-05, "loss": 0.5385, "step": 12505 }, { "epoch": 1.05, "learning_rate": 1.031176822418084e-05, "loss": 0.5486, "step": 12510 }, { "epoch": 1.05, "learning_rate": 1.029399537988004e-05, "loss": 0.5091, "step": 12515 }, { "epoch": 1.05, "learning_rate": 1.0276233893292935e-05, "loss": 0.5404, "step": 12520 }, { "epoch": 1.05, "learning_rate": 1.0258483778137048e-05, "loss": 0.6191, "step": 12525 }, { "epoch": 1.05, "learning_rate": 1.0240745048121117e-05, "loss": 0.5341, "step": 12530 }, { "epoch": 1.05, "learning_rate": 1.022301771694509e-05, "loss": 0.5434, "step": 12535 }, { "epoch": 1.05, "learning_rate": 1.0205301798300087e-05, "loss": 0.6465, "step": 12540 }, { "epoch": 1.05, "learning_rate": 1.0187597305868446e-05, "loss": 0.5777, "step": 12545 }, { "epoch": 1.05, "learning_rate": 1.016990425332367e-05, "loss": 0.516, "step": 12550 }, { "epoch": 1.05, "learning_rate": 1.0152222654330412e-05, "loss": 0.6171, "step": 12555 }, { "epoch": 1.05, "learning_rate": 1.0134552522544496e-05, "loss": 0.5774, "step": 12560 }, { "epoch": 1.05, "learning_rate": 1.0116893871612887e-05, "loss": 0.5896, "step": 12565 }, { "epoch": 1.05, "learning_rate": 1.0099246715173679e-05, "loss": 0.6159, "step": 12570 }, { "epoch": 1.06, "learning_rate": 1.0081611066856092e-05, "loss": 0.5998, "step": 12575 }, { "epoch": 1.06, "learning_rate": 1.0063986940280457e-05, "loss": 0.5541, "step": 12580 }, { "epoch": 1.06, "learning_rate": 1.0046374349058205e-05, "loss": 0.5577, "step": 12585 }, { "epoch": 1.06, "learning_rate": 1.0028773306791858e-05, "loss": 0.5567, "step": 12590 }, { "epoch": 1.06, "learning_rate": 1.0011183827075032e-05, "loss": 0.6003, "step": 12595 }, { "epoch": 1.06, "learning_rate": 9.99360592349238e-06, "loss": 0.5427, "step": 12600 }, { "epoch": 1.06, "learning_rate": 9.976039609619655e-06, "loss": 0.5245, "step": 12605 }, { "epoch": 1.06, "learning_rate": 9.95848489902362e-06, "loss": 0.5329, "step": 12610 }, { "epoch": 1.06, "learning_rate": 9.940941805262107e-06, "loss": 0.5744, "step": 12615 }, { "epoch": 1.06, "learning_rate": 9.923410341883963e-06, "loss": 0.6009, "step": 12620 }, { "epoch": 1.06, "learning_rate": 9.905890522429053e-06, "loss": 0.5419, "step": 12625 }, { "epoch": 1.06, "learning_rate": 9.888382360428258e-06, "loss": 0.5754, "step": 12630 }, { "epoch": 1.06, "learning_rate": 9.87088586940344e-06, "loss": 0.543, "step": 12635 }, { "epoch": 1.06, "learning_rate": 9.85340106286746e-06, "loss": 0.6561, "step": 12640 }, { "epoch": 1.06, "learning_rate": 9.83592795432415e-06, "loss": 0.5657, "step": 12645 }, { "epoch": 1.06, "learning_rate": 9.81846655726831e-06, "loss": 0.6056, "step": 12650 }, { "epoch": 1.06, "learning_rate": 9.801016885185698e-06, "loss": 0.5428, "step": 12655 }, { "epoch": 1.06, "learning_rate": 9.783578951552998e-06, "loss": 0.545, "step": 12660 }, { "epoch": 1.06, "learning_rate": 9.76615276983785e-06, "loss": 0.5215, "step": 12665 }, { "epoch": 1.06, "learning_rate": 9.74873835349881e-06, "loss": 0.6217, "step": 12670 }, { "epoch": 1.06, "learning_rate": 9.731335715985351e-06, "loss": 0.5407, "step": 12675 }, { "epoch": 1.06, "learning_rate": 9.713944870737834e-06, "loss": 0.571, "step": 12680 }, { "epoch": 1.06, "learning_rate": 9.696565831187531e-06, "loss": 0.5372, "step": 12685 }, { "epoch": 1.06, "learning_rate": 9.679198610756588e-06, "loss": 0.4813, "step": 12690 }, { "epoch": 1.07, "learning_rate": 9.661843222858021e-06, "loss": 0.5799, "step": 12695 }, { "epoch": 1.07, "learning_rate": 9.644499680895716e-06, "loss": 0.5233, "step": 12700 }, { "epoch": 1.07, "learning_rate": 9.627167998264403e-06, "loss": 0.5943, "step": 12705 }, { "epoch": 1.07, "learning_rate": 9.60984818834966e-06, "loss": 0.5759, "step": 12710 }, { "epoch": 1.07, "learning_rate": 9.592540264527874e-06, "loss": 0.528, "step": 12715 }, { "epoch": 1.07, "learning_rate": 9.575244240166278e-06, "loss": 0.5312, "step": 12720 }, { "epoch": 1.07, "learning_rate": 9.557960128622906e-06, "loss": 0.5256, "step": 12725 }, { "epoch": 1.07, "learning_rate": 9.540687943246584e-06, "loss": 0.5816, "step": 12730 }, { "epoch": 1.07, "learning_rate": 9.523427697376942e-06, "loss": 0.4891, "step": 12735 }, { "epoch": 1.07, "learning_rate": 9.506179404344372e-06, "loss": 0.5242, "step": 12740 }, { "epoch": 1.07, "learning_rate": 9.488943077470061e-06, "loss": 0.5526, "step": 12745 }, { "epoch": 1.07, "learning_rate": 9.471718730065912e-06, "loss": 0.5419, "step": 12750 }, { "epoch": 1.07, "learning_rate": 9.454506375434613e-06, "loss": 0.5985, "step": 12755 }, { "epoch": 1.07, "learning_rate": 9.437306026869572e-06, "loss": 0.572, "step": 12760 }, { "epoch": 1.07, "learning_rate": 9.420117697654938e-06, "loss": 0.5706, "step": 12765 }, { "epoch": 1.07, "learning_rate": 9.402941401065569e-06, "loss": 0.6406, "step": 12770 }, { "epoch": 1.07, "learning_rate": 9.385777150367018e-06, "loss": 0.4826, "step": 12775 }, { "epoch": 1.07, "learning_rate": 9.368624958815553e-06, "loss": 0.5004, "step": 12780 }, { "epoch": 1.07, "learning_rate": 9.351484839658122e-06, "loss": 0.5844, "step": 12785 }, { "epoch": 1.07, "learning_rate": 9.334356806132352e-06, "loss": 0.5565, "step": 12790 }, { "epoch": 1.07, "learning_rate": 9.317240871466531e-06, "loss": 0.5639, "step": 12795 }, { "epoch": 1.07, "learning_rate": 9.300137048879607e-06, "loss": 0.6016, "step": 12800 }, { "epoch": 1.07, "learning_rate": 9.283045351581171e-06, "loss": 0.5799, "step": 12805 }, { "epoch": 1.07, "learning_rate": 9.265965792771449e-06, "loss": 0.524, "step": 12810 }, { "epoch": 1.08, "learning_rate": 9.248898385641307e-06, "loss": 0.5632, "step": 12815 }, { "epoch": 1.08, "learning_rate": 9.23184314337219e-06, "loss": 0.5491, "step": 12820 }, { "epoch": 1.08, "learning_rate": 9.214800079136191e-06, "loss": 0.5567, "step": 12825 }, { "epoch": 1.08, "learning_rate": 9.197769206095958e-06, "loss": 0.5319, "step": 12830 }, { "epoch": 1.08, "learning_rate": 9.18075053740475e-06, "loss": 0.5801, "step": 12835 }, { "epoch": 1.08, "learning_rate": 9.163744086206399e-06, "loss": 0.5611, "step": 12840 }, { "epoch": 1.08, "learning_rate": 9.146749865635287e-06, "loss": 0.5397, "step": 12845 }, { "epoch": 1.08, "learning_rate": 9.129767888816368e-06, "loss": 0.5553, "step": 12850 }, { "epoch": 1.08, "learning_rate": 9.11279816886512e-06, "loss": 0.6536, "step": 12855 }, { "epoch": 1.08, "learning_rate": 9.095840718887572e-06, "loss": 0.5819, "step": 12860 }, { "epoch": 1.08, "learning_rate": 9.078895551980269e-06, "loss": 0.5717, "step": 12865 }, { "epoch": 1.08, "learning_rate": 9.061962681230266e-06, "loss": 0.5462, "step": 12870 }, { "epoch": 1.08, "learning_rate": 9.045042119715138e-06, "loss": 0.5826, "step": 12875 }, { "epoch": 1.08, "learning_rate": 9.028133880502925e-06, "loss": 0.5553, "step": 12880 }, { "epoch": 1.08, "learning_rate": 9.011237976652182e-06, "loss": 0.5711, "step": 12885 }, { "epoch": 1.08, "learning_rate": 8.994354421211901e-06, "loss": 0.5211, "step": 12890 }, { "epoch": 1.08, "learning_rate": 8.977483227221572e-06, "loss": 0.5823, "step": 12895 }, { "epoch": 1.08, "learning_rate": 8.960624407711121e-06, "loss": 0.57, "step": 12900 }, { "epoch": 1.08, "learning_rate": 8.943777975700917e-06, "loss": 0.5411, "step": 12905 }, { "epoch": 1.08, "learning_rate": 8.92694394420177e-06, "loss": 0.596, "step": 12910 }, { "epoch": 1.08, "learning_rate": 8.910122326214904e-06, "loss": 0.5031, "step": 12915 }, { "epoch": 1.08, "learning_rate": 8.89331313473196e-06, "loss": 0.585, "step": 12920 }, { "epoch": 1.08, "learning_rate": 8.876516382734979e-06, "loss": 0.5129, "step": 12925 }, { "epoch": 1.09, "learning_rate": 8.85973208319641e-06, "loss": 0.467, "step": 12930 }, { "epoch": 1.09, "learning_rate": 8.84296024907905e-06, "loss": 0.5337, "step": 12935 }, { "epoch": 1.09, "learning_rate": 8.826200893336098e-06, "loss": 0.5834, "step": 12940 }, { "epoch": 1.09, "learning_rate": 8.809454028911113e-06, "loss": 0.5485, "step": 12945 }, { "epoch": 1.09, "learning_rate": 8.792719668737997e-06, "loss": 0.5869, "step": 12950 }, { "epoch": 1.09, "learning_rate": 8.775997825741011e-06, "loss": 0.5672, "step": 12955 }, { "epoch": 1.09, "learning_rate": 8.75928851283472e-06, "loss": 0.5628, "step": 12960 }, { "epoch": 1.09, "learning_rate": 8.742591742924037e-06, "loss": 0.5207, "step": 12965 }, { "epoch": 1.09, "learning_rate": 8.725907528904179e-06, "loss": 0.5343, "step": 12970 }, { "epoch": 1.09, "learning_rate": 8.709235883660666e-06, "loss": 0.5633, "step": 12975 }, { "epoch": 1.09, "learning_rate": 8.692576820069312e-06, "loss": 0.5291, "step": 12980 }, { "epoch": 1.09, "learning_rate": 8.675930350996223e-06, "loss": 0.522, "step": 12985 }, { "epoch": 1.09, "learning_rate": 8.65929648929775e-06, "loss": 0.5456, "step": 12990 }, { "epoch": 1.09, "learning_rate": 8.642675247820537e-06, "loss": 0.5712, "step": 12995 }, { "epoch": 1.09, "learning_rate": 8.626066639401467e-06, "loss": 0.53, "step": 13000 }, { "epoch": 1.09, "learning_rate": 8.60947067686767e-06, "loss": 0.6185, "step": 13005 }, { "epoch": 1.09, "learning_rate": 8.59288737303651e-06, "loss": 0.5863, "step": 13010 }, { "epoch": 1.09, "learning_rate": 8.576316740715567e-06, "loss": 0.5543, "step": 13015 }, { "epoch": 1.09, "learning_rate": 8.559758792702649e-06, "loss": 0.5255, "step": 13020 }, { "epoch": 1.09, "learning_rate": 8.543213541785763e-06, "loss": 0.5822, "step": 13025 }, { "epoch": 1.09, "learning_rate": 8.526681000743089e-06, "loss": 0.5855, "step": 13030 }, { "epoch": 1.09, "learning_rate": 8.510161182343016e-06, "loss": 0.5417, "step": 13035 }, { "epoch": 1.09, "learning_rate": 8.4936540993441e-06, "loss": 0.5898, "step": 13040 }, { "epoch": 1.09, "learning_rate": 8.477159764495069e-06, "loss": 0.573, "step": 13045 }, { "epoch": 1.1, "learning_rate": 8.46067819053478e-06, "loss": 0.6133, "step": 13050 }, { "epoch": 1.1, "learning_rate": 8.444209390192257e-06, "loss": 0.5107, "step": 13055 }, { "epoch": 1.1, "learning_rate": 8.427753376186653e-06, "loss": 0.5636, "step": 13060 }, { "epoch": 1.1, "learning_rate": 8.411310161227246e-06, "loss": 0.5607, "step": 13065 }, { "epoch": 1.1, "learning_rate": 8.394879758013433e-06, "loss": 0.4598, "step": 13070 }, { "epoch": 1.1, "learning_rate": 8.378462179234705e-06, "loss": 0.53, "step": 13075 }, { "epoch": 1.1, "learning_rate": 8.362057437570661e-06, "loss": 0.5489, "step": 13080 }, { "epoch": 1.1, "learning_rate": 8.345665545690978e-06, "loss": 0.5869, "step": 13085 }, { "epoch": 1.1, "learning_rate": 8.32928651625542e-06, "loss": 0.5951, "step": 13090 }, { "epoch": 1.1, "learning_rate": 8.312920361913786e-06, "loss": 0.5862, "step": 13095 }, { "epoch": 1.1, "learning_rate": 8.29656709530598e-06, "loss": 0.5711, "step": 13100 }, { "epoch": 1.1, "learning_rate": 8.280226729061902e-06, "loss": 0.6099, "step": 13105 }, { "epoch": 1.1, "learning_rate": 8.263899275801523e-06, "loss": 0.5446, "step": 13110 }, { "epoch": 1.1, "learning_rate": 8.247584748134832e-06, "loss": 0.5948, "step": 13115 }, { "epoch": 1.1, "learning_rate": 8.231283158661831e-06, "loss": 0.5534, "step": 13120 }, { "epoch": 1.1, "learning_rate": 8.214994519972539e-06, "loss": 0.5246, "step": 13125 }, { "epoch": 1.1, "learning_rate": 8.198718844646958e-06, "loss": 0.5595, "step": 13130 }, { "epoch": 1.1, "learning_rate": 8.182456145255093e-06, "loss": 0.5895, "step": 13135 }, { "epoch": 1.1, "learning_rate": 8.166206434356921e-06, "loss": 0.5374, "step": 13140 }, { "epoch": 1.1, "learning_rate": 8.149969724502384e-06, "loss": 0.5705, "step": 13145 }, { "epoch": 1.1, "learning_rate": 8.1337460282314e-06, "loss": 0.5996, "step": 13150 }, { "epoch": 1.1, "learning_rate": 8.117535358073806e-06, "loss": 0.6057, "step": 13155 }, { "epoch": 1.1, "learning_rate": 8.101337726549401e-06, "loss": 0.6053, "step": 13160 }, { "epoch": 1.1, "learning_rate": 8.085153146167924e-06, "loss": 0.5806, "step": 13165 }, { "epoch": 1.11, "learning_rate": 8.068981629428992e-06, "loss": 0.5508, "step": 13170 }, { "epoch": 1.11, "learning_rate": 8.052823188822182e-06, "loss": 0.5408, "step": 13175 }, { "epoch": 1.11, "learning_rate": 8.03667783682694e-06, "loss": 0.5202, "step": 13180 }, { "epoch": 1.11, "learning_rate": 8.020545585912617e-06, "loss": 0.5109, "step": 13185 }, { "epoch": 1.11, "learning_rate": 8.004426448538444e-06, "loss": 0.557, "step": 13190 }, { "epoch": 1.11, "learning_rate": 7.98832043715352e-06, "loss": 0.5086, "step": 13195 }, { "epoch": 1.11, "learning_rate": 7.972227564196813e-06, "loss": 0.5896, "step": 13200 }, { "epoch": 1.11, "learning_rate": 7.956147842097148e-06, "loss": 0.5642, "step": 13205 }, { "epoch": 1.11, "learning_rate": 7.940081283273166e-06, "loss": 0.5646, "step": 13210 }, { "epoch": 1.11, "learning_rate": 7.924027900133368e-06, "loss": 0.5673, "step": 13215 }, { "epoch": 1.11, "learning_rate": 7.907987705076079e-06, "loss": 0.5999, "step": 13220 }, { "epoch": 1.11, "learning_rate": 7.891960710489427e-06, "loss": 0.5264, "step": 13225 }, { "epoch": 1.11, "learning_rate": 7.875946928751355e-06, "loss": 0.5298, "step": 13230 }, { "epoch": 1.11, "learning_rate": 7.8599463722296e-06, "loss": 0.593, "step": 13235 }, { "epoch": 1.11, "learning_rate": 7.843959053281663e-06, "loss": 0.558, "step": 13240 }, { "epoch": 1.11, "learning_rate": 7.827984984254858e-06, "loss": 0.5638, "step": 13245 }, { "epoch": 1.11, "learning_rate": 7.812024177486238e-06, "loss": 0.5529, "step": 13250 }, { "epoch": 1.11, "learning_rate": 7.796076645302631e-06, "loss": 0.5871, "step": 13255 }, { "epoch": 1.11, "learning_rate": 7.780142400020596e-06, "loss": 0.5087, "step": 13260 }, { "epoch": 1.11, "learning_rate": 7.764221453946454e-06, "loss": 0.573, "step": 13265 }, { "epoch": 1.11, "learning_rate": 7.748313819376224e-06, "loss": 0.5748, "step": 13270 }, { "epoch": 1.11, "learning_rate": 7.732419508595667e-06, "loss": 0.6045, "step": 13275 }, { "epoch": 1.11, "learning_rate": 7.716538533880244e-06, "loss": 0.5874, "step": 13280 }, { "epoch": 1.11, "learning_rate": 7.700670907495122e-06, "loss": 0.6154, "step": 13285 }, { "epoch": 1.12, "learning_rate": 7.684816641695158e-06, "loss": 0.5876, "step": 13290 }, { "epoch": 1.12, "learning_rate": 7.668975748724886e-06, "loss": 0.5616, "step": 13295 }, { "epoch": 1.12, "learning_rate": 7.653148240818513e-06, "loss": 0.5844, "step": 13300 }, { "epoch": 1.12, "learning_rate": 7.637334130199925e-06, "loss": 0.5226, "step": 13305 }, { "epoch": 1.12, "learning_rate": 7.621533429082622e-06, "loss": 0.6274, "step": 13310 }, { "epoch": 1.12, "learning_rate": 7.6057461496697814e-06, "loss": 0.5462, "step": 13315 }, { "epoch": 1.12, "learning_rate": 7.589972304154219e-06, "loss": 0.5495, "step": 13320 }, { "epoch": 1.12, "learning_rate": 7.574211904718337e-06, "loss": 0.5396, "step": 13325 }, { "epoch": 1.12, "learning_rate": 7.55846496353419e-06, "loss": 0.5634, "step": 13330 }, { "epoch": 1.12, "learning_rate": 7.5427314927634275e-06, "loss": 0.6113, "step": 13335 }, { "epoch": 1.12, "learning_rate": 7.527011504557288e-06, "loss": 0.5644, "step": 13340 }, { "epoch": 1.12, "learning_rate": 7.51130501105661e-06, "loss": 0.5277, "step": 13345 }, { "epoch": 1.12, "learning_rate": 7.495612024391796e-06, "loss": 0.5346, "step": 13350 }, { "epoch": 1.12, "learning_rate": 7.479932556682831e-06, "loss": 0.5624, "step": 13355 }, { "epoch": 1.12, "learning_rate": 7.464266620039245e-06, "loss": 0.5723, "step": 13360 }, { "epoch": 1.12, "learning_rate": 7.4486142265601275e-06, "loss": 0.5872, "step": 13365 }, { "epoch": 1.12, "learning_rate": 7.4329753883341154e-06, "loss": 0.5811, "step": 13370 }, { "epoch": 1.12, "learning_rate": 7.417350117439348e-06, "loss": 0.4941, "step": 13375 }, { "epoch": 1.12, "learning_rate": 7.401738425943522e-06, "loss": 0.5457, "step": 13380 }, { "epoch": 1.12, "learning_rate": 7.386140325903815e-06, "loss": 0.5553, "step": 13385 }, { "epoch": 1.12, "learning_rate": 7.370555829366926e-06, "loss": 0.5194, "step": 13390 }, { "epoch": 1.12, "learning_rate": 7.354984948369048e-06, "loss": 0.6924, "step": 13395 }, { "epoch": 1.12, "learning_rate": 7.339427694935855e-06, "loss": 0.5122, "step": 13400 }, { "epoch": 1.12, "learning_rate": 7.323884081082491e-06, "loss": 0.4999, "step": 13405 }, { "epoch": 1.13, "learning_rate": 7.308354118813571e-06, "loss": 0.5256, "step": 13410 }, { "epoch": 1.13, "learning_rate": 7.2928378201231716e-06, "loss": 0.5143, "step": 13415 }, { "epoch": 1.13, "learning_rate": 7.2773351969948054e-06, "loss": 0.5461, "step": 13420 }, { "epoch": 1.13, "learning_rate": 7.261846261401439e-06, "loss": 0.561, "step": 13425 }, { "epoch": 1.13, "learning_rate": 7.246371025305446e-06, "loss": 0.4667, "step": 13430 }, { "epoch": 1.13, "learning_rate": 7.230909500658631e-06, "loss": 0.4954, "step": 13435 }, { "epoch": 1.13, "learning_rate": 7.215461699402218e-06, "loss": 0.5799, "step": 13440 }, { "epoch": 1.13, "learning_rate": 7.200027633466825e-06, "loss": 0.5416, "step": 13445 }, { "epoch": 1.13, "learning_rate": 7.184607314772448e-06, "loss": 0.5944, "step": 13450 }, { "epoch": 1.13, "learning_rate": 7.1692007552284865e-06, "loss": 0.5725, "step": 13455 }, { "epoch": 1.13, "learning_rate": 7.153807966733705e-06, "loss": 0.5452, "step": 13460 }, { "epoch": 1.13, "learning_rate": 7.1384289611762325e-06, "loss": 0.4928, "step": 13465 }, { "epoch": 1.13, "learning_rate": 7.1230637504335534e-06, "loss": 0.5741, "step": 13470 }, { "epoch": 1.13, "learning_rate": 7.1077123463725e-06, "loss": 0.5309, "step": 13475 }, { "epoch": 1.13, "learning_rate": 7.092374760849238e-06, "loss": 0.6216, "step": 13480 }, { "epoch": 1.13, "learning_rate": 7.077051005709268e-06, "loss": 0.5395, "step": 13485 }, { "epoch": 1.13, "learning_rate": 7.061741092787394e-06, "loss": 0.4846, "step": 13490 }, { "epoch": 1.13, "learning_rate": 7.046445033907739e-06, "loss": 0.6235, "step": 13495 }, { "epoch": 1.13, "learning_rate": 7.0311628408837345e-06, "loss": 0.5673, "step": 13500 }, { "epoch": 1.13, "learning_rate": 7.015894525518091e-06, "loss": 0.56, "step": 13505 }, { "epoch": 1.13, "learning_rate": 7.000640099602801e-06, "loss": 0.5565, "step": 13510 }, { "epoch": 1.13, "learning_rate": 6.985399574919149e-06, "loss": 0.6283, "step": 13515 }, { "epoch": 1.13, "learning_rate": 6.970172963237648e-06, "loss": 0.5349, "step": 13520 }, { "epoch": 1.13, "learning_rate": 6.954960276318095e-06, "loss": 0.517, "step": 13525 }, { "epoch": 1.14, "learning_rate": 6.939761525909527e-06, "loss": 0.5079, "step": 13530 }, { "epoch": 1.14, "learning_rate": 6.924576723750206e-06, "loss": 0.6075, "step": 13535 }, { "epoch": 1.14, "learning_rate": 6.909405881567646e-06, "loss": 0.5374, "step": 13540 }, { "epoch": 1.14, "learning_rate": 6.894249011078544e-06, "loss": 0.5408, "step": 13545 }, { "epoch": 1.14, "learning_rate": 6.879106123988832e-06, "loss": 0.5577, "step": 13550 }, { "epoch": 1.14, "learning_rate": 6.863977231993637e-06, "loss": 0.5639, "step": 13555 }, { "epoch": 1.14, "learning_rate": 6.848862346777277e-06, "loss": 0.5628, "step": 13560 }, { "epoch": 1.14, "learning_rate": 6.833761480013251e-06, "loss": 0.5311, "step": 13565 }, { "epoch": 1.14, "learning_rate": 6.818674643364231e-06, "loss": 0.5234, "step": 13570 }, { "epoch": 1.14, "learning_rate": 6.803601848482058e-06, "loss": 0.5192, "step": 13575 }, { "epoch": 1.14, "learning_rate": 6.788543107007719e-06, "loss": 0.545, "step": 13580 }, { "epoch": 1.14, "learning_rate": 6.773498430571365e-06, "loss": 0.5529, "step": 13585 }, { "epoch": 1.14, "learning_rate": 6.758467830792253e-06, "loss": 0.5449, "step": 13590 }, { "epoch": 1.14, "learning_rate": 6.743451319278798e-06, "loss": 0.5408, "step": 13595 }, { "epoch": 1.14, "learning_rate": 6.728448907628532e-06, "loss": 0.5545, "step": 13600 }, { "epoch": 1.14, "learning_rate": 6.7134606074280685e-06, "loss": 0.5826, "step": 13605 }, { "epoch": 1.14, "learning_rate": 6.698486430253159e-06, "loss": 0.5528, "step": 13610 }, { "epoch": 1.14, "learning_rate": 6.683526387668623e-06, "loss": 0.6051, "step": 13615 }, { "epoch": 1.14, "learning_rate": 6.668580491228379e-06, "loss": 0.5364, "step": 13620 }, { "epoch": 1.14, "learning_rate": 6.653648752475411e-06, "loss": 0.5286, "step": 13625 }, { "epoch": 1.14, "learning_rate": 6.638731182941774e-06, "loss": 0.6081, "step": 13630 }, { "epoch": 1.14, "learning_rate": 6.623827794148571e-06, "loss": 0.5688, "step": 13635 }, { "epoch": 1.14, "learning_rate": 6.608938597605965e-06, "loss": 0.5428, "step": 13640 }, { "epoch": 1.15, "learning_rate": 6.594063604813156e-06, "loss": 0.5885, "step": 13645 }, { "epoch": 1.15, "learning_rate": 6.579202827258354e-06, "loss": 0.61, "step": 13650 }, { "epoch": 1.15, "learning_rate": 6.5643562764188286e-06, "loss": 0.5621, "step": 13655 }, { "epoch": 1.15, "learning_rate": 6.549523963760821e-06, "loss": 0.5239, "step": 13660 }, { "epoch": 1.15, "learning_rate": 6.534705900739599e-06, "loss": 0.6069, "step": 13665 }, { "epoch": 1.15, "learning_rate": 6.5199020987994244e-06, "loss": 0.5758, "step": 13670 }, { "epoch": 1.15, "learning_rate": 6.505112569373539e-06, "loss": 0.5303, "step": 13675 }, { "epoch": 1.15, "learning_rate": 6.4903373238841714e-06, "loss": 0.506, "step": 13680 }, { "epoch": 1.15, "learning_rate": 6.475576373742503e-06, "loss": 0.5352, "step": 13685 }, { "epoch": 1.15, "learning_rate": 6.460829730348686e-06, "loss": 0.5163, "step": 13690 }, { "epoch": 1.15, "learning_rate": 6.446097405091822e-06, "loss": 0.5766, "step": 13695 }, { "epoch": 1.15, "learning_rate": 6.431379409349949e-06, "loss": 0.5396, "step": 13700 }, { "epoch": 1.15, "learning_rate": 6.416675754490051e-06, "loss": 0.572, "step": 13705 }, { "epoch": 1.15, "learning_rate": 6.401986451868014e-06, "loss": 0.5674, "step": 13710 }, { "epoch": 1.15, "learning_rate": 6.387311512828656e-06, "loss": 0.5514, "step": 13715 }, { "epoch": 1.15, "learning_rate": 6.3726509487057075e-06, "loss": 0.5325, "step": 13720 }, { "epoch": 1.15, "learning_rate": 6.358004770821774e-06, "loss": 0.561, "step": 13725 }, { "epoch": 1.15, "learning_rate": 6.343372990488369e-06, "loss": 0.6124, "step": 13730 }, { "epoch": 1.15, "learning_rate": 6.3287556190058855e-06, "loss": 0.5394, "step": 13735 }, { "epoch": 1.15, "learning_rate": 6.314152667663578e-06, "loss": 0.6091, "step": 13740 }, { "epoch": 1.15, "learning_rate": 6.299564147739575e-06, "loss": 0.5804, "step": 13745 }, { "epoch": 1.15, "learning_rate": 6.284990070500854e-06, "loss": 0.605, "step": 13750 }, { "epoch": 1.15, "learning_rate": 6.270430447203238e-06, "loss": 0.5939, "step": 13755 }, { "epoch": 1.15, "learning_rate": 6.255885289091392e-06, "loss": 0.4925, "step": 13760 }, { "epoch": 1.16, "learning_rate": 6.241354607398797e-06, "loss": 0.5844, "step": 13765 }, { "epoch": 1.16, "learning_rate": 6.226838413347766e-06, "loss": 0.5568, "step": 13770 }, { "epoch": 1.16, "learning_rate": 6.212336718149417e-06, "loss": 0.5136, "step": 13775 }, { "epoch": 1.16, "learning_rate": 6.197849533003672e-06, "loss": 0.5389, "step": 13780 }, { "epoch": 1.16, "learning_rate": 6.1833768690992475e-06, "loss": 0.528, "step": 13785 }, { "epoch": 1.16, "learning_rate": 6.168918737613652e-06, "loss": 0.5906, "step": 13790 }, { "epoch": 1.16, "learning_rate": 6.1544751497131455e-06, "loss": 0.5323, "step": 13795 }, { "epoch": 1.16, "learning_rate": 6.1400461165527835e-06, "loss": 0.5507, "step": 13800 }, { "epoch": 1.16, "learning_rate": 6.125631649276364e-06, "loss": 0.5597, "step": 13805 }, { "epoch": 1.16, "learning_rate": 6.111231759016447e-06, "loss": 0.5214, "step": 13810 }, { "epoch": 1.16, "learning_rate": 6.096846456894326e-06, "loss": 0.5143, "step": 13815 }, { "epoch": 1.16, "learning_rate": 6.082475754020036e-06, "loss": 0.5509, "step": 13820 }, { "epoch": 1.16, "learning_rate": 6.068119661492319e-06, "loss": 0.5697, "step": 13825 }, { "epoch": 1.16, "learning_rate": 6.053778190398651e-06, "loss": 0.5021, "step": 13830 }, { "epoch": 1.16, "learning_rate": 6.039451351815209e-06, "loss": 0.5866, "step": 13835 }, { "epoch": 1.16, "learning_rate": 6.0251391568068746e-06, "loss": 0.5225, "step": 13840 }, { "epoch": 1.16, "learning_rate": 6.01084161642721e-06, "loss": 0.5757, "step": 13845 }, { "epoch": 1.16, "learning_rate": 5.9965587417184665e-06, "loss": 0.5671, "step": 13850 }, { "epoch": 1.16, "learning_rate": 5.9822905437115665e-06, "loss": 0.5403, "step": 13855 }, { "epoch": 1.16, "learning_rate": 5.968037033426108e-06, "loss": 0.5775, "step": 13860 }, { "epoch": 1.16, "learning_rate": 5.953798221870316e-06, "loss": 0.5505, "step": 13865 }, { "epoch": 1.16, "learning_rate": 5.939574120041091e-06, "loss": 0.5201, "step": 13870 }, { "epoch": 1.16, "learning_rate": 5.925364738923972e-06, "loss": 0.5417, "step": 13875 }, { "epoch": 1.16, "learning_rate": 5.911170089493109e-06, "loss": 0.5726, "step": 13880 }, { "epoch": 1.17, "learning_rate": 5.896990182711293e-06, "loss": 0.6282, "step": 13885 }, { "epoch": 1.17, "learning_rate": 5.882825029529923e-06, "loss": 0.5903, "step": 13890 }, { "epoch": 1.17, "learning_rate": 5.868674640888999e-06, "loss": 0.5928, "step": 13895 }, { "epoch": 1.17, "learning_rate": 5.854539027717129e-06, "loss": 0.5213, "step": 13900 }, { "epoch": 1.17, "learning_rate": 5.8404182009315e-06, "loss": 0.5608, "step": 13905 }, { "epoch": 1.17, "learning_rate": 5.826312171437881e-06, "loss": 0.5773, "step": 13910 }, { "epoch": 1.17, "learning_rate": 5.8122209501306176e-06, "loss": 0.5201, "step": 13915 }, { "epoch": 1.17, "learning_rate": 5.798144547892611e-06, "loss": 0.5639, "step": 13920 }, { "epoch": 1.17, "learning_rate": 5.784082975595331e-06, "loss": 0.5854, "step": 13925 }, { "epoch": 1.17, "learning_rate": 5.7700362440987695e-06, "loss": 0.5224, "step": 13930 }, { "epoch": 1.17, "learning_rate": 5.756004364251488e-06, "loss": 0.5618, "step": 13935 }, { "epoch": 1.17, "learning_rate": 5.741987346890546e-06, "loss": 0.6049, "step": 13940 }, { "epoch": 1.17, "learning_rate": 5.727985202841543e-06, "loss": 0.5883, "step": 13945 }, { "epoch": 1.17, "learning_rate": 5.713997942918595e-06, "loss": 0.4829, "step": 13950 }, { "epoch": 1.17, "learning_rate": 5.7000255779243116e-06, "loss": 0.6127, "step": 13955 }, { "epoch": 1.17, "learning_rate": 5.686068118649807e-06, "loss": 0.5751, "step": 13960 }, { "epoch": 1.17, "learning_rate": 5.672125575874676e-06, "loss": 0.53, "step": 13965 }, { "epoch": 1.17, "learning_rate": 5.658197960366998e-06, "loss": 0.5122, "step": 13970 }, { "epoch": 1.17, "learning_rate": 5.644285282883324e-06, "loss": 0.5084, "step": 13975 }, { "epoch": 1.17, "learning_rate": 5.630387554168673e-06, "loss": 0.6113, "step": 13980 }, { "epoch": 1.17, "learning_rate": 5.6165047849564975e-06, "loss": 0.5564, "step": 13985 }, { "epoch": 1.17, "learning_rate": 5.60263698596872e-06, "loss": 0.5858, "step": 13990 }, { "epoch": 1.17, "learning_rate": 5.588784167915695e-06, "loss": 0.5323, "step": 13995 }, { "epoch": 1.17, "learning_rate": 5.574946341496206e-06, "loss": 0.531, "step": 14000 }, { "epoch": 1.18, "learning_rate": 5.561123517397449e-06, "loss": 0.548, "step": 14005 }, { "epoch": 1.18, "learning_rate": 5.547315706295045e-06, "loss": 0.535, "step": 14010 }, { "epoch": 1.18, "learning_rate": 5.5335229188530155e-06, "loss": 0.5698, "step": 14015 }, { "epoch": 1.18, "learning_rate": 5.519745165723783e-06, "loss": 0.5876, "step": 14020 }, { "epoch": 1.18, "learning_rate": 5.505982457548154e-06, "loss": 0.587, "step": 14025 }, { "epoch": 1.18, "learning_rate": 5.492234804955318e-06, "loss": 0.5831, "step": 14030 }, { "epoch": 1.18, "learning_rate": 5.478502218562837e-06, "loss": 0.5483, "step": 14035 }, { "epoch": 1.18, "learning_rate": 5.464784708976639e-06, "loss": 0.5559, "step": 14040 }, { "epoch": 1.18, "learning_rate": 5.4510822867909955e-06, "loss": 0.5283, "step": 14045 }, { "epoch": 1.18, "learning_rate": 5.4373949625885405e-06, "loss": 0.6742, "step": 14050 }, { "epoch": 1.18, "learning_rate": 5.423722746940243e-06, "loss": 0.6286, "step": 14055 }, { "epoch": 1.18, "learning_rate": 5.410065650405402e-06, "loss": 0.5311, "step": 14060 }, { "epoch": 1.18, "learning_rate": 5.396423683531643e-06, "loss": 0.495, "step": 14065 }, { "epoch": 1.18, "learning_rate": 5.382796856854908e-06, "loss": 0.5504, "step": 14070 }, { "epoch": 1.18, "learning_rate": 5.369185180899427e-06, "loss": 0.5644, "step": 14075 }, { "epoch": 1.18, "learning_rate": 5.355588666177758e-06, "loss": 0.5916, "step": 14080 }, { "epoch": 1.18, "learning_rate": 5.342007323190726e-06, "loss": 0.5201, "step": 14085 }, { "epoch": 1.18, "learning_rate": 5.328441162427452e-06, "loss": 0.5161, "step": 14090 }, { "epoch": 1.18, "learning_rate": 5.3148901943653375e-06, "loss": 0.578, "step": 14095 }, { "epoch": 1.18, "learning_rate": 5.3013544294700266e-06, "loss": 0.5267, "step": 14100 }, { "epoch": 1.18, "learning_rate": 5.287833878195436e-06, "loss": 0.5834, "step": 14105 }, { "epoch": 1.18, "learning_rate": 5.27432855098374e-06, "loss": 0.4787, "step": 14110 }, { "epoch": 1.18, "learning_rate": 5.260838458265341e-06, "loss": 0.6052, "step": 14115 }, { "epoch": 1.18, "learning_rate": 5.2473636104588884e-06, "loss": 0.561, "step": 14120 }, { "epoch": 1.19, "learning_rate": 5.233904017971245e-06, "loss": 0.5581, "step": 14125 }, { "epoch": 1.19, "learning_rate": 5.220459691197502e-06, "loss": 0.526, "step": 14130 }, { "epoch": 1.19, "learning_rate": 5.2070306405209545e-06, "loss": 0.5668, "step": 14135 }, { "epoch": 1.19, "learning_rate": 5.1936168763131095e-06, "loss": 0.5468, "step": 14140 }, { "epoch": 1.19, "learning_rate": 5.180218408933646e-06, "loss": 0.5063, "step": 14145 }, { "epoch": 1.19, "learning_rate": 5.166835248730451e-06, "loss": 0.5549, "step": 14150 }, { "epoch": 1.19, "learning_rate": 5.153467406039586e-06, "loss": 0.5961, "step": 14155 }, { "epoch": 1.19, "learning_rate": 5.140114891185263e-06, "loss": 0.5627, "step": 14160 }, { "epoch": 1.19, "learning_rate": 5.126777714479883e-06, "loss": 0.5928, "step": 14165 }, { "epoch": 1.19, "learning_rate": 5.1134558862239865e-06, "loss": 0.5317, "step": 14170 }, { "epoch": 1.19, "learning_rate": 5.10014941670626e-06, "loss": 0.5739, "step": 14175 }, { "epoch": 1.19, "learning_rate": 5.086858316203536e-06, "loss": 0.5922, "step": 14180 }, { "epoch": 1.19, "learning_rate": 5.0735825949807695e-06, "loss": 0.6042, "step": 14185 }, { "epoch": 1.19, "learning_rate": 5.060322263291039e-06, "loss": 0.5579, "step": 14190 }, { "epoch": 1.19, "learning_rate": 5.047077331375541e-06, "loss": 0.5594, "step": 14195 }, { "epoch": 1.19, "learning_rate": 5.033847809463582e-06, "loss": 0.5871, "step": 14200 }, { "epoch": 1.19, "learning_rate": 5.0206337077725485e-06, "loss": 0.5136, "step": 14205 }, { "epoch": 1.19, "learning_rate": 5.0074350365079455e-06, "loss": 0.5252, "step": 14210 }, { "epoch": 1.19, "learning_rate": 4.994251805863328e-06, "loss": 0.6206, "step": 14215 }, { "epoch": 1.19, "learning_rate": 4.981084026020355e-06, "loss": 0.5277, "step": 14220 }, { "epoch": 1.19, "learning_rate": 4.96793170714874e-06, "loss": 0.512, "step": 14225 }, { "epoch": 1.19, "learning_rate": 4.954794859406256e-06, "loss": 0.5067, "step": 14230 }, { "epoch": 1.19, "learning_rate": 4.941673492938728e-06, "loss": 0.6082, "step": 14235 }, { "epoch": 1.19, "learning_rate": 4.928567617880025e-06, "loss": 0.5462, "step": 14240 }, { "epoch": 1.2, "learning_rate": 4.915477244352054e-06, "loss": 0.5668, "step": 14245 }, { "epoch": 1.2, "learning_rate": 4.902402382464746e-06, "loss": 0.566, "step": 14250 }, { "epoch": 1.2, "learning_rate": 4.889343042316055e-06, "loss": 0.5422, "step": 14255 }, { "epoch": 1.2, "learning_rate": 4.876299233991951e-06, "loss": 0.5775, "step": 14260 }, { "epoch": 1.2, "learning_rate": 4.863270967566389e-06, "loss": 0.5251, "step": 14265 }, { "epoch": 1.2, "learning_rate": 4.850258253101345e-06, "loss": 0.6104, "step": 14270 }, { "epoch": 1.2, "learning_rate": 4.83726110064677e-06, "loss": 0.5389, "step": 14275 }, { "epoch": 1.2, "learning_rate": 4.824279520240612e-06, "loss": 0.5869, "step": 14280 }, { "epoch": 1.2, "learning_rate": 4.8113135219087616e-06, "loss": 0.5811, "step": 14285 }, { "epoch": 1.2, "learning_rate": 4.798363115665105e-06, "loss": 0.5263, "step": 14290 }, { "epoch": 1.2, "learning_rate": 4.785428311511475e-06, "loss": 0.6069, "step": 14295 }, { "epoch": 1.2, "learning_rate": 4.7725091194376545e-06, "loss": 0.5758, "step": 14300 }, { "epoch": 1.2, "learning_rate": 4.759605549421367e-06, "loss": 0.5431, "step": 14305 }, { "epoch": 1.2, "learning_rate": 4.746717611428278e-06, "loss": 0.5364, "step": 14310 }, { "epoch": 1.2, "learning_rate": 4.733845315411978e-06, "loss": 0.5258, "step": 14315 }, { "epoch": 1.2, "learning_rate": 4.720988671313964e-06, "loss": 0.5328, "step": 14320 }, { "epoch": 1.2, "learning_rate": 4.7081476890636574e-06, "loss": 0.5558, "step": 14325 }, { "epoch": 1.2, "learning_rate": 4.695322378578387e-06, "loss": 0.5118, "step": 14330 }, { "epoch": 1.2, "learning_rate": 4.682512749763368e-06, "loss": 0.5553, "step": 14335 }, { "epoch": 1.2, "learning_rate": 4.669718812511714e-06, "loss": 0.5166, "step": 14340 }, { "epoch": 1.2, "learning_rate": 4.656940576704408e-06, "loss": 0.558, "step": 14345 }, { "epoch": 1.2, "learning_rate": 4.644178052210322e-06, "loss": 0.6156, "step": 14350 }, { "epoch": 1.2, "learning_rate": 4.631431248886173e-06, "loss": 0.5541, "step": 14355 }, { "epoch": 1.21, "learning_rate": 4.618700176576554e-06, "loss": 0.5442, "step": 14360 }, { "epoch": 1.21, "learning_rate": 4.6059848451139e-06, "loss": 0.5417, "step": 14365 }, { "epoch": 1.21, "learning_rate": 4.5932852643184976e-06, "loss": 0.5706, "step": 14370 }, { "epoch": 1.21, "learning_rate": 4.580601443998464e-06, "loss": 0.625, "step": 14375 }, { "epoch": 1.21, "learning_rate": 4.567933393949733e-06, "loss": 0.5363, "step": 14380 }, { "epoch": 1.21, "learning_rate": 4.555281123956076e-06, "loss": 0.5485, "step": 14385 }, { "epoch": 1.21, "learning_rate": 4.542644643789068e-06, "loss": 0.555, "step": 14390 }, { "epoch": 1.21, "learning_rate": 4.530023963208094e-06, "loss": 0.5514, "step": 14395 }, { "epoch": 1.21, "learning_rate": 4.517419091960331e-06, "loss": 0.5992, "step": 14400 }, { "epoch": 1.21, "learning_rate": 4.5048300397807525e-06, "loss": 0.5273, "step": 14405 }, { "epoch": 1.21, "learning_rate": 4.4922568163921096e-06, "loss": 0.6097, "step": 14410 }, { "epoch": 1.21, "learning_rate": 4.47969943150493e-06, "loss": 0.5601, "step": 14415 }, { "epoch": 1.21, "learning_rate": 4.46715789481752e-06, "loss": 0.5076, "step": 14420 }, { "epoch": 1.21, "learning_rate": 4.454632216015916e-06, "loss": 0.6016, "step": 14425 }, { "epoch": 1.21, "learning_rate": 4.4421224047739465e-06, "loss": 0.5718, "step": 14430 }, { "epoch": 1.21, "learning_rate": 4.429628470753147e-06, "loss": 0.5163, "step": 14435 }, { "epoch": 1.21, "learning_rate": 4.41715042360282e-06, "loss": 0.5766, "step": 14440 }, { "epoch": 1.21, "learning_rate": 4.404688272959981e-06, "loss": 0.5817, "step": 14445 }, { "epoch": 1.21, "learning_rate": 4.392242028449381e-06, "loss": 0.5561, "step": 14450 }, { "epoch": 1.21, "learning_rate": 4.379811699683478e-06, "loss": 0.5165, "step": 14455 }, { "epoch": 1.21, "learning_rate": 4.367397296262437e-06, "loss": 0.5684, "step": 14460 }, { "epoch": 1.21, "learning_rate": 4.354998827774129e-06, "loss": 0.597, "step": 14465 }, { "epoch": 1.21, "learning_rate": 4.342616303794117e-06, "loss": 0.5327, "step": 14470 }, { "epoch": 1.21, "learning_rate": 4.3302497338856495e-06, "loss": 0.6113, "step": 14475 }, { "epoch": 1.22, "learning_rate": 4.317899127599653e-06, "loss": 0.5275, "step": 14480 }, { "epoch": 1.22, "learning_rate": 4.305564494474717e-06, "loss": 0.559, "step": 14485 }, { "epoch": 1.22, "learning_rate": 4.293245844037111e-06, "loss": 0.4638, "step": 14490 }, { "epoch": 1.22, "learning_rate": 4.280943185800743e-06, "loss": 0.5121, "step": 14495 }, { "epoch": 1.22, "learning_rate": 4.268656529267184e-06, "loss": 0.5068, "step": 14500 }, { "epoch": 1.22, "learning_rate": 4.2563858839256395e-06, "loss": 0.5325, "step": 14505 }, { "epoch": 1.22, "learning_rate": 4.244131259252953e-06, "loss": 0.5412, "step": 14510 }, { "epoch": 1.22, "learning_rate": 4.231892664713591e-06, "loss": 0.5983, "step": 14515 }, { "epoch": 1.22, "learning_rate": 4.219670109759643e-06, "loss": 0.5843, "step": 14520 }, { "epoch": 1.22, "learning_rate": 4.207463603830811e-06, "loss": 0.5528, "step": 14525 }, { "epoch": 1.22, "learning_rate": 4.1952731563543985e-06, "loss": 0.5714, "step": 14530 }, { "epoch": 1.22, "learning_rate": 4.1830987767453174e-06, "loss": 0.5983, "step": 14535 }, { "epoch": 1.22, "learning_rate": 4.170940474406043e-06, "loss": 0.6018, "step": 14540 }, { "epoch": 1.22, "learning_rate": 4.158798258726668e-06, "loss": 0.5872, "step": 14545 }, { "epoch": 1.22, "learning_rate": 4.146672139084837e-06, "loss": 0.4915, "step": 14550 }, { "epoch": 1.22, "learning_rate": 4.134562124845778e-06, "loss": 0.5654, "step": 14555 }, { "epoch": 1.22, "learning_rate": 4.122468225362278e-06, "loss": 0.5477, "step": 14560 }, { "epoch": 1.22, "learning_rate": 4.1103904499746605e-06, "loss": 0.6336, "step": 14565 }, { "epoch": 1.22, "learning_rate": 4.098328808010818e-06, "loss": 0.5588, "step": 14570 }, { "epoch": 1.22, "learning_rate": 4.086283308786174e-06, "loss": 0.6043, "step": 14575 }, { "epoch": 1.22, "learning_rate": 4.074253961603688e-06, "loss": 0.578, "step": 14580 }, { "epoch": 1.22, "learning_rate": 4.062240775753839e-06, "loss": 0.5795, "step": 14585 }, { "epoch": 1.22, "learning_rate": 4.050243760514635e-06, "loss": 0.4638, "step": 14590 }, { "epoch": 1.22, "learning_rate": 4.038262925151587e-06, "loss": 0.5283, "step": 14595 }, { "epoch": 1.23, "learning_rate": 4.026298278917701e-06, "loss": 0.6197, "step": 14600 }, { "epoch": 1.23, "learning_rate": 4.014349831053496e-06, "loss": 0.5452, "step": 14605 }, { "epoch": 1.23, "learning_rate": 4.002417590786978e-06, "loss": 0.5179, "step": 14610 }, { "epoch": 1.23, "learning_rate": 3.990501567333626e-06, "loss": 0.5325, "step": 14615 }, { "epoch": 1.23, "learning_rate": 3.978601769896406e-06, "loss": 0.5306, "step": 14620 }, { "epoch": 1.23, "learning_rate": 3.966718207665745e-06, "loss": 0.5429, "step": 14625 }, { "epoch": 1.23, "learning_rate": 3.954850889819542e-06, "loss": 0.5752, "step": 14630 }, { "epoch": 1.23, "learning_rate": 3.942999825523125e-06, "loss": 0.5334, "step": 14635 }, { "epoch": 1.23, "learning_rate": 3.9311650239292955e-06, "loss": 0.5272, "step": 14640 }, { "epoch": 1.23, "learning_rate": 3.91934649417828e-06, "loss": 0.5542, "step": 14645 }, { "epoch": 1.23, "learning_rate": 3.907544245397754e-06, "loss": 0.5483, "step": 14650 }, { "epoch": 1.23, "learning_rate": 3.895758286702794e-06, "loss": 0.5895, "step": 14655 }, { "epoch": 1.23, "learning_rate": 3.883988627195917e-06, "loss": 0.5689, "step": 14660 }, { "epoch": 1.23, "learning_rate": 3.872235275967042e-06, "loss": 0.5435, "step": 14665 }, { "epoch": 1.23, "learning_rate": 3.860498242093496e-06, "loss": 0.5453, "step": 14670 }, { "epoch": 1.23, "learning_rate": 3.848777534640002e-06, "loss": 0.5676, "step": 14675 }, { "epoch": 1.23, "learning_rate": 3.837073162658675e-06, "loss": 0.546, "step": 14680 }, { "epoch": 1.23, "learning_rate": 3.825385135189016e-06, "loss": 0.5657, "step": 14685 }, { "epoch": 1.23, "learning_rate": 3.813713461257898e-06, "loss": 0.5158, "step": 14690 }, { "epoch": 1.23, "learning_rate": 3.802058149879564e-06, "loss": 0.5616, "step": 14695 }, { "epoch": 1.23, "learning_rate": 3.790419210055632e-06, "loss": 0.5245, "step": 14700 }, { "epoch": 1.23, "learning_rate": 3.7787966507750517e-06, "loss": 0.5518, "step": 14705 }, { "epoch": 1.23, "learning_rate": 3.767190481014149e-06, "loss": 0.5359, "step": 14710 }, { "epoch": 1.23, "learning_rate": 3.7556007097365646e-06, "loss": 0.5549, "step": 14715 }, { "epoch": 1.24, "learning_rate": 3.7440273458933e-06, "loss": 0.5838, "step": 14720 }, { "epoch": 1.24, "learning_rate": 3.732470398422669e-06, "loss": 0.5406, "step": 14725 }, { "epoch": 1.24, "learning_rate": 3.720929876250315e-06, "loss": 0.5819, "step": 14730 }, { "epoch": 1.24, "learning_rate": 3.7094057882891887e-06, "loss": 0.5308, "step": 14735 }, { "epoch": 1.24, "learning_rate": 3.6978981434395516e-06, "loss": 0.5318, "step": 14740 }, { "epoch": 1.24, "learning_rate": 3.686406950588972e-06, "loss": 0.5616, "step": 14745 }, { "epoch": 1.24, "learning_rate": 3.674932218612301e-06, "loss": 0.5153, "step": 14750 }, { "epoch": 1.24, "learning_rate": 3.663473956371688e-06, "loss": 0.6164, "step": 14755 }, { "epoch": 1.24, "learning_rate": 3.6520321727165464e-06, "loss": 0.5678, "step": 14760 }, { "epoch": 1.24, "learning_rate": 3.6406068764835795e-06, "loss": 0.5332, "step": 14765 }, { "epoch": 1.24, "learning_rate": 3.6291980764967536e-06, "loss": 0.5591, "step": 14770 }, { "epoch": 1.24, "learning_rate": 3.617805781567282e-06, "loss": 0.5261, "step": 14775 }, { "epoch": 1.24, "learning_rate": 3.6064300004936464e-06, "loss": 0.5772, "step": 14780 }, { "epoch": 1.24, "learning_rate": 3.595070742061568e-06, "loss": 0.5298, "step": 14785 }, { "epoch": 1.24, "learning_rate": 3.583728015044005e-06, "loss": 0.4927, "step": 14790 }, { "epoch": 1.24, "learning_rate": 3.5724018282011552e-06, "loss": 0.5336, "step": 14795 }, { "epoch": 1.24, "learning_rate": 3.5610921902804377e-06, "loss": 0.5585, "step": 14800 }, { "epoch": 1.24, "learning_rate": 3.5497991100164886e-06, "loss": 0.6224, "step": 14805 }, { "epoch": 1.24, "learning_rate": 3.5385225961311553e-06, "loss": 0.5821, "step": 14810 }, { "epoch": 1.24, "learning_rate": 3.527262657333508e-06, "loss": 0.53, "step": 14815 }, { "epoch": 1.24, "learning_rate": 3.5160193023197814e-06, "loss": 0.5543, "step": 14820 }, { "epoch": 1.24, "learning_rate": 3.5047925397734326e-06, "loss": 0.571, "step": 14825 }, { "epoch": 1.24, "learning_rate": 3.49358237836509e-06, "loss": 0.5344, "step": 14830 }, { "epoch": 1.24, "learning_rate": 3.4823888267525695e-06, "loss": 0.5413, "step": 14835 }, { "epoch": 1.25, "learning_rate": 3.4712118935808555e-06, "loss": 0.5384, "step": 14840 }, { "epoch": 1.25, "learning_rate": 3.4600515874820883e-06, "loss": 0.56, "step": 14845 }, { "epoch": 1.25, "learning_rate": 3.448907917075575e-06, "loss": 0.5113, "step": 14850 }, { "epoch": 1.25, "learning_rate": 3.437780890967776e-06, "loss": 0.6189, "step": 14855 }, { "epoch": 1.25, "learning_rate": 3.426670517752295e-06, "loss": 0.6206, "step": 14860 }, { "epoch": 1.25, "learning_rate": 3.4155768060098763e-06, "loss": 0.496, "step": 14865 }, { "epoch": 1.25, "learning_rate": 3.4044997643084e-06, "loss": 0.5149, "step": 14870 }, { "epoch": 1.25, "learning_rate": 3.3934394012028526e-06, "loss": 0.5192, "step": 14875 }, { "epoch": 1.25, "learning_rate": 3.382395725235357e-06, "loss": 0.563, "step": 14880 }, { "epoch": 1.25, "learning_rate": 3.3713687449351495e-06, "loss": 0.5708, "step": 14885 }, { "epoch": 1.25, "learning_rate": 3.360358468818564e-06, "loss": 0.5407, "step": 14890 }, { "epoch": 1.25, "learning_rate": 3.3493649053890326e-06, "loss": 0.5227, "step": 14895 }, { "epoch": 1.25, "learning_rate": 3.3383880631370907e-06, "loss": 0.5541, "step": 14900 }, { "epoch": 1.25, "learning_rate": 3.327427950540346e-06, "loss": 0.5375, "step": 14905 }, { "epoch": 1.25, "learning_rate": 3.3164845760635e-06, "loss": 0.5287, "step": 14910 }, { "epoch": 1.25, "learning_rate": 3.305557948158308e-06, "loss": 0.5715, "step": 14915 }, { "epoch": 1.25, "learning_rate": 3.2946480752636066e-06, "loss": 0.6309, "step": 14920 }, { "epoch": 1.25, "learning_rate": 3.2837549658052914e-06, "loss": 0.5496, "step": 14925 }, { "epoch": 1.25, "learning_rate": 3.2728786281963105e-06, "loss": 0.4951, "step": 14930 }, { "epoch": 1.25, "learning_rate": 3.2620190708366473e-06, "loss": 0.6059, "step": 14935 }, { "epoch": 1.25, "learning_rate": 3.2511763021133403e-06, "loss": 0.6179, "step": 14940 }, { "epoch": 1.25, "learning_rate": 3.2403503304004547e-06, "loss": 0.652, "step": 14945 }, { "epoch": 1.25, "learning_rate": 3.2295411640590856e-06, "loss": 0.5401, "step": 14950 }, { "epoch": 1.25, "learning_rate": 3.218748811437347e-06, "loss": 0.6408, "step": 14955 }, { "epoch": 1.26, "learning_rate": 3.207973280870369e-06, "loss": 0.5911, "step": 14960 }, { "epoch": 1.26, "learning_rate": 3.197214580680288e-06, "loss": 0.5874, "step": 14965 }, { "epoch": 1.26, "learning_rate": 3.186472719176245e-06, "loss": 0.5519, "step": 14970 }, { "epoch": 1.26, "learning_rate": 3.1757477046543776e-06, "loss": 0.5615, "step": 14975 }, { "epoch": 1.26, "learning_rate": 3.1650395453977992e-06, "loss": 0.5343, "step": 14980 }, { "epoch": 1.26, "learning_rate": 3.154348249676625e-06, "loss": 0.557, "step": 14985 }, { "epoch": 1.26, "learning_rate": 3.1436738257479253e-06, "loss": 0.5345, "step": 14990 }, { "epoch": 1.26, "learning_rate": 3.133016281855758e-06, "loss": 0.5676, "step": 14995 }, { "epoch": 1.26, "learning_rate": 3.122375626231136e-06, "loss": 0.5, "step": 15000 }, { "epoch": 1.26, "learning_rate": 3.1117518670920306e-06, "loss": 0.5787, "step": 15005 }, { "epoch": 1.26, "learning_rate": 3.101145012643364e-06, "loss": 0.5634, "step": 15010 }, { "epoch": 1.26, "learning_rate": 3.0905550710770002e-06, "loss": 0.6239, "step": 15015 }, { "epoch": 1.26, "learning_rate": 3.0799820505717448e-06, "loss": 0.5418, "step": 15020 }, { "epoch": 1.26, "learning_rate": 3.0694259592933318e-06, "loss": 0.5966, "step": 15025 }, { "epoch": 1.26, "learning_rate": 3.058886805394423e-06, "loss": 0.5428, "step": 15030 }, { "epoch": 1.26, "learning_rate": 3.0483645970146025e-06, "loss": 0.5414, "step": 15035 }, { "epoch": 1.26, "learning_rate": 3.0378593422803496e-06, "loss": 0.5745, "step": 15040 }, { "epoch": 1.26, "learning_rate": 3.0273710493050724e-06, "loss": 0.5743, "step": 15045 }, { "epoch": 1.26, "learning_rate": 3.0168997261890738e-06, "loss": 0.59, "step": 15050 }, { "epoch": 1.26, "learning_rate": 3.006445381019532e-06, "loss": 0.5581, "step": 15055 }, { "epoch": 1.26, "learning_rate": 2.9960080218705334e-06, "loss": 0.5466, "step": 15060 }, { "epoch": 1.26, "learning_rate": 2.9855876568030435e-06, "loss": 0.5477, "step": 15065 }, { "epoch": 1.26, "learning_rate": 2.975184293864894e-06, "loss": 0.5859, "step": 15070 }, { "epoch": 1.27, "learning_rate": 2.964797941090794e-06, "loss": 0.6236, "step": 15075 }, { "epoch": 1.27, "learning_rate": 2.954428606502313e-06, "loss": 0.5679, "step": 15080 }, { "epoch": 1.27, "learning_rate": 2.94407629810787e-06, "loss": 0.5564, "step": 15085 }, { "epoch": 1.27, "learning_rate": 2.93374102390275e-06, "loss": 0.5416, "step": 15090 }, { "epoch": 1.27, "learning_rate": 2.9234227918690626e-06, "loss": 0.5265, "step": 15095 }, { "epoch": 1.27, "learning_rate": 2.9131216099757693e-06, "loss": 0.5054, "step": 15100 }, { "epoch": 1.27, "learning_rate": 2.9028374861786556e-06, "loss": 0.5393, "step": 15105 }, { "epoch": 1.27, "learning_rate": 2.892570428420344e-06, "loss": 0.5573, "step": 15110 }, { "epoch": 1.27, "learning_rate": 2.8823204446302628e-06, "loss": 0.5345, "step": 15115 }, { "epoch": 1.27, "learning_rate": 2.872087542724669e-06, "loss": 0.5452, "step": 15120 }, { "epoch": 1.27, "learning_rate": 2.861871730606605e-06, "loss": 0.5461, "step": 15125 }, { "epoch": 1.27, "learning_rate": 2.8516730161659343e-06, "loss": 0.5184, "step": 15130 }, { "epoch": 1.27, "learning_rate": 2.841491407279309e-06, "loss": 0.6066, "step": 15135 }, { "epoch": 1.27, "learning_rate": 2.83132691181017e-06, "loss": 0.5535, "step": 15140 }, { "epoch": 1.27, "learning_rate": 2.8211795376087397e-06, "loss": 0.532, "step": 15145 }, { "epoch": 1.27, "learning_rate": 2.8110492925120275e-06, "loss": 0.5536, "step": 15150 }, { "epoch": 1.27, "learning_rate": 2.8009361843437888e-06, "loss": 0.5664, "step": 15155 }, { "epoch": 1.27, "learning_rate": 2.790840220914573e-06, "loss": 0.5304, "step": 15160 }, { "epoch": 1.27, "learning_rate": 2.7807614100216706e-06, "loss": 0.596, "step": 15165 }, { "epoch": 1.27, "learning_rate": 2.77069975944913e-06, "loss": 0.598, "step": 15170 }, { "epoch": 1.27, "learning_rate": 2.7606552769677466e-06, "loss": 0.5716, "step": 15175 }, { "epoch": 1.27, "learning_rate": 2.750627970335057e-06, "loss": 0.558, "step": 15180 }, { "epoch": 1.27, "learning_rate": 2.740617847295332e-06, "loss": 0.5591, "step": 15185 }, { "epoch": 1.27, "learning_rate": 2.7306249155795743e-06, "loss": 0.5893, "step": 15190 }, { "epoch": 1.28, "learning_rate": 2.7206491829054966e-06, "loss": 0.5734, "step": 15195 }, { "epoch": 1.28, "learning_rate": 2.7106906569775435e-06, "loss": 0.5636, "step": 15200 }, { "epoch": 1.28, "learning_rate": 2.7007493454868694e-06, "loss": 0.5942, "step": 15205 }, { "epoch": 1.28, "learning_rate": 2.6908252561113207e-06, "loss": 0.5338, "step": 15210 }, { "epoch": 1.28, "learning_rate": 2.680918396515453e-06, "loss": 0.5568, "step": 15215 }, { "epoch": 1.28, "learning_rate": 2.6710287743505174e-06, "loss": 0.5324, "step": 15220 }, { "epoch": 1.28, "learning_rate": 2.661156397254444e-06, "loss": 0.5685, "step": 15225 }, { "epoch": 1.28, "learning_rate": 2.651301272851853e-06, "loss": 0.5468, "step": 15230 }, { "epoch": 1.28, "learning_rate": 2.641463408754033e-06, "loss": 0.5456, "step": 15235 }, { "epoch": 1.28, "learning_rate": 2.6316428125589433e-06, "loss": 0.5336, "step": 15240 }, { "epoch": 1.28, "learning_rate": 2.6218394918512108e-06, "loss": 0.6125, "step": 15245 }, { "epoch": 1.28, "learning_rate": 2.6120534542021206e-06, "loss": 0.6004, "step": 15250 }, { "epoch": 1.28, "learning_rate": 2.6022847071695973e-06, "loss": 0.5271, "step": 15255 }, { "epoch": 1.28, "learning_rate": 2.592533258298227e-06, "loss": 0.5787, "step": 15260 }, { "epoch": 1.28, "learning_rate": 2.582799115119236e-06, "loss": 0.5216, "step": 15265 }, { "epoch": 1.28, "learning_rate": 2.573082285150466e-06, "loss": 0.529, "step": 15270 }, { "epoch": 1.28, "learning_rate": 2.5633827758964084e-06, "loss": 0.5522, "step": 15275 }, { "epoch": 1.28, "learning_rate": 2.5537005948481714e-06, "loss": 0.518, "step": 15280 }, { "epoch": 1.28, "learning_rate": 2.5440357494834755e-06, "loss": 0.4673, "step": 15285 }, { "epoch": 1.28, "learning_rate": 2.5343882472666558e-06, "loss": 0.5963, "step": 15290 }, { "epoch": 1.28, "learning_rate": 2.524758095648655e-06, "loss": 0.5057, "step": 15295 }, { "epoch": 1.28, "learning_rate": 2.5151453020670122e-06, "loss": 0.5649, "step": 15300 }, { "epoch": 1.28, "learning_rate": 2.5055498739458643e-06, "loss": 0.5624, "step": 15305 }, { "epoch": 1.28, "learning_rate": 2.4959718186959358e-06, "loss": 0.5275, "step": 15310 }, { "epoch": 1.29, "learning_rate": 2.486411143714526e-06, "loss": 0.5597, "step": 15315 }, { "epoch": 1.29, "learning_rate": 2.47686785638552e-06, "loss": 0.5828, "step": 15320 }, { "epoch": 1.29, "learning_rate": 2.4673419640793698e-06, "loss": 0.5242, "step": 15325 }, { "epoch": 1.29, "learning_rate": 2.4578334741531013e-06, "loss": 0.5317, "step": 15330 }, { "epoch": 1.29, "learning_rate": 2.448342393950287e-06, "loss": 0.5467, "step": 15335 }, { "epoch": 1.29, "learning_rate": 2.4388687308010585e-06, "loss": 0.528, "step": 15340 }, { "epoch": 1.29, "learning_rate": 2.429412492022101e-06, "loss": 0.5157, "step": 15345 }, { "epoch": 1.29, "learning_rate": 2.4199736849166404e-06, "loss": 0.5378, "step": 15350 }, { "epoch": 1.29, "learning_rate": 2.410552316774434e-06, "loss": 0.5348, "step": 15355 }, { "epoch": 1.29, "learning_rate": 2.40114839487178e-06, "loss": 0.5243, "step": 15360 }, { "epoch": 1.29, "learning_rate": 2.3917619264714974e-06, "loss": 0.4923, "step": 15365 }, { "epoch": 1.29, "learning_rate": 2.3823929188229207e-06, "loss": 0.5366, "step": 15370 }, { "epoch": 1.29, "learning_rate": 2.373041379161908e-06, "loss": 0.5495, "step": 15375 }, { "epoch": 1.29, "learning_rate": 2.3637073147108194e-06, "loss": 0.6345, "step": 15380 }, { "epoch": 1.29, "learning_rate": 2.3543907326785243e-06, "loss": 0.5343, "step": 15385 }, { "epoch": 1.29, "learning_rate": 2.3450916402603885e-06, "loss": 0.525, "step": 15390 }, { "epoch": 1.29, "learning_rate": 2.3358100446382704e-06, "loss": 0.5427, "step": 15395 }, { "epoch": 1.29, "learning_rate": 2.3265459529805134e-06, "loss": 0.5907, "step": 15400 }, { "epoch": 1.29, "learning_rate": 2.3172993724419416e-06, "loss": 0.5891, "step": 15405 }, { "epoch": 1.29, "learning_rate": 2.308070310163854e-06, "loss": 0.5854, "step": 15410 }, { "epoch": 1.29, "learning_rate": 2.298858773274029e-06, "loss": 0.5405, "step": 15415 }, { "epoch": 1.29, "learning_rate": 2.2896647688866986e-06, "loss": 0.557, "step": 15420 }, { "epoch": 1.29, "learning_rate": 2.2804883041025655e-06, "loss": 0.5498, "step": 15425 }, { "epoch": 1.29, "learning_rate": 2.271329386008772e-06, "loss": 0.5456, "step": 15430 }, { "epoch": 1.3, "learning_rate": 2.2621880216789183e-06, "loss": 0.5269, "step": 15435 }, { "epoch": 1.3, "learning_rate": 2.2530642181730455e-06, "loss": 0.5693, "step": 15440 }, { "epoch": 1.3, "learning_rate": 2.2439579825376337e-06, "loss": 0.5629, "step": 15445 }, { "epoch": 1.3, "learning_rate": 2.234869321805594e-06, "loss": 0.5482, "step": 15450 }, { "epoch": 1.3, "learning_rate": 2.2257982429962633e-06, "loss": 0.4833, "step": 15455 }, { "epoch": 1.3, "learning_rate": 2.216744753115399e-06, "loss": 0.5979, "step": 15460 }, { "epoch": 1.3, "learning_rate": 2.2077088591551774e-06, "loss": 0.5808, "step": 15465 }, { "epoch": 1.3, "learning_rate": 2.198690568094186e-06, "loss": 0.5331, "step": 15470 }, { "epoch": 1.3, "learning_rate": 2.189689886897403e-06, "loss": 0.5229, "step": 15475 }, { "epoch": 1.3, "learning_rate": 2.180706822516232e-06, "loss": 0.4934, "step": 15480 }, { "epoch": 1.3, "learning_rate": 2.1717413818884422e-06, "loss": 0.5476, "step": 15485 }, { "epoch": 1.3, "learning_rate": 2.1627935719382126e-06, "loss": 0.5868, "step": 15490 }, { "epoch": 1.3, "learning_rate": 2.1538633995761003e-06, "loss": 0.5772, "step": 15495 }, { "epoch": 1.3, "learning_rate": 2.1449508716990364e-06, "loss": 0.5919, "step": 15500 }, { "epoch": 1.3, "learning_rate": 2.136055995190328e-06, "loss": 0.5283, "step": 15505 }, { "epoch": 1.3, "learning_rate": 2.1271787769196497e-06, "loss": 0.5333, "step": 15510 }, { "epoch": 1.3, "learning_rate": 2.1183192237430373e-06, "loss": 0.5434, "step": 15515 }, { "epoch": 1.3, "learning_rate": 2.1094773425028886e-06, "loss": 0.6365, "step": 15520 }, { "epoch": 1.3, "learning_rate": 2.100653140027942e-06, "loss": 0.5763, "step": 15525 }, { "epoch": 1.3, "learning_rate": 2.091846623133298e-06, "loss": 0.551, "step": 15530 }, { "epoch": 1.3, "learning_rate": 2.0830577986203775e-06, "loss": 0.5179, "step": 15535 }, { "epoch": 1.3, "learning_rate": 2.0742866732769595e-06, "loss": 0.5438, "step": 15540 }, { "epoch": 1.3, "learning_rate": 2.0655332538771333e-06, "loss": 0.5481, "step": 15545 }, { "epoch": 1.3, "learning_rate": 2.05679754718133e-06, "loss": 0.54, "step": 15550 }, { "epoch": 1.31, "learning_rate": 2.048079559936289e-06, "loss": 0.5404, "step": 15555 }, { "epoch": 1.31, "learning_rate": 2.0393792988750714e-06, "loss": 0.568, "step": 15560 }, { "epoch": 1.31, "learning_rate": 2.03069677071705e-06, "loss": 0.5067, "step": 15565 }, { "epoch": 1.31, "learning_rate": 2.0220319821678964e-06, "loss": 0.5356, "step": 15570 }, { "epoch": 1.31, "learning_rate": 2.0133849399195823e-06, "loss": 0.6154, "step": 15575 }, { "epoch": 1.31, "learning_rate": 2.004755650650378e-06, "loss": 0.5624, "step": 15580 }, { "epoch": 1.31, "learning_rate": 1.9961441210248398e-06, "loss": 0.5058, "step": 15585 }, { "epoch": 1.31, "learning_rate": 1.9875503576938043e-06, "loss": 0.6195, "step": 15590 }, { "epoch": 1.31, "learning_rate": 1.9789743672943896e-06, "loss": 0.5042, "step": 15595 }, { "epoch": 1.31, "learning_rate": 1.9704161564499947e-06, "loss": 0.5618, "step": 15600 }, { "epoch": 1.31, "learning_rate": 1.9618757317702794e-06, "loss": 0.5392, "step": 15605 }, { "epoch": 1.31, "learning_rate": 1.9533530998511627e-06, "loss": 0.5866, "step": 15610 }, { "epoch": 1.31, "learning_rate": 1.944848267274832e-06, "loss": 0.5357, "step": 15615 }, { "epoch": 1.31, "learning_rate": 1.9363612406097216e-06, "loss": 0.6092, "step": 15620 }, { "epoch": 1.31, "learning_rate": 1.9278920264105186e-06, "loss": 0.5579, "step": 15625 }, { "epoch": 1.31, "learning_rate": 1.9194406312181473e-06, "loss": 0.5412, "step": 15630 }, { "epoch": 1.31, "learning_rate": 1.9110070615597745e-06, "loss": 0.5496, "step": 15635 }, { "epoch": 1.31, "learning_rate": 1.9025913239488018e-06, "loss": 0.4891, "step": 15640 }, { "epoch": 1.31, "learning_rate": 1.8941934248848547e-06, "loss": 0.5425, "step": 15645 }, { "epoch": 1.31, "learning_rate": 1.8858133708537783e-06, "loss": 0.6616, "step": 15650 }, { "epoch": 1.31, "learning_rate": 1.877451168327643e-06, "loss": 0.513, "step": 15655 }, { "epoch": 1.31, "learning_rate": 1.8691068237647296e-06, "loss": 0.551, "step": 15660 }, { "epoch": 1.31, "learning_rate": 1.8607803436095279e-06, "loss": 0.5083, "step": 15665 }, { "epoch": 1.31, "learning_rate": 1.852471734292724e-06, "loss": 0.6074, "step": 15670 }, { "epoch": 1.32, "learning_rate": 1.8441810022312184e-06, "loss": 0.5745, "step": 15675 }, { "epoch": 1.32, "learning_rate": 1.8359081538280782e-06, "loss": 0.5684, "step": 15680 }, { "epoch": 1.32, "learning_rate": 1.8276531954725868e-06, "loss": 0.5564, "step": 15685 }, { "epoch": 1.32, "learning_rate": 1.8194161335401888e-06, "loss": 0.5091, "step": 15690 }, { "epoch": 1.32, "learning_rate": 1.8111969743925233e-06, "loss": 0.5857, "step": 15695 }, { "epoch": 1.32, "learning_rate": 1.8029957243773988e-06, "loss": 0.5463, "step": 15700 }, { "epoch": 1.32, "learning_rate": 1.794812389828776e-06, "loss": 0.5212, "step": 15705 }, { "epoch": 1.32, "learning_rate": 1.7866469770668025e-06, "loss": 0.5542, "step": 15710 }, { "epoch": 1.32, "learning_rate": 1.77849949239777e-06, "loss": 0.5721, "step": 15715 }, { "epoch": 1.32, "learning_rate": 1.7703699421141335e-06, "loss": 0.6029, "step": 15720 }, { "epoch": 1.32, "learning_rate": 1.7622583324944876e-06, "loss": 0.6053, "step": 15725 }, { "epoch": 1.32, "learning_rate": 1.7541646698035735e-06, "loss": 0.5997, "step": 15730 }, { "epoch": 1.32, "learning_rate": 1.7460889602922802e-06, "loss": 0.6196, "step": 15735 }, { "epoch": 1.32, "learning_rate": 1.7380312101976188e-06, "loss": 0.5246, "step": 15740 }, { "epoch": 1.32, "learning_rate": 1.729991425742744e-06, "loss": 0.6592, "step": 15745 }, { "epoch": 1.32, "learning_rate": 1.7219696131369146e-06, "loss": 0.6267, "step": 15750 }, { "epoch": 1.32, "learning_rate": 1.7139657785755298e-06, "loss": 0.5803, "step": 15755 }, { "epoch": 1.32, "learning_rate": 1.7059799282400952e-06, "loss": 0.6099, "step": 15760 }, { "epoch": 1.32, "learning_rate": 1.6980120682982242e-06, "loss": 0.5367, "step": 15765 }, { "epoch": 1.32, "learning_rate": 1.6900622049036391e-06, "loss": 0.5472, "step": 15770 }, { "epoch": 1.32, "learning_rate": 1.6821303441961672e-06, "loss": 0.6058, "step": 15775 }, { "epoch": 1.32, "learning_rate": 1.6742164923017279e-06, "loss": 0.5511, "step": 15780 }, { "epoch": 1.32, "learning_rate": 1.666320655332329e-06, "loss": 0.6218, "step": 15785 }, { "epoch": 1.33, "learning_rate": 1.6584428393860735e-06, "loss": 0.4976, "step": 15790 }, { "epoch": 1.33, "learning_rate": 1.650583050547136e-06, "loss": 0.5288, "step": 15795 }, { "epoch": 1.33, "learning_rate": 1.6427412948857773e-06, "loss": 0.5717, "step": 15800 }, { "epoch": 1.33, "learning_rate": 1.634917578458328e-06, "loss": 0.589, "step": 15805 }, { "epoch": 1.33, "learning_rate": 1.6271119073071828e-06, "loss": 0.5459, "step": 15810 }, { "epoch": 1.33, "learning_rate": 1.6193242874608077e-06, "loss": 0.5617, "step": 15815 }, { "epoch": 1.33, "learning_rate": 1.6115547249337143e-06, "loss": 0.5961, "step": 15820 }, { "epoch": 1.33, "learning_rate": 1.6038032257264802e-06, "loss": 0.5767, "step": 15825 }, { "epoch": 1.33, "learning_rate": 1.5960697958257304e-06, "loss": 0.5519, "step": 15830 }, { "epoch": 1.33, "learning_rate": 1.5883544412041346e-06, "loss": 0.5342, "step": 15835 }, { "epoch": 1.33, "learning_rate": 1.5806571678203958e-06, "loss": 0.5205, "step": 15840 }, { "epoch": 1.33, "learning_rate": 1.5729779816192614e-06, "loss": 0.6264, "step": 15845 }, { "epoch": 1.33, "learning_rate": 1.565316888531504e-06, "loss": 0.5639, "step": 15850 }, { "epoch": 1.33, "learning_rate": 1.557673894473924e-06, "loss": 0.598, "step": 15855 }, { "epoch": 1.33, "learning_rate": 1.5500490053493466e-06, "loss": 0.5793, "step": 15860 }, { "epoch": 1.33, "learning_rate": 1.5424422270466171e-06, "loss": 0.5191, "step": 15865 }, { "epoch": 1.33, "learning_rate": 1.534853565440575e-06, "loss": 0.5276, "step": 15870 }, { "epoch": 1.33, "learning_rate": 1.5272830263920873e-06, "loss": 0.5974, "step": 15875 }, { "epoch": 1.33, "learning_rate": 1.5197306157480218e-06, "loss": 0.5558, "step": 15880 }, { "epoch": 1.33, "learning_rate": 1.5121963393412402e-06, "loss": 0.5994, "step": 15885 }, { "epoch": 1.33, "learning_rate": 1.5046802029905965e-06, "loss": 0.626, "step": 15890 }, { "epoch": 1.33, "learning_rate": 1.4971822125009415e-06, "loss": 0.586, "step": 15895 }, { "epoch": 1.33, "learning_rate": 1.4897023736631128e-06, "loss": 0.5132, "step": 15900 }, { "epoch": 1.33, "learning_rate": 1.4822406922539195e-06, "loss": 0.6486, "step": 15905 }, { "epoch": 1.34, "learning_rate": 1.474797174036155e-06, "loss": 0.7015, "step": 15910 }, { "epoch": 1.34, "learning_rate": 1.4673718247585843e-06, "loss": 0.5219, "step": 15915 }, { "epoch": 1.34, "learning_rate": 1.4599646501559427e-06, "loss": 0.5338, "step": 15920 }, { "epoch": 1.34, "learning_rate": 1.4525756559489206e-06, "loss": 0.5604, "step": 15925 }, { "epoch": 1.34, "learning_rate": 1.4452048478441698e-06, "loss": 0.5484, "step": 15930 }, { "epoch": 1.34, "learning_rate": 1.4378522315343036e-06, "loss": 0.6283, "step": 15935 }, { "epoch": 1.34, "learning_rate": 1.430517812697879e-06, "loss": 0.5203, "step": 15940 }, { "epoch": 1.34, "learning_rate": 1.423201596999399e-06, "loss": 0.5616, "step": 15945 }, { "epoch": 1.34, "learning_rate": 1.4159035900893126e-06, "loss": 0.5318, "step": 15950 }, { "epoch": 1.34, "learning_rate": 1.4086237976040056e-06, "loss": 0.4877, "step": 15955 }, { "epoch": 1.34, "learning_rate": 1.4013622251657865e-06, "loss": 0.5112, "step": 15960 }, { "epoch": 1.34, "learning_rate": 1.3941188783828995e-06, "loss": 0.5352, "step": 15965 }, { "epoch": 1.34, "learning_rate": 1.3868937628495171e-06, "loss": 0.5806, "step": 15970 }, { "epoch": 1.34, "learning_rate": 1.3796868841457229e-06, "loss": 0.5969, "step": 15975 }, { "epoch": 1.34, "learning_rate": 1.3724982478375253e-06, "loss": 0.52, "step": 15980 }, { "epoch": 1.34, "learning_rate": 1.3653278594768309e-06, "loss": 0.5518, "step": 15985 }, { "epoch": 1.34, "learning_rate": 1.3581757246014654e-06, "loss": 0.5609, "step": 15990 }, { "epoch": 1.34, "learning_rate": 1.3510418487351518e-06, "loss": 0.6251, "step": 15995 }, { "epoch": 1.34, "learning_rate": 1.3439262373875105e-06, "loss": 0.5978, "step": 16000 }, { "epoch": 1.34, "learning_rate": 1.3368288960540543e-06, "loss": 0.5458, "step": 16005 }, { "epoch": 1.34, "learning_rate": 1.3297498302161953e-06, "loss": 0.4966, "step": 16010 }, { "epoch": 1.34, "learning_rate": 1.3226890453412188e-06, "loss": 0.54, "step": 16015 }, { "epoch": 1.34, "learning_rate": 1.3156465468822988e-06, "loss": 0.6587, "step": 16020 }, { "epoch": 1.34, "learning_rate": 1.308622340278487e-06, "loss": 0.5322, "step": 16025 }, { "epoch": 1.35, "learning_rate": 1.3016164309546996e-06, "loss": 0.5841, "step": 16030 }, { "epoch": 1.35, "learning_rate": 1.2946288243217309e-06, "loss": 0.5702, "step": 16035 }, { "epoch": 1.35, "learning_rate": 1.2876595257762358e-06, "loss": 0.6211, "step": 16040 }, { "epoch": 1.35, "learning_rate": 1.2807085407007257e-06, "loss": 0.6052, "step": 16045 }, { "epoch": 1.35, "learning_rate": 1.2737758744635785e-06, "loss": 0.5684, "step": 16050 }, { "epoch": 1.35, "learning_rate": 1.2668615324190142e-06, "loss": 0.5635, "step": 16055 }, { "epoch": 1.35, "learning_rate": 1.2599655199071053e-06, "loss": 0.5644, "step": 16060 }, { "epoch": 1.35, "learning_rate": 1.2530878422537667e-06, "loss": 0.4881, "step": 16065 }, { "epoch": 1.35, "learning_rate": 1.246228504770755e-06, "loss": 0.5345, "step": 16070 }, { "epoch": 1.35, "learning_rate": 1.2393875127556575e-06, "loss": 0.5446, "step": 16075 }, { "epoch": 1.35, "learning_rate": 1.2325648714919008e-06, "loss": 0.5153, "step": 16080 }, { "epoch": 1.35, "learning_rate": 1.2257605862487337e-06, "loss": 0.5634, "step": 16085 }, { "epoch": 1.35, "learning_rate": 1.2189746622812248e-06, "loss": 0.5645, "step": 16090 }, { "epoch": 1.35, "learning_rate": 1.2122071048302708e-06, "loss": 0.5562, "step": 16095 }, { "epoch": 1.35, "learning_rate": 1.205457919122574e-06, "loss": 0.5056, "step": 16100 }, { "epoch": 1.35, "learning_rate": 1.1987271103706537e-06, "loss": 0.5372, "step": 16105 }, { "epoch": 1.35, "learning_rate": 1.192014683772838e-06, "loss": 0.5047, "step": 16110 }, { "epoch": 1.35, "learning_rate": 1.1853206445132548e-06, "loss": 0.5386, "step": 16115 }, { "epoch": 1.35, "learning_rate": 1.1786449977618325e-06, "loss": 0.6012, "step": 16120 }, { "epoch": 1.35, "learning_rate": 1.1719877486742914e-06, "loss": 0.517, "step": 16125 }, { "epoch": 1.35, "learning_rate": 1.1653489023921466e-06, "loss": 0.5639, "step": 16130 }, { "epoch": 1.35, "learning_rate": 1.158728464042702e-06, "loss": 0.5553, "step": 16135 }, { "epoch": 1.35, "learning_rate": 1.1521264387390402e-06, "loss": 0.5047, "step": 16140 }, { "epoch": 1.35, "learning_rate": 1.1455428315800237e-06, "loss": 0.5601, "step": 16145 }, { "epoch": 1.36, "learning_rate": 1.138977647650291e-06, "loss": 0.5086, "step": 16150 }, { "epoch": 1.36, "learning_rate": 1.132430892020256e-06, "loss": 0.5722, "step": 16155 }, { "epoch": 1.36, "learning_rate": 1.1259025697460929e-06, "loss": 0.5598, "step": 16160 }, { "epoch": 1.36, "learning_rate": 1.1193926858697496e-06, "loss": 0.5309, "step": 16165 }, { "epoch": 1.36, "learning_rate": 1.112901245418918e-06, "loss": 0.5818, "step": 16170 }, { "epoch": 1.36, "learning_rate": 1.1064282534070603e-06, "loss": 0.5869, "step": 16175 }, { "epoch": 1.36, "learning_rate": 1.0999737148333855e-06, "loss": 0.5493, "step": 16180 }, { "epoch": 1.36, "learning_rate": 1.093537634682848e-06, "loss": 0.5338, "step": 16185 }, { "epoch": 1.36, "learning_rate": 1.087120017926152e-06, "loss": 0.5282, "step": 16190 }, { "epoch": 1.36, "learning_rate": 1.0807208695197384e-06, "loss": 0.5253, "step": 16195 }, { "epoch": 1.36, "learning_rate": 1.0743401944057901e-06, "loss": 0.5519, "step": 16200 }, { "epoch": 1.36, "learning_rate": 1.0679779975122096e-06, "loss": 0.5828, "step": 16205 }, { "epoch": 1.36, "learning_rate": 1.0616342837526382e-06, "loss": 0.5731, "step": 16210 }, { "epoch": 1.36, "learning_rate": 1.0553090580264434e-06, "loss": 0.5569, "step": 16215 }, { "epoch": 1.36, "learning_rate": 1.0490023252187148e-06, "loss": 0.5449, "step": 16220 }, { "epoch": 1.36, "learning_rate": 1.0427140902002503e-06, "loss": 0.5552, "step": 16225 }, { "epoch": 1.36, "learning_rate": 1.036444357827568e-06, "loss": 0.5205, "step": 16230 }, { "epoch": 1.36, "learning_rate": 1.0301931329429054e-06, "loss": 0.568, "step": 16235 }, { "epoch": 1.36, "learning_rate": 1.0239604203741843e-06, "loss": 0.5854, "step": 16240 }, { "epoch": 1.36, "learning_rate": 1.0177462249350456e-06, "loss": 0.5147, "step": 16245 }, { "epoch": 1.36, "learning_rate": 1.011550551424828e-06, "loss": 0.5288, "step": 16250 }, { "epoch": 1.36, "learning_rate": 1.0053734046285624e-06, "loss": 0.6063, "step": 16255 }, { "epoch": 1.36, "learning_rate": 9.992147893169657e-07, "loss": 0.6174, "step": 16260 }, { "epoch": 1.36, "learning_rate": 9.9307471024645e-07, "loss": 0.5847, "step": 16265 }, { "epoch": 1.37, "learning_rate": 9.869531721591134e-07, "loss": 0.5904, "step": 16270 }, { "epoch": 1.37, "learning_rate": 9.808501797827275e-07, "loss": 0.5106, "step": 16275 }, { "epoch": 1.37, "learning_rate": 9.747657378307435e-07, "loss": 0.5623, "step": 16280 }, { "epoch": 1.37, "learning_rate": 9.68699851002286e-07, "loss": 0.5445, "step": 16285 }, { "epoch": 1.37, "learning_rate": 9.62652523982152e-07, "loss": 0.6027, "step": 16290 }, { "epoch": 1.37, "learning_rate": 9.566237614408002e-07, "loss": 0.5544, "step": 16295 }, { "epoch": 1.37, "learning_rate": 9.50613568034353e-07, "loss": 0.6172, "step": 16300 }, { "epoch": 1.37, "learning_rate": 9.446219484045948e-07, "loss": 0.6055, "step": 16305 }, { "epoch": 1.37, "learning_rate": 9.386489071789545e-07, "loss": 0.5715, "step": 16310 }, { "epoch": 1.37, "learning_rate": 9.326944489705281e-07, "loss": 0.5896, "step": 16315 }, { "epoch": 1.37, "learning_rate": 9.267585783780425e-07, "loss": 0.5551, "step": 16320 }, { "epoch": 1.37, "learning_rate": 9.208412999858862e-07, "loss": 0.5269, "step": 16325 }, { "epoch": 1.37, "learning_rate": 9.149426183640758e-07, "loss": 0.5842, "step": 16330 }, { "epoch": 1.37, "learning_rate": 9.090625380682699e-07, "loss": 0.5248, "step": 16335 }, { "epoch": 1.37, "learning_rate": 9.032010636397665e-07, "loss": 0.5714, "step": 16340 }, { "epoch": 1.37, "learning_rate": 8.973581996054803e-07, "loss": 0.5535, "step": 16345 }, { "epoch": 1.37, "learning_rate": 8.915339504779685e-07, "loss": 0.56, "step": 16350 }, { "epoch": 1.37, "learning_rate": 8.857283207553996e-07, "loss": 0.5499, "step": 16355 }, { "epoch": 1.37, "learning_rate": 8.79941314921573e-07, "loss": 0.526, "step": 16360 }, { "epoch": 1.37, "learning_rate": 8.741729374458884e-07, "loss": 0.5224, "step": 16365 }, { "epoch": 1.37, "learning_rate": 8.684231927833714e-07, "loss": 0.517, "step": 16370 }, { "epoch": 1.37, "learning_rate": 8.626920853746612e-07, "loss": 0.5931, "step": 16375 }, { "epoch": 1.37, "learning_rate": 8.569796196459839e-07, "loss": 0.548, "step": 16380 }, { "epoch": 1.37, "learning_rate": 8.512858000091911e-07, "loss": 0.5038, "step": 16385 }, { "epoch": 1.38, "learning_rate": 8.456106308617151e-07, "loss": 0.5854, "step": 16390 }, { "epoch": 1.38, "learning_rate": 8.399541165865942e-07, "loss": 0.5347, "step": 16395 }, { "epoch": 1.38, "learning_rate": 8.343162615524591e-07, "loss": 0.4955, "step": 16400 }, { "epoch": 1.38, "learning_rate": 8.286970701135239e-07, "loss": 0.6111, "step": 16405 }, { "epoch": 1.38, "learning_rate": 8.230965466095947e-07, "loss": 0.5663, "step": 16410 }, { "epoch": 1.38, "learning_rate": 8.175146953660561e-07, "loss": 0.5677, "step": 16415 }, { "epoch": 1.38, "learning_rate": 8.119515206938766e-07, "loss": 0.5401, "step": 16420 }, { "epoch": 1.38, "learning_rate": 8.064070268895913e-07, "loss": 0.5521, "step": 16425 }, { "epoch": 1.38, "learning_rate": 8.008812182353137e-07, "loss": 0.5418, "step": 16430 }, { "epoch": 1.38, "learning_rate": 7.953740989987274e-07, "loss": 0.5558, "step": 16435 }, { "epoch": 1.38, "learning_rate": 7.898856734330801e-07, "loss": 0.56, "step": 16440 }, { "epoch": 1.38, "learning_rate": 7.844159457771838e-07, "loss": 0.5095, "step": 16445 }, { "epoch": 1.38, "learning_rate": 7.789649202554067e-07, "loss": 0.5636, "step": 16450 }, { "epoch": 1.38, "learning_rate": 7.735326010776728e-07, "loss": 0.5475, "step": 16455 }, { "epoch": 1.38, "learning_rate": 7.681189924394622e-07, "loss": 0.5616, "step": 16460 }, { "epoch": 1.38, "learning_rate": 7.627240985218054e-07, "loss": 0.5336, "step": 16465 }, { "epoch": 1.38, "learning_rate": 7.573479234912723e-07, "loss": 0.5153, "step": 16470 }, { "epoch": 1.38, "learning_rate": 7.519904714999859e-07, "loss": 0.5651, "step": 16475 }, { "epoch": 1.38, "learning_rate": 7.466517466855949e-07, "loss": 0.5497, "step": 16480 }, { "epoch": 1.38, "learning_rate": 7.41331753171301e-07, "loss": 0.5859, "step": 16485 }, { "epoch": 1.38, "learning_rate": 7.360304950658259e-07, "loss": 0.5389, "step": 16490 }, { "epoch": 1.38, "learning_rate": 7.307479764634334e-07, "loss": 0.5268, "step": 16495 }, { "epoch": 1.38, "learning_rate": 7.254842014439018e-07, "loss": 0.6128, "step": 16500 }, { "epoch": 1.39, "learning_rate": 7.202391740725456e-07, "loss": 0.5403, "step": 16505 }, { "epoch": 1.39, "learning_rate": 7.150128984001886e-07, "loss": 0.6025, "step": 16510 }, { "epoch": 1.39, "learning_rate": 7.098053784631852e-07, "loss": 0.5118, "step": 16515 }, { "epoch": 1.39, "learning_rate": 7.046166182833908e-07, "loss": 0.5136, "step": 16520 }, { "epoch": 1.39, "learning_rate": 6.994466218681828e-07, "loss": 0.5843, "step": 16525 }, { "epoch": 1.39, "learning_rate": 6.942953932104424e-07, "loss": 0.5342, "step": 16530 }, { "epoch": 1.39, "learning_rate": 6.891629362885565e-07, "loss": 0.5798, "step": 16535 }, { "epoch": 1.39, "learning_rate": 6.84049255066413e-07, "loss": 0.531, "step": 16540 }, { "epoch": 1.39, "learning_rate": 6.789543534933995e-07, "loss": 0.5525, "step": 16545 }, { "epoch": 1.39, "learning_rate": 6.738782355044049e-07, "loss": 0.5923, "step": 16550 }, { "epoch": 1.39, "learning_rate": 6.688209050198041e-07, "loss": 0.5242, "step": 16555 }, { "epoch": 1.39, "learning_rate": 6.637823659454673e-07, "loss": 0.511, "step": 16560 }, { "epoch": 1.39, "learning_rate": 6.587626221727483e-07, "loss": 0.5702, "step": 16565 }, { "epoch": 1.39, "learning_rate": 6.53761677578485e-07, "loss": 0.5541, "step": 16570 }, { "epoch": 1.39, "learning_rate": 6.487795360249987e-07, "loss": 0.58, "step": 16575 }, { "epoch": 1.39, "learning_rate": 6.438162013600896e-07, "loss": 0.525, "step": 16580 }, { "epoch": 1.39, "learning_rate": 6.388716774170272e-07, "loss": 0.598, "step": 16585 }, { "epoch": 1.39, "learning_rate": 6.339459680145598e-07, "loss": 0.549, "step": 16590 }, { "epoch": 1.39, "learning_rate": 6.290390769568999e-07, "loss": 0.5113, "step": 16595 }, { "epoch": 1.39, "learning_rate": 6.241510080337271e-07, "loss": 0.5025, "step": 16600 }, { "epoch": 1.39, "learning_rate": 6.192817650201882e-07, "loss": 0.5493, "step": 16605 }, { "epoch": 1.39, "learning_rate": 6.144313516768863e-07, "loss": 0.5779, "step": 16610 }, { "epoch": 1.39, "learning_rate": 6.095997717498858e-07, "loss": 0.6167, "step": 16615 }, { "epoch": 1.39, "learning_rate": 6.047870289706992e-07, "loss": 0.6023, "step": 16620 }, { "epoch": 1.4, "learning_rate": 5.999931270563003e-07, "loss": 0.5516, "step": 16625 }, { "epoch": 1.4, "learning_rate": 5.952180697091026e-07, "loss": 0.5347, "step": 16630 }, { "epoch": 1.4, "learning_rate": 5.904618606169698e-07, "loss": 0.5705, "step": 16635 }, { "epoch": 1.4, "learning_rate": 5.85724503453211e-07, "loss": 0.5439, "step": 16640 }, { "epoch": 1.4, "learning_rate": 5.81006001876569e-07, "loss": 0.5426, "step": 16645 }, { "epoch": 1.4, "learning_rate": 5.763063595312284e-07, "loss": 0.5466, "step": 16650 }, { "epoch": 1.4, "learning_rate": 5.71625580046814e-07, "loss": 0.5364, "step": 16655 }, { "epoch": 1.4, "learning_rate": 5.669636670383699e-07, "loss": 0.5641, "step": 16660 }, { "epoch": 1.4, "learning_rate": 5.623206241063772e-07, "loss": 0.5546, "step": 16665 }, { "epoch": 1.4, "learning_rate": 5.576964548367425e-07, "loss": 0.5436, "step": 16670 }, { "epoch": 1.4, "learning_rate": 5.530911628007951e-07, "loss": 0.4991, "step": 16675 }, { "epoch": 1.4, "learning_rate": 5.485047515552872e-07, "loss": 0.5614, "step": 16680 }, { "epoch": 1.4, "learning_rate": 5.439372246423879e-07, "loss": 0.6126, "step": 16685 }, { "epoch": 1.4, "learning_rate": 5.393885855896786e-07, "loss": 0.5291, "step": 16690 }, { "epoch": 1.4, "learning_rate": 5.348588379101572e-07, "loss": 0.5466, "step": 16695 }, { "epoch": 1.4, "learning_rate": 5.30347985102228e-07, "loss": 0.5487, "step": 16700 }, { "epoch": 1.4, "learning_rate": 5.258560306497045e-07, "loss": 0.5197, "step": 16705 }, { "epoch": 1.4, "learning_rate": 5.21382978021806e-07, "loss": 0.5452, "step": 16710 }, { "epoch": 1.4, "learning_rate": 5.169288306731524e-07, "loss": 0.576, "step": 16715 }, { "epoch": 1.4, "learning_rate": 5.124935920437612e-07, "loss": 0.5918, "step": 16720 }, { "epoch": 1.4, "learning_rate": 5.080772655590483e-07, "loss": 0.5621, "step": 16725 }, { "epoch": 1.4, "learning_rate": 5.03679854629821e-07, "loss": 0.5138, "step": 16730 }, { "epoch": 1.4, "learning_rate": 4.993013626522796e-07, "loss": 0.5919, "step": 16735 }, { "epoch": 1.4, "learning_rate": 4.949417930080136e-07, "loss": 0.5606, "step": 16740 }, { "epoch": 1.41, "learning_rate": 4.906011490639989e-07, "loss": 0.55, "step": 16745 }, { "epoch": 1.41, "learning_rate": 4.862794341725901e-07, "loss": 0.5631, "step": 16750 }, { "epoch": 1.41, "learning_rate": 4.819766516715313e-07, "loss": 0.5851, "step": 16755 }, { "epoch": 1.41, "learning_rate": 4.776928048839336e-07, "loss": 0.5203, "step": 16760 }, { "epoch": 1.41, "learning_rate": 4.7342789711829204e-07, "loss": 0.5434, "step": 16765 }, { "epoch": 1.41, "learning_rate": 4.6918193166847204e-07, "loss": 0.5699, "step": 16770 }, { "epoch": 1.41, "learning_rate": 4.649549118137086e-07, "loss": 0.5961, "step": 16775 }, { "epoch": 1.41, "learning_rate": 4.607468408186072e-07, "loss": 0.5855, "step": 16780 }, { "epoch": 1.41, "learning_rate": 4.565577219331374e-07, "loss": 0.5737, "step": 16785 }, { "epoch": 1.41, "learning_rate": 4.523875583926307e-07, "loss": 0.6003, "step": 16790 }, { "epoch": 1.41, "learning_rate": 4.4823635341778026e-07, "loss": 0.5483, "step": 16795 }, { "epoch": 1.41, "learning_rate": 4.441041102146354e-07, "loss": 0.5641, "step": 16800 }, { "epoch": 1.41, "learning_rate": 4.399908319746043e-07, "loss": 0.5381, "step": 16805 }, { "epoch": 1.41, "learning_rate": 4.3589652187444596e-07, "loss": 0.525, "step": 16810 }, { "epoch": 1.41, "learning_rate": 4.318211830762697e-07, "loss": 0.5552, "step": 16815 }, { "epoch": 1.41, "learning_rate": 4.277648187275329e-07, "loss": 0.5926, "step": 16820 }, { "epoch": 1.41, "learning_rate": 4.237274319610407e-07, "loss": 0.5533, "step": 16825 }, { "epoch": 1.41, "learning_rate": 4.1970902589493764e-07, "loss": 0.5362, "step": 16830 }, { "epoch": 1.41, "learning_rate": 4.157096036327135e-07, "loss": 0.5443, "step": 16835 }, { "epoch": 1.41, "learning_rate": 4.117291682631946e-07, "loss": 0.5345, "step": 16840 }, { "epoch": 1.41, "learning_rate": 4.0776772286054143e-07, "loss": 0.5428, "step": 16845 }, { "epoch": 1.41, "learning_rate": 4.038252704842538e-07, "loss": 0.6486, "step": 16850 }, { "epoch": 1.41, "learning_rate": 3.999018141791544e-07, "loss": 0.5574, "step": 16855 }, { "epoch": 1.41, "learning_rate": 3.9599735697540554e-07, "loss": 0.5808, "step": 16860 }, { "epoch": 1.42, "learning_rate": 3.921119018884839e-07, "loss": 0.5545, "step": 16865 }, { "epoch": 1.42, "learning_rate": 3.882454519192003e-07, "loss": 0.5678, "step": 16870 }, { "epoch": 1.42, "learning_rate": 3.843980100536826e-07, "loss": 0.5239, "step": 16875 }, { "epoch": 1.42, "learning_rate": 3.8056957926338175e-07, "loss": 0.5063, "step": 16880 }, { "epoch": 1.42, "learning_rate": 3.767601625050604e-07, "loss": 0.5036, "step": 16885 }, { "epoch": 1.42, "learning_rate": 3.72969762720804e-07, "loss": 0.5453, "step": 16890 }, { "epoch": 1.42, "learning_rate": 3.691983828380041e-07, "loss": 0.6124, "step": 16895 }, { "epoch": 1.42, "learning_rate": 3.6544602576936703e-07, "loss": 0.4871, "step": 16900 }, { "epoch": 1.42, "learning_rate": 3.6171269441290787e-07, "loss": 0.5699, "step": 16905 }, { "epoch": 1.42, "learning_rate": 3.579983916519425e-07, "loss": 0.5361, "step": 16910 }, { "epoch": 1.42, "learning_rate": 3.5430312035509837e-07, "loss": 0.5393, "step": 16915 }, { "epoch": 1.42, "learning_rate": 3.5062688337629823e-07, "loss": 0.5225, "step": 16920 }, { "epoch": 1.42, "learning_rate": 3.4696968355476534e-07, "loss": 0.5335, "step": 16925 }, { "epoch": 1.42, "learning_rate": 3.4333152371502086e-07, "loss": 0.5695, "step": 16930 }, { "epoch": 1.42, "learning_rate": 3.397124066668866e-07, "loss": 0.5561, "step": 16935 }, { "epoch": 1.42, "learning_rate": 3.361123352054657e-07, "loss": 0.5701, "step": 16940 }, { "epoch": 1.42, "learning_rate": 3.325313121111617e-07, "loss": 0.6007, "step": 16945 }, { "epoch": 1.42, "learning_rate": 3.2896934014966516e-07, "loss": 0.5531, "step": 16950 }, { "epoch": 1.42, "learning_rate": 3.2542642207194773e-07, "loss": 0.539, "step": 16955 }, { "epoch": 1.42, "learning_rate": 3.219025606142734e-07, "loss": 0.551, "step": 16960 }, { "epoch": 1.42, "learning_rate": 3.18397758498179e-07, "loss": 0.5922, "step": 16965 }, { "epoch": 1.42, "learning_rate": 3.14912018430491e-07, "loss": 0.5159, "step": 16970 }, { "epoch": 1.42, "learning_rate": 3.1144534310331154e-07, "loss": 0.5438, "step": 16975 }, { "epoch": 1.42, "learning_rate": 3.0799773519401e-07, "loss": 0.5294, "step": 16980 }, { "epoch": 1.43, "learning_rate": 3.045691973652398e-07, "loss": 0.5495, "step": 16985 }, { "epoch": 1.43, "learning_rate": 3.0115973226492455e-07, "loss": 0.597, "step": 16990 }, { "epoch": 1.43, "learning_rate": 2.977693425262523e-07, "loss": 0.5623, "step": 16995 }, { "epoch": 1.43, "learning_rate": 2.943980307676841e-07, "loss": 0.5179, "step": 17000 }, { "epoch": 1.43, "learning_rate": 2.910457995929483e-07, "loss": 0.6155, "step": 17005 }, { "epoch": 1.43, "learning_rate": 2.877126515910294e-07, "loss": 0.5197, "step": 17010 }, { "epoch": 1.43, "learning_rate": 2.843985893361795e-07, "loss": 0.5792, "step": 17015 }, { "epoch": 1.43, "learning_rate": 2.811036153879093e-07, "loss": 0.5526, "step": 17020 }, { "epoch": 1.43, "learning_rate": 2.778277322909861e-07, "loss": 0.5869, "step": 17025 }, { "epoch": 1.43, "learning_rate": 2.7457094257543335e-07, "loss": 0.5249, "step": 17030 }, { "epoch": 1.43, "learning_rate": 2.7133324875653066e-07, "loss": 0.5454, "step": 17035 }, { "epoch": 1.43, "learning_rate": 2.681146533348056e-07, "loss": 0.5457, "step": 17040 }, { "epoch": 1.43, "learning_rate": 2.6491515879603635e-07, "loss": 0.5287, "step": 17045 }, { "epoch": 1.43, "learning_rate": 2.617347676112547e-07, "loss": 0.5605, "step": 17050 }, { "epoch": 1.43, "learning_rate": 2.5857348223673194e-07, "loss": 0.5102, "step": 17055 }, { "epoch": 1.43, "learning_rate": 2.554313051139845e-07, "loss": 0.5422, "step": 17060 }, { "epoch": 1.43, "learning_rate": 2.523082386697767e-07, "loss": 0.55, "step": 17065 }, { "epoch": 1.43, "learning_rate": 2.492042853161042e-07, "loss": 0.5451, "step": 17070 }, { "epoch": 1.43, "learning_rate": 2.461194474502132e-07, "loss": 0.5034, "step": 17075 }, { "epoch": 1.43, "learning_rate": 2.4305372745457576e-07, "loss": 0.5911, "step": 17080 }, { "epoch": 1.43, "learning_rate": 2.400071276969007e-07, "loss": 0.5889, "step": 17085 }, { "epoch": 1.43, "learning_rate": 2.3697965053013637e-07, "loss": 0.5326, "step": 17090 }, { "epoch": 1.43, "learning_rate": 2.3397129829245413e-07, "loss": 0.5216, "step": 17095 }, { "epoch": 1.43, "learning_rate": 2.3098207330726485e-07, "loss": 0.5169, "step": 17100 }, { "epoch": 1.44, "learning_rate": 2.2801197788319407e-07, "loss": 0.5812, "step": 17105 }, { "epoch": 1.44, "learning_rate": 2.250610143141041e-07, "loss": 0.5411, "step": 17110 }, { "epoch": 1.44, "learning_rate": 2.2212918487907742e-07, "loss": 0.5682, "step": 17115 }, { "epoch": 1.44, "learning_rate": 2.1921649184241943e-07, "loss": 0.5149, "step": 17120 }, { "epoch": 1.44, "learning_rate": 2.1632293745365574e-07, "loss": 0.5627, "step": 17125 }, { "epoch": 1.44, "learning_rate": 2.1344852394752646e-07, "loss": 0.5857, "step": 17130 }, { "epoch": 1.44, "learning_rate": 2.1059325354400305e-07, "loss": 0.5996, "step": 17135 }, { "epoch": 1.44, "learning_rate": 2.0775712844825213e-07, "loss": 0.5789, "step": 17140 }, { "epoch": 1.44, "learning_rate": 2.0494015085066875e-07, "loss": 0.5738, "step": 17145 }, { "epoch": 1.44, "learning_rate": 2.0214232292685708e-07, "loss": 0.5861, "step": 17150 }, { "epoch": 1.44, "learning_rate": 1.9936364683762754e-07, "loss": 0.6145, "step": 17155 }, { "epoch": 1.44, "learning_rate": 1.9660412472900235e-07, "loss": 0.5473, "step": 17160 }, { "epoch": 1.44, "learning_rate": 1.9386375873221286e-07, "loss": 0.5513, "step": 17165 }, { "epoch": 1.44, "learning_rate": 1.9114255096369105e-07, "loss": 0.5138, "step": 17170 }, { "epoch": 1.44, "learning_rate": 1.8844050352507526e-07, "loss": 0.5152, "step": 17175 }, { "epoch": 1.44, "learning_rate": 1.8575761850320727e-07, "loss": 0.6125, "step": 17180 }, { "epoch": 1.44, "learning_rate": 1.8309389797012688e-07, "loss": 0.5365, "step": 17185 }, { "epoch": 1.44, "learning_rate": 1.8044934398307457e-07, "loss": 0.5264, "step": 17190 }, { "epoch": 1.44, "learning_rate": 1.7782395858448598e-07, "loss": 0.5607, "step": 17195 }, { "epoch": 1.44, "learning_rate": 1.7521774380199197e-07, "loss": 0.5552, "step": 17200 }, { "epoch": 1.44, "learning_rate": 1.7263070164842133e-07, "loss": 0.5522, "step": 17205 }, { "epoch": 1.44, "learning_rate": 1.7006283412179525e-07, "loss": 0.5582, "step": 17210 }, { "epoch": 1.44, "learning_rate": 1.6751414320532453e-07, "loss": 0.5665, "step": 17215 }, { "epoch": 1.45, "learning_rate": 1.6498463086740412e-07, "loss": 0.5512, "step": 17220 }, { "epoch": 1.45, "learning_rate": 1.624742990616268e-07, "loss": 0.5645, "step": 17225 }, { "epoch": 1.45, "learning_rate": 1.5998314972676676e-07, "loss": 0.5416, "step": 17230 }, { "epoch": 1.45, "learning_rate": 1.5751118478678216e-07, "loss": 0.5573, "step": 17235 }, { "epoch": 1.45, "learning_rate": 1.5505840615081536e-07, "loss": 0.5505, "step": 17240 }, { "epoch": 1.45, "learning_rate": 1.526248157131982e-07, "loss": 0.5963, "step": 17245 }, { "epoch": 1.45, "learning_rate": 1.5021041535343005e-07, "loss": 0.5119, "step": 17250 }, { "epoch": 1.45, "learning_rate": 1.478152069361971e-07, "loss": 0.5668, "step": 17255 }, { "epoch": 1.45, "learning_rate": 1.4543919231136404e-07, "loss": 0.6242, "step": 17260 }, { "epoch": 1.45, "learning_rate": 1.4308237331396858e-07, "loss": 0.5878, "step": 17265 }, { "epoch": 1.45, "learning_rate": 1.4074475176422407e-07, "loss": 0.5339, "step": 17270 }, { "epoch": 1.45, "learning_rate": 1.3842632946752254e-07, "loss": 0.4849, "step": 17275 }, { "epoch": 1.45, "learning_rate": 1.3612710821441777e-07, "loss": 0.5803, "step": 17280 }, { "epoch": 1.45, "learning_rate": 1.3384708978064208e-07, "loss": 0.5718, "step": 17285 }, { "epoch": 1.45, "learning_rate": 1.3158627592709804e-07, "loss": 0.6143, "step": 17290 }, { "epoch": 1.45, "learning_rate": 1.2934466839984726e-07, "loss": 0.586, "step": 17295 }, { "epoch": 1.45, "learning_rate": 1.2712226893012713e-07, "loss": 0.5006, "step": 17300 }, { "epoch": 1.45, "learning_rate": 1.2491907923433688e-07, "loss": 0.5623, "step": 17305 }, { "epoch": 1.45, "learning_rate": 1.227351010140432e-07, "loss": 0.5473, "step": 17310 }, { "epoch": 1.45, "learning_rate": 1.2057033595596345e-07, "loss": 0.6075, "step": 17315 }, { "epoch": 1.45, "learning_rate": 1.1842478573199367e-07, "loss": 0.5507, "step": 17320 }, { "epoch": 1.45, "learning_rate": 1.16298451999175e-07, "loss": 0.5811, "step": 17325 }, { "epoch": 1.45, "learning_rate": 1.1419133639971602e-07, "loss": 0.5167, "step": 17330 }, { "epoch": 1.45, "learning_rate": 1.1210344056098165e-07, "loss": 0.5388, "step": 17335 }, { "epoch": 1.46, "learning_rate": 1.1003476609549036e-07, "loss": 0.521, "step": 17340 }, { "epoch": 1.46, "learning_rate": 1.079853146009141e-07, "loss": 0.5275, "step": 17345 }, { "epoch": 1.46, "learning_rate": 1.0595508766008678e-07, "loss": 0.5567, "step": 17350 }, { "epoch": 1.46, "learning_rate": 1.0394408684098744e-07, "loss": 0.5466, "step": 17355 }, { "epoch": 1.46, "learning_rate": 1.0195231369674597e-07, "loss": 0.5341, "step": 17360 }, { "epoch": 1.46, "learning_rate": 9.997976976564572e-08, "loss": 0.5688, "step": 17365 }, { "epoch": 1.46, "learning_rate": 9.802645657112087e-08, "loss": 0.5877, "step": 17370 }, { "epoch": 1.46, "learning_rate": 9.6092375621748e-08, "loss": 0.5532, "step": 17375 }, { "epoch": 1.46, "learning_rate": 9.417752841125171e-08, "loss": 0.5739, "step": 17380 }, { "epoch": 1.46, "learning_rate": 9.228191641850736e-08, "loss": 0.5371, "step": 17385 }, { "epoch": 1.46, "learning_rate": 9.040554110752719e-08, "loss": 0.5397, "step": 17390 }, { "epoch": 1.46, "learning_rate": 8.854840392746866e-08, "loss": 0.5723, "step": 17395 }, { "epoch": 1.46, "learning_rate": 8.671050631263722e-08, "loss": 0.5556, "step": 17400 }, { "epoch": 1.46, "learning_rate": 8.489184968246966e-08, "loss": 0.5761, "step": 17405 }, { "epoch": 1.46, "learning_rate": 8.309243544155076e-08, "loss": 0.5252, "step": 17410 }, { "epoch": 1.46, "learning_rate": 8.131226497959943e-08, "loss": 0.5275, "step": 17415 }, { "epoch": 1.46, "learning_rate": 7.95513396714742e-08, "loss": 0.5085, "step": 17420 }, { "epoch": 1.46, "learning_rate": 7.780966087716779e-08, "loss": 0.4833, "step": 17425 }, { "epoch": 1.46, "learning_rate": 7.60872299418125e-08, "loss": 0.5299, "step": 17430 }, { "epoch": 1.46, "learning_rate": 7.438404819566924e-08, "loss": 0.4901, "step": 17435 }, { "epoch": 1.46, "learning_rate": 7.270011695414137e-08, "loss": 0.5323, "step": 17440 }, { "epoch": 1.46, "learning_rate": 7.103543751775521e-08, "loss": 0.5006, "step": 17445 }, { "epoch": 1.46, "learning_rate": 6.93900111721768e-08, "loss": 0.5445, "step": 17450 }, { "epoch": 1.46, "learning_rate": 6.77638391881924e-08, "loss": 0.5209, "step": 17455 }, { "epoch": 1.47, "learning_rate": 6.615692282173069e-08, "loss": 0.5275, "step": 17460 }, { "epoch": 1.47, "learning_rate": 6.456926331384061e-08, "loss": 0.5609, "step": 17465 }, { "epoch": 1.47, "learning_rate": 6.300086189069965e-08, "loss": 0.5969, "step": 17470 }, { "epoch": 1.47, "learning_rate": 6.145171976361108e-08, "loss": 0.5442, "step": 17475 }, { "epoch": 1.47, "learning_rate": 5.992183812900676e-08, "loss": 0.57, "step": 17480 }, { "epoch": 1.47, "learning_rate": 5.841121816844153e-08, "loss": 0.4688, "step": 17485 }, { "epoch": 1.47, "learning_rate": 5.6919861048596035e-08, "loss": 0.5471, "step": 17490 }, { "epoch": 1.47, "learning_rate": 5.544776792126838e-08, "loss": 0.5103, "step": 17495 }, { "epoch": 1.47, "learning_rate": 5.3994939923387996e-08, "loss": 0.551, "step": 17500 }, { "epoch": 1.47, "learning_rate": 5.256137817699902e-08, "loss": 0.5818, "step": 17505 }, { "epoch": 1.47, "learning_rate": 5.114708378926303e-08, "loss": 0.5303, "step": 17510 }, { "epoch": 1.47, "learning_rate": 4.975205785247017e-08, "loss": 0.6146, "step": 17515 }, { "epoch": 1.47, "learning_rate": 4.8376301444022495e-08, "loss": 0.536, "step": 17520 }, { "epoch": 1.47, "learning_rate": 4.701981562644231e-08, "loss": 0.5852, "step": 17525 }, { "epoch": 1.47, "learning_rate": 4.568260144736658e-08, "loss": 0.5414, "step": 17530 }, { "epoch": 1.47, "learning_rate": 4.4364659939549745e-08, "loss": 0.6276, "step": 17535 }, { "epoch": 1.47, "learning_rate": 4.306599212086371e-08, "loss": 0.5174, "step": 17540 }, { "epoch": 1.47, "learning_rate": 4.178659899429227e-08, "loss": 0.5378, "step": 17545 }, { "epoch": 1.47, "learning_rate": 4.052648154793393e-08, "loss": 0.5817, "step": 17550 }, { "epoch": 1.47, "learning_rate": 3.9285640754999075e-08, "loss": 0.547, "step": 17555 }, { "epoch": 1.47, "learning_rate": 3.806407757381558e-08, "loss": 0.5943, "step": 17560 }, { "epoch": 1.47, "learning_rate": 3.686179294781211e-08, "loss": 0.5325, "step": 17565 }, { "epoch": 1.47, "learning_rate": 3.567878780554035e-08, "loss": 0.5637, "step": 17570 }, { "epoch": 1.47, "learning_rate": 3.451506306065555e-08, "loss": 0.5449, "step": 17575 }, { "epoch": 1.48, "learning_rate": 3.3370619611922114e-08, "loss": 0.5583, "step": 17580 }, { "epoch": 1.48, "learning_rate": 3.224545834321635e-08, "loss": 0.529, "step": 17585 }, { "epoch": 1.48, "learning_rate": 3.113958012351814e-08, "loss": 0.6111, "step": 17590 }, { "epoch": 1.48, "learning_rate": 3.005298580691929e-08, "loss": 0.5482, "step": 17595 }, { "epoch": 1.48, "learning_rate": 2.8985676232615167e-08, "loss": 0.5429, "step": 17600 }, { "epoch": 1.48, "learning_rate": 2.7937652224907517e-08, "loss": 0.5208, "step": 17605 }, { "epoch": 1.48, "learning_rate": 2.690891459320721e-08, "loss": 0.5263, "step": 17610 }, { "epoch": 1.48, "learning_rate": 2.5899464132023156e-08, "loss": 0.6064, "step": 17615 }, { "epoch": 1.48, "learning_rate": 2.4909301620976154e-08, "loss": 0.561, "step": 17620 }, { "epoch": 1.48, "learning_rate": 2.393842782478506e-08, "loss": 0.5578, "step": 17625 }, { "epoch": 1.48, "learning_rate": 2.2986843493272293e-08, "loss": 0.5619, "step": 17630 }, { "epoch": 1.48, "learning_rate": 2.2054549361363863e-08, "loss": 0.5691, "step": 17635 }, { "epoch": 1.48, "learning_rate": 2.114154614908659e-08, "loss": 0.55, "step": 17640 }, { "epoch": 1.48, "learning_rate": 2.0247834561573643e-08, "loss": 0.5679, "step": 17645 }, { "epoch": 1.48, "learning_rate": 1.9373415289047903e-08, "loss": 0.5665, "step": 17650 }, { "epoch": 1.48, "learning_rate": 1.8518289006846935e-08, "loss": 0.5459, "step": 17655 }, { "epoch": 1.48, "learning_rate": 1.7682456375392454e-08, "loss": 0.5644, "step": 17660 }, { "epoch": 1.48, "learning_rate": 1.6865918040220864e-08, "loss": 0.5903, "step": 17665 }, { "epoch": 1.48, "learning_rate": 1.606867463195272e-08, "loss": 0.5418, "step": 17670 }, { "epoch": 1.48, "learning_rate": 1.529072676631771e-08, "loss": 0.5612, "step": 17675 }, { "epoch": 1.48, "learning_rate": 1.4532075044138005e-08, "loss": 0.5657, "step": 17680 }, { "epoch": 1.48, "learning_rate": 1.3792720051333808e-08, "loss": 0.6194, "step": 17685 }, { "epoch": 1.48, "learning_rate": 1.3072662358920574e-08, "loss": 0.5694, "step": 17690 }, { "epoch": 1.48, "learning_rate": 1.2371902523014567e-08, "loss": 0.5919, "step": 17695 }, { "epoch": 1.49, "learning_rate": 1.1690441084827308e-08, "loss": 0.5565, "step": 17700 }, { "epoch": 1.49, "learning_rate": 1.1028278570657247e-08, "loss": 0.5603, "step": 17705 }, { "epoch": 1.49, "learning_rate": 1.038541549190919e-08, "loss": 0.5781, "step": 17710 }, { "epoch": 1.49, "learning_rate": 9.761852345077648e-09, "loss": 0.5737, "step": 17715 }, { "epoch": 1.49, "learning_rate": 9.157589611752393e-09, "loss": 0.534, "step": 17720 }, { "epoch": 1.49, "learning_rate": 8.572627758612894e-09, "loss": 0.5662, "step": 17725 }, { "epoch": 1.49, "learning_rate": 8.006967237439433e-09, "loss": 0.6045, "step": 17730 }, { "epoch": 1.49, "learning_rate": 7.460608485099219e-09, "loss": 0.5545, "step": 17735 }, { "epoch": 1.49, "learning_rate": 6.933551923557491e-09, "loss": 0.5735, "step": 17740 }, { "epoch": 1.49, "learning_rate": 6.425797959869195e-09, "loss": 0.5077, "step": 17745 }, { "epoch": 1.49, "learning_rate": 5.937346986178982e-09, "loss": 0.5272, "step": 17750 }, { "epoch": 1.49, "learning_rate": 5.4681993797295325e-09, "loss": 0.5595, "step": 17755 }, { "epoch": 1.49, "learning_rate": 5.0183555028504584e-09, "loss": 0.5157, "step": 17760 }, { "epoch": 1.49, "learning_rate": 4.587815702963849e-09, "loss": 0.5416, "step": 17765 }, { "epoch": 1.49, "learning_rate": 4.176580312587053e-09, "loss": 0.5501, "step": 17770 }, { "epoch": 1.49, "learning_rate": 3.784649649318794e-09, "loss": 0.549, "step": 17775 }, { "epoch": 1.49, "learning_rate": 3.4120240158586058e-09, "loss": 0.541, "step": 17780 }, { "epoch": 1.49, "learning_rate": 3.0587036999929484e-09, "loss": 0.6243, "step": 17785 }, { "epoch": 1.49, "learning_rate": 2.7246889745924375e-09, "loss": 0.5739, "step": 17790 }, { "epoch": 1.49, "learning_rate": 2.4099800976284947e-09, "loss": 0.4989, "step": 17795 }, { "epoch": 1.49, "learning_rate": 2.1145773121511446e-09, "loss": 0.5615, "step": 17800 }, { "epoch": 1.49, "learning_rate": 1.8384808463084436e-09, "loss": 0.4995, "step": 17805 }, { "epoch": 1.49, "learning_rate": 1.5816909133326007e-09, "loss": 0.5014, "step": 17810 }, { "epoch": 1.49, "learning_rate": 1.3442077115510821e-09, "loss": 0.5085, "step": 17815 }, { "epoch": 1.5, "learning_rate": 1.126031424372731e-09, "loss": 0.5051, "step": 17820 }, { "epoch": 1.5, "learning_rate": 9.271622203016472e-10, "loss": 0.5611, "step": 17825 }, { "epoch": 1.5, "learning_rate": 7.476002529233084e-10, "loss": 0.5955, "step": 17830 }, { "epoch": 1.5, "learning_rate": 5.873456609239992e-10, "loss": 0.6174, "step": 17835 }, { "epoch": 1.5, "learning_rate": 4.463985680630556e-10, "loss": 0.5871, "step": 17840 }, { "epoch": 1.5, "learning_rate": 3.247590832033964e-10, "loss": 0.57, "step": 17845 }, { "epoch": 1.5, "learning_rate": 2.224273002865429e-10, "loss": 0.604, "step": 17850 }, { "epoch": 1.5, "learning_rate": 1.3940329834649656e-10, "loss": 0.5237, "step": 17855 }, { "epoch": 1.5, "learning_rate": 7.568714150141265e-11, "loss": 0.5507, "step": 17860 }, { "epoch": 1.5, "learning_rate": 3.127887896192672e-11, "loss": 0.5975, "step": 17865 }, { "epoch": 1.5, "learning_rate": 6.178545025603555e-12, "loss": 0.4715, "step": 17870 }, { "epoch": 1.5, "step": 17874, "total_flos": 5.173383007954797e+18, "train_loss": 0.5712567279439411, "train_runtime": 45718.841, "train_samples_per_second": 4.692, "train_steps_per_second": 0.391 } ], "logging_steps": 5, "max_steps": 17874, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "total_flos": 5.173383007954797e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }