{ "best_metric": null, "best_model_checkpoint": null, "epoch": 31.446190102120973, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002094789211835559, "learning_rate": 1.0206207261596573e-07, "loss": 42.596397399902344, "step": 1 }, { "epoch": 0.020947892118355592, "learning_rate": 1.0206207261596575e-06, "loss": 41.090047200520836, "step": 10 }, { "epoch": 0.041895784236711184, "learning_rate": 2.041241452319315e-06, "loss": 40.72335205078125, "step": 20 }, { "epoch": 0.06284367635506677, "learning_rate": 3.0618621784789722e-06, "loss": 40.80942077636719, "step": 30 }, { "epoch": 0.08379156847342237, "learning_rate": 4.08248290463863e-06, "loss": 40.0015869140625, "step": 40 }, { "epoch": 0.10473946059177795, "learning_rate": 5.103103630798286e-06, "loss": 39.082891845703124, "step": 50 }, { "epoch": 0.12568735271013354, "learning_rate": 6.1237243569579445e-06, "loss": 39.320306396484376, "step": 60 }, { "epoch": 0.14663524482848914, "learning_rate": 7.144345083117603e-06, "loss": 38.66647033691406, "step": 70 }, { "epoch": 0.16758313694684474, "learning_rate": 8.16496580927726e-06, "loss": 38.0055908203125, "step": 80 }, { "epoch": 0.1885310290652003, "learning_rate": 9.185586535436916e-06, "loss": 37.521505737304686, "step": 90 }, { "epoch": 0.2094789211835559, "learning_rate": 1.0206207261596573e-05, "loss": 35.75002746582031, "step": 100 }, { "epoch": 0.2304268133019115, "learning_rate": 1.1226827987756233e-05, "loss": 35.38407592773437, "step": 110 }, { "epoch": 0.2513747054202671, "learning_rate": 1.2247448713915889e-05, "loss": 34.455230712890625, "step": 120 }, { "epoch": 0.2723225975386227, "learning_rate": 1.3268069440075545e-05, "loss": 33.48695373535156, "step": 130 }, { "epoch": 0.2932704896569783, "learning_rate": 1.4288690166235205e-05, "loss": 33.0624755859375, "step": 140 }, { "epoch": 0.31421838177533384, "learning_rate": 1.530931089239486e-05, "loss": 31.633297729492188, "step": 150 }, { "epoch": 0.33516627389368947, "learning_rate": 1.632993161855452e-05, "loss": 30.392620849609376, "step": 160 }, { "epoch": 0.35611416601204504, "learning_rate": 1.7350552344714174e-05, "loss": 30.184588623046874, "step": 170 }, { "epoch": 0.3770620581304006, "learning_rate": 1.8371173070873833e-05, "loss": 29.326535034179688, "step": 180 }, { "epoch": 0.39800995024875624, "learning_rate": 1.939179379703349e-05, "loss": 28.23697509765625, "step": 190 }, { "epoch": 0.4189578423671118, "learning_rate": 2.0412414523193145e-05, "loss": 27.301419067382813, "step": 200 }, { "epoch": 0.4399057344854674, "learning_rate": 2.1433035249352804e-05, "loss": 26.5281494140625, "step": 210 }, { "epoch": 0.460853626603823, "learning_rate": 2.2453655975512465e-05, "loss": 25.992547607421876, "step": 220 }, { "epoch": 0.4818015187221786, "learning_rate": 2.347427670167212e-05, "loss": 24.90663299560547, "step": 230 }, { "epoch": 0.5027494108405341, "learning_rate": 2.4494897427831778e-05, "loss": 23.896534729003907, "step": 240 }, { "epoch": 0.5236973029588897, "learning_rate": 2.5515518153991436e-05, "loss": 22.488262939453126, "step": 250 }, { "epoch": 0.5446451950772454, "learning_rate": 2.653613888015109e-05, "loss": 21.760206604003905, "step": 260 }, { "epoch": 0.565593087195601, "learning_rate": 2.755675960631075e-05, "loss": 20.808561706542967, "step": 270 }, { "epoch": 0.5865409793139565, "learning_rate": 2.857738033247041e-05, "loss": 20.107774353027345, "step": 280 }, { "epoch": 0.6074888714323121, "learning_rate": 2.9598001058630065e-05, "loss": 19.469386291503906, "step": 290 }, { "epoch": 0.6284367635506677, "learning_rate": 3.061862178478972e-05, "loss": 18.442234802246094, "step": 300 }, { "epoch": 0.6493846556690233, "learning_rate": 3.163924251094938e-05, "loss": 18.132992553710938, "step": 310 }, { "epoch": 0.6703325477873789, "learning_rate": 3.265986323710904e-05, "loss": 17.425698852539064, "step": 320 }, { "epoch": 0.6912804399057345, "learning_rate": 3.3680483963268694e-05, "loss": 16.683474731445312, "step": 330 }, { "epoch": 0.7122283320240901, "learning_rate": 3.470110468942835e-05, "loss": 16.511445617675783, "step": 340 }, { "epoch": 0.7331762241424457, "learning_rate": 3.5721725415588004e-05, "loss": 15.8478759765625, "step": 350 }, { "epoch": 0.7541241162608012, "learning_rate": 3.6742346141747665e-05, "loss": 15.450515747070312, "step": 360 }, { "epoch": 0.7750720083791568, "learning_rate": 3.7762966867907327e-05, "loss": 14.916090393066407, "step": 370 }, { "epoch": 0.7960199004975125, "learning_rate": 3.878358759406698e-05, "loss": 14.495413208007813, "step": 380 }, { "epoch": 0.816967792615868, "learning_rate": 3.980420832022664e-05, "loss": 13.862504577636718, "step": 390 }, { "epoch": 0.8379156847342236, "learning_rate": 4.082482904638629e-05, "loss": 13.526719665527343, "step": 400 }, { "epoch": 0.8588635768525792, "learning_rate": 4.184544977254595e-05, "loss": 13.241981506347656, "step": 410 }, { "epoch": 0.8798114689709348, "learning_rate": 4.286607049870561e-05, "loss": 12.791949462890624, "step": 420 }, { "epoch": 0.9007593610892904, "learning_rate": 4.388669122486527e-05, "loss": 12.353260040283203, "step": 430 }, { "epoch": 0.921707253207646, "learning_rate": 4.490731195102493e-05, "loss": 11.939605712890625, "step": 440 }, { "epoch": 0.9426551453260016, "learning_rate": 4.5927932677184585e-05, "loss": 11.615445709228515, "step": 450 }, { "epoch": 0.9636030374443572, "learning_rate": 4.694855340334424e-05, "loss": 11.29063720703125, "step": 460 }, { "epoch": 0.9845509295627127, "learning_rate": 4.7969174129503894e-05, "loss": 11.051673889160156, "step": 470 }, { "epoch": 1.0062843676355067, "learning_rate": 4.8989794855663556e-05, "loss": 11.174005889892578, "step": 480 }, { "epoch": 1.0272322597538623, "learning_rate": 5.001041558182322e-05, "loss": 10.351375579833984, "step": 490 }, { "epoch": 1.0481801518722178, "learning_rate": 5.103103630798287e-05, "loss": 10.082479858398438, "step": 500 }, { "epoch": 1.0691280439905735, "learning_rate": 5.205165703414253e-05, "loss": 9.848101806640624, "step": 510 }, { "epoch": 1.090075936108929, "learning_rate": 5.307227776030218e-05, "loss": 9.558543395996093, "step": 520 }, { "epoch": 1.1110238282272846, "learning_rate": 5.409289848646184e-05, "loss": 9.263871765136718, "step": 530 }, { "epoch": 1.1319717203456403, "learning_rate": 5.51135192126215e-05, "loss": 9.101696014404297, "step": 540 }, { "epoch": 1.1529196124639958, "learning_rate": 5.613413993878116e-05, "loss": 8.784052276611328, "step": 550 }, { "epoch": 1.1738675045823515, "learning_rate": 5.715476066494082e-05, "loss": 8.466715240478516, "step": 560 }, { "epoch": 1.194815396700707, "learning_rate": 5.817538139110047e-05, "loss": 8.419536590576172, "step": 570 }, { "epoch": 1.2157632888190626, "learning_rate": 5.919600211726013e-05, "loss": 8.445430755615234, "step": 580 }, { "epoch": 1.236711180937418, "learning_rate": 6.0216622843419785e-05, "loss": 7.745582580566406, "step": 590 }, { "epoch": 1.2576590730557737, "learning_rate": 6.123724356957945e-05, "loss": 7.821333312988282, "step": 600 }, { "epoch": 1.2786069651741294, "learning_rate": 6.22578642957391e-05, "loss": 7.500454711914062, "step": 610 }, { "epoch": 1.2995548572924849, "learning_rate": 6.327848502189876e-05, "loss": 7.44578857421875, "step": 620 }, { "epoch": 1.3205027494108406, "learning_rate": 6.429910574805841e-05, "loss": 7.184627532958984, "step": 630 }, { "epoch": 1.341450641529196, "learning_rate": 6.531972647421808e-05, "loss": 6.880846405029297, "step": 640 }, { "epoch": 1.3623985336475517, "learning_rate": 6.634034720037773e-05, "loss": 6.639464569091797, "step": 650 }, { "epoch": 1.3833464257659074, "learning_rate": 6.736096792653739e-05, "loss": 6.543840789794922, "step": 660 }, { "epoch": 1.4042943178842628, "learning_rate": 6.838158865269704e-05, "loss": 6.245196914672851, "step": 670 }, { "epoch": 1.4252422100026185, "learning_rate": 6.94022093788567e-05, "loss": 6.316292572021484, "step": 680 }, { "epoch": 1.446190102120974, "learning_rate": 7.042283010501637e-05, "loss": 6.2703697204589846, "step": 690 }, { "epoch": 1.4671379942393297, "learning_rate": 7.144345083117601e-05, "loss": 6.09345703125, "step": 700 }, { "epoch": 1.4880858863576854, "learning_rate": 7.246407155733568e-05, "loss": 5.9814506530761715, "step": 710 }, { "epoch": 1.5090337784760408, "learning_rate": 7.348469228349533e-05, "loss": 5.9337646484375, "step": 720 }, { "epoch": 1.5299816705943965, "learning_rate": 7.450531300965498e-05, "loss": 5.937409591674805, "step": 730 }, { "epoch": 1.550929562712752, "learning_rate": 7.552593373581465e-05, "loss": 5.64327392578125, "step": 740 }, { "epoch": 1.5718774548311076, "learning_rate": 7.654655446197431e-05, "loss": 5.371760559082031, "step": 750 }, { "epoch": 1.5928253469494633, "learning_rate": 7.756717518813396e-05, "loss": 5.309605407714844, "step": 760 }, { "epoch": 1.6137732390678188, "learning_rate": 7.858779591429362e-05, "loss": 5.3783222198486325, "step": 770 }, { "epoch": 1.6347211311861742, "learning_rate": 7.960841664045329e-05, "loss": 5.400894546508789, "step": 780 }, { "epoch": 1.65566902330453, "learning_rate": 8.062903736661294e-05, "loss": 5.137008285522461, "step": 790 }, { "epoch": 1.6766169154228856, "learning_rate": 8.164965809277258e-05, "loss": 5.365228271484375, "step": 800 }, { "epoch": 1.6975648075412413, "learning_rate": 8.267027881893225e-05, "loss": 5.210577392578125, "step": 810 }, { "epoch": 1.7185126996595967, "learning_rate": 8.36908995450919e-05, "loss": 5.149754333496094, "step": 820 }, { "epoch": 1.7394605917779522, "learning_rate": 8.471152027125156e-05, "loss": 5.063209915161133, "step": 830 }, { "epoch": 1.7604084838963079, "learning_rate": 8.573214099741121e-05, "loss": 4.89969482421875, "step": 840 }, { "epoch": 1.7813563760146636, "learning_rate": 8.675276172357088e-05, "loss": 4.8380378723144535, "step": 850 }, { "epoch": 1.8023042681330192, "learning_rate": 8.777338244973054e-05, "loss": 4.826490783691407, "step": 860 }, { "epoch": 1.8232521602513747, "learning_rate": 8.879400317589019e-05, "loss": 4.733642578125, "step": 870 }, { "epoch": 1.8442000523697302, "learning_rate": 8.981462390204986e-05, "loss": 4.840193557739258, "step": 880 }, { "epoch": 1.8651479444880859, "learning_rate": 9.083524462820951e-05, "loss": 4.841461944580078, "step": 890 }, { "epoch": 1.8860958366064415, "learning_rate": 9.185586535436917e-05, "loss": 4.591343688964844, "step": 900 }, { "epoch": 1.9070437287247972, "learning_rate": 9.287648608052881e-05, "loss": 4.796835708618164, "step": 910 }, { "epoch": 1.9279916208431527, "learning_rate": 9.389710680668848e-05, "loss": 4.928312301635742, "step": 920 }, { "epoch": 1.9489395129615081, "learning_rate": 9.491772753284813e-05, "loss": 4.6936603546142575, "step": 930 }, { "epoch": 1.9698874050798638, "learning_rate": 9.593834825900779e-05, "loss": 4.678832626342773, "step": 940 }, { "epoch": 1.9908352971982195, "learning_rate": 9.695896898516746e-05, "loss": 4.801474380493164, "step": 950 }, { "epoch": 2.0125687352710133, "learning_rate": 9.797958971132711e-05, "loss": 4.615579986572266, "step": 960 }, { "epoch": 2.033516627389369, "learning_rate": 9.900021043748677e-05, "loss": 4.685293197631836, "step": 970 }, { "epoch": 2.0544645195077247, "learning_rate": 0.00010002083116364643, "loss": 4.529672622680664, "step": 980 }, { "epoch": 2.07541241162608, "learning_rate": 0.00010104145188980609, "loss": 4.6519828796386715, "step": 990 }, { "epoch": 2.0963603037444356, "learning_rate": 0.00010206207261596574, "loss": 4.200122451782226, "step": 1000 }, { "epoch": 2.1173081958627913, "learning_rate": 0.0001030826933421254, "loss": 4.467970275878907, "step": 1010 }, { "epoch": 2.138256087981147, "learning_rate": 0.00010410331406828505, "loss": 4.364266586303711, "step": 1020 }, { "epoch": 2.1592039800995027, "learning_rate": 0.00010512393479444471, "loss": 4.321992874145508, "step": 1030 }, { "epoch": 2.180151872217858, "learning_rate": 0.00010614455552060436, "loss": 4.208817672729492, "step": 1040 }, { "epoch": 2.2010997643362136, "learning_rate": 0.00010716517624676403, "loss": 4.414374923706054, "step": 1050 }, { "epoch": 2.2220476564545693, "learning_rate": 0.00010818579697292369, "loss": 4.222100067138672, "step": 1060 }, { "epoch": 2.242995548572925, "learning_rate": 0.00010920641769908334, "loss": 4.392937850952149, "step": 1070 }, { "epoch": 2.2639434406912806, "learning_rate": 0.000110227038425243, "loss": 4.361217498779297, "step": 1080 }, { "epoch": 2.284891332809636, "learning_rate": 0.00011124765915140266, "loss": 4.305131912231445, "step": 1090 }, { "epoch": 2.3058392249279915, "learning_rate": 0.00011226827987756232, "loss": 4.322722244262695, "step": 1100 }, { "epoch": 2.326787117046347, "learning_rate": 0.00011328890060372197, "loss": 4.250308990478516, "step": 1110 }, { "epoch": 2.347735009164703, "learning_rate": 0.00011430952132988164, "loss": 4.366016006469726, "step": 1120 }, { "epoch": 2.3686829012830586, "learning_rate": 0.00011533014205604128, "loss": 4.271330642700195, "step": 1130 }, { "epoch": 2.389630793401414, "learning_rate": 0.00011635076278220094, "loss": 4.35943489074707, "step": 1140 }, { "epoch": 2.4105786855197695, "learning_rate": 0.00011737138350836059, "loss": 4.14013442993164, "step": 1150 }, { "epoch": 2.431526577638125, "learning_rate": 0.00011839200423452026, "loss": 4.263423156738281, "step": 1160 }, { "epoch": 2.452474469756481, "learning_rate": 0.00011941262496067991, "loss": 4.046255874633789, "step": 1170 }, { "epoch": 2.473422361874836, "learning_rate": 0.00012043324568683957, "loss": 4.267144775390625, "step": 1180 }, { "epoch": 2.494370253993192, "learning_rate": 0.00012145386641299924, "loss": 4.446724319458008, "step": 1190 }, { "epoch": 2.5153181461115475, "learning_rate": 0.0001224744871391589, "loss": 4.310148239135742, "step": 1200 }, { "epoch": 2.536266038229903, "learning_rate": 0.00012349510786531856, "loss": 4.145759963989258, "step": 1210 }, { "epoch": 2.557213930348259, "learning_rate": 0.0001245157285914782, "loss": 3.9344154357910157, "step": 1220 }, { "epoch": 2.5781618224666145, "learning_rate": 0.00012553634931763784, "loss": 4.1616455078125, "step": 1230 }, { "epoch": 2.5991097145849698, "learning_rate": 0.0001265569700437975, "loss": 4.117146682739258, "step": 1240 }, { "epoch": 2.6200576067033254, "learning_rate": 0.00012757759076995718, "loss": 4.102180480957031, "step": 1250 }, { "epoch": 2.641005498821681, "learning_rate": 0.00012859821149611682, "loss": 4.15636100769043, "step": 1260 }, { "epoch": 2.661953390940037, "learning_rate": 0.0001296188322222765, "loss": 4.069457626342773, "step": 1270 }, { "epoch": 2.682901283058392, "learning_rate": 0.00013063945294843616, "loss": 4.0670215606689455, "step": 1280 }, { "epoch": 2.7038491751767477, "learning_rate": 0.0001316600736745958, "loss": 3.985906219482422, "step": 1290 }, { "epoch": 2.7247970672951034, "learning_rate": 0.00013268069440075547, "loss": 4.272599792480468, "step": 1300 }, { "epoch": 2.745744959413459, "learning_rate": 0.00013370131512691514, "loss": 3.9709007263183596, "step": 1310 }, { "epoch": 2.7666928515318148, "learning_rate": 0.00013472193585307478, "loss": 4.1308135986328125, "step": 1320 }, { "epoch": 2.7876407436501704, "learning_rate": 0.00013574255657923444, "loss": 4.175233840942383, "step": 1330 }, { "epoch": 2.8085886357685257, "learning_rate": 0.00013676317730539409, "loss": 3.9187103271484376, "step": 1340 }, { "epoch": 2.8295365278868814, "learning_rate": 0.00013778379803155375, "loss": 3.8264163970947265, "step": 1350 }, { "epoch": 2.850484420005237, "learning_rate": 0.0001388044187577134, "loss": 4.124664306640625, "step": 1360 }, { "epoch": 2.8714323121235923, "learning_rate": 0.00013982503948387306, "loss": 3.952465057373047, "step": 1370 }, { "epoch": 2.892380204241948, "learning_rate": 0.00014084566021003273, "loss": 3.987406921386719, "step": 1380 }, { "epoch": 2.9133280963603037, "learning_rate": 0.00014186628093619237, "loss": 3.92406005859375, "step": 1390 }, { "epoch": 2.9342759884786593, "learning_rate": 0.00014288690166235201, "loss": 3.932135009765625, "step": 1400 }, { "epoch": 2.955223880597015, "learning_rate": 0.00014390752238851168, "loss": 4.134164428710937, "step": 1410 }, { "epoch": 2.9761717727153707, "learning_rate": 0.00014492814311467135, "loss": 4.045958709716797, "step": 1420 }, { "epoch": 2.9971196648337264, "learning_rate": 0.000145948763840831, "loss": 3.9462562561035157, "step": 1430 }, { "epoch": 3.01885310290652, "learning_rate": 0.00014696938456699066, "loss": 4.254601669311524, "step": 1440 }, { "epoch": 3.0398009950248754, "learning_rate": 0.00014799000529315033, "loss": 3.9297733306884766, "step": 1450 }, { "epoch": 3.060748887143231, "learning_rate": 0.00014901062601930997, "loss": 3.8294136047363283, "step": 1460 }, { "epoch": 3.081696779261587, "learning_rate": 0.00015003124674546964, "loss": 4.05113525390625, "step": 1470 }, { "epoch": 3.1026446713799425, "learning_rate": 0.0001510518674716293, "loss": 3.975564956665039, "step": 1480 }, { "epoch": 3.123592563498298, "learning_rate": 0.00015207248819778895, "loss": 3.9852630615234377, "step": 1490 }, { "epoch": 3.1445404556166534, "learning_rate": 0.00015309310892394862, "loss": 4.0287940979003904, "step": 1500 }, { "epoch": 3.165488347735009, "learning_rate": 0.00015411372965010828, "loss": 3.9631397247314455, "step": 1510 }, { "epoch": 3.1864362398533648, "learning_rate": 0.00015513435037626793, "loss": 3.9254386901855467, "step": 1520 }, { "epoch": 3.2073841319717205, "learning_rate": 0.0001561549711024276, "loss": 3.82324104309082, "step": 1530 }, { "epoch": 3.228332024090076, "learning_rate": 0.00015717559182858723, "loss": 3.9215030670166016, "step": 1540 }, { "epoch": 3.2492799162084314, "learning_rate": 0.0001581962125547469, "loss": 3.8164131164550783, "step": 1550 }, { "epoch": 3.270227808326787, "learning_rate": 0.00015921683328090657, "loss": 3.7764801025390624, "step": 1560 }, { "epoch": 3.2911757004451427, "learning_rate": 0.0001602374540070662, "loss": 3.9773059844970704, "step": 1570 }, { "epoch": 3.3121235925634984, "learning_rate": 0.00016125807473322588, "loss": 3.8427078247070314, "step": 1580 }, { "epoch": 3.333071484681854, "learning_rate": 0.00016227869545938555, "loss": 3.8103118896484376, "step": 1590 }, { "epoch": 3.3540193768002093, "learning_rate": 0.00016329931618554516, "loss": 3.8182399749755858, "step": 1600 }, { "epoch": 3.374967268918565, "learning_rate": 0.00016431993691170483, "loss": 3.750722122192383, "step": 1610 }, { "epoch": 3.3959151610369207, "learning_rate": 0.0001653405576378645, "loss": 3.9241371154785156, "step": 1620 }, { "epoch": 3.4168630531552764, "learning_rate": 0.00016636117836402414, "loss": 3.827128219604492, "step": 1630 }, { "epoch": 3.4378109452736316, "learning_rate": 0.0001673817990901838, "loss": 3.750970458984375, "step": 1640 }, { "epoch": 3.4587588373919873, "learning_rate": 0.00016840241981634345, "loss": 3.827236557006836, "step": 1650 }, { "epoch": 3.479706729510343, "learning_rate": 0.00016942304054250312, "loss": 3.862264633178711, "step": 1660 }, { "epoch": 3.5006546216286987, "learning_rate": 0.0001704436612686628, "loss": 3.9794548034667967, "step": 1670 }, { "epoch": 3.5216025137470544, "learning_rate": 0.00017146428199482243, "loss": 3.8642444610595703, "step": 1680 }, { "epoch": 3.54255040586541, "learning_rate": 0.0001724849027209821, "loss": 3.8710708618164062, "step": 1690 }, { "epoch": 3.5634982979837653, "learning_rate": 0.00017350552344714176, "loss": 3.7993534088134764, "step": 1700 }, { "epoch": 3.584446190102121, "learning_rate": 0.0001745261441733014, "loss": 3.8951980590820314, "step": 1710 }, { "epoch": 3.6053940822204766, "learning_rate": 0.00017554676489946107, "loss": 3.728221893310547, "step": 1720 }, { "epoch": 3.6263419743388323, "learning_rate": 0.00017656738562562074, "loss": 3.7405670166015623, "step": 1730 }, { "epoch": 3.6472898664571876, "learning_rate": 0.00017758800635178038, "loss": 3.7010948181152346, "step": 1740 }, { "epoch": 3.6682377585755432, "learning_rate": 0.00017860862707794005, "loss": 3.7746726989746096, "step": 1750 }, { "epoch": 3.689185650693899, "learning_rate": 0.00017962924780409972, "loss": 3.842145538330078, "step": 1760 }, { "epoch": 3.7101335428122546, "learning_rate": 0.00018064986853025936, "loss": 3.817458724975586, "step": 1770 }, { "epoch": 3.7310814349306103, "learning_rate": 0.00018167048925641903, "loss": 3.7765247344970705, "step": 1780 }, { "epoch": 3.752029327048966, "learning_rate": 0.0001826911099825787, "loss": 3.6870758056640627, "step": 1790 }, { "epoch": 3.772977219167321, "learning_rate": 0.00018371173070873834, "loss": 3.8823310852050783, "step": 1800 }, { "epoch": 3.793925111285677, "learning_rate": 0.000184732351434898, "loss": 3.7599208831787108, "step": 1810 }, { "epoch": 3.8148730034040326, "learning_rate": 0.00018575297216105762, "loss": 3.8205623626708984, "step": 1820 }, { "epoch": 3.835820895522388, "learning_rate": 0.0001867735928872173, "loss": 3.840204620361328, "step": 1830 }, { "epoch": 3.8567687876407435, "learning_rate": 0.00018779421361337696, "loss": 3.8769672393798826, "step": 1840 }, { "epoch": 3.877716679759099, "learning_rate": 0.0001888148343395366, "loss": 3.7292160034179687, "step": 1850 }, { "epoch": 3.898664571877455, "learning_rate": 0.00018983545506569627, "loss": 3.785298156738281, "step": 1860 }, { "epoch": 3.9196124639958105, "learning_rate": 0.00019085607579185594, "loss": 3.691172790527344, "step": 1870 }, { "epoch": 3.940560356114166, "learning_rate": 0.00019187669651801558, "loss": 3.7312793731689453, "step": 1880 }, { "epoch": 3.9615082482325215, "learning_rate": 0.00019289731724417525, "loss": 3.7419872283935547, "step": 1890 }, { "epoch": 3.982456140350877, "learning_rate": 0.0001939179379703349, "loss": 3.587678909301758, "step": 1900 }, { "epoch": 4.004189578423671, "learning_rate": 0.00019493855869649455, "loss": 3.9609317779541016, "step": 1910 }, { "epoch": 4.025137470542027, "learning_rate": 0.00019595917942265422, "loss": 3.7329071044921873, "step": 1920 }, { "epoch": 4.046085362660382, "learning_rate": 0.0001969798001488139, "loss": 3.7702545166015624, "step": 1930 }, { "epoch": 4.067033254778738, "learning_rate": 0.00019800042087497353, "loss": 3.796523666381836, "step": 1940 }, { "epoch": 4.087981146897094, "learning_rate": 0.0001990210416011332, "loss": 3.643301773071289, "step": 1950 }, { "epoch": 4.108929039015449, "learning_rate": 0.00020004166232729287, "loss": 3.705374526977539, "step": 1960 }, { "epoch": 4.129876931133805, "learning_rate": 0.0002010622830534525, "loss": 3.619226837158203, "step": 1970 }, { "epoch": 4.15082482325216, "learning_rate": 0.00020208290377961218, "loss": 3.862563705444336, "step": 1980 }, { "epoch": 4.1717727153705155, "learning_rate": 0.00020310352450577185, "loss": 3.6324195861816406, "step": 1990 }, { "epoch": 4.192720607488871, "learning_rate": 0.0002041241452319315, "loss": 3.674951171875, "step": 2000 }, { "epoch": 4.213668499607227, "learning_rate": 0.00020514476595809116, "loss": 3.59210205078125, "step": 2010 }, { "epoch": 4.234616391725583, "learning_rate": 0.0002061653866842508, "loss": 3.8746570587158202, "step": 2020 }, { "epoch": 4.255564283843938, "learning_rate": 0.00020718600741041044, "loss": 3.682146453857422, "step": 2030 }, { "epoch": 4.276512175962294, "learning_rate": 0.0002082066281365701, "loss": 3.8217212677001955, "step": 2040 }, { "epoch": 4.29746006808065, "learning_rate": 0.00020922724886272975, "loss": 3.691872787475586, "step": 2050 }, { "epoch": 4.318407960199005, "learning_rate": 0.00021024786958888942, "loss": 3.7160354614257813, "step": 2060 }, { "epoch": 4.339355852317361, "learning_rate": 0.00021126849031504908, "loss": 3.5512325286865236, "step": 2070 }, { "epoch": 4.360303744435716, "learning_rate": 0.00021228911104120873, "loss": 3.623905563354492, "step": 2080 }, { "epoch": 4.3812516365540715, "learning_rate": 0.0002133097317673684, "loss": 3.756671905517578, "step": 2090 }, { "epoch": 4.402199528672427, "learning_rate": 0.00021433035249352806, "loss": 3.71322021484375, "step": 2100 }, { "epoch": 4.423147420790783, "learning_rate": 0.0002153509732196877, "loss": 3.588302993774414, "step": 2110 }, { "epoch": 4.4440953129091385, "learning_rate": 0.00021637159394584737, "loss": 3.666096496582031, "step": 2120 }, { "epoch": 4.465043205027494, "learning_rate": 0.00021739221467200704, "loss": 3.660139465332031, "step": 2130 }, { "epoch": 4.48599109714585, "learning_rate": 0.00021841283539816668, "loss": 3.5749874114990234, "step": 2140 }, { "epoch": 4.5069389892642056, "learning_rate": 0.00021943345612432635, "loss": 3.621977615356445, "step": 2150 }, { "epoch": 4.527886881382561, "learning_rate": 0.000220454076850486, "loss": 3.7179306030273436, "step": 2160 }, { "epoch": 4.548834773500916, "learning_rate": 0.00022147469757664566, "loss": 3.5558433532714844, "step": 2170 }, { "epoch": 4.569782665619272, "learning_rate": 0.00022249531830280533, "loss": 3.754520034790039, "step": 2180 }, { "epoch": 4.590730557737627, "learning_rate": 0.00022351593902896497, "loss": 3.734426498413086, "step": 2190 }, { "epoch": 4.611678449855983, "learning_rate": 0.00022453655975512464, "loss": 3.6508132934570314, "step": 2200 }, { "epoch": 4.632626341974339, "learning_rate": 0.0002255571804812843, "loss": 3.7082672119140625, "step": 2210 }, { "epoch": 4.653574234092694, "learning_rate": 0.00022657780120744395, "loss": 3.600681686401367, "step": 2220 }, { "epoch": 4.67452212621105, "learning_rate": 0.00022759842193360361, "loss": 3.6909461975097657, "step": 2230 }, { "epoch": 4.695470018329406, "learning_rate": 0.00022861904265976328, "loss": 3.557560348510742, "step": 2240 }, { "epoch": 4.7164179104477615, "learning_rate": 0.0002296396633859229, "loss": 3.6415851593017576, "step": 2250 }, { "epoch": 4.737365802566117, "learning_rate": 0.00023066028411208256, "loss": 3.6133026123046874, "step": 2260 }, { "epoch": 4.758313694684473, "learning_rate": 0.00023168090483824223, "loss": 3.5348537445068358, "step": 2270 }, { "epoch": 4.779261586802828, "learning_rate": 0.00023270152556440187, "loss": 3.6589839935302733, "step": 2280 }, { "epoch": 4.800209478921183, "learning_rate": 0.00023372214629056154, "loss": 3.5354270935058594, "step": 2290 }, { "epoch": 4.821157371039539, "learning_rate": 0.00023474276701672118, "loss": 3.536578369140625, "step": 2300 }, { "epoch": 4.842105263157895, "learning_rate": 0.00023576338774288085, "loss": 3.5566326141357423, "step": 2310 }, { "epoch": 4.86305315527625, "learning_rate": 0.00023678400846904052, "loss": 3.475338363647461, "step": 2320 }, { "epoch": 4.884001047394606, "learning_rate": 0.00023780462919520016, "loss": 3.6042369842529296, "step": 2330 }, { "epoch": 4.904948939512962, "learning_rate": 0.00023882524992135983, "loss": 3.621173095703125, "step": 2340 }, { "epoch": 4.925896831631317, "learning_rate": 0.0002398458706475195, "loss": 3.640410232543945, "step": 2350 }, { "epoch": 4.946844723749672, "learning_rate": 0.00024086649137367914, "loss": 3.488922882080078, "step": 2360 }, { "epoch": 4.967792615868028, "learning_rate": 0.0002418871120998388, "loss": 3.4922332763671875, "step": 2370 }, { "epoch": 4.988740507986384, "learning_rate": 0.00024290773282599848, "loss": 3.590007019042969, "step": 2380 }, { "epoch": 5.010473946059178, "learning_rate": 0.00024392835355215812, "loss": 3.6829368591308596, "step": 2390 }, { "epoch": 5.0314218381775335, "learning_rate": 0.0002449489742783178, "loss": 3.651840591430664, "step": 2400 }, { "epoch": 5.052369730295889, "learning_rate": 0.00024596959500447745, "loss": 3.7254043579101563, "step": 2410 }, { "epoch": 5.073317622414245, "learning_rate": 0.0002469902157306371, "loss": 3.594907760620117, "step": 2420 }, { "epoch": 5.094265514532601, "learning_rate": 0.00024801083645679674, "loss": 3.4854148864746093, "step": 2430 }, { "epoch": 5.115213406650955, "learning_rate": 0.0002490314571829564, "loss": 3.58482666015625, "step": 2440 }, { "epoch": 5.136161298769311, "learning_rate": 0.00025005207790911607, "loss": 3.5847278594970704, "step": 2450 }, { "epoch": 5.157109190887667, "learning_rate": 0.0002510726986352757, "loss": 3.5045509338378906, "step": 2460 }, { "epoch": 5.178057083006022, "learning_rate": 0.00025209331936143535, "loss": 3.6352733612060546, "step": 2470 }, { "epoch": 5.199004975124378, "learning_rate": 0.000253113940087595, "loss": 3.555766296386719, "step": 2480 }, { "epoch": 5.219952867242734, "learning_rate": 0.0002541345608137547, "loss": 3.541688919067383, "step": 2490 }, { "epoch": 5.2409007593610895, "learning_rate": 0.00025515518153991436, "loss": 3.5400638580322266, "step": 2500 }, { "epoch": 5.261848651479445, "learning_rate": 0.00025617580226607403, "loss": 3.4929561614990234, "step": 2510 }, { "epoch": 5.282796543597801, "learning_rate": 0.00025719642299223364, "loss": 3.5611968994140626, "step": 2520 }, { "epoch": 5.3037444357161565, "learning_rate": 0.0002582170437183933, "loss": 3.4528472900390623, "step": 2530 }, { "epoch": 5.324692327834511, "learning_rate": 0.000259237664444553, "loss": 3.474958801269531, "step": 2540 }, { "epoch": 5.345640219952867, "learning_rate": 0.00026025828517071265, "loss": 3.6800113677978517, "step": 2550 }, { "epoch": 5.366588112071223, "learning_rate": 0.0002612789058968723, "loss": 3.524998092651367, "step": 2560 }, { "epoch": 5.387536004189578, "learning_rate": 0.00026229952662303193, "loss": 3.3865074157714843, "step": 2570 }, { "epoch": 5.408483896307934, "learning_rate": 0.0002633201473491916, "loss": 3.416782760620117, "step": 2580 }, { "epoch": 5.42943178842629, "learning_rate": 0.00026434076807535127, "loss": 3.4291786193847655, "step": 2590 }, { "epoch": 5.450379680544645, "learning_rate": 0.00026536138880151093, "loss": 3.487574005126953, "step": 2600 }, { "epoch": 5.471327572663001, "learning_rate": 0.0002663820095276706, "loss": 3.5091732025146483, "step": 2610 }, { "epoch": 5.492275464781357, "learning_rate": 0.00026740263025383027, "loss": 4.268975448608399, "step": 2620 }, { "epoch": 5.5132233568997115, "learning_rate": 0.0002684232509799899, "loss": 3.732823944091797, "step": 2630 }, { "epoch": 5.534171249018067, "learning_rate": 0.00026944387170614955, "loss": 3.650152587890625, "step": 2640 }, { "epoch": 5.555119141136423, "learning_rate": 0.0002704644924323092, "loss": 3.6992671966552733, "step": 2650 }, { "epoch": 5.576067033254779, "learning_rate": 0.0002714851131584689, "loss": 3.5627864837646483, "step": 2660 }, { "epoch": 5.597014925373134, "learning_rate": 0.0002725057338846285, "loss": 3.5473575592041016, "step": 2670 }, { "epoch": 5.61796281749149, "learning_rate": 0.00027352635461078817, "loss": 3.7565258026123045, "step": 2680 }, { "epoch": 5.638910709609846, "learning_rate": 0.00027454697533694784, "loss": 3.4811996459960937, "step": 2690 }, { "epoch": 5.659858601728201, "learning_rate": 0.0002755675960631075, "loss": 3.5412361145019533, "step": 2700 }, { "epoch": 5.680806493846557, "learning_rate": 0.0002765882167892671, "loss": 3.6894275665283205, "step": 2710 }, { "epoch": 5.701754385964913, "learning_rate": 0.0002776088375154268, "loss": 3.532870864868164, "step": 2720 }, { "epoch": 5.722702278083268, "learning_rate": 0.00027862945824158646, "loss": 3.4718368530273436, "step": 2730 }, { "epoch": 5.743650170201623, "learning_rate": 0.00027965007896774613, "loss": 3.5320533752441405, "step": 2740 }, { "epoch": 5.764598062319979, "learning_rate": 0.0002806706996939058, "loss": 3.6331645965576174, "step": 2750 }, { "epoch": 5.7855459544383345, "learning_rate": 0.00028169132042006546, "loss": 3.50958137512207, "step": 2760 }, { "epoch": 5.80649384655669, "learning_rate": 0.00028271194114622513, "loss": 3.5480377197265627, "step": 2770 }, { "epoch": 5.827441738675046, "learning_rate": 0.00028373256187238475, "loss": 3.5677505493164063, "step": 2780 }, { "epoch": 5.848389630793402, "learning_rate": 0.0002847531825985444, "loss": 3.5496990203857424, "step": 2790 }, { "epoch": 5.869337522911757, "learning_rate": 0.00028577380332470403, "loss": 3.4990489959716795, "step": 2800 }, { "epoch": 5.890285415030113, "learning_rate": 0.0002867944240508637, "loss": 3.4764991760253907, "step": 2810 }, { "epoch": 5.911233307148468, "learning_rate": 0.00028781504477702337, "loss": 3.4782173156738283, "step": 2820 }, { "epoch": 5.932181199266823, "learning_rate": 0.00028883566550318303, "loss": 3.5007530212402345, "step": 2830 }, { "epoch": 5.953129091385179, "learning_rate": 0.0002898562862293427, "loss": 3.4596179962158202, "step": 2840 }, { "epoch": 5.974076983503535, "learning_rate": 0.0002908769069555023, "loss": 3.4080764770507814, "step": 2850 }, { "epoch": 5.9950248756218905, "learning_rate": 0.000291897527681662, "loss": 3.5865558624267577, "step": 2860 }, { "epoch": 6.016758313694685, "learning_rate": 0.00029291814840782165, "loss": 3.539356231689453, "step": 2870 }, { "epoch": 6.03770620581304, "learning_rate": 0.0002939387691339813, "loss": 3.5746910095214846, "step": 2880 }, { "epoch": 6.058654097931396, "learning_rate": 0.000294959389860141, "loss": 3.477669906616211, "step": 2890 }, { "epoch": 6.079601990049751, "learning_rate": 0.00029598001058630066, "loss": 3.385912322998047, "step": 2900 }, { "epoch": 6.100549882168107, "learning_rate": 0.00029700063131246027, "loss": 3.566743850708008, "step": 2910 }, { "epoch": 6.121497774286462, "learning_rate": 0.00029802125203861994, "loss": 3.4617984771728514, "step": 2920 }, { "epoch": 6.142445666404818, "learning_rate": 0.0002990418727647796, "loss": 3.46124382019043, "step": 2930 }, { "epoch": 6.163393558523174, "learning_rate": 0.0003000624934909393, "loss": 3.5851741790771485, "step": 2940 }, { "epoch": 6.184341450641529, "learning_rate": 0.00030108311421709894, "loss": 3.5333206176757814, "step": 2950 }, { "epoch": 6.205289342759885, "learning_rate": 0.0003021037349432586, "loss": 3.4518871307373047, "step": 2960 }, { "epoch": 6.226237234878241, "learning_rate": 0.0003031243556694182, "loss": 3.5048519134521485, "step": 2970 }, { "epoch": 6.247185126996596, "learning_rate": 0.0003041449763955779, "loss": 3.386810302734375, "step": 2980 }, { "epoch": 6.268133019114952, "learning_rate": 0.00030516559712173756, "loss": 3.3472484588623046, "step": 2990 }, { "epoch": 6.289080911233307, "learning_rate": 0.00030618621784789723, "loss": 3.578289794921875, "step": 3000 }, { "epoch": 6.3100288033516625, "learning_rate": 0.0003072068385740569, "loss": 3.4641948699951173, "step": 3010 }, { "epoch": 6.330976695470018, "learning_rate": 0.00030822745930021657, "loss": 3.42608757019043, "step": 3020 }, { "epoch": 6.351924587588374, "learning_rate": 0.0003092480800263762, "loss": 3.4154186248779297, "step": 3030 }, { "epoch": 6.3728724797067295, "learning_rate": 0.00031026870075253585, "loss": 3.5270923614501952, "step": 3040 }, { "epoch": 6.393820371825085, "learning_rate": 0.0003112893214786955, "loss": 3.4780391693115233, "step": 3050 }, { "epoch": 6.414768263943441, "learning_rate": 0.0003123099422048552, "loss": 3.379390335083008, "step": 3060 }, { "epoch": 6.435716156061797, "learning_rate": 0.00031333056293101486, "loss": 3.405352020263672, "step": 3070 }, { "epoch": 6.456664048180152, "learning_rate": 0.00031435118365717447, "loss": 3.523044204711914, "step": 3080 }, { "epoch": 6.477611940298507, "learning_rate": 0.00031537180438333414, "loss": 3.488709259033203, "step": 3090 }, { "epoch": 6.498559832416863, "learning_rate": 0.0003163924251094938, "loss": 3.3736576080322265, "step": 3100 }, { "epoch": 6.519507724535218, "learning_rate": 0.0003174130458356535, "loss": 3.5457527160644533, "step": 3110 }, { "epoch": 6.540455616653574, "learning_rate": 0.00031843366656181314, "loss": 3.38338623046875, "step": 3120 }, { "epoch": 6.56140350877193, "learning_rate": 0.0003194542872879728, "loss": 3.4261444091796873, "step": 3130 }, { "epoch": 6.5823514008902855, "learning_rate": 0.0003204749080141324, "loss": 3.365514373779297, "step": 3140 }, { "epoch": 6.603299293008641, "learning_rate": 0.0003214955287402921, "loss": 3.3958808898925783, "step": 3150 }, { "epoch": 6.624247185126997, "learning_rate": 0.00032251614946645176, "loss": 3.4493579864501953, "step": 3160 }, { "epoch": 6.6451950772453525, "learning_rate": 0.00032353677019261143, "loss": 3.509814453125, "step": 3170 }, { "epoch": 6.666142969363708, "learning_rate": 0.0003245573909187711, "loss": 3.3270954132080077, "step": 3180 }, { "epoch": 6.687090861482063, "learning_rate": 0.00032557801164493077, "loss": 3.407461929321289, "step": 3190 }, { "epoch": 6.708038753600419, "learning_rate": 0.0003265986323710903, "loss": 3.3440326690673827, "step": 3200 }, { "epoch": 6.728986645718774, "learning_rate": 0.00032761925309725, "loss": 3.3119239807128906, "step": 3210 }, { "epoch": 6.74993453783713, "learning_rate": 0.00032863987382340966, "loss": 3.441579818725586, "step": 3220 }, { "epoch": 6.770882429955486, "learning_rate": 0.00032966049454956933, "loss": 3.448055648803711, "step": 3230 }, { "epoch": 6.791830322073841, "learning_rate": 0.000330681115275729, "loss": 3.2744544982910155, "step": 3240 }, { "epoch": 6.812778214192197, "learning_rate": 0.0003317017360018886, "loss": 3.4412532806396485, "step": 3250 }, { "epoch": 6.833726106310553, "learning_rate": 0.0003327223567280483, "loss": 3.459817123413086, "step": 3260 }, { "epoch": 6.8546739984289085, "learning_rate": 0.00033374297745420795, "loss": 3.3376232147216798, "step": 3270 }, { "epoch": 6.875621890547263, "learning_rate": 0.0003347635981803676, "loss": 3.557674789428711, "step": 3280 }, { "epoch": 6.896569782665619, "learning_rate": 0.0003357842189065273, "loss": 3.290896987915039, "step": 3290 }, { "epoch": 6.917517674783975, "learning_rate": 0.0003368048396326869, "loss": 3.49056396484375, "step": 3300 }, { "epoch": 6.93846556690233, "learning_rate": 0.00033782546035884657, "loss": 3.3487998962402346, "step": 3310 }, { "epoch": 6.959413459020686, "learning_rate": 0.00033884608108500624, "loss": 3.3251983642578127, "step": 3320 }, { "epoch": 6.980361351139042, "learning_rate": 0.0003398667018111659, "loss": 3.463846206665039, "step": 3330 }, { "epoch": 7.002094789211836, "learning_rate": 0.0003408873225373256, "loss": 3.5404449462890626, "step": 3340 }, { "epoch": 7.023042681330191, "learning_rate": 0.00034190794326348524, "loss": 3.317121887207031, "step": 3350 }, { "epoch": 7.043990573448546, "learning_rate": 0.00034292856398964486, "loss": 3.446538543701172, "step": 3360 }, { "epoch": 7.064938465566902, "learning_rate": 0.0003439491847158045, "loss": 3.410959243774414, "step": 3370 }, { "epoch": 7.085886357685258, "learning_rate": 0.0003449698054419642, "loss": 3.4593124389648438, "step": 3380 }, { "epoch": 7.1068342498036134, "learning_rate": 0.00034599042616812386, "loss": 3.4331336975097657, "step": 3390 }, { "epoch": 7.127782141921969, "learning_rate": 0.00034701104689428353, "loss": 3.415497970581055, "step": 3400 }, { "epoch": 7.148730034040325, "learning_rate": 0.0003480316676204432, "loss": 3.358320617675781, "step": 3410 }, { "epoch": 7.1696779261586805, "learning_rate": 0.0003490522883466028, "loss": 3.369782257080078, "step": 3420 }, { "epoch": 7.190625818277036, "learning_rate": 0.0003500729090727625, "loss": 3.3603092193603517, "step": 3430 }, { "epoch": 7.211573710395392, "learning_rate": 0.00035109352979892215, "loss": 3.4065528869628907, "step": 3440 }, { "epoch": 7.232521602513747, "learning_rate": 0.0003521141505250818, "loss": 3.368368148803711, "step": 3450 }, { "epoch": 7.253469494632102, "learning_rate": 0.0003531347712512415, "loss": 3.3687610626220703, "step": 3460 }, { "epoch": 7.274417386750458, "learning_rate": 0.00035415539197740115, "loss": 3.397439956665039, "step": 3470 }, { "epoch": 7.295365278868814, "learning_rate": 0.00035517601270356077, "loss": 3.329518508911133, "step": 3480 }, { "epoch": 7.316313170987169, "learning_rate": 0.00035619663342972044, "loss": 3.3606395721435547, "step": 3490 }, { "epoch": 7.337261063105525, "learning_rate": 0.0003572172541558801, "loss": 3.373159408569336, "step": 3500 }, { "epoch": 7.358208955223881, "learning_rate": 0.00035823787488203977, "loss": 3.460713195800781, "step": 3510 }, { "epoch": 7.379156847342236, "learning_rate": 0.00035925849560819944, "loss": 3.3600276947021483, "step": 3520 }, { "epoch": 7.400104739460592, "learning_rate": 0.0003602791163343591, "loss": 3.3381488800048826, "step": 3530 }, { "epoch": 7.421052631578947, "learning_rate": 0.0003612997370605187, "loss": 3.5098854064941407, "step": 3540 }, { "epoch": 7.442000523697303, "learning_rate": 0.0003623203577866784, "loss": 3.358294677734375, "step": 3550 }, { "epoch": 7.462948415815658, "learning_rate": 0.00036334097851283806, "loss": 3.361553955078125, "step": 3560 }, { "epoch": 7.483896307934014, "learning_rate": 0.00036436159923899773, "loss": 3.3908660888671873, "step": 3570 }, { "epoch": 7.50484420005237, "learning_rate": 0.0003653822199651574, "loss": 3.305834197998047, "step": 3580 }, { "epoch": 7.525792092170725, "learning_rate": 0.000366402840691317, "loss": 3.3551094055175783, "step": 3590 }, { "epoch": 7.546739984289081, "learning_rate": 0.0003674234614174767, "loss": 3.336803436279297, "step": 3600 }, { "epoch": 7.567687876407437, "learning_rate": 0.00036844408214363635, "loss": 3.402811050415039, "step": 3610 }, { "epoch": 7.588635768525792, "learning_rate": 0.000369464702869796, "loss": 3.3823123931884767, "step": 3620 }, { "epoch": 7.609583660644148, "learning_rate": 0.00037048532359595563, "loss": 3.3616653442382813, "step": 3630 }, { "epoch": 7.630531552762504, "learning_rate": 0.00037150594432211524, "loss": 3.409253692626953, "step": 3640 }, { "epoch": 7.6514794448808585, "learning_rate": 0.0003725265650482749, "loss": 3.1992008209228517, "step": 3650 }, { "epoch": 7.672427336999214, "learning_rate": 0.0003735471857744346, "loss": 3.2639488220214843, "step": 3660 }, { "epoch": 7.69337522911757, "learning_rate": 0.00037456780650059425, "loss": 3.4038814544677733, "step": 3670 }, { "epoch": 7.714323121235926, "learning_rate": 0.0003755884272267539, "loss": 3.382715606689453, "step": 3680 }, { "epoch": 7.735271013354281, "learning_rate": 0.0003766090479529136, "loss": 3.380691146850586, "step": 3690 }, { "epoch": 7.756218905472637, "learning_rate": 0.0003776296686790732, "loss": 3.3293548583984376, "step": 3700 }, { "epoch": 7.777166797590993, "learning_rate": 0.00037865028940523287, "loss": 3.2755306243896483, "step": 3710 }, { "epoch": 7.798114689709348, "learning_rate": 0.00037967091013139253, "loss": 3.3747108459472654, "step": 3720 }, { "epoch": 7.819062581827704, "learning_rate": 0.0003806915308575522, "loss": 3.325161361694336, "step": 3730 }, { "epoch": 7.840010473946059, "learning_rate": 0.00038171215158371187, "loss": 3.3385120391845704, "step": 3740 }, { "epoch": 7.8609583660644144, "learning_rate": 0.00038273277230987154, "loss": 3.365946960449219, "step": 3750 }, { "epoch": 7.88190625818277, "learning_rate": 0.00038375339303603115, "loss": 3.2765518188476563, "step": 3760 }, { "epoch": 7.902854150301126, "learning_rate": 0.0003847740137621908, "loss": 3.4135189056396484, "step": 3770 }, { "epoch": 7.9238020424194815, "learning_rate": 0.0003857946344883505, "loss": 3.3337100982666015, "step": 3780 }, { "epoch": 7.944749934537837, "learning_rate": 0.00038681525521451016, "loss": 3.469867706298828, "step": 3790 }, { "epoch": 7.965697826656193, "learning_rate": 0.0003878358759406698, "loss": 3.4340835571289063, "step": 3800 }, { "epoch": 7.9866457187745485, "learning_rate": 0.00038885649666682944, "loss": 3.3163192749023436, "step": 3810 }, { "epoch": 8.008379156847342, "learning_rate": 0.0003898771173929891, "loss": 3.4269264221191404, "step": 3820 }, { "epoch": 8.029327048965698, "learning_rate": 0.0003908977381191488, "loss": 3.366756057739258, "step": 3830 }, { "epoch": 8.050274941084053, "learning_rate": 0.00039191835884530845, "loss": 3.5194732666015627, "step": 3840 }, { "epoch": 8.071222833202409, "learning_rate": 0.0003929389795714681, "loss": 3.2884559631347656, "step": 3850 }, { "epoch": 8.092170725320765, "learning_rate": 0.0003939596002976278, "loss": 3.3115074157714846, "step": 3860 }, { "epoch": 8.11311861743912, "learning_rate": 0.0003949802210237874, "loss": 3.342890167236328, "step": 3870 }, { "epoch": 8.134066509557476, "learning_rate": 0.00039600084174994706, "loss": 3.337261962890625, "step": 3880 }, { "epoch": 8.155014401675832, "learning_rate": 0.00039702146247610673, "loss": 3.3896888732910155, "step": 3890 }, { "epoch": 8.175962293794187, "learning_rate": 0.0003980420832022664, "loss": 3.314004898071289, "step": 3900 }, { "epoch": 8.196910185912543, "learning_rate": 0.00039906270392842607, "loss": 3.253472900390625, "step": 3910 }, { "epoch": 8.217858078030899, "learning_rate": 0.00040008332465458574, "loss": 3.2534595489501954, "step": 3920 }, { "epoch": 8.238805970149254, "learning_rate": 0.00040110394538074535, "loss": 3.3420372009277344, "step": 3930 }, { "epoch": 8.25975386226761, "learning_rate": 0.000402124566106905, "loss": 3.3110313415527344, "step": 3940 }, { "epoch": 8.280701754385966, "learning_rate": 0.0004031451868330647, "loss": 3.3015865325927733, "step": 3950 }, { "epoch": 8.30164964650432, "learning_rate": 0.00040416580755922436, "loss": 3.207544708251953, "step": 3960 }, { "epoch": 8.322597538622675, "learning_rate": 0.000405186428285384, "loss": 3.2784183502197264, "step": 3970 }, { "epoch": 8.343545430741031, "learning_rate": 0.0004062070490115437, "loss": 3.29312744140625, "step": 3980 }, { "epoch": 8.364493322859387, "learning_rate": 0.0004072276697377033, "loss": 3.349509048461914, "step": 3990 }, { "epoch": 8.385441214977742, "learning_rate": 0.000408248290463863, "loss": 3.311314010620117, "step": 4000 }, { "epoch": 8.406389107096098, "learning_rate": 0.00040926891119002264, "loss": 3.2704097747802736, "step": 4010 }, { "epoch": 8.427336999214454, "learning_rate": 0.0004102895319161823, "loss": 3.3431529998779297, "step": 4020 }, { "epoch": 8.44828489133281, "learning_rate": 0.000411310152642342, "loss": 3.2444534301757812, "step": 4030 }, { "epoch": 8.469232783451165, "learning_rate": 0.0004123307733685016, "loss": 3.2499820709228517, "step": 4040 }, { "epoch": 8.49018067556952, "learning_rate": 0.00041335139409466126, "loss": 3.4333778381347657, "step": 4050 }, { "epoch": 8.511128567687877, "learning_rate": 0.0004143720148208209, "loss": 3.598118209838867, "step": 4060 }, { "epoch": 8.532076459806232, "learning_rate": 0.00041539263554698055, "loss": 3.1703567504882812, "step": 4070 }, { "epoch": 8.553024351924588, "learning_rate": 0.0004164132562731402, "loss": 3.308456802368164, "step": 4080 }, { "epoch": 8.573972244042944, "learning_rate": 0.00041743387699929983, "loss": 3.262325668334961, "step": 4090 }, { "epoch": 8.5949201361613, "learning_rate": 0.0004184544977254595, "loss": 3.277301788330078, "step": 4100 }, { "epoch": 8.615868028279655, "learning_rate": 0.00041947511845161916, "loss": 3.245453643798828, "step": 4110 }, { "epoch": 8.63681592039801, "learning_rate": 0.00042049573917777883, "loss": 3.246595764160156, "step": 4120 }, { "epoch": 8.657763812516366, "learning_rate": 0.0004215163599039385, "loss": 3.310033416748047, "step": 4130 }, { "epoch": 8.678711704634722, "learning_rate": 0.00042253698063009817, "loss": 3.301158905029297, "step": 4140 }, { "epoch": 8.699659596753076, "learning_rate": 0.0004235576013562578, "loss": 3.2295578002929686, "step": 4150 }, { "epoch": 8.720607488871432, "learning_rate": 0.00042457822208241745, "loss": 3.26501579284668, "step": 4160 }, { "epoch": 8.741555380989787, "learning_rate": 0.0004255988428085771, "loss": 3.346723937988281, "step": 4170 }, { "epoch": 8.762503273108143, "learning_rate": 0.0004266194635347368, "loss": 3.2818328857421877, "step": 4180 }, { "epoch": 8.783451165226499, "learning_rate": 0.00042764008426089646, "loss": 3.3082435607910154, "step": 4190 }, { "epoch": 8.804399057344854, "learning_rate": 0.0004286607049870561, "loss": 3.2165481567382814, "step": 4200 }, { "epoch": 8.82534694946321, "learning_rate": 0.00042968132571321574, "loss": 3.217595672607422, "step": 4210 }, { "epoch": 8.846294841581566, "learning_rate": 0.0004307019464393754, "loss": 3.2202774047851563, "step": 4220 }, { "epoch": 8.867242733699921, "learning_rate": 0.0004317225671655351, "loss": 3.3156604766845703, "step": 4230 }, { "epoch": 8.888190625818277, "learning_rate": 0.00043274318789169474, "loss": 3.2440589904785155, "step": 4240 }, { "epoch": 8.909138517936633, "learning_rate": 0.0004337638086178544, "loss": 3.354948043823242, "step": 4250 }, { "epoch": 8.930086410054988, "learning_rate": 0.0004347844293440141, "loss": 3.2932735443115235, "step": 4260 }, { "epoch": 8.951034302173344, "learning_rate": 0.0004358050500701737, "loss": 3.1642740249633787, "step": 4270 }, { "epoch": 8.9719821942917, "learning_rate": 0.00043682567079633336, "loss": 3.1953109741210937, "step": 4280 }, { "epoch": 8.992930086410055, "learning_rate": 0.00043784629152249303, "loss": 3.1374893188476562, "step": 4290 }, { "epoch": 9.014663524482849, "learning_rate": 0.0004388669122486527, "loss": 3.4447471618652346, "step": 4300 }, { "epoch": 9.035611416601204, "learning_rate": 0.00043988753297481237, "loss": 3.278203582763672, "step": 4310 }, { "epoch": 9.05655930871956, "learning_rate": 0.000440908153700972, "loss": 3.2842811584472655, "step": 4320 }, { "epoch": 9.077507200837916, "learning_rate": 0.00044192877442713165, "loss": 3.2456493377685547, "step": 4330 }, { "epoch": 9.098455092956272, "learning_rate": 0.0004429493951532913, "loss": 3.18524112701416, "step": 4340 }, { "epoch": 9.119402985074627, "learning_rate": 0.000443970015879451, "loss": 3.2943866729736326, "step": 4350 }, { "epoch": 9.140350877192983, "learning_rate": 0.00044499063660561065, "loss": 3.325389862060547, "step": 4360 }, { "epoch": 9.161298769311339, "learning_rate": 0.0004460112573317703, "loss": 3.1400611877441404, "step": 4370 }, { "epoch": 9.182246661429694, "learning_rate": 0.00044703187805792994, "loss": 3.272686004638672, "step": 4380 }, { "epoch": 9.20319455354805, "learning_rate": 0.0004480524987840896, "loss": 3.284004974365234, "step": 4390 }, { "epoch": 9.224142445666406, "learning_rate": 0.0004490731195102493, "loss": 3.2489898681640623, "step": 4400 }, { "epoch": 9.24509033778476, "learning_rate": 0.00045009374023640894, "loss": 3.2408329010009767, "step": 4410 }, { "epoch": 9.266038229903115, "learning_rate": 0.0004511143609625686, "loss": 3.2516738891601564, "step": 4420 }, { "epoch": 9.286986122021471, "learning_rate": 0.0004521349816887283, "loss": 3.1824373245239257, "step": 4430 }, { "epoch": 9.307934014139827, "learning_rate": 0.0004531556024148879, "loss": 3.2249637603759767, "step": 4440 }, { "epoch": 9.328881906258182, "learning_rate": 0.00045417622314104756, "loss": 3.174178695678711, "step": 4450 }, { "epoch": 9.349829798376538, "learning_rate": 0.00045519684386720723, "loss": 3.188156318664551, "step": 4460 }, { "epoch": 9.370777690494894, "learning_rate": 0.0004562174645933669, "loss": 3.228466796875, "step": 4470 }, { "epoch": 9.39172558261325, "learning_rate": 0.00045723808531952657, "loss": 3.2763172149658204, "step": 4480 }, { "epoch": 9.412673474731605, "learning_rate": 0.0004582587060456861, "loss": 3.266025161743164, "step": 4490 }, { "epoch": 9.43362136684996, "learning_rate": 0.0004592793267718458, "loss": 3.2390396118164064, "step": 4500 }, { "epoch": 9.454569258968316, "learning_rate": 0.00046029994749800546, "loss": 3.23939208984375, "step": 4510 }, { "epoch": 9.475517151086672, "learning_rate": 0.00046132056822416513, "loss": 3.219116973876953, "step": 4520 }, { "epoch": 9.496465043205028, "learning_rate": 0.0004623411889503248, "loss": 3.277789306640625, "step": 4530 }, { "epoch": 9.517412935323383, "learning_rate": 0.00046336180967648447, "loss": 3.284175491333008, "step": 4540 }, { "epoch": 9.538360827441739, "learning_rate": 0.0004643824304026441, "loss": 3.208120346069336, "step": 4550 }, { "epoch": 9.559308719560095, "learning_rate": 0.00046540305112880375, "loss": 3.271435546875, "step": 4560 }, { "epoch": 9.58025661167845, "learning_rate": 0.0004664236718549634, "loss": 3.2989322662353517, "step": 4570 }, { "epoch": 9.601204503796806, "learning_rate": 0.0004674442925811231, "loss": 3.1721576690673827, "step": 4580 }, { "epoch": 9.622152395915162, "learning_rate": 0.00046846491330728275, "loss": 3.1907968521118164, "step": 4590 }, { "epoch": 9.643100288033516, "learning_rate": 0.00046948553403344237, "loss": 3.3379592895507812, "step": 4600 }, { "epoch": 9.664048180151871, "learning_rate": 0.00047050615475960204, "loss": 3.2489646911621093, "step": 4610 }, { "epoch": 9.684996072270227, "learning_rate": 0.0004715267754857617, "loss": 3.3012271881103517, "step": 4620 }, { "epoch": 9.705943964388583, "learning_rate": 0.00047254739621192137, "loss": 3.235492706298828, "step": 4630 }, { "epoch": 9.726891856506938, "learning_rate": 0.00047356801693808104, "loss": 3.2170711517333985, "step": 4640 }, { "epoch": 9.747839748625294, "learning_rate": 0.0004745886376642407, "loss": 3.1358510971069338, "step": 4650 }, { "epoch": 9.76878764074365, "learning_rate": 0.0004756092583904003, "loss": 3.154219055175781, "step": 4660 }, { "epoch": 9.789735532862005, "learning_rate": 0.00047662987911656, "loss": 3.3397506713867187, "step": 4670 }, { "epoch": 9.810683424980361, "learning_rate": 0.00047765049984271966, "loss": 3.2498783111572265, "step": 4680 }, { "epoch": 9.831631317098717, "learning_rate": 0.00047867112056887933, "loss": 3.2211677551269533, "step": 4690 }, { "epoch": 9.852579209217073, "learning_rate": 0.000479691741295039, "loss": 3.2689888000488283, "step": 4700 }, { "epoch": 9.873527101335428, "learning_rate": 0.00048071236202119866, "loss": 3.1732282638549805, "step": 4710 }, { "epoch": 9.894474993453784, "learning_rate": 0.0004817329827473583, "loss": 3.1548320770263674, "step": 4720 }, { "epoch": 9.91542288557214, "learning_rate": 0.00048275360347351795, "loss": 3.2713703155517577, "step": 4730 }, { "epoch": 9.936370777690495, "learning_rate": 0.0004837742241996776, "loss": 3.1592134475708007, "step": 4740 }, { "epoch": 9.957318669808851, "learning_rate": 0.0004847948449258373, "loss": 3.213180923461914, "step": 4750 }, { "epoch": 9.978266561927207, "learning_rate": 0.00048581546565199695, "loss": 3.2513309478759767, "step": 4760 }, { "epoch": 9.999214454045562, "learning_rate": 0.0004868360863781566, "loss": 3.3612499237060547, "step": 4770 }, { "epoch": 10.020947892118356, "learning_rate": 0.00048785670710431623, "loss": 3.2473423004150392, "step": 4780 }, { "epoch": 10.041895784236711, "learning_rate": 0.000488877327830476, "loss": 3.3008792877197264, "step": 4790 }, { "epoch": 10.062843676355067, "learning_rate": 0.0004898979485566356, "loss": 3.286971664428711, "step": 4800 }, { "epoch": 10.083791568473423, "learning_rate": 0.0004909185692827952, "loss": 3.300416946411133, "step": 4810 }, { "epoch": 10.104739460591778, "learning_rate": 0.0004919391900089549, "loss": 3.1919363021850584, "step": 4820 }, { "epoch": 10.125687352710134, "learning_rate": 0.0004929598107351145, "loss": 3.2064422607421874, "step": 4830 }, { "epoch": 10.14663524482849, "learning_rate": 0.0004939804314612742, "loss": 3.1432363510131838, "step": 4840 }, { "epoch": 10.167583136946845, "learning_rate": 0.0004950010521874339, "loss": 3.17150764465332, "step": 4850 }, { "epoch": 10.188531029065201, "learning_rate": 0.0004960216729135935, "loss": 3.2499244689941404, "step": 4860 }, { "epoch": 10.209478921183555, "learning_rate": 0.0004970422936397532, "loss": 3.1550519943237303, "step": 4870 }, { "epoch": 10.23042681330191, "learning_rate": 0.0004980629143659128, "loss": 3.1220830917358398, "step": 4880 }, { "epoch": 10.251374705420266, "learning_rate": 0.0004990835350920725, "loss": 3.2074295043945313, "step": 4890 }, { "epoch": 10.272322597538622, "learning_rate": 0.0005001041558182321, "loss": 3.198388862609863, "step": 4900 }, { "epoch": 10.293270489656978, "learning_rate": 0.0005011247765443918, "loss": 3.218120574951172, "step": 4910 }, { "epoch": 10.314218381775333, "learning_rate": 0.0005021453972705514, "loss": 3.2131175994873047, "step": 4920 }, { "epoch": 10.33516627389369, "learning_rate": 0.0005031660179967111, "loss": 3.1987491607666017, "step": 4930 }, { "epoch": 10.356114166012045, "learning_rate": 0.0005041866387228707, "loss": 3.175269889831543, "step": 4940 }, { "epoch": 10.3770620581304, "learning_rate": 0.0005052072594490304, "loss": 3.183473014831543, "step": 4950 }, { "epoch": 10.398009950248756, "learning_rate": 0.00050622788017519, "loss": 3.0571062088012697, "step": 4960 }, { "epoch": 10.418957842367112, "learning_rate": 0.0005072485009013497, "loss": 3.325624465942383, "step": 4970 }, { "epoch": 10.439905734485468, "learning_rate": 0.0005082691216275094, "loss": 3.249886703491211, "step": 4980 }, { "epoch": 10.460853626603823, "learning_rate": 0.000509289742353669, "loss": 3.18145866394043, "step": 4990 }, { "epoch": 10.481801518722179, "learning_rate": 0.0005103103630798287, "loss": 3.1750720977783202, "step": 5000 }, { "epoch": 10.502749410840535, "learning_rate": 0.0005098008169087462, "loss": 3.298409271240234, "step": 5010 }, { "epoch": 10.52369730295889, "learning_rate": 0.0005092927940452339, "loss": 3.0969064712524412, "step": 5020 }, { "epoch": 10.544645195077246, "learning_rate": 0.0005087862869144114, "loss": 3.3151134490966796, "step": 5030 }, { "epoch": 10.565593087195602, "learning_rate": 0.0005082812879940277, "loss": 3.2290550231933595, "step": 5040 }, { "epoch": 10.586540979313957, "learning_rate": 0.0005077777898139921, "loss": 3.140799713134766, "step": 5050 }, { "epoch": 10.607488871432313, "learning_rate": 0.0005072757849559103, "loss": 3.1083478927612305, "step": 5060 }, { "epoch": 10.628436763550667, "learning_rate": 0.0005067752660526248, "loss": 3.1976173400878904, "step": 5070 }, { "epoch": 10.649384655669023, "learning_rate": 0.0005062762257877613, "loss": 3.2580982208251954, "step": 5080 }, { "epoch": 10.670332547787378, "learning_rate": 0.0005057786568952791, "loss": 3.280201721191406, "step": 5090 }, { "epoch": 10.691280439905734, "learning_rate": 0.000505282552159027, "loss": 3.141094779968262, "step": 5100 }, { "epoch": 10.71222833202409, "learning_rate": 0.000504787904412304, "loss": 3.095392608642578, "step": 5110 }, { "epoch": 10.733176224142445, "learning_rate": 0.000504294706537424, "loss": 3.2083145141601563, "step": 5120 }, { "epoch": 10.754124116260801, "learning_rate": 0.0005038029514652858, "loss": 3.289701461791992, "step": 5130 }, { "epoch": 10.775072008379157, "learning_rate": 0.0005033126321749477, "loss": 3.2425827026367187, "step": 5140 }, { "epoch": 10.796019900497512, "learning_rate": 0.000502823741693206, "loss": 3.277665710449219, "step": 5150 }, { "epoch": 10.816967792615868, "learning_rate": 0.0005023362730941793, "loss": 3.1909582138061525, "step": 5160 }, { "epoch": 10.837915684734224, "learning_rate": 0.0005018502194988955, "loss": 3.170912170410156, "step": 5170 }, { "epoch": 10.85886357685258, "learning_rate": 0.0005013655740748848, "loss": 3.2589969635009766, "step": 5180 }, { "epoch": 10.879811468970935, "learning_rate": 0.0005008823300357761, "loss": 3.1849817276000976, "step": 5190 }, { "epoch": 10.90075936108929, "learning_rate": 0.0005004004806408972, "loss": 3.1448366165161135, "step": 5200 }, { "epoch": 10.921707253207646, "learning_rate": 0.0004999200191948814, "loss": 3.122829055786133, "step": 5210 }, { "epoch": 10.942655145326002, "learning_rate": 0.0004994409390472751, "loss": 3.1754734039306642, "step": 5220 }, { "epoch": 10.963603037444358, "learning_rate": 0.0004989632335921523, "loss": 3.1149194717407225, "step": 5230 }, { "epoch": 10.984550929562714, "learning_rate": 0.0004984868962677315, "loss": 3.100501823425293, "step": 5240 }, { "epoch": 11.006284367635507, "learning_rate": 0.0004980119205559973, "loss": 3.330778121948242, "step": 5250 }, { "epoch": 11.027232259753863, "learning_rate": 0.0004975382999823259, "loss": 3.1559074401855467, "step": 5260 }, { "epoch": 11.048180151872218, "learning_rate": 0.0004970660281151141, "loss": 3.172486114501953, "step": 5270 }, { "epoch": 11.069128043990574, "learning_rate": 0.0004965950985654126, "loss": 3.1997749328613283, "step": 5280 }, { "epoch": 11.09007593610893, "learning_rate": 0.0004961255049865635, "loss": 3.2853694915771485, "step": 5290 }, { "epoch": 11.111023828227285, "learning_rate": 0.0004956572410738401, "loss": 3.145161819458008, "step": 5300 }, { "epoch": 11.131971720345641, "learning_rate": 0.000495190300564092, "loss": 3.126105308532715, "step": 5310 }, { "epoch": 11.152919612463997, "learning_rate": 0.0004947246772353933, "loss": 3.2012374877929686, "step": 5320 }, { "epoch": 11.17386750458235, "learning_rate": 0.0004942603649066942, "loss": 3.1729455947875977, "step": 5330 }, { "epoch": 11.194815396700706, "learning_rate": 0.0004937973574374762, "loss": 3.148386001586914, "step": 5340 }, { "epoch": 11.215763288819062, "learning_rate": 0.0004933356487274114, "loss": 3.065207290649414, "step": 5350 }, { "epoch": 11.236711180937418, "learning_rate": 0.0004928752327160248, "loss": 3.150010108947754, "step": 5360 }, { "epoch": 11.257659073055773, "learning_rate": 0.0004924161033823598, "loss": 3.2157524108886717, "step": 5370 }, { "epoch": 11.278606965174129, "learning_rate": 0.0004919582547446482, "loss": 3.0866676330566407, "step": 5380 }, { "epoch": 11.299554857292485, "learning_rate": 0.0004915016808599824, "loss": 3.2173648834228517, "step": 5390 }, { "epoch": 11.32050274941084, "learning_rate": 0.0004910463758239914, "loss": 3.1369649887084963, "step": 5400 }, { "epoch": 11.341450641529196, "learning_rate": 0.0004905923337705201, "loss": 3.0868097305297852, "step": 5410 }, { "epoch": 11.362398533647552, "learning_rate": 0.0004901395488713123, "loss": 3.2488777160644533, "step": 5420 }, { "epoch": 11.383346425765907, "learning_rate": 0.0004896880153356963, "loss": 3.121846008300781, "step": 5430 }, { "epoch": 11.404294317884263, "learning_rate": 0.000489237727410273, "loss": 3.1137548446655274, "step": 5440 }, { "epoch": 11.425242210002619, "learning_rate": 0.0004887886793786093, "loss": 3.0799299240112306, "step": 5450 }, { "epoch": 11.446190102120974, "learning_rate": 0.0004883408655609327, "loss": 3.0961063385009764, "step": 5460 }, { "epoch": 11.46713799423933, "learning_rate": 0.0004878942803138293, "loss": 3.0329910278320313, "step": 5470 }, { "epoch": 11.488085886357686, "learning_rate": 0.0004874489180299454, "loss": 3.173397445678711, "step": 5480 }, { "epoch": 11.509033778476041, "learning_rate": 0.00048700477313769213, "loss": 3.1852407455444336, "step": 5490 }, { "epoch": 11.529981670594397, "learning_rate": 0.00048656184010095185, "loss": 3.2040431976318358, "step": 5500 }, { "epoch": 11.550929562712753, "learning_rate": 0.00048612011341878916, "loss": 3.189468002319336, "step": 5510 }, { "epoch": 11.571877454831107, "learning_rate": 0.0004856795876251634, "loss": 3.2286914825439452, "step": 5520 }, { "epoch": 11.592825346949462, "learning_rate": 0.00048524025728864493, "loss": 3.034438896179199, "step": 5530 }, { "epoch": 11.613773239067818, "learning_rate": 0.0004848021170121335, "loss": 3.085980987548828, "step": 5540 }, { "epoch": 11.634721131186174, "learning_rate": 0.0004843651614325803, "loss": 3.1111934661865233, "step": 5550 }, { "epoch": 11.65566902330453, "learning_rate": 0.00048392938522071163, "loss": 3.159061050415039, "step": 5560 }, { "epoch": 11.676616915422885, "learning_rate": 0.0004834947830807563, "loss": 3.0602264404296875, "step": 5570 }, { "epoch": 11.69756480754124, "learning_rate": 0.00048306134975017523, "loss": 3.120003890991211, "step": 5580 }, { "epoch": 11.718512699659597, "learning_rate": 0.0004826290799993939, "loss": 3.1456703186035155, "step": 5590 }, { "epoch": 11.739460591777952, "learning_rate": 0.0004821979686315372, "loss": 3.112548828125, "step": 5600 }, { "epoch": 11.760408483896308, "learning_rate": 0.00048176801048216693, "loss": 3.1137924194335938, "step": 5610 }, { "epoch": 11.781356376014664, "learning_rate": 0.0004813392004190223, "loss": 3.146605110168457, "step": 5620 }, { "epoch": 11.80230426813302, "learning_rate": 0.00048091153334176224, "loss": 3.1099647521972655, "step": 5630 }, { "epoch": 11.823252160251375, "learning_rate": 0.00048048500418171097, "loss": 3.099277305603027, "step": 5640 }, { "epoch": 11.84420005236973, "learning_rate": 0.0004800596079016053, "loss": 3.085763931274414, "step": 5650 }, { "epoch": 11.865147944488086, "learning_rate": 0.0004796353394953452, "loss": 3.188782501220703, "step": 5660 }, { "epoch": 11.886095836606442, "learning_rate": 0.0004792121939877459, "loss": 3.176821708679199, "step": 5670 }, { "epoch": 11.907043728724798, "learning_rate": 0.00047879016643429336, "loss": 3.067020797729492, "step": 5680 }, { "epoch": 11.927991620843153, "learning_rate": 0.00047836925192090116, "loss": 3.1511611938476562, "step": 5690 }, { "epoch": 11.948939512961509, "learning_rate": 0.0004779494455636703, "loss": 3.0738733291625975, "step": 5700 }, { "epoch": 11.969887405079863, "learning_rate": 0.00047753074250865145, "loss": 3.237213897705078, "step": 5710 }, { "epoch": 11.990835297198219, "learning_rate": 0.00047711313793160877, "loss": 3.052178382873535, "step": 5720 }, { "epoch": 12.012568735271014, "learning_rate": 0.000476696627037787, "loss": 3.2651294708251952, "step": 5730 }, { "epoch": 12.03351662738937, "learning_rate": 0.0004762812050616797, "loss": 3.162643241882324, "step": 5740 }, { "epoch": 12.054464519507725, "learning_rate": 0.0004758668672668006, "loss": 3.1709291458129885, "step": 5750 }, { "epoch": 12.07541241162608, "learning_rate": 0.00047545360894545664, "loss": 3.063345527648926, "step": 5760 }, { "epoch": 12.096360303744436, "learning_rate": 0.0004750414254185235, "loss": 3.093794250488281, "step": 5770 }, { "epoch": 12.117308195862792, "learning_rate": 0.0004746303120352226, "loss": 3.1082719802856444, "step": 5780 }, { "epoch": 12.138256087981146, "learning_rate": 0.00047422026417290146, "loss": 3.1271081924438477, "step": 5790 }, { "epoch": 12.159203980099502, "learning_rate": 0.0004738112772368146, "loss": 3.141692543029785, "step": 5800 }, { "epoch": 12.180151872217857, "learning_rate": 0.00047340334665990787, "loss": 3.1134639739990235, "step": 5810 }, { "epoch": 12.201099764336213, "learning_rate": 0.0004729964679026039, "loss": 3.03677921295166, "step": 5820 }, { "epoch": 12.222047656454569, "learning_rate": 0.0004725906364525903, "loss": 3.2071063995361326, "step": 5830 }, { "epoch": 12.242995548572924, "learning_rate": 0.0004721858478246089, "loss": 3.173069953918457, "step": 5840 }, { "epoch": 12.26394344069128, "learning_rate": 0.0004717820975602482, "loss": 3.048240089416504, "step": 5850 }, { "epoch": 12.284891332809636, "learning_rate": 0.0004713793812277367, "loss": 3.041463088989258, "step": 5860 }, { "epoch": 12.305839224927992, "learning_rate": 0.00047097769442173856, "loss": 3.0645767211914063, "step": 5870 }, { "epoch": 12.326787117046347, "learning_rate": 0.00047057703276315164, "loss": 2.982158088684082, "step": 5880 }, { "epoch": 12.347735009164703, "learning_rate": 0.0004701773918989065, "loss": 3.111321449279785, "step": 5890 }, { "epoch": 12.368682901283059, "learning_rate": 0.00046977876750176805, "loss": 3.141143798828125, "step": 5900 }, { "epoch": 12.389630793401414, "learning_rate": 0.0004693811552701385, "loss": 3.0916566848754883, "step": 5910 }, { "epoch": 12.41057868551977, "learning_rate": 0.0004689845509278626, "loss": 3.0807928085327148, "step": 5920 }, { "epoch": 12.431526577638126, "learning_rate": 0.00046858895022403474, "loss": 3.0439529418945312, "step": 5930 }, { "epoch": 12.452474469756481, "learning_rate": 0.000468194348932807, "loss": 3.0425508499145506, "step": 5940 }, { "epoch": 12.473422361874837, "learning_rate": 0.00046780074285319984, "loss": 3.187800407409668, "step": 5950 }, { "epoch": 12.494370253993193, "learning_rate": 0.0004674081278089144, "loss": 3.0460309982299805, "step": 5960 }, { "epoch": 12.515318146111547, "learning_rate": 0.00046701649964814616, "loss": 3.2187931060791017, "step": 5970 }, { "epoch": 12.536266038229904, "learning_rate": 0.0004666258542434007, "loss": 3.109378433227539, "step": 5980 }, { "epoch": 12.557213930348258, "learning_rate": 0.000466236187491311, "loss": 3.0655149459838866, "step": 5990 }, { "epoch": 12.578161822466614, "learning_rate": 0.00046584749531245617, "loss": 3.132980728149414, "step": 6000 }, { "epoch": 12.59910971458497, "learning_rate": 0.0004654597736511823, "loss": 3.0032047271728515, "step": 6010 }, { "epoch": 12.620057606703325, "learning_rate": 0.0004650730184754247, "loss": 3.0569095611572266, "step": 6020 }, { "epoch": 12.64100549882168, "learning_rate": 0.0004646872257765318, "loss": 3.1891340255737304, "step": 6030 }, { "epoch": 12.661953390940036, "learning_rate": 0.00046430239156909045, "loss": 3.036951446533203, "step": 6040 }, { "epoch": 12.682901283058392, "learning_rate": 0.00046391851189075343, "loss": 3.077804374694824, "step": 6050 }, { "epoch": 12.703849175176748, "learning_rate": 0.00046353558280206746, "loss": 3.1198028564453124, "step": 6060 }, { "epoch": 12.724797067295103, "learning_rate": 0.00046315360038630404, "loss": 3.1289579391479494, "step": 6070 }, { "epoch": 12.745744959413459, "learning_rate": 0.0004627725607492909, "loss": 3.047295570373535, "step": 6080 }, { "epoch": 12.766692851531815, "learning_rate": 0.00046239246001924503, "loss": 3.071992111206055, "step": 6090 }, { "epoch": 12.78764074365017, "learning_rate": 0.000462013294346608, "loss": 3.050577735900879, "step": 6100 }, { "epoch": 12.808588635768526, "learning_rate": 0.00046163505990388167, "loss": 3.0774341583251954, "step": 6110 }, { "epoch": 12.829536527886882, "learning_rate": 0.00046125775288546623, "loss": 3.120297431945801, "step": 6120 }, { "epoch": 12.850484420005237, "learning_rate": 0.00046088136950749937, "loss": 3.1301042556762697, "step": 6130 }, { "epoch": 12.871432312123593, "learning_rate": 0.0004605059060076967, "loss": 3.0827388763427734, "step": 6140 }, { "epoch": 12.892380204241949, "learning_rate": 0.0004601313586451939, "loss": 3.136738967895508, "step": 6150 }, { "epoch": 12.913328096360305, "learning_rate": 0.00045975772370039034, "loss": 3.0567752838134767, "step": 6160 }, { "epoch": 12.93427598847866, "learning_rate": 0.0004593849974747937, "loss": 3.053047752380371, "step": 6170 }, { "epoch": 12.955223880597014, "learning_rate": 0.0004590131762908664, "loss": 3.0607650756835936, "step": 6180 }, { "epoch": 12.97617177271537, "learning_rate": 0.00045864225649187287, "loss": 3.1021827697753905, "step": 6190 }, { "epoch": 12.997119664833725, "learning_rate": 0.000458272234441729, "loss": 3.016301727294922, "step": 6200 }, { "epoch": 13.01885310290652, "learning_rate": 0.00045790310652485205, "loss": 3.1855663299560546, "step": 6210 }, { "epoch": 13.039800995024876, "learning_rate": 0.0004575348691460124, "loss": 3.0596897125244142, "step": 6220 }, { "epoch": 13.060748887143232, "learning_rate": 0.00045716751873018654, "loss": 3.061813735961914, "step": 6230 }, { "epoch": 13.081696779261588, "learning_rate": 0.00045680105172241103, "loss": 3.106767463684082, "step": 6240 }, { "epoch": 13.102644671379942, "learning_rate": 0.0004564354645876384, "loss": 3.101357269287109, "step": 6250 }, { "epoch": 13.123592563498297, "learning_rate": 0.00045607075381059363, "loss": 3.085792350769043, "step": 6260 }, { "epoch": 13.144540455616653, "learning_rate": 0.00045570691589563234, "loss": 3.071797752380371, "step": 6270 }, { "epoch": 13.165488347735009, "learning_rate": 0.0004553439473666, "loss": 3.087900161743164, "step": 6280 }, { "epoch": 13.186436239853364, "learning_rate": 0.0004549818447666924, "loss": 3.0981624603271483, "step": 6290 }, { "epoch": 13.20738413197172, "learning_rate": 0.00045462060465831743, "loss": 2.995559501647949, "step": 6300 }, { "epoch": 13.228332024090076, "learning_rate": 0.0004542602236229581, "loss": 3.0986444473266603, "step": 6310 }, { "epoch": 13.249279916208431, "learning_rate": 0.00045390069826103653, "loss": 3.1423923492431642, "step": 6320 }, { "epoch": 13.270227808326787, "learning_rate": 0.00045354202519177925, "loss": 2.981964111328125, "step": 6330 }, { "epoch": 13.291175700445143, "learning_rate": 0.0004531842010530839, "loss": 3.062668800354004, "step": 6340 }, { "epoch": 13.312123592563498, "learning_rate": 0.0004528272225013865, "loss": 3.1759321212768556, "step": 6350 }, { "epoch": 13.333071484681854, "learning_rate": 0.00045247108621153056, "loss": 3.0892358779907227, "step": 6360 }, { "epoch": 13.35401937680021, "learning_rate": 0.0004521157888766368, "loss": 3.0303468704223633, "step": 6370 }, { "epoch": 13.374967268918565, "learning_rate": 0.00045176132720797443, "loss": 3.042502021789551, "step": 6380 }, { "epoch": 13.395915161036921, "learning_rate": 0.0004514076979348328, "loss": 3.109409713745117, "step": 6390 }, { "epoch": 13.416863053155277, "learning_rate": 0.0004510548978043951, "loss": 3.0352380752563475, "step": 6400 }, { "epoch": 13.437810945273633, "learning_rate": 0.00045070292358161265, "loss": 2.9740083694458006, "step": 6410 }, { "epoch": 13.458758837391988, "learning_rate": 0.0004503517720490801, "loss": 3.1181098937988283, "step": 6420 }, { "epoch": 13.479706729510344, "learning_rate": 0.000450001440006912, "loss": 3.089175987243652, "step": 6430 }, { "epoch": 13.500654621628698, "learning_rate": 0.00044965192427262043, "loss": 3.0885658264160156, "step": 6440 }, { "epoch": 13.521602513747053, "learning_rate": 0.0004493032216809934, "loss": 3.0794023513793944, "step": 6450 }, { "epoch": 13.54255040586541, "learning_rate": 0.00044895532908397455, "loss": 3.0824106216430662, "step": 6460 }, { "epoch": 13.563498297983765, "learning_rate": 0.00044860824335054384, "loss": 3.0646196365356446, "step": 6470 }, { "epoch": 13.58444619010212, "learning_rate": 0.00044826196136659916, "loss": 3.044062614440918, "step": 6480 }, { "epoch": 13.605394082220476, "learning_rate": 0.00044791648003483884, "loss": 3.0133747100830077, "step": 6490 }, { "epoch": 13.626341974338832, "learning_rate": 0.0004475717962746455, "loss": 3.070328712463379, "step": 6500 }, { "epoch": 13.647289866457188, "learning_rate": 0.0004472279070219706, "loss": 3.0279052734375, "step": 6510 }, { "epoch": 13.668237758575543, "learning_rate": 0.00044688480922922, "loss": 3.147620964050293, "step": 6520 }, { "epoch": 13.689185650693899, "learning_rate": 0.00044654249986514057, "loss": 3.1151987075805665, "step": 6530 }, { "epoch": 13.710133542812255, "learning_rate": 0.0004462009759147076, "loss": 3.072108268737793, "step": 6540 }, { "epoch": 13.73108143493061, "learning_rate": 0.0004458602343790135, "loss": 3.134627342224121, "step": 6550 }, { "epoch": 13.752029327048966, "learning_rate": 0.00044552027227515704, "loss": 3.032268524169922, "step": 6560 }, { "epoch": 13.772977219167322, "learning_rate": 0.00044518108663613355, "loss": 3.065017509460449, "step": 6570 }, { "epoch": 13.793925111285677, "learning_rate": 0.00044484267451072644, "loss": 3.0611106872558596, "step": 6580 }, { "epoch": 13.814873003404033, "learning_rate": 0.0004445050329633992, "loss": 3.2091243743896483, "step": 6590 }, { "epoch": 13.835820895522389, "learning_rate": 0.0004441681590741884, "loss": 3.058238983154297, "step": 6600 }, { "epoch": 13.856768787640744, "learning_rate": 0.0004438320499385977, "loss": 3.073333168029785, "step": 6610 }, { "epoch": 13.8777166797591, "learning_rate": 0.00044349670266749286, "loss": 3.037291145324707, "step": 6620 }, { "epoch": 13.898664571877454, "learning_rate": 0.0004431621143869969, "loss": 3.0411745071411134, "step": 6630 }, { "epoch": 13.91961246399581, "learning_rate": 0.00044282828223838727, "loss": 3.148990440368652, "step": 6640 }, { "epoch": 13.940560356114165, "learning_rate": 0.0004424952033779929, "loss": 3.014286994934082, "step": 6650 }, { "epoch": 13.961508248232521, "learning_rate": 0.00044216287497709253, "loss": 3.0138343811035155, "step": 6660 }, { "epoch": 13.982456140350877, "learning_rate": 0.0004418312942218139, "loss": 2.9974302291870116, "step": 6670 }, { "epoch": 14.004189578423672, "learning_rate": 0.0004415004583130336, "loss": 3.1527809143066405, "step": 6680 }, { "epoch": 14.025137470542028, "learning_rate": 0.0004411703644662778, "loss": 2.9581697463989256, "step": 6690 }, { "epoch": 14.046085362660381, "learning_rate": 0.00044084100991162385, "loss": 3.0720396041870117, "step": 6700 }, { "epoch": 14.067033254778737, "learning_rate": 0.00044051239189360286, "loss": 3.0470098495483398, "step": 6710 }, { "epoch": 14.087981146897093, "learning_rate": 0.00044018450767110235, "loss": 3.0677566528320312, "step": 6720 }, { "epoch": 14.108929039015448, "learning_rate": 0.0004398573545172709, "loss": 3.110503005981445, "step": 6730 }, { "epoch": 14.129876931133804, "learning_rate": 0.0004395309297194223, "loss": 2.9687520980834963, "step": 6740 }, { "epoch": 14.15082482325216, "learning_rate": 0.0004392052305789416, "loss": 2.985172080993652, "step": 6750 }, { "epoch": 14.171772715370516, "learning_rate": 0.0004388802544111908, "loss": 3.0017110824584963, "step": 6760 }, { "epoch": 14.192720607488871, "learning_rate": 0.0004385559985454165, "loss": 3.041835403442383, "step": 6770 }, { "epoch": 14.213668499607227, "learning_rate": 0.0004382324603246575, "loss": 3.0984907150268555, "step": 6780 }, { "epoch": 14.234616391725583, "learning_rate": 0.0004379096371056532, "loss": 3.0736331939697266, "step": 6790 }, { "epoch": 14.255564283843938, "learning_rate": 0.000437587526258753, "loss": 2.9994585037231447, "step": 6800 }, { "epoch": 14.276512175962294, "learning_rate": 0.0004372661251678265, "loss": 3.1013252258300783, "step": 6810 }, { "epoch": 14.29746006808065, "learning_rate": 0.00043694543123017407, "loss": 3.102655220031738, "step": 6820 }, { "epoch": 14.318407960199005, "learning_rate": 0.0004366254418564382, "loss": 3.1105621337890623, "step": 6830 }, { "epoch": 14.339355852317361, "learning_rate": 0.0004363061544705161, "loss": 3.1368709564208985, "step": 6840 }, { "epoch": 14.360303744435717, "learning_rate": 0.0004359875665094723, "loss": 3.052720069885254, "step": 6850 }, { "epoch": 14.381251636554072, "learning_rate": 0.00043566967542345227, "loss": 2.977310562133789, "step": 6860 }, { "epoch": 14.402199528672428, "learning_rate": 0.00043535247867559673, "loss": 3.111159896850586, "step": 6870 }, { "epoch": 14.423147420790784, "learning_rate": 0.00043503597374195665, "loss": 2.959975814819336, "step": 6880 }, { "epoch": 14.444095312909138, "learning_rate": 0.0004347201581114088, "loss": 3.125636100769043, "step": 6890 }, { "epoch": 14.465043205027493, "learning_rate": 0.0004344050292855724, "loss": 2.9577571868896486, "step": 6900 }, { "epoch": 14.485991097145849, "learning_rate": 0.00043409058477872554, "loss": 3.101388931274414, "step": 6910 }, { "epoch": 14.506938989264205, "learning_rate": 0.00043377682211772343, "loss": 3.0661073684692384, "step": 6920 }, { "epoch": 14.52788688138256, "learning_rate": 0.0004334637388419161, "loss": 2.974909019470215, "step": 6930 }, { "epoch": 14.548834773500916, "learning_rate": 0.0004331513325030681, "loss": 3.0314458847045898, "step": 6940 }, { "epoch": 14.569782665619272, "learning_rate": 0.0004328396006652773, "loss": 3.1744915008544923, "step": 6950 }, { "epoch": 14.590730557737627, "learning_rate": 0.00043252854090489564, "loss": 3.06768741607666, "step": 6960 }, { "epoch": 14.611678449855983, "learning_rate": 0.00043221815081044985, "loss": 3.161996269226074, "step": 6970 }, { "epoch": 14.632626341974339, "learning_rate": 0.00043190842798256285, "loss": 3.102631378173828, "step": 6980 }, { "epoch": 14.653574234092694, "learning_rate": 0.00043159937003387584, "loss": 3.004058074951172, "step": 6990 }, { "epoch": 14.67452212621105, "learning_rate": 0.00043129097458897135, "loss": 3.135270118713379, "step": 7000 }, { "epoch": 14.695470018329406, "learning_rate": 0.000430983239284296, "loss": 3.0084003448486327, "step": 7010 }, { "epoch": 14.716417910447761, "learning_rate": 0.0004306761617680849, "loss": 2.9995773315429686, "step": 7020 }, { "epoch": 14.737365802566117, "learning_rate": 0.00043036973970028583, "loss": 3.000468444824219, "step": 7030 }, { "epoch": 14.758313694684473, "learning_rate": 0.00043006397075248464, "loss": 3.0801364898681642, "step": 7040 }, { "epoch": 14.779261586802829, "learning_rate": 0.00042975885260783056, "loss": 3.0909229278564454, "step": 7050 }, { "epoch": 14.800209478921184, "learning_rate": 0.00042945438296096303, "loss": 2.9928516387939452, "step": 7060 }, { "epoch": 14.82115737103954, "learning_rate": 0.0004291505595179379, "loss": 3.036148262023926, "step": 7070 }, { "epoch": 14.842105263157894, "learning_rate": 0.0004288473799961553, "loss": 3.0693193435668946, "step": 7080 }, { "epoch": 14.863053155276251, "learning_rate": 0.0004285448421242875, "loss": 3.0427278518676757, "step": 7090 }, { "epoch": 14.884001047394605, "learning_rate": 0.00042824294364220724, "loss": 2.9749155044555664, "step": 7100 }, { "epoch": 14.90494893951296, "learning_rate": 0.0004279416823009172, "loss": 2.9990673065185547, "step": 7110 }, { "epoch": 14.925896831631317, "learning_rate": 0.0004276410558624791, "loss": 3.091754913330078, "step": 7120 }, { "epoch": 14.946844723749672, "learning_rate": 0.0004273410620999446, "loss": 3.003107452392578, "step": 7130 }, { "epoch": 14.967792615868028, "learning_rate": 0.0004270416987972853, "loss": 3.023390007019043, "step": 7140 }, { "epoch": 14.988740507986384, "learning_rate": 0.00042674296374932424, "loss": 3.068536376953125, "step": 7150 }, { "epoch": 15.010473946059177, "learning_rate": 0.0004264448547616681, "loss": 3.245321273803711, "step": 7160 }, { "epoch": 15.031421838177533, "learning_rate": 0.00042614736965063864, "loss": 2.9358680725097654, "step": 7170 }, { "epoch": 15.052369730295888, "learning_rate": 0.0004258505062432064, "loss": 3.0279872894287108, "step": 7180 }, { "epoch": 15.073317622414244, "learning_rate": 0.0004255542623769234, "loss": 2.96344108581543, "step": 7190 }, { "epoch": 15.0942655145326, "learning_rate": 0.00042525863589985727, "loss": 3.1603927612304688, "step": 7200 }, { "epoch": 15.115213406650955, "learning_rate": 0.00042496362467052564, "loss": 3.0409677505493162, "step": 7210 }, { "epoch": 15.136161298769311, "learning_rate": 0.00042466922655783073, "loss": 3.154404067993164, "step": 7220 }, { "epoch": 15.157109190887667, "learning_rate": 0.00042437543944099504, "loss": 2.999993324279785, "step": 7230 }, { "epoch": 15.178057083006022, "learning_rate": 0.00042408226120949674, "loss": 2.962456703186035, "step": 7240 }, { "epoch": 15.199004975124378, "learning_rate": 0.00042378968976300647, "loss": 3.050062561035156, "step": 7250 }, { "epoch": 15.219952867242734, "learning_rate": 0.00042349772301132377, "loss": 3.058196258544922, "step": 7260 }, { "epoch": 15.24090075936109, "learning_rate": 0.0004232063588743146, "loss": 3.0242469787597654, "step": 7270 }, { "epoch": 15.261848651479445, "learning_rate": 0.00042291559528184904, "loss": 2.929056930541992, "step": 7280 }, { "epoch": 15.2827965435978, "learning_rate": 0.0004226254301737393, "loss": 3.0346649169921873, "step": 7290 }, { "epoch": 15.303744435716157, "learning_rate": 0.0004223358614996787, "loss": 3.079379081726074, "step": 7300 }, { "epoch": 15.324692327834512, "learning_rate": 0.00042204688721918075, "loss": 2.984081268310547, "step": 7310 }, { "epoch": 15.345640219952868, "learning_rate": 0.0004217585053015187, "loss": 3.025343322753906, "step": 7320 }, { "epoch": 15.366588112071224, "learning_rate": 0.0004214707137256656, "loss": 3.016037940979004, "step": 7330 }, { "epoch": 15.38753600418958, "learning_rate": 0.0004211835104802349, "loss": 3.1470058441162108, "step": 7340 }, { "epoch": 15.408483896307935, "learning_rate": 0.00042089689356342115, "loss": 3.007353591918945, "step": 7350 }, { "epoch": 15.429431788426289, "learning_rate": 0.0004206108609829418, "loss": 3.0402362823486326, "step": 7360 }, { "epoch": 15.450379680544645, "learning_rate": 0.00042032541075597875, "loss": 3.018893241882324, "step": 7370 }, { "epoch": 15.471327572663, "learning_rate": 0.0004200405409091207, "loss": 3.0513105392456055, "step": 7380 }, { "epoch": 15.492275464781356, "learning_rate": 0.00041975624947830593, "loss": 3.0438756942749023, "step": 7390 }, { "epoch": 15.513223356899712, "learning_rate": 0.00041947253450876515, "loss": 2.977249526977539, "step": 7400 }, { "epoch": 15.534171249018067, "learning_rate": 0.00041918939405496546, "loss": 2.956187629699707, "step": 7410 }, { "epoch": 15.555119141136423, "learning_rate": 0.00041890682618055396, "loss": 3.0582775115966796, "step": 7420 }, { "epoch": 15.576067033254779, "learning_rate": 0.0004186248289583023, "loss": 2.9948537826538084, "step": 7430 }, { "epoch": 15.597014925373134, "learning_rate": 0.00041834340047005144, "loss": 2.9837194442749024, "step": 7440 }, { "epoch": 15.61796281749149, "learning_rate": 0.0004180625388066569, "loss": 3.0729391098022463, "step": 7450 }, { "epoch": 15.638910709609846, "learning_rate": 0.00041778224206793433, "loss": 3.054386329650879, "step": 7460 }, { "epoch": 15.659858601728201, "learning_rate": 0.00041750250836260536, "loss": 3.102676582336426, "step": 7470 }, { "epoch": 15.680806493846557, "learning_rate": 0.0004172233358082443, "loss": 2.986006164550781, "step": 7480 }, { "epoch": 15.701754385964913, "learning_rate": 0.00041694472253122467, "loss": 3.0711380004882813, "step": 7490 }, { "epoch": 15.722702278083268, "learning_rate": 0.00041666666666666664, "loss": 2.9737503051757814, "step": 7500 }, { "epoch": 15.743650170201624, "learning_rate": 0.0004163891663583843, "loss": 3.030619812011719, "step": 7510 }, { "epoch": 15.76459806231998, "learning_rate": 0.00041611221975883396, "loss": 3.0626684188842774, "step": 7520 }, { "epoch": 15.785545954438335, "learning_rate": 0.00041583582502906203, "loss": 2.9612255096435547, "step": 7530 }, { "epoch": 15.806493846556691, "learning_rate": 0.0004155599803386543, "loss": 2.9540287017822267, "step": 7540 }, { "epoch": 15.827441738675045, "learning_rate": 0.0004152846838656846, "loss": 2.9890960693359374, "step": 7550 }, { "epoch": 15.8483896307934, "learning_rate": 0.00041500993379666443, "loss": 2.998134803771973, "step": 7560 }, { "epoch": 15.869337522911756, "learning_rate": 0.0004147357283264927, "loss": 3.046440315246582, "step": 7570 }, { "epoch": 15.890285415030112, "learning_rate": 0.000414462065658406, "loss": 2.9803043365478517, "step": 7580 }, { "epoch": 15.911233307148468, "learning_rate": 0.0004141889440039292, "loss": 3.0623497009277343, "step": 7590 }, { "epoch": 15.932181199266823, "learning_rate": 0.00041391636158282614, "loss": 2.926837921142578, "step": 7600 }, { "epoch": 15.953129091385179, "learning_rate": 0.00041364431662305114, "loss": 2.932399368286133, "step": 7610 }, { "epoch": 15.974076983503535, "learning_rate": 0.0004133728073607005, "loss": 2.996663284301758, "step": 7620 }, { "epoch": 15.99502487562189, "learning_rate": 0.00041310183203996446, "loss": 3.0678241729736326, "step": 7630 }, { "epoch": 16.016758313694684, "learning_rate": 0.0004128313889130795, "loss": 3.262166213989258, "step": 7640 }, { "epoch": 16.03770620581304, "learning_rate": 0.0004125614762402809, "loss": 3.072698402404785, "step": 7650 }, { "epoch": 16.058654097931395, "learning_rate": 0.00041229209228975627, "loss": 2.960147476196289, "step": 7660 }, { "epoch": 16.079601990049753, "learning_rate": 0.000412023235337598, "loss": 2.987987518310547, "step": 7670 }, { "epoch": 16.100549882168107, "learning_rate": 0.00041175490366775766, "loss": 2.9958822250366213, "step": 7680 }, { "epoch": 16.121497774286464, "learning_rate": 0.0004114870955719997, "loss": 3.0043949127197265, "step": 7690 }, { "epoch": 16.142445666404818, "learning_rate": 0.00041121980934985563, "loss": 3.013554573059082, "step": 7700 }, { "epoch": 16.163393558523175, "learning_rate": 0.000410953043308579, "loss": 3.0146947860717774, "step": 7710 }, { "epoch": 16.18434145064153, "learning_rate": 0.0004106867957631001, "loss": 2.9756107330322266, "step": 7720 }, { "epoch": 16.205289342759883, "learning_rate": 0.00041042106503598165, "loss": 2.998594284057617, "step": 7730 }, { "epoch": 16.22623723487824, "learning_rate": 0.0004101558494573738, "loss": 2.968126678466797, "step": 7740 }, { "epoch": 16.247185126996595, "learning_rate": 0.0004098911473649706, "loss": 2.938851737976074, "step": 7750 }, { "epoch": 16.268133019114952, "learning_rate": 0.0004096269571039658, "loss": 3.0778596878051756, "step": 7760 }, { "epoch": 16.289080911233306, "learning_rate": 0.00040936327702701005, "loss": 2.871398162841797, "step": 7770 }, { "epoch": 16.310028803351663, "learning_rate": 0.00040910010549416687, "loss": 2.9972572326660156, "step": 7780 }, { "epoch": 16.330976695470017, "learning_rate": 0.0004088374408728706, "loss": 3.1002374649047852, "step": 7790 }, { "epoch": 16.351924587588375, "learning_rate": 0.0004085752815378834, "loss": 2.9723093032836916, "step": 7800 }, { "epoch": 16.37287247970673, "learning_rate": 0.0004083136258712532, "loss": 3.0089197158813477, "step": 7810 }, { "epoch": 16.393820371825086, "learning_rate": 0.0004080524722622717, "loss": 2.9960916519165037, "step": 7820 }, { "epoch": 16.41476826394344, "learning_rate": 0.00040779181910743294, "loss": 2.948496437072754, "step": 7830 }, { "epoch": 16.435716156061797, "learning_rate": 0.0004075316648103914, "loss": 2.986690139770508, "step": 7840 }, { "epoch": 16.45666404818015, "learning_rate": 0.0004072720077819216, "loss": 3.064560317993164, "step": 7850 }, { "epoch": 16.47761194029851, "learning_rate": 0.0004070128464398768, "loss": 3.0366847991943358, "step": 7860 }, { "epoch": 16.498559832416863, "learning_rate": 0.0004067541792091489, "loss": 3.0212535858154297, "step": 7870 }, { "epoch": 16.51950772453522, "learning_rate": 0.0004064960045216279, "loss": 2.966229057312012, "step": 7880 }, { "epoch": 16.540455616653574, "learning_rate": 0.0004062383208161624, "loss": 2.9718713760375977, "step": 7890 }, { "epoch": 16.56140350877193, "learning_rate": 0.0004059811265385193, "loss": 2.938900947570801, "step": 7900 }, { "epoch": 16.582351400890285, "learning_rate": 0.00040572442014134516, "loss": 3.0135732650756837, "step": 7910 }, { "epoch": 16.60329929300864, "learning_rate": 0.00040546820008412654, "loss": 3.016792869567871, "step": 7920 }, { "epoch": 16.624247185126997, "learning_rate": 0.0004052124648331515, "loss": 2.961100387573242, "step": 7930 }, { "epoch": 16.64519507724535, "learning_rate": 0.00040495721286147086, "loss": 2.9855838775634767, "step": 7940 }, { "epoch": 16.666142969363708, "learning_rate": 0.00040470244264886006, "loss": 3.0260711669921876, "step": 7950 }, { "epoch": 16.687090861482062, "learning_rate": 0.00040444815268178097, "loss": 2.9670747756958007, "step": 7960 }, { "epoch": 16.70803875360042, "learning_rate": 0.00040419434145334414, "loss": 2.9868255615234376, "step": 7970 }, { "epoch": 16.728986645718773, "learning_rate": 0.00040394100746327154, "loss": 2.993141746520996, "step": 7980 }, { "epoch": 16.74993453783713, "learning_rate": 0.0004036881492178589, "loss": 2.991754722595215, "step": 7990 }, { "epoch": 16.770882429955485, "learning_rate": 0.00040343576522993926, "loss": 3.0531938552856444, "step": 8000 }, { "epoch": 16.791830322073842, "learning_rate": 0.00040318385401884554, "loss": 2.8399303436279295, "step": 8010 }, { "epoch": 16.812778214192196, "learning_rate": 0.00040293241411037484, "loss": 3.0251434326171873, "step": 8020 }, { "epoch": 16.833726106310554, "learning_rate": 0.00040268144403675154, "loss": 2.9438486099243164, "step": 8030 }, { "epoch": 16.854673998428908, "learning_rate": 0.0004024309423365915, "loss": 2.9782060623168944, "step": 8040 }, { "epoch": 16.875621890547265, "learning_rate": 0.0004021809075548668, "loss": 2.972634696960449, "step": 8050 }, { "epoch": 16.89656978266562, "learning_rate": 0.0004019313382428694, "loss": 2.979868507385254, "step": 8060 }, { "epoch": 16.917517674783976, "learning_rate": 0.00040168223295817656, "loss": 2.990520477294922, "step": 8070 }, { "epoch": 16.93846556690233, "learning_rate": 0.00040143359026461554, "loss": 3.0180835723876953, "step": 8080 }, { "epoch": 16.959413459020688, "learning_rate": 0.000401185408732229, "loss": 3.0260868072509766, "step": 8090 }, { "epoch": 16.98036135113904, "learning_rate": 0.0004009376869372401, "loss": 2.9574857711791993, "step": 8100 }, { "epoch": 17.002094789211835, "learning_rate": 0.00040069042346201864, "loss": 3.0914968490600585, "step": 8110 }, { "epoch": 17.023042681330192, "learning_rate": 0.00040044361689504655, "loss": 3.062566947937012, "step": 8120 }, { "epoch": 17.043990573448546, "learning_rate": 0.0004001972658308847, "loss": 2.976962661743164, "step": 8130 }, { "epoch": 17.064938465566904, "learning_rate": 0.0003999513688701383, "loss": 2.9750572204589845, "step": 8140 }, { "epoch": 17.085886357685258, "learning_rate": 0.00039970592461942457, "loss": 2.987382698059082, "step": 8150 }, { "epoch": 17.106834249803615, "learning_rate": 0.00039946093169133874, "loss": 3.001695442199707, "step": 8160 }, { "epoch": 17.12778214192197, "learning_rate": 0.0003992163887044217, "loss": 2.961598777770996, "step": 8170 }, { "epoch": 17.148730034040323, "learning_rate": 0.0003989722942831268, "loss": 2.957429313659668, "step": 8180 }, { "epoch": 17.16967792615868, "learning_rate": 0.0003987286470577879, "loss": 3.0364120483398436, "step": 8190 }, { "epoch": 17.190625818277034, "learning_rate": 0.0003984854456645864, "loss": 3.0673593521118163, "step": 8200 }, { "epoch": 17.211573710395392, "learning_rate": 0.0003982426887455199, "loss": 2.9361265182495115, "step": 8210 }, { "epoch": 17.232521602513746, "learning_rate": 0.00039800037494836985, "loss": 3.001542854309082, "step": 8220 }, { "epoch": 17.253469494632103, "learning_rate": 0.00039775850292667005, "loss": 2.95641975402832, "step": 8230 }, { "epoch": 17.274417386750457, "learning_rate": 0.0003975170713396753, "loss": 2.8832208633422853, "step": 8240 }, { "epoch": 17.295365278868815, "learning_rate": 0.0003972760788523301, "loss": 2.9211734771728515, "step": 8250 }, { "epoch": 17.31631317098717, "learning_rate": 0.0003970355241352378, "loss": 2.9908830642700197, "step": 8260 }, { "epoch": 17.337261063105526, "learning_rate": 0.00039679540586462953, "loss": 2.991852951049805, "step": 8270 }, { "epoch": 17.35820895522388, "learning_rate": 0.00039655572272233384, "loss": 3.0602521896362305, "step": 8280 }, { "epoch": 17.379156847342237, "learning_rate": 0.0003963164733957462, "loss": 2.975466728210449, "step": 8290 }, { "epoch": 17.40010473946059, "learning_rate": 0.00039607765657779864, "loss": 3.0946418762207033, "step": 8300 }, { "epoch": 17.42105263157895, "learning_rate": 0.0003958392709669304, "loss": 2.993026924133301, "step": 8310 }, { "epoch": 17.442000523697303, "learning_rate": 0.00039560131526705723, "loss": 3.0121936798095703, "step": 8320 }, { "epoch": 17.46294841581566, "learning_rate": 0.0003953637881875425, "loss": 3.0414730072021485, "step": 8330 }, { "epoch": 17.483896307934014, "learning_rate": 0.0003951266884431675, "loss": 3.0235416412353517, "step": 8340 }, { "epoch": 17.50484420005237, "learning_rate": 0.00039489001475410214, "loss": 2.9818603515625, "step": 8350 }, { "epoch": 17.525792092170725, "learning_rate": 0.00039465376584587626, "loss": 2.994624137878418, "step": 8360 }, { "epoch": 17.54673998428908, "learning_rate": 0.00039441794044935054, "loss": 3.0084808349609373, "step": 8370 }, { "epoch": 17.567687876407437, "learning_rate": 0.00039418253730068797, "loss": 2.8967424392700196, "step": 8380 }, { "epoch": 17.58863576852579, "learning_rate": 0.0003939475551413253, "loss": 2.9253704071044924, "step": 8390 }, { "epoch": 17.609583660644148, "learning_rate": 0.000393712992717945, "loss": 3.0752674102783204, "step": 8400 }, { "epoch": 17.630531552762502, "learning_rate": 0.0003934788487824469, "loss": 2.925820159912109, "step": 8410 }, { "epoch": 17.65147944488086, "learning_rate": 0.0003932451220919205, "loss": 2.9983007431030275, "step": 8420 }, { "epoch": 17.672427336999213, "learning_rate": 0.0003930118114086172, "loss": 2.884238624572754, "step": 8430 }, { "epoch": 17.69337522911757, "learning_rate": 0.00039277891549992266, "loss": 2.854781723022461, "step": 8440 }, { "epoch": 17.714323121235925, "learning_rate": 0.0003925464331383298, "loss": 2.9886890411376954, "step": 8450 }, { "epoch": 17.735271013354282, "learning_rate": 0.00039231436310141113, "loss": 3.0236677169799804, "step": 8460 }, { "epoch": 17.756218905472636, "learning_rate": 0.00039208270417179214, "loss": 2.951685905456543, "step": 8470 }, { "epoch": 17.777166797590993, "learning_rate": 0.0003918514551371243, "loss": 2.971786880493164, "step": 8480 }, { "epoch": 17.798114689709347, "learning_rate": 0.0003916206147900585, "loss": 2.983307647705078, "step": 8490 }, { "epoch": 17.819062581827705, "learning_rate": 0.00039139018192821845, "loss": 2.9782459259033205, "step": 8500 }, { "epoch": 17.84001047394606, "learning_rate": 0.00039116015535417445, "loss": 3.02642765045166, "step": 8510 }, { "epoch": 17.860958366064416, "learning_rate": 0.00039093053387541745, "loss": 3.011845588684082, "step": 8520 }, { "epoch": 17.88190625818277, "learning_rate": 0.00039070131630433274, "loss": 2.939919090270996, "step": 8530 }, { "epoch": 17.902854150301128, "learning_rate": 0.00039047250145817424, "loss": 2.996026039123535, "step": 8540 }, { "epoch": 17.92380204241948, "learning_rate": 0.00039024408815903914, "loss": 2.903793716430664, "step": 8550 }, { "epoch": 17.94474993453784, "learning_rate": 0.0003900160752338421, "loss": 2.9599498748779296, "step": 8560 }, { "epoch": 17.965697826656193, "learning_rate": 0.00038978846151429, "loss": 2.841645050048828, "step": 8570 }, { "epoch": 17.986645718774547, "learning_rate": 0.0003895612458368572, "loss": 2.885163497924805, "step": 8580 }, { "epoch": 18.008379156847344, "learning_rate": 0.00038933442704275974, "loss": 2.9911325454711912, "step": 8590 }, { "epoch": 18.029327048965698, "learning_rate": 0.0003891080039779314, "loss": 3.027914619445801, "step": 8600 }, { "epoch": 18.050274941084055, "learning_rate": 0.0003888819754929986, "loss": 2.9694196701049806, "step": 8610 }, { "epoch": 18.07122283320241, "learning_rate": 0.0003886563404432558, "loss": 2.9900096893310546, "step": 8620 }, { "epoch": 18.092170725320763, "learning_rate": 0.0003884310976886414, "loss": 2.987308692932129, "step": 8630 }, { "epoch": 18.11311861743912, "learning_rate": 0.0003882062460937135, "loss": 2.934325408935547, "step": 8640 }, { "epoch": 18.134066509557474, "learning_rate": 0.0003879817845276255, "loss": 2.9511764526367186, "step": 8650 }, { "epoch": 18.15501440167583, "learning_rate": 0.0003877577118641029, "loss": 2.9479455947875977, "step": 8660 }, { "epoch": 18.175962293794186, "learning_rate": 0.00038753402698141903, "loss": 3.0447383880615235, "step": 8670 }, { "epoch": 18.196910185912543, "learning_rate": 0.0003873107287623715, "loss": 2.917817497253418, "step": 8680 }, { "epoch": 18.217858078030897, "learning_rate": 0.00038708781609425905, "loss": 2.8964914321899413, "step": 8690 }, { "epoch": 18.238805970149254, "learning_rate": 0.000386865287868858, "loss": 2.967067527770996, "step": 8700 }, { "epoch": 18.25975386226761, "learning_rate": 0.0003866431429823993, "loss": 2.991856002807617, "step": 8710 }, { "epoch": 18.280701754385966, "learning_rate": 0.00038642138033554525, "loss": 2.96053524017334, "step": 8720 }, { "epoch": 18.30164964650432, "learning_rate": 0.00038619999883336703, "loss": 2.9373369216918945, "step": 8730 }, { "epoch": 18.322597538622677, "learning_rate": 0.0003859789973853217, "loss": 2.9254953384399416, "step": 8740 }, { "epoch": 18.34354543074103, "learning_rate": 0.0003857583749052298, "loss": 3.008597564697266, "step": 8750 }, { "epoch": 18.36449332285939, "learning_rate": 0.0003855381303112527, "loss": 2.9206886291503906, "step": 8760 }, { "epoch": 18.385441214977742, "learning_rate": 0.0003853182625258708, "loss": 2.9910358428955077, "step": 8770 }, { "epoch": 18.4063891070961, "learning_rate": 0.0003850987704758608, "loss": 2.894259452819824, "step": 8780 }, { "epoch": 18.427336999214454, "learning_rate": 0.00038487965309227413, "loss": 3.0239398956298826, "step": 8790 }, { "epoch": 18.44828489133281, "learning_rate": 0.0003846609093104148, "loss": 3.023584747314453, "step": 8800 }, { "epoch": 18.469232783451165, "learning_rate": 0.00038444253806981784, "loss": 2.9302574157714845, "step": 8810 }, { "epoch": 18.49018067556952, "learning_rate": 0.00038422453831422784, "loss": 2.968699264526367, "step": 8820 }, { "epoch": 18.511128567687877, "learning_rate": 0.0003840069089915771, "loss": 2.904973793029785, "step": 8830 }, { "epoch": 18.53207645980623, "learning_rate": 0.00038378964905396454, "loss": 2.9212614059448243, "step": 8840 }, { "epoch": 18.553024351924588, "learning_rate": 0.00038357275745763475, "loss": 2.9412769317626952, "step": 8850 }, { "epoch": 18.573972244042942, "learning_rate": 0.0003833562331629563, "loss": 2.926407814025879, "step": 8860 }, { "epoch": 18.5949201361613, "learning_rate": 0.0003831400751344014, "loss": 2.87393741607666, "step": 8870 }, { "epoch": 18.615868028279653, "learning_rate": 0.00038292428234052486, "loss": 2.93045597076416, "step": 8880 }, { "epoch": 18.63681592039801, "learning_rate": 0.0003827088537539434, "loss": 2.8498041152954103, "step": 8890 }, { "epoch": 18.657763812516365, "learning_rate": 0.00038249378835131535, "loss": 2.980220603942871, "step": 8900 }, { "epoch": 18.678711704634722, "learning_rate": 0.0003822790851133196, "loss": 2.9403018951416016, "step": 8910 }, { "epoch": 18.699659596753076, "learning_rate": 0.00038206474302463617, "loss": 2.932261848449707, "step": 8920 }, { "epoch": 18.720607488871433, "learning_rate": 0.00038185076107392544, "loss": 3.0572792053222657, "step": 8930 }, { "epoch": 18.741555380989787, "learning_rate": 0.0003816371382538082, "loss": 2.8232454299926757, "step": 8940 }, { "epoch": 18.762503273108145, "learning_rate": 0.0003814238735608459, "loss": 2.9247211456298827, "step": 8950 }, { "epoch": 18.7834511652265, "learning_rate": 0.0003812109659955207, "loss": 2.9309356689453123, "step": 8960 }, { "epoch": 18.804399057344856, "learning_rate": 0.00038099841456221617, "loss": 2.9406196594238283, "step": 8970 }, { "epoch": 18.82534694946321, "learning_rate": 0.0003807862182691969, "loss": 2.9863868713378907, "step": 8980 }, { "epoch": 18.846294841581567, "learning_rate": 0.00038057437612859003, "loss": 2.9020156860351562, "step": 8990 }, { "epoch": 18.86724273369992, "learning_rate": 0.0003803628871563653, "loss": 2.8909185409545897, "step": 9000 }, { "epoch": 18.888190625818275, "learning_rate": 0.0003801517503723161, "loss": 2.905278205871582, "step": 9010 }, { "epoch": 18.909138517936633, "learning_rate": 0.00037994096480004037, "loss": 2.971329689025879, "step": 9020 }, { "epoch": 18.930086410054987, "learning_rate": 0.0003797305294669214, "loss": 2.9014846801757814, "step": 9030 }, { "epoch": 18.951034302173344, "learning_rate": 0.00037952044340410954, "loss": 2.919228363037109, "step": 9040 }, { "epoch": 18.971982194291698, "learning_rate": 0.00037931070564650276, "loss": 2.9611515045166015, "step": 9050 }, { "epoch": 18.992930086410055, "learning_rate": 0.0003791013152327286, "loss": 3.06106014251709, "step": 9060 }, { "epoch": 19.01466352448285, "learning_rate": 0.00037889227120512545, "loss": 3.0117502212524414, "step": 9070 }, { "epoch": 19.035611416601206, "learning_rate": 0.0003786835726097239, "loss": 2.908797836303711, "step": 9080 }, { "epoch": 19.05655930871956, "learning_rate": 0.00037847521849622895, "loss": 2.977317047119141, "step": 9090 }, { "epoch": 19.077507200837914, "learning_rate": 0.0003782672079180015, "loss": 2.982106018066406, "step": 9100 }, { "epoch": 19.09845509295627, "learning_rate": 0.0003780595399320404, "loss": 2.9336454391479494, "step": 9110 }, { "epoch": 19.119402985074625, "learning_rate": 0.00037785221359896444, "loss": 2.9511249542236326, "step": 9120 }, { "epoch": 19.140350877192983, "learning_rate": 0.00037764522798299443, "loss": 2.9214210510253906, "step": 9130 }, { "epoch": 19.161298769311337, "learning_rate": 0.0003774385821519358, "loss": 2.9141027450561525, "step": 9140 }, { "epoch": 19.182246661429694, "learning_rate": 0.0003772322751771605, "loss": 3.0709518432617187, "step": 9150 }, { "epoch": 19.203194553548048, "learning_rate": 0.00037702630613358986, "loss": 2.9491186141967773, "step": 9160 }, { "epoch": 19.224142445666406, "learning_rate": 0.0003768206740996769, "loss": 2.9979949951171876, "step": 9170 }, { "epoch": 19.24509033778476, "learning_rate": 0.00037661537815738915, "loss": 2.9300355911254883, "step": 9180 }, { "epoch": 19.266038229903117, "learning_rate": 0.00037641041739219143, "loss": 2.95788631439209, "step": 9190 }, { "epoch": 19.28698612202147, "learning_rate": 0.00037620579089302876, "loss": 2.960785675048828, "step": 9200 }, { "epoch": 19.30793401413983, "learning_rate": 0.0003760014977523091, "loss": 2.9440664291381835, "step": 9210 }, { "epoch": 19.328881906258182, "learning_rate": 0.00037579753706588697, "loss": 2.9864282608032227, "step": 9220 }, { "epoch": 19.34982979837654, "learning_rate": 0.00037559390793304604, "loss": 2.830784225463867, "step": 9230 }, { "epoch": 19.370777690494894, "learning_rate": 0.00037539060945648286, "loss": 2.981433868408203, "step": 9240 }, { "epoch": 19.39172558261325, "learning_rate": 0.00037518764074229014, "loss": 2.8778303146362303, "step": 9250 }, { "epoch": 19.412673474731605, "learning_rate": 0.00037498500089994, "loss": 2.896072006225586, "step": 9260 }, { "epoch": 19.433621366849962, "learning_rate": 0.00037478268904226795, "loss": 2.962319564819336, "step": 9270 }, { "epoch": 19.454569258968316, "learning_rate": 0.00037458070428545635, "loss": 2.9110170364379884, "step": 9280 }, { "epoch": 19.47551715108667, "learning_rate": 0.00037437904574901817, "loss": 2.928105926513672, "step": 9290 }, { "epoch": 19.496465043205028, "learning_rate": 0.00037417771255578104, "loss": 2.91275577545166, "step": 9300 }, { "epoch": 19.51741293532338, "learning_rate": 0.00037397670383187097, "loss": 2.964938163757324, "step": 9310 }, { "epoch": 19.53836082744174, "learning_rate": 0.0003737760187066967, "loss": 2.953006935119629, "step": 9320 }, { "epoch": 19.559308719560093, "learning_rate": 0.00037357565631293365, "loss": 3.108722686767578, "step": 9330 }, { "epoch": 19.58025661167845, "learning_rate": 0.00037337561578650833, "loss": 3.078016471862793, "step": 9340 }, { "epoch": 19.601204503796804, "learning_rate": 0.00037317589626658255, "loss": 3.006759262084961, "step": 9350 }, { "epoch": 19.622152395915162, "learning_rate": 0.0003729764968955379, "loss": 2.9581228256225587, "step": 9360 }, { "epoch": 19.643100288033516, "learning_rate": 0.00037277741681896045, "loss": 3.199405860900879, "step": 9370 }, { "epoch": 19.664048180151873, "learning_rate": 0.0003725786551856251, "loss": 3.039951133728027, "step": 9380 }, { "epoch": 19.684996072270227, "learning_rate": 0.0003723802111474804, "loss": 2.99322566986084, "step": 9390 }, { "epoch": 19.705943964388585, "learning_rate": 0.0003721820838596335, "loss": 2.9919605255126953, "step": 9400 }, { "epoch": 19.72689185650694, "learning_rate": 0.00037198427248033485, "loss": 2.9185922622680662, "step": 9410 }, { "epoch": 19.747839748625296, "learning_rate": 0.00037178677617096337, "loss": 2.927593994140625, "step": 9420 }, { "epoch": 19.76878764074365, "learning_rate": 0.0003715895940960111, "loss": 2.9274175643920897, "step": 9430 }, { "epoch": 19.789735532862007, "learning_rate": 0.000371392725423069, "loss": 2.9191694259643555, "step": 9440 }, { "epoch": 19.81068342498036, "learning_rate": 0.00037119616932281165, "loss": 3.002157974243164, "step": 9450 }, { "epoch": 19.83163131709872, "learning_rate": 0.00037099992496898276, "loss": 2.9765802383422852, "step": 9460 }, { "epoch": 19.852579209217073, "learning_rate": 0.00037080399153838065, "loss": 2.983877182006836, "step": 9470 }, { "epoch": 19.873527101335426, "learning_rate": 0.00037060836821084373, "loss": 2.9292572021484373, "step": 9480 }, { "epoch": 19.894474993453784, "learning_rate": 0.00037041305416923604, "loss": 2.9650571823120115, "step": 9490 }, { "epoch": 19.915422885572138, "learning_rate": 0.0003702180485994327, "loss": 2.9420921325683596, "step": 9500 }, { "epoch": 19.936370777690495, "learning_rate": 0.00037002335069030614, "loss": 2.8849225997924806, "step": 9510 }, { "epoch": 19.95731866980885, "learning_rate": 0.0003698289596337116, "loss": 2.882498550415039, "step": 9520 }, { "epoch": 19.978266561927207, "learning_rate": 0.00036963487462447303, "loss": 2.9787607192993164, "step": 9530 }, { "epoch": 19.99921445404556, "learning_rate": 0.0003694410948603691, "loss": 2.93823299407959, "step": 9540 }, { "epoch": 20.020947892118354, "learning_rate": 0.00036924761954211944, "loss": 3.033322334289551, "step": 9550 }, { "epoch": 20.04189578423671, "learning_rate": 0.0003690544478733707, "loss": 2.9187992095947264, "step": 9560 }, { "epoch": 20.062843676355065, "learning_rate": 0.0003688615790606828, "loss": 2.8914941787719726, "step": 9570 }, { "epoch": 20.083791568473423, "learning_rate": 0.000368669012313515, "loss": 2.8893537521362305, "step": 9580 }, { "epoch": 20.104739460591777, "learning_rate": 0.0003684767468442126, "loss": 2.906744384765625, "step": 9590 }, { "epoch": 20.125687352710134, "learning_rate": 0.0003682847818679935, "loss": 2.8719203948974608, "step": 9600 }, { "epoch": 20.146635244828488, "learning_rate": 0.0003680931166029342, "loss": 2.956478500366211, "step": 9610 }, { "epoch": 20.167583136946845, "learning_rate": 0.000367901750269957, "loss": 3.0235532760620116, "step": 9620 }, { "epoch": 20.1885310290652, "learning_rate": 0.00036771068209281657, "loss": 2.9067535400390625, "step": 9630 }, { "epoch": 20.209478921183557, "learning_rate": 0.0003675199112980863, "loss": 3.0424097061157225, "step": 9640 }, { "epoch": 20.23042681330191, "learning_rate": 0.0003673294371151458, "loss": 2.873898506164551, "step": 9650 }, { "epoch": 20.251374705420268, "learning_rate": 0.0003671392587761674, "loss": 2.9879985809326173, "step": 9660 }, { "epoch": 20.272322597538622, "learning_rate": 0.0003669493755161031, "loss": 2.9691110610961915, "step": 9670 }, { "epoch": 20.29327048965698, "learning_rate": 0.00036675978657267204, "loss": 2.8997966766357424, "step": 9680 }, { "epoch": 20.314218381775333, "learning_rate": 0.00036657049118634733, "loss": 3.0071743011474608, "step": 9690 }, { "epoch": 20.33516627389369, "learning_rate": 0.0003663814886003432, "loss": 2.9313344955444336, "step": 9700 }, { "epoch": 20.356114166012045, "learning_rate": 0.00036619277806060276, "loss": 2.9554468154907227, "step": 9710 }, { "epoch": 20.377062058130402, "learning_rate": 0.0003660043588157846, "loss": 2.9395862579345704, "step": 9720 }, { "epoch": 20.398009950248756, "learning_rate": 0.00036581623011725114, "loss": 2.8985124588012696, "step": 9730 }, { "epoch": 20.41895784236711, "learning_rate": 0.0003656283912190554, "loss": 2.92258243560791, "step": 9740 }, { "epoch": 20.439905734485468, "learning_rate": 0.00036544084137792883, "loss": 2.9244316101074217, "step": 9750 }, { "epoch": 20.46085362660382, "learning_rate": 0.00036525357985326903, "loss": 2.9558000564575195, "step": 9760 }, { "epoch": 20.48180151872218, "learning_rate": 0.0003650666059071275, "loss": 2.84803466796875, "step": 9770 }, { "epoch": 20.502749410840533, "learning_rate": 0.00036487991880419725, "loss": 2.9493398666381836, "step": 9780 }, { "epoch": 20.52369730295889, "learning_rate": 0.00036469351781180073, "loss": 2.925904083251953, "step": 9790 }, { "epoch": 20.544645195077244, "learning_rate": 0.00036450740219987765, "loss": 2.8513689041137695, "step": 9800 }, { "epoch": 20.5655930871956, "learning_rate": 0.0003643215712409734, "loss": 2.9981321334838866, "step": 9810 }, { "epoch": 20.586540979313956, "learning_rate": 0.00036413602421022653, "loss": 2.8937055587768556, "step": 9820 }, { "epoch": 20.607488871432313, "learning_rate": 0.0003639507603853572, "loss": 3.000600814819336, "step": 9830 }, { "epoch": 20.628436763550667, "learning_rate": 0.00036376577904665525, "loss": 2.8795480728149414, "step": 9840 }, { "epoch": 20.649384655669024, "learning_rate": 0.00036358107947696876, "loss": 2.8654504776000977, "step": 9850 }, { "epoch": 20.67033254778738, "learning_rate": 0.0003633966609616919, "loss": 2.9193105697631836, "step": 9860 }, { "epoch": 20.691280439905736, "learning_rate": 0.00036321252278875344, "loss": 3.000343894958496, "step": 9870 }, { "epoch": 20.71222833202409, "learning_rate": 0.00036302866424860566, "loss": 2.9843284606933596, "step": 9880 }, { "epoch": 20.733176224142447, "learning_rate": 0.00036284508463421217, "loss": 2.9358942031860353, "step": 9890 }, { "epoch": 20.7541241162608, "learning_rate": 0.0003626617832410371, "loss": 2.9408638000488283, "step": 9900 }, { "epoch": 20.77507200837916, "learning_rate": 0.00036247875936703335, "loss": 2.9035417556762697, "step": 9910 }, { "epoch": 20.796019900497512, "learning_rate": 0.00036229601231263145, "loss": 2.8692134857177733, "step": 9920 }, { "epoch": 20.81696779261587, "learning_rate": 0.0003621135413807282, "loss": 2.8154270172119142, "step": 9930 }, { "epoch": 20.837915684734224, "learning_rate": 0.0003619313458766758, "loss": 2.905722427368164, "step": 9940 }, { "epoch": 20.858863576852578, "learning_rate": 0.0003617494251082704, "loss": 2.9300207138061523, "step": 9950 }, { "epoch": 20.879811468970935, "learning_rate": 0.0003615677783857413, "loss": 2.9284923553466795, "step": 9960 }, { "epoch": 20.90075936108929, "learning_rate": 0.0003613864050217397, "loss": 2.8830732345581054, "step": 9970 }, { "epoch": 20.921707253207646, "learning_rate": 0.0003612053043313283, "loss": 2.922040557861328, "step": 9980 }, { "epoch": 20.942655145326, "learning_rate": 0.0003610244756319697, "loss": 2.919090461730957, "step": 9990 }, { "epoch": 20.963603037444358, "learning_rate": 0.00036084391824351607, "loss": 2.9119571685791015, "step": 10000 }, { "epoch": 20.98455092956271, "learning_rate": 0.00036066363148819854, "loss": 3.0065305709838865, "step": 10010 }, { "epoch": 21.006284367635505, "learning_rate": 0.000360483614690616, "loss": 3.088846206665039, "step": 10020 }, { "epoch": 21.027232259753863, "learning_rate": 0.00036030386717772494, "loss": 2.887124443054199, "step": 10030 }, { "epoch": 21.048180151872216, "learning_rate": 0.0003601243882788286, "loss": 2.9657873153686523, "step": 10040 }, { "epoch": 21.069128043990574, "learning_rate": 0.0003599451773255667, "loss": 2.8971757888793945, "step": 10050 }, { "epoch": 21.090075936108928, "learning_rate": 0.00035976623365190465, "loss": 2.8915260314941404, "step": 10060 }, { "epoch": 21.111023828227285, "learning_rate": 0.0003595875565941235, "loss": 2.9463220596313477, "step": 10070 }, { "epoch": 21.13197172034564, "learning_rate": 0.00035940914549080944, "loss": 2.9156425476074217, "step": 10080 }, { "epoch": 21.152919612463997, "learning_rate": 0.0003592309996828435, "loss": 2.996392250061035, "step": 10090 }, { "epoch": 21.17386750458235, "learning_rate": 0.0003590531185133913, "loss": 2.899692344665527, "step": 10100 }, { "epoch": 21.194815396700708, "learning_rate": 0.0003588755013278929, "loss": 2.9242908477783205, "step": 10110 }, { "epoch": 21.215763288819062, "learning_rate": 0.00035869814747405306, "loss": 2.846599578857422, "step": 10120 }, { "epoch": 21.23671118093742, "learning_rate": 0.00035852105630183027, "loss": 2.9692026138305665, "step": 10130 }, { "epoch": 21.257659073055773, "learning_rate": 0.0003583442271634278, "loss": 2.921228790283203, "step": 10140 }, { "epoch": 21.27860696517413, "learning_rate": 0.000358167659413283, "loss": 2.906037139892578, "step": 10150 }, { "epoch": 21.299554857292485, "learning_rate": 0.00035799135240805765, "loss": 2.8459890365600584, "step": 10160 }, { "epoch": 21.320502749410842, "learning_rate": 0.0003578153055066282, "loss": 2.839315986633301, "step": 10170 }, { "epoch": 21.341450641529196, "learning_rate": 0.00035763951807007597, "loss": 2.918286895751953, "step": 10180 }, { "epoch": 21.36239853364755, "learning_rate": 0.0003574639894616771, "loss": 2.9180910110473635, "step": 10190 }, { "epoch": 21.383346425765907, "learning_rate": 0.0003572887190468934, "loss": 2.96252498626709, "step": 10200 }, { "epoch": 21.40429431788426, "learning_rate": 0.00035711370619336214, "loss": 2.873885726928711, "step": 10210 }, { "epoch": 21.42524221000262, "learning_rate": 0.00035693895027088694, "loss": 2.8597929000854494, "step": 10220 }, { "epoch": 21.446190102120973, "learning_rate": 0.00035676445065142793, "loss": 2.9235706329345703, "step": 10230 }, { "epoch": 21.46713799423933, "learning_rate": 0.0003565902067090925, "loss": 2.843309783935547, "step": 10240 }, { "epoch": 21.488085886357684, "learning_rate": 0.0003564162178201257, "loss": 2.9598644256591795, "step": 10250 }, { "epoch": 21.50903377847604, "learning_rate": 0.0003562424833629007, "loss": 2.8366893768310546, "step": 10260 }, { "epoch": 21.529981670594395, "learning_rate": 0.0003560690027179101, "loss": 2.927451515197754, "step": 10270 }, { "epoch": 21.550929562712753, "learning_rate": 0.00035589577526775603, "loss": 2.8992708206176756, "step": 10280 }, { "epoch": 21.571877454831107, "learning_rate": 0.000355722800397141, "loss": 2.8801607131958007, "step": 10290 }, { "epoch": 21.592825346949464, "learning_rate": 0.00035555007749285897, "loss": 2.929705047607422, "step": 10300 }, { "epoch": 21.613773239067818, "learning_rate": 0.00035537760594378607, "loss": 2.9493310928344725, "step": 10310 }, { "epoch": 21.634721131186176, "learning_rate": 0.00035520538514087155, "loss": 2.9074274063110352, "step": 10320 }, { "epoch": 21.65566902330453, "learning_rate": 0.0003550334144771289, "loss": 2.8912160873413084, "step": 10330 }, { "epoch": 21.676616915422887, "learning_rate": 0.00035486169334762637, "loss": 2.8537384033203126, "step": 10340 }, { "epoch": 21.69756480754124, "learning_rate": 0.00035469022114947857, "loss": 2.899664878845215, "step": 10350 }, { "epoch": 21.7185126996596, "learning_rate": 0.00035451899728183736, "loss": 2.9585891723632813, "step": 10360 }, { "epoch": 21.739460591777952, "learning_rate": 0.00035434802114588305, "loss": 2.8878129959106444, "step": 10370 }, { "epoch": 21.760408483896306, "learning_rate": 0.00035417729214481556, "loss": 2.9733861923217773, "step": 10380 }, { "epoch": 21.781356376014664, "learning_rate": 0.0003540068096838456, "loss": 2.877838134765625, "step": 10390 }, { "epoch": 21.802304268133017, "learning_rate": 0.0003538365731701862, "loss": 2.874703598022461, "step": 10400 }, { "epoch": 21.823252160251375, "learning_rate": 0.0003536665820130437, "loss": 2.8807294845581053, "step": 10410 }, { "epoch": 21.84420005236973, "learning_rate": 0.00035349683562360966, "loss": 2.9425344467163086, "step": 10420 }, { "epoch": 21.865147944488086, "learning_rate": 0.0003533273334150517, "loss": 2.9000774383544923, "step": 10430 }, { "epoch": 21.88609583660644, "learning_rate": 0.0003531580748025054, "loss": 2.8818313598632814, "step": 10440 }, { "epoch": 21.907043728724798, "learning_rate": 0.00035298905920306563, "loss": 2.9183849334716796, "step": 10450 }, { "epoch": 21.92799162084315, "learning_rate": 0.00035282028603577823, "loss": 2.843509292602539, "step": 10460 }, { "epoch": 21.94893951296151, "learning_rate": 0.0003526517547216315, "loss": 2.9971471786499024, "step": 10470 }, { "epoch": 21.969887405079863, "learning_rate": 0.000352483464683548, "loss": 2.851757621765137, "step": 10480 }, { "epoch": 21.99083529719822, "learning_rate": 0.0003523154153463761, "loss": 2.889949417114258, "step": 10490 }, { "epoch": 22.012568735271014, "learning_rate": 0.00035214760613688187, "loss": 2.9378170013427733, "step": 10500 }, { "epoch": 22.033516627389368, "learning_rate": 0.0003519800364837407, "loss": 2.9217357635498047, "step": 10510 }, { "epoch": 22.054464519507725, "learning_rate": 0.0003518127058175293, "loss": 2.9642236709594725, "step": 10520 }, { "epoch": 22.07541241162608, "learning_rate": 0.00035164561357071755, "loss": 2.9043264389038086, "step": 10530 }, { "epoch": 22.096360303744436, "learning_rate": 0.0003514787591776602, "loss": 2.9355424880981444, "step": 10540 }, { "epoch": 22.11730819586279, "learning_rate": 0.0003513121420745892, "loss": 2.876350975036621, "step": 10550 }, { "epoch": 22.138256087981148, "learning_rate": 0.0003511457616996052, "loss": 2.9557785034179687, "step": 10560 }, { "epoch": 22.1592039800995, "learning_rate": 0.0003509796174926703, "loss": 2.9274904251098635, "step": 10570 }, { "epoch": 22.18015187221786, "learning_rate": 0.00035081370889559934, "loss": 2.873898506164551, "step": 10580 }, { "epoch": 22.201099764336213, "learning_rate": 0.0003506480353520526, "loss": 3.015602684020996, "step": 10590 }, { "epoch": 22.22204765645457, "learning_rate": 0.0003504825963075276, "loss": 2.836939239501953, "step": 10600 }, { "epoch": 22.242995548572924, "learning_rate": 0.00035031739120935175, "loss": 2.9317630767822265, "step": 10610 }, { "epoch": 22.263943440691282, "learning_rate": 0.0003501524195066741, "loss": 2.8972990036010744, "step": 10620 }, { "epoch": 22.284891332809636, "learning_rate": 0.0003499876806504578, "loss": 2.9171611785888674, "step": 10630 }, { "epoch": 22.305839224927993, "learning_rate": 0.00034982317409347263, "loss": 2.9115921020507813, "step": 10640 }, { "epoch": 22.326787117046347, "learning_rate": 0.00034965889929028707, "loss": 3.029188537597656, "step": 10650 }, { "epoch": 22.3477350091647, "learning_rate": 0.000349494855697261, "loss": 2.900659370422363, "step": 10660 }, { "epoch": 22.36868290128306, "learning_rate": 0.0003493310427725377, "loss": 2.8941118240356447, "step": 10670 }, { "epoch": 22.389630793401412, "learning_rate": 0.0003491674599760369, "loss": 2.913021278381348, "step": 10680 }, { "epoch": 22.41057868551977, "learning_rate": 0.0003490041067694469, "loss": 2.907943916320801, "step": 10690 }, { "epoch": 22.431526577638124, "learning_rate": 0.00034884098261621724, "loss": 2.8749153137207033, "step": 10700 }, { "epoch": 22.45247446975648, "learning_rate": 0.00034867808698155125, "loss": 2.836786460876465, "step": 10710 }, { "epoch": 22.473422361874835, "learning_rate": 0.0003485154193323988, "loss": 2.9680070877075195, "step": 10720 }, { "epoch": 22.494370253993193, "learning_rate": 0.00034835297913744903, "loss": 2.839517593383789, "step": 10730 }, { "epoch": 22.515318146111547, "learning_rate": 0.0003481907658671227, "loss": 2.9294412612915037, "step": 10740 }, { "epoch": 22.536266038229904, "learning_rate": 0.0003480287789935653, "loss": 2.931773376464844, "step": 10750 }, { "epoch": 22.557213930348258, "learning_rate": 0.00034786701799063976, "loss": 2.8776823043823243, "step": 10760 }, { "epoch": 22.578161822466615, "learning_rate": 0.00034770548233391924, "loss": 2.8429998397827148, "step": 10770 }, { "epoch": 22.59910971458497, "learning_rate": 0.0003475441715006799, "loss": 2.8952512741088867, "step": 10780 }, { "epoch": 22.620057606703327, "learning_rate": 0.0003473830849698938, "loss": 2.8576940536499023, "step": 10790 }, { "epoch": 22.64100549882168, "learning_rate": 0.0003472222222222222, "loss": 2.9290803909301757, "step": 10800 }, { "epoch": 22.661953390940038, "learning_rate": 0.00034706158274000796, "loss": 2.861796569824219, "step": 10810 }, { "epoch": 22.682901283058392, "learning_rate": 0.00034690116600726885, "loss": 2.9451555252075194, "step": 10820 }, { "epoch": 22.70384917517675, "learning_rate": 0.0003467409715096907, "loss": 2.8637598037719725, "step": 10830 }, { "epoch": 22.724797067295103, "learning_rate": 0.00034658099873462027, "loss": 2.919582176208496, "step": 10840 }, { "epoch": 22.745744959413457, "learning_rate": 0.0003464212471710583, "loss": 2.9625146865844725, "step": 10850 }, { "epoch": 22.766692851531815, "learning_rate": 0.0003462617163096529, "loss": 2.8601587295532225, "step": 10860 }, { "epoch": 22.78764074365017, "learning_rate": 0.00034610240564269265, "loss": 2.8949390411376954, "step": 10870 }, { "epoch": 22.808588635768526, "learning_rate": 0.0003459433146640997, "loss": 2.8205642700195312, "step": 10880 }, { "epoch": 22.82953652788688, "learning_rate": 0.00034578444286942307, "loss": 2.93542366027832, "step": 10890 }, { "epoch": 22.850484420005237, "learning_rate": 0.00034562578975583187, "loss": 2.8913852691650392, "step": 10900 }, { "epoch": 22.87143231212359, "learning_rate": 0.00034546735482210894, "loss": 2.9478212356567384, "step": 10910 }, { "epoch": 22.89238020424195, "learning_rate": 0.0003453091375686437, "loss": 2.854338455200195, "step": 10920 }, { "epoch": 22.913328096360303, "learning_rate": 0.00034515113749742586, "loss": 2.919601058959961, "step": 10930 }, { "epoch": 22.93427598847866, "learning_rate": 0.00034499335411203894, "loss": 2.88704776763916, "step": 10940 }, { "epoch": 22.955223880597014, "learning_rate": 0.00034483578691765326, "loss": 2.906253433227539, "step": 10950 }, { "epoch": 22.97617177271537, "learning_rate": 0.00034467843542102, "loss": 2.8981559753417967, "step": 10960 }, { "epoch": 22.997119664833725, "learning_rate": 0.0003445212991304641, "loss": 2.9073596954345704, "step": 10970 }, { "epoch": 23.01885310290652, "learning_rate": 0.00034436437755587827, "loss": 2.960616874694824, "step": 10980 }, { "epoch": 23.039800995024876, "learning_rate": 0.00034420767020871656, "loss": 2.9549840927124023, "step": 10990 }, { "epoch": 23.06074888714323, "learning_rate": 0.00034405117660198765, "loss": 2.9256917953491213, "step": 11000 }, { "epoch": 23.081696779261588, "learning_rate": 0.00034389489625024885, "loss": 2.89876651763916, "step": 11010 }, { "epoch": 23.10264467137994, "learning_rate": 0.00034373882866959936, "loss": 2.828813362121582, "step": 11020 }, { "epoch": 23.1235925634983, "learning_rate": 0.0003435829733776745, "loss": 2.9374326705932616, "step": 11030 }, { "epoch": 23.144540455616653, "learning_rate": 0.00034342732989363903, "loss": 2.928928184509277, "step": 11040 }, { "epoch": 23.16548834773501, "learning_rate": 0.0003432718977381811, "loss": 2.8702091217041015, "step": 11050 }, { "epoch": 23.186436239853364, "learning_rate": 0.0003431166764335058, "loss": 2.937228965759277, "step": 11060 }, { "epoch": 23.207384131971722, "learning_rate": 0.0003429616655033297, "loss": 2.9454578399658202, "step": 11070 }, { "epoch": 23.228332024090076, "learning_rate": 0.00034280686447287373, "loss": 2.8274587631225585, "step": 11080 }, { "epoch": 23.249279916208433, "learning_rate": 0.00034265227286885776, "loss": 2.876905632019043, "step": 11090 }, { "epoch": 23.270227808326787, "learning_rate": 0.00034249789021949435, "loss": 2.8264415740966795, "step": 11100 }, { "epoch": 23.29117570044514, "learning_rate": 0.0003423437160544826, "loss": 2.8484895706176756, "step": 11110 }, { "epoch": 23.3121235925635, "learning_rate": 0.0003421897499050022, "loss": 2.885685920715332, "step": 11120 }, { "epoch": 23.333071484681852, "learning_rate": 0.0003420359913037075, "loss": 2.9073020935058596, "step": 11130 }, { "epoch": 23.35401937680021, "learning_rate": 0.0003418824397847216, "loss": 2.919231986999512, "step": 11140 }, { "epoch": 23.374967268918564, "learning_rate": 0.00034172909488363007, "loss": 2.8528385162353516, "step": 11150 }, { "epoch": 23.39591516103692, "learning_rate": 0.00034157595613747545, "loss": 2.8856670379638674, "step": 11160 }, { "epoch": 23.416863053155275, "learning_rate": 0.00034142302308475133, "loss": 2.851297950744629, "step": 11170 }, { "epoch": 23.437810945273633, "learning_rate": 0.0003412702952653962, "loss": 2.8231760025024415, "step": 11180 }, { "epoch": 23.458758837391986, "learning_rate": 0.00034111777222078796, "loss": 2.9219854354858397, "step": 11190 }, { "epoch": 23.479706729510344, "learning_rate": 0.00034096545349373804, "loss": 2.977204132080078, "step": 11200 }, { "epoch": 23.500654621628698, "learning_rate": 0.0003408133386284857, "loss": 2.8231952667236326, "step": 11210 }, { "epoch": 23.521602513747055, "learning_rate": 0.0003406614271706919, "loss": 2.891893196105957, "step": 11220 }, { "epoch": 23.54255040586541, "learning_rate": 0.0003405097186674344, "loss": 2.8958648681640624, "step": 11230 }, { "epoch": 23.563498297983767, "learning_rate": 0.00034035821266720136, "loss": 2.8495506286621093, "step": 11240 }, { "epoch": 23.58444619010212, "learning_rate": 0.0003402069087198858, "loss": 2.935627746582031, "step": 11250 }, { "epoch": 23.605394082220478, "learning_rate": 0.00034005580637678053, "loss": 2.840359687805176, "step": 11260 }, { "epoch": 23.626341974338832, "learning_rate": 0.00033990490519057183, "loss": 2.9121625900268553, "step": 11270 }, { "epoch": 23.64728986645719, "learning_rate": 0.0003397542047153345, "loss": 2.897580146789551, "step": 11280 }, { "epoch": 23.668237758575543, "learning_rate": 0.0003396037045065257, "loss": 2.894269561767578, "step": 11290 }, { "epoch": 23.6891856506939, "learning_rate": 0.0003394534041209802, "loss": 2.923667335510254, "step": 11300 }, { "epoch": 23.710133542812255, "learning_rate": 0.0003393033031169043, "loss": 2.9479984283447265, "step": 11310 }, { "epoch": 23.73108143493061, "learning_rate": 0.0003391534010538705, "loss": 2.9156826019287108, "step": 11320 }, { "epoch": 23.752029327048966, "learning_rate": 0.00033900369749281225, "loss": 2.9133535385131837, "step": 11330 }, { "epoch": 23.77297721916732, "learning_rate": 0.00033885419199601845, "loss": 2.912689971923828, "step": 11340 }, { "epoch": 23.793925111285677, "learning_rate": 0.000338704884127128, "loss": 2.921385955810547, "step": 11350 }, { "epoch": 23.81487300340403, "learning_rate": 0.00033855577345112453, "loss": 2.9245376586914062, "step": 11360 }, { "epoch": 23.83582089552239, "learning_rate": 0.0003384068595343312, "loss": 2.921660232543945, "step": 11370 }, { "epoch": 23.856768787640743, "learning_rate": 0.00033825814194440504, "loss": 2.8597009658813475, "step": 11380 }, { "epoch": 23.8777166797591, "learning_rate": 0.0003381096202503321, "loss": 2.9699087142944336, "step": 11390 }, { "epoch": 23.898664571877454, "learning_rate": 0.00033796129402242193, "loss": 2.868007850646973, "step": 11400 }, { "epoch": 23.91961246399581, "learning_rate": 0.0003378131628323024, "loss": 2.8257036209106445, "step": 11410 }, { "epoch": 23.940560356114165, "learning_rate": 0.0003376652262529146, "loss": 2.8747650146484376, "step": 11420 }, { "epoch": 23.961508248232523, "learning_rate": 0.00033751748385850753, "loss": 2.8721830368041994, "step": 11430 }, { "epoch": 23.982456140350877, "learning_rate": 0.00033736993522463316, "loss": 2.8383148193359373, "step": 11440 }, { "epoch": 24.00418957842367, "learning_rate": 0.00033722257992814113, "loss": 2.9513004302978514, "step": 11450 }, { "epoch": 24.025137470542028, "learning_rate": 0.0003370754175471737, "loss": 2.9344108581542967, "step": 11460 }, { "epoch": 24.04608536266038, "learning_rate": 0.0003369284476611607, "loss": 2.9109573364257812, "step": 11470 }, { "epoch": 24.06703325477874, "learning_rate": 0.00033678166985081433, "loss": 2.8260976791381838, "step": 11480 }, { "epoch": 24.087981146897093, "learning_rate": 0.0003366350836981245, "loss": 2.790972137451172, "step": 11490 }, { "epoch": 24.10892903901545, "learning_rate": 0.0003364886887863534, "loss": 2.8716499328613283, "step": 11500 }, { "epoch": 24.129876931133804, "learning_rate": 0.0003363424847000309, "loss": 2.8708847045898436, "step": 11510 }, { "epoch": 24.15082482325216, "learning_rate": 0.0003361964710249494, "loss": 2.8762466430664064, "step": 11520 }, { "epoch": 24.171772715370516, "learning_rate": 0.00033605064734815865, "loss": 2.8436599731445313, "step": 11530 }, { "epoch": 24.192720607488873, "learning_rate": 0.0003359050132579615, "loss": 2.885796546936035, "step": 11540 }, { "epoch": 24.213668499607227, "learning_rate": 0.00033575956834390843, "loss": 2.8459619522094726, "step": 11550 }, { "epoch": 24.234616391725584, "learning_rate": 0.00033561431219679297, "loss": 2.8883172988891603, "step": 11560 }, { "epoch": 24.25556428384394, "learning_rate": 0.00033546924440864666, "loss": 2.8706939697265623, "step": 11570 }, { "epoch": 24.276512175962292, "learning_rate": 0.0003353243645727346, "loss": 2.8278776168823243, "step": 11580 }, { "epoch": 24.29746006808065, "learning_rate": 0.0003351796722835502, "loss": 2.8596363067626953, "step": 11590 }, { "epoch": 24.318407960199004, "learning_rate": 0.00033503516713681087, "loss": 2.8937658309936523, "step": 11600 }, { "epoch": 24.33935585231736, "learning_rate": 0.00033489084872945283, "loss": 2.9479068756103515, "step": 11610 }, { "epoch": 24.360303744435715, "learning_rate": 0.0003347467166596268, "loss": 2.913376235961914, "step": 11620 }, { "epoch": 24.381251636554072, "learning_rate": 0.0003346027705266929, "loss": 2.7945356369018555, "step": 11630 }, { "epoch": 24.402199528672426, "learning_rate": 0.0003344590099312164, "loss": 2.908190155029297, "step": 11640 }, { "epoch": 24.423147420790784, "learning_rate": 0.00033431543447496275, "loss": 2.8602962493896484, "step": 11650 }, { "epoch": 24.444095312909138, "learning_rate": 0.000334172043760893, "loss": 2.8535890579223633, "step": 11660 }, { "epoch": 24.465043205027495, "learning_rate": 0.0003340288373931593, "loss": 2.8150957107543944, "step": 11670 }, { "epoch": 24.48599109714585, "learning_rate": 0.0003338858149771002, "loss": 2.847452735900879, "step": 11680 }, { "epoch": 24.506938989264206, "learning_rate": 0.0003337429761192361, "loss": 2.7802717208862306, "step": 11690 }, { "epoch": 24.52788688138256, "learning_rate": 0.00033360032042726483, "loss": 2.9678937911987306, "step": 11700 }, { "epoch": 24.548834773500918, "learning_rate": 0.000333457847510057, "loss": 2.8469560623168944, "step": 11710 }, { "epoch": 24.56978266561927, "learning_rate": 0.0003333155569776514, "loss": 2.916895866394043, "step": 11720 }, { "epoch": 24.59073055773763, "learning_rate": 0.00033317344844125064, "loss": 2.8457548141479494, "step": 11730 }, { "epoch": 24.611678449855983, "learning_rate": 0.00033303152151321696, "loss": 2.872743606567383, "step": 11740 }, { "epoch": 24.63262634197434, "learning_rate": 0.00033288977580706714, "loss": 2.890146255493164, "step": 11750 }, { "epoch": 24.653574234092694, "learning_rate": 0.0003327482109374687, "loss": 2.847947883605957, "step": 11760 }, { "epoch": 24.67452212621105, "learning_rate": 0.00033260682652023517, "loss": 2.9236717224121094, "step": 11770 }, { "epoch": 24.695470018329406, "learning_rate": 0.0003324656221723217, "loss": 2.9201459884643555, "step": 11780 }, { "epoch": 24.71641791044776, "learning_rate": 0.000332324597511821, "loss": 2.8557527542114256, "step": 11790 }, { "epoch": 24.737365802566117, "learning_rate": 0.00033218375215795864, "loss": 2.875984001159668, "step": 11800 }, { "epoch": 24.75831369468447, "learning_rate": 0.00033204308573108897, "loss": 2.830782890319824, "step": 11810 }, { "epoch": 24.77926158680283, "learning_rate": 0.00033190259785269066, "loss": 2.844138526916504, "step": 11820 }, { "epoch": 24.800209478921182, "learning_rate": 0.0003317622881453626, "loss": 2.8771383285522463, "step": 11830 }, { "epoch": 24.82115737103954, "learning_rate": 0.0003316221562328194, "loss": 2.863381767272949, "step": 11840 }, { "epoch": 24.842105263157894, "learning_rate": 0.0003314822017398875, "loss": 2.8934911727905273, "step": 11850 }, { "epoch": 24.86305315527625, "learning_rate": 0.00033134242429250053, "loss": 2.7928911209106446, "step": 11860 }, { "epoch": 24.884001047394605, "learning_rate": 0.00033120282351769556, "loss": 2.8646501541137694, "step": 11870 }, { "epoch": 24.904948939512963, "learning_rate": 0.0003310633990436084, "loss": 2.8573431015014648, "step": 11880 }, { "epoch": 24.925896831631317, "learning_rate": 0.00033092415049947006, "loss": 2.9596303939819335, "step": 11890 }, { "epoch": 24.946844723749674, "learning_rate": 0.00033078507751560195, "loss": 2.809922790527344, "step": 11900 }, { "epoch": 24.967792615868028, "learning_rate": 0.00033064617972341235, "loss": 2.829710578918457, "step": 11910 }, { "epoch": 24.988740507986385, "learning_rate": 0.0003305074567553919, "loss": 2.837497520446777, "step": 11920 }, { "epoch": 25.01047394605918, "learning_rate": 0.0003303689082451096, "loss": 2.998362922668457, "step": 11930 }, { "epoch": 25.031421838177533, "learning_rate": 0.00033023053382720904, "loss": 2.8903406143188475, "step": 11940 }, { "epoch": 25.05236973029589, "learning_rate": 0.0003300923331374039, "loss": 2.88183536529541, "step": 11950 }, { "epoch": 25.073317622414244, "learning_rate": 0.00032995430581247417, "loss": 2.8853179931640627, "step": 11960 }, { "epoch": 25.0942655145326, "learning_rate": 0.0003298164514902622, "loss": 2.8495412826538087, "step": 11970 }, { "epoch": 25.115213406650955, "learning_rate": 0.0003296787698096686, "loss": 2.8417972564697265, "step": 11980 }, { "epoch": 25.136161298769313, "learning_rate": 0.0003295412604106482, "loss": 2.8254583358764647, "step": 11990 }, { "epoch": 25.157109190887667, "learning_rate": 0.00032940392293420614, "loss": 2.8476821899414064, "step": 12000 }, { "epoch": 25.178057083006024, "learning_rate": 0.00032926675702239425, "loss": 2.8646284103393556, "step": 12010 }, { "epoch": 25.199004975124378, "learning_rate": 0.00032912976231830646, "loss": 2.8645925521850586, "step": 12020 }, { "epoch": 25.219952867242732, "learning_rate": 0.0003289929384660757, "loss": 2.9142387390136717, "step": 12030 }, { "epoch": 25.24090075936109, "learning_rate": 0.0003288562851108693, "loss": 2.911361312866211, "step": 12040 }, { "epoch": 25.261848651479443, "learning_rate": 0.0003287198018988856, "loss": 2.810334014892578, "step": 12050 }, { "epoch": 25.2827965435978, "learning_rate": 0.00032858348847734985, "loss": 2.8691171646118163, "step": 12060 }, { "epoch": 25.303744435716155, "learning_rate": 0.00032844734449451055, "loss": 2.8615827560424805, "step": 12070 }, { "epoch": 25.324692327834512, "learning_rate": 0.00032831136959963553, "loss": 2.8075706481933596, "step": 12080 }, { "epoch": 25.345640219952866, "learning_rate": 0.00032817556344300823, "loss": 2.836076354980469, "step": 12090 }, { "epoch": 25.366588112071224, "learning_rate": 0.0003280399256759237, "loss": 2.873185729980469, "step": 12100 }, { "epoch": 25.387536004189577, "learning_rate": 0.0003279044559506852, "loss": 2.8835927963256838, "step": 12110 }, { "epoch": 25.408483896307935, "learning_rate": 0.0003277691539206003, "loss": 2.884838676452637, "step": 12120 }, { "epoch": 25.42943178842629, "learning_rate": 0.0003276340192399769, "loss": 2.9353681564331056, "step": 12130 }, { "epoch": 25.450379680544646, "learning_rate": 0.00032749905156412, "loss": 2.87127571105957, "step": 12140 }, { "epoch": 25.471327572663, "learning_rate": 0.0003273642505493275, "loss": 2.848041534423828, "step": 12150 }, { "epoch": 25.492275464781358, "learning_rate": 0.0003272296158528871, "loss": 2.8736820220947266, "step": 12160 }, { "epoch": 25.51322335689971, "learning_rate": 0.000327095147133072, "loss": 2.889766502380371, "step": 12170 }, { "epoch": 25.53417124901807, "learning_rate": 0.00032696084404913777, "loss": 2.8456445693969727, "step": 12180 }, { "epoch": 25.555119141136423, "learning_rate": 0.00032682670626131837, "loss": 2.8694175720214843, "step": 12190 }, { "epoch": 25.57606703325478, "learning_rate": 0.0003266927334308229, "loss": 2.863827705383301, "step": 12200 }, { "epoch": 25.597014925373134, "learning_rate": 0.0003265589252198317, "loss": 2.8949514389038087, "step": 12210 }, { "epoch": 25.617962817491488, "learning_rate": 0.0003264252812914928, "loss": 2.870989990234375, "step": 12220 }, { "epoch": 25.638910709609846, "learning_rate": 0.0003262918013099186, "loss": 2.8301280975341796, "step": 12230 }, { "epoch": 25.6598586017282, "learning_rate": 0.00032615848494018204, "loss": 2.7910818099975585, "step": 12240 }, { "epoch": 25.680806493846557, "learning_rate": 0.0003260253318483131, "loss": 2.8830698013305662, "step": 12250 }, { "epoch": 25.70175438596491, "learning_rate": 0.0003258923417012957, "loss": 2.886226844787598, "step": 12260 }, { "epoch": 25.72270227808327, "learning_rate": 0.00032575951416706354, "loss": 2.9646997451782227, "step": 12270 }, { "epoch": 25.743650170201622, "learning_rate": 0.0003256268489144972, "loss": 2.896713066101074, "step": 12280 }, { "epoch": 25.76459806231998, "learning_rate": 0.0003254943456134202, "loss": 2.8680368423461915, "step": 12290 }, { "epoch": 25.785545954438334, "learning_rate": 0.0003253620039345959, "loss": 2.866026496887207, "step": 12300 }, { "epoch": 25.80649384655669, "learning_rate": 0.0003252298235497241, "loss": 2.862067985534668, "step": 12310 }, { "epoch": 25.827441738675045, "learning_rate": 0.0003250978041314371, "loss": 2.8973188400268555, "step": 12320 }, { "epoch": 25.848389630793402, "learning_rate": 0.000324965945353297, "loss": 2.9389106750488283, "step": 12330 }, { "epoch": 25.869337522911756, "learning_rate": 0.0003248342468897917, "loss": 2.9147424697875977, "step": 12340 }, { "epoch": 25.890285415030114, "learning_rate": 0.00032470270841633195, "loss": 2.894465446472168, "step": 12350 }, { "epoch": 25.911233307148468, "learning_rate": 0.00032457132960924783, "loss": 2.9301485061645507, "step": 12360 }, { "epoch": 25.932181199266825, "learning_rate": 0.00032444011014578535, "loss": 2.8576644897460937, "step": 12370 }, { "epoch": 25.95312909138518, "learning_rate": 0.00032430904970410314, "loss": 2.836701202392578, "step": 12380 }, { "epoch": 25.974076983503537, "learning_rate": 0.0003241781479632693, "loss": 2.8457481384277346, "step": 12390 }, { "epoch": 25.99502487562189, "learning_rate": 0.0003240474046032579, "loss": 2.829239082336426, "step": 12400 }, { "epoch": 26.016758313694684, "learning_rate": 0.00032391681930494566, "loss": 3.0122323989868165, "step": 12410 }, { "epoch": 26.03770620581304, "learning_rate": 0.000323786391750109, "loss": 2.8899608612060548, "step": 12420 }, { "epoch": 26.058654097931395, "learning_rate": 0.0003236561216214202, "loss": 2.9380813598632813, "step": 12430 }, { "epoch": 26.079601990049753, "learning_rate": 0.000323526008602445, "loss": 2.9364286422729493, "step": 12440 }, { "epoch": 26.100549882168107, "learning_rate": 0.0003233960523776387, "loss": 2.8298776626586912, "step": 12450 }, { "epoch": 26.121497774286464, "learning_rate": 0.0003232662526323429, "loss": 2.868173027038574, "step": 12460 }, { "epoch": 26.142445666404818, "learning_rate": 0.0003231366090527828, "loss": 2.8364093780517576, "step": 12470 }, { "epoch": 26.163393558523175, "learning_rate": 0.00032300712132606366, "loss": 2.917738342285156, "step": 12480 }, { "epoch": 26.18434145064153, "learning_rate": 0.0003228777891401678, "loss": 2.8115827560424806, "step": 12490 }, { "epoch": 26.205289342759883, "learning_rate": 0.0003227486121839514, "loss": 2.8544151306152346, "step": 12500 }, { "epoch": 26.22623723487824, "learning_rate": 0.00032261959014714107, "loss": 2.890985870361328, "step": 12510 }, { "epoch": 26.247185126996595, "learning_rate": 0.0003224907227203312, "loss": 2.8269269943237303, "step": 12520 }, { "epoch": 26.268133019114952, "learning_rate": 0.0003223620095949806, "loss": 2.8392301559448243, "step": 12530 }, { "epoch": 26.289080911233306, "learning_rate": 0.00032223345046340936, "loss": 2.8283065795898437, "step": 12540 }, { "epoch": 26.310028803351663, "learning_rate": 0.00032210504501879576, "loss": 2.9033248901367186, "step": 12550 }, { "epoch": 26.330976695470017, "learning_rate": 0.0003219767929551733, "loss": 2.8192907333374024, "step": 12560 }, { "epoch": 26.351924587588375, "learning_rate": 0.00032184869396742754, "loss": 2.8758308410644533, "step": 12570 }, { "epoch": 26.37287247970673, "learning_rate": 0.00032172074775129323, "loss": 2.8491661071777346, "step": 12580 }, { "epoch": 26.393820371825086, "learning_rate": 0.00032159295400335114, "loss": 2.862008285522461, "step": 12590 }, { "epoch": 26.41476826394344, "learning_rate": 0.00032146531242102476, "loss": 2.854539489746094, "step": 12600 }, { "epoch": 26.435716156061797, "learning_rate": 0.0003213378227025779, "loss": 2.9059074401855467, "step": 12610 }, { "epoch": 26.45666404818015, "learning_rate": 0.00032121048454711114, "loss": 2.8347517013549806, "step": 12620 }, { "epoch": 26.47761194029851, "learning_rate": 0.00032108329765455926, "loss": 2.8621740341186523, "step": 12630 }, { "epoch": 26.498559832416863, "learning_rate": 0.00032095626172568784, "loss": 2.8287914276123045, "step": 12640 }, { "epoch": 26.51950772453522, "learning_rate": 0.00032082937646209084, "loss": 2.8201780319213867, "step": 12650 }, { "epoch": 26.540455616653574, "learning_rate": 0.0003207026415661871, "loss": 2.853387451171875, "step": 12660 }, { "epoch": 26.56140350877193, "learning_rate": 0.0003205760567412178, "loss": 2.8255029678344727, "step": 12670 }, { "epoch": 26.582351400890285, "learning_rate": 0.00032044962169124335, "loss": 2.8133966445922853, "step": 12680 }, { "epoch": 26.60329929300864, "learning_rate": 0.0003203233361211406, "loss": 2.8209064483642576, "step": 12690 }, { "epoch": 26.624247185126997, "learning_rate": 0.00032019719973659996, "loss": 2.839722442626953, "step": 12700 }, { "epoch": 26.64519507724535, "learning_rate": 0.00032007121224412224, "loss": 2.8414018630981444, "step": 12710 }, { "epoch": 26.666142969363708, "learning_rate": 0.0003199453733510162, "loss": 2.8677789688110353, "step": 12720 }, { "epoch": 26.687090861482062, "learning_rate": 0.00031981968276539543, "loss": 2.9177148818969725, "step": 12730 }, { "epoch": 26.70803875360042, "learning_rate": 0.0003196941401961754, "loss": 2.8555475234985352, "step": 12740 }, { "epoch": 26.728986645718773, "learning_rate": 0.000319568745353071, "loss": 2.8636154174804687, "step": 12750 }, { "epoch": 26.74993453783713, "learning_rate": 0.0003194434979465935, "loss": 2.810639190673828, "step": 12760 }, { "epoch": 26.770882429955485, "learning_rate": 0.0003193183976880476, "loss": 2.9356500625610353, "step": 12770 }, { "epoch": 26.791830322073842, "learning_rate": 0.00031919344428952895, "loss": 2.848637580871582, "step": 12780 }, { "epoch": 26.812778214192196, "learning_rate": 0.0003190686374639211, "loss": 2.8234004974365234, "step": 12790 }, { "epoch": 26.833726106310554, "learning_rate": 0.00031894397692489295, "loss": 2.8002485275268554, "step": 12800 }, { "epoch": 26.854673998428908, "learning_rate": 0.0003188194623868958, "loss": 2.841193199157715, "step": 12810 }, { "epoch": 26.875621890547265, "learning_rate": 0.00031869509356516063, "loss": 2.8377119064331056, "step": 12820 }, { "epoch": 26.89656978266562, "learning_rate": 0.00031857087017569556, "loss": 2.797208786010742, "step": 12830 }, { "epoch": 26.917517674783976, "learning_rate": 0.0003184467919352828, "loss": 2.778369140625, "step": 12840 }, { "epoch": 26.93846556690233, "learning_rate": 0.0003183228585614763, "loss": 2.8303714752197267, "step": 12850 }, { "epoch": 26.959413459020688, "learning_rate": 0.0003181990697725988, "loss": 2.805090141296387, "step": 12860 }, { "epoch": 26.98036135113904, "learning_rate": 0.0003180754252877392, "loss": 2.7620264053344727, "step": 12870 }, { "epoch": 27.002094789211835, "learning_rate": 0.0003179519248267498, "loss": 2.9018489837646486, "step": 12880 }, { "epoch": 27.023042681330192, "learning_rate": 0.000317828568110244, "loss": 2.8413219451904297, "step": 12890 }, { "epoch": 27.043990573448546, "learning_rate": 0.000317705354859593, "loss": 2.868427276611328, "step": 12900 }, { "epoch": 27.064938465566904, "learning_rate": 0.0003175822847969239, "loss": 2.84520263671875, "step": 12910 }, { "epoch": 27.085886357685258, "learning_rate": 0.00031745935764511645, "loss": 2.865756607055664, "step": 12920 }, { "epoch": 27.106834249803615, "learning_rate": 0.0003173365731278007, "loss": 2.8851186752319338, "step": 12930 }, { "epoch": 27.12778214192197, "learning_rate": 0.00031721393096935445, "loss": 2.8631362915039062, "step": 12940 }, { "epoch": 27.148730034040323, "learning_rate": 0.00031709143089490063, "loss": 2.8974273681640623, "step": 12950 }, { "epoch": 27.16967792615868, "learning_rate": 0.00031696907263030445, "loss": 2.8190950393676757, "step": 12960 }, { "epoch": 27.190625818277034, "learning_rate": 0.00031684685590217115, "loss": 2.861093521118164, "step": 12970 }, { "epoch": 27.211573710395392, "learning_rate": 0.00031672478043784336, "loss": 2.925172233581543, "step": 12980 }, { "epoch": 27.232521602513746, "learning_rate": 0.0003166028459653984, "loss": 2.7551206588745116, "step": 12990 }, { "epoch": 27.253469494632103, "learning_rate": 0.0003164810522136458, "loss": 2.8190824508666994, "step": 13000 }, { "epoch": 27.274417386750457, "learning_rate": 0.0003163593989121249, "loss": 2.884243965148926, "step": 13010 }, { "epoch": 27.295365278868815, "learning_rate": 0.0003162378857911022, "loss": 2.831955909729004, "step": 13020 }, { "epoch": 27.31631317098717, "learning_rate": 0.00031611651258156884, "loss": 2.891588020324707, "step": 13030 }, { "epoch": 27.337261063105526, "learning_rate": 0.0003159952790152381, "loss": 2.8689960479736327, "step": 13040 }, { "epoch": 27.35820895522388, "learning_rate": 0.0003158741848245431, "loss": 2.844234085083008, "step": 13050 }, { "epoch": 27.379156847342237, "learning_rate": 0.0003157532297426339, "loss": 2.783745765686035, "step": 13060 }, { "epoch": 27.40010473946059, "learning_rate": 0.00031563241350337546, "loss": 2.85959415435791, "step": 13070 }, { "epoch": 27.42105263157895, "learning_rate": 0.00031551173584134514, "loss": 2.828862762451172, "step": 13080 }, { "epoch": 27.442000523697303, "learning_rate": 0.0003153911964918298, "loss": 2.8126575469970705, "step": 13090 }, { "epoch": 27.46294841581566, "learning_rate": 0.0003152707951908239, "loss": 2.8336280822753905, "step": 13100 }, { "epoch": 27.483896307934014, "learning_rate": 0.0003151505316750269, "loss": 2.8542291641235353, "step": 13110 }, { "epoch": 27.50484420005237, "learning_rate": 0.0003150304056818405, "loss": 2.8555719375610353, "step": 13120 }, { "epoch": 27.525792092170725, "learning_rate": 0.00031491041694936697, "loss": 2.8440032958984376, "step": 13130 }, { "epoch": 27.54673998428908, "learning_rate": 0.000314790565216406, "loss": 2.899538040161133, "step": 13140 }, { "epoch": 27.567687876407437, "learning_rate": 0.0003146708502224526, "loss": 2.823881149291992, "step": 13150 }, { "epoch": 27.58863576852579, "learning_rate": 0.0003145512717076948, "loss": 2.8198898315429686, "step": 13160 }, { "epoch": 27.609583660644148, "learning_rate": 0.00031443182941301147, "loss": 2.8212156295776367, "step": 13170 }, { "epoch": 27.630531552762502, "learning_rate": 0.0003143125230799694, "loss": 2.7753381729125977, "step": 13180 }, { "epoch": 27.65147944488086, "learning_rate": 0.00031419335245082134, "loss": 2.812895393371582, "step": 13190 }, { "epoch": 27.672427336999213, "learning_rate": 0.00031407431726850375, "loss": 2.8747041702270506, "step": 13200 }, { "epoch": 27.69337522911757, "learning_rate": 0.00031395541727663413, "loss": 2.8663089752197264, "step": 13210 }, { "epoch": 27.714323121235925, "learning_rate": 0.0003138366522195088, "loss": 2.8993961334228517, "step": 13220 }, { "epoch": 27.735271013354282, "learning_rate": 0.0003137180218421011, "loss": 2.9394744873046874, "step": 13230 }, { "epoch": 27.756218905472636, "learning_rate": 0.0003135995258900582, "loss": 2.8471282958984374, "step": 13240 }, { "epoch": 27.777166797590993, "learning_rate": 0.0003134811641096994, "loss": 2.7851446151733397, "step": 13250 }, { "epoch": 27.798114689709347, "learning_rate": 0.00031336293624801393, "loss": 2.819938850402832, "step": 13260 }, { "epoch": 27.819062581827705, "learning_rate": 0.00031324484205265824, "loss": 2.8013900756835937, "step": 13270 }, { "epoch": 27.84001047394606, "learning_rate": 0.000313126881271954, "loss": 2.850057601928711, "step": 13280 }, { "epoch": 27.860958366064416, "learning_rate": 0.0003130090536548859, "loss": 2.7631250381469727, "step": 13290 }, { "epoch": 27.88190625818277, "learning_rate": 0.00031289135895109924, "loss": 2.8360868453979493, "step": 13300 }, { "epoch": 27.902854150301128, "learning_rate": 0.00031277379691089786, "loss": 2.804159927368164, "step": 13310 }, { "epoch": 27.92380204241948, "learning_rate": 0.00031265636728524174, "loss": 2.8401294708251954, "step": 13320 }, { "epoch": 27.94474993453784, "learning_rate": 0.000312539069825745, "loss": 2.850791168212891, "step": 13330 }, { "epoch": 27.965697826656193, "learning_rate": 0.00031242190428467325, "loss": 2.862323188781738, "step": 13340 }, { "epoch": 27.986645718774547, "learning_rate": 0.0003123048704149423, "loss": 2.8848134994506838, "step": 13350 }, { "epoch": 28.008379156847344, "learning_rate": 0.0003121879679701147, "loss": 2.9553651809692383, "step": 13360 }, { "epoch": 28.029327048965698, "learning_rate": 0.00031207119670439884, "loss": 2.893220138549805, "step": 13370 }, { "epoch": 28.050274941084055, "learning_rate": 0.00031195455637264574, "loss": 2.8204929351806642, "step": 13380 }, { "epoch": 28.07122283320241, "learning_rate": 0.00031183804673034756, "loss": 2.8356761932373047, "step": 13390 }, { "epoch": 28.092170725320763, "learning_rate": 0.0003117216675336353, "loss": 2.800448989868164, "step": 13400 }, { "epoch": 28.11311861743912, "learning_rate": 0.00031160541853927627, "loss": 2.8977182388305662, "step": 13410 }, { "epoch": 28.134066509557474, "learning_rate": 0.0003114892995046725, "loss": 2.8017560958862306, "step": 13420 }, { "epoch": 28.15501440167583, "learning_rate": 0.00031137331018785835, "loss": 2.7457189559936523, "step": 13430 }, { "epoch": 28.175962293794186, "learning_rate": 0.00031125745034749834, "loss": 2.8290485382080077, "step": 13440 }, { "epoch": 28.196910185912543, "learning_rate": 0.00031114171974288516, "loss": 2.8317813873291016, "step": 13450 }, { "epoch": 28.217858078030897, "learning_rate": 0.00031102611813393753, "loss": 2.7843399047851562, "step": 13460 }, { "epoch": 28.238805970149254, "learning_rate": 0.0003109106452811981, "loss": 2.8257192611694335, "step": 13470 }, { "epoch": 28.25975386226761, "learning_rate": 0.00031079530094583135, "loss": 2.8432809829711916, "step": 13480 }, { "epoch": 28.280701754385966, "learning_rate": 0.0003106800848896216, "loss": 2.882096862792969, "step": 13490 }, { "epoch": 28.30164964650432, "learning_rate": 0.0003105649968749708, "loss": 2.87137508392334, "step": 13500 }, { "epoch": 28.322597538622677, "learning_rate": 0.0003104500366648965, "loss": 2.8303447723388673, "step": 13510 }, { "epoch": 28.34354543074103, "learning_rate": 0.0003103352040230302, "loss": 2.955478477478027, "step": 13520 }, { "epoch": 28.36449332285939, "learning_rate": 0.00031022049871361445, "loss": 2.7974782943725587, "step": 13530 }, { "epoch": 28.385441214977742, "learning_rate": 0.0003101059205015017, "loss": 2.882868766784668, "step": 13540 }, { "epoch": 28.4063891070961, "learning_rate": 0.0003099914691521518, "loss": 2.9435708999633787, "step": 13550 }, { "epoch": 28.427336999214454, "learning_rate": 0.00030987714443163, "loss": 2.8506664276123046, "step": 13560 }, { "epoch": 28.44828489133281, "learning_rate": 0.00030976294610660516, "loss": 2.8492944717407225, "step": 13570 }, { "epoch": 28.469232783451165, "learning_rate": 0.00030964887394434754, "loss": 2.8658618927001953, "step": 13580 }, { "epoch": 28.49018067556952, "learning_rate": 0.000309534927712727, "loss": 2.8701282501220704, "step": 13590 }, { "epoch": 28.511128567687877, "learning_rate": 0.0003094211071802107, "loss": 2.8161798477172852, "step": 13600 }, { "epoch": 28.53207645980623, "learning_rate": 0.00030930741211586155, "loss": 2.768409538269043, "step": 13610 }, { "epoch": 28.553024351924588, "learning_rate": 0.0003091938422893358, "loss": 2.84487361907959, "step": 13620 }, { "epoch": 28.573972244042942, "learning_rate": 0.00030908039747088155, "loss": 2.8081539154052733, "step": 13630 }, { "epoch": 28.5949201361613, "learning_rate": 0.00030896707743133635, "loss": 2.8049062728881835, "step": 13640 }, { "epoch": 28.615868028279653, "learning_rate": 0.0003088538819421255, "loss": 2.8450254440307616, "step": 13650 }, { "epoch": 28.63681592039801, "learning_rate": 0.00030874081077526003, "loss": 2.8079158782958986, "step": 13660 }, { "epoch": 28.657763812516365, "learning_rate": 0.00030862786370333505, "loss": 2.8801244735717773, "step": 13670 }, { "epoch": 28.678711704634722, "learning_rate": 0.00030851504049952727, "loss": 2.8432153701782226, "step": 13680 }, { "epoch": 28.699659596753076, "learning_rate": 0.00030840234093759347, "loss": 2.913180923461914, "step": 13690 }, { "epoch": 28.720607488871433, "learning_rate": 0.0003082897647918688, "loss": 2.857924461364746, "step": 13700 }, { "epoch": 28.741555380989787, "learning_rate": 0.0003081773118372642, "loss": 2.7912296295166015, "step": 13710 }, { "epoch": 28.762503273108145, "learning_rate": 0.00030806498184926523, "loss": 2.8504261016845702, "step": 13720 }, { "epoch": 28.7834511652265, "learning_rate": 0.0003079527746039298, "loss": 2.8378028869628906, "step": 13730 }, { "epoch": 28.804399057344856, "learning_rate": 0.00030784068987788624, "loss": 2.803904914855957, "step": 13740 }, { "epoch": 28.82534694946321, "learning_rate": 0.00030772872744833183, "loss": 2.839299774169922, "step": 13750 }, { "epoch": 28.846294841581567, "learning_rate": 0.00030761688709303036, "loss": 2.7884681701660154, "step": 13760 }, { "epoch": 28.86724273369992, "learning_rate": 0.0003075051685903109, "loss": 2.88138427734375, "step": 13770 }, { "epoch": 28.888190625818275, "learning_rate": 0.00030739357171906536, "loss": 2.81328125, "step": 13780 }, { "epoch": 28.909138517936633, "learning_rate": 0.0003072820962587471, "loss": 2.8141046524047852, "step": 13790 }, { "epoch": 28.930086410054987, "learning_rate": 0.00030717074198936904, "loss": 2.7672204971313477, "step": 13800 }, { "epoch": 28.951034302173344, "learning_rate": 0.0003070595086915015, "loss": 2.8781991958618165, "step": 13810 }, { "epoch": 28.971982194291698, "learning_rate": 0.00030694839614627076, "loss": 2.7781099319458007, "step": 13820 }, { "epoch": 28.992930086410055, "learning_rate": 0.0003068374041353571, "loss": 2.879766082763672, "step": 13830 }, { "epoch": 29.01466352448285, "learning_rate": 0.000306726532440993, "loss": 2.8991397857666015, "step": 13840 }, { "epoch": 29.035611416601206, "learning_rate": 0.0003066157808459613, "loss": 2.8512521743774415, "step": 13850 }, { "epoch": 29.05655930871956, "learning_rate": 0.0003065051491335936, "loss": 2.833390235900879, "step": 13860 }, { "epoch": 29.077507200837914, "learning_rate": 0.0003063946370877681, "loss": 2.8554765701293947, "step": 13870 }, { "epoch": 29.09845509295627, "learning_rate": 0.0003062842444929085, "loss": 2.7805418014526366, "step": 13880 }, { "epoch": 29.119402985074625, "learning_rate": 0.00030617397113398125, "loss": 3.09820671081543, "step": 13890 }, { "epoch": 29.140350877192983, "learning_rate": 0.00030606381679649483, "loss": 2.900446128845215, "step": 13900 }, { "epoch": 29.161298769311337, "learning_rate": 0.00030595378126649727, "loss": 2.852696418762207, "step": 13910 }, { "epoch": 29.182246661429694, "learning_rate": 0.0003058438643305747, "loss": 3.0798343658447265, "step": 13920 }, { "epoch": 29.203194553548048, "learning_rate": 0.00030573406577584955, "loss": 2.8329389572143553, "step": 13930 }, { "epoch": 29.224142445666406, "learning_rate": 0.000305624385389979, "loss": 2.7638198852539064, "step": 13940 }, { "epoch": 29.24509033778476, "learning_rate": 0.0003055148229611527, "loss": 2.7774702072143556, "step": 13950 }, { "epoch": 29.266038229903117, "learning_rate": 0.00030540537827809176, "loss": 2.884586524963379, "step": 13960 }, { "epoch": 29.28698612202147, "learning_rate": 0.0003052960511300467, "loss": 2.858045196533203, "step": 13970 }, { "epoch": 29.30793401413983, "learning_rate": 0.0003051868413067956, "loss": 2.8505125045776367, "step": 13980 }, { "epoch": 29.328881906258182, "learning_rate": 0.00030507774859864277, "loss": 2.840318298339844, "step": 13990 }, { "epoch": 29.34982979837654, "learning_rate": 0.0003049687727964166, "loss": 2.871793746948242, "step": 14000 }, { "epoch": 29.370777690494894, "learning_rate": 0.00030485991369146834, "loss": 2.814739990234375, "step": 14010 }, { "epoch": 29.39172558261325, "learning_rate": 0.00030475117107567015, "loss": 2.8241125106811524, "step": 14020 }, { "epoch": 29.412673474731605, "learning_rate": 0.0003046425447414135, "loss": 2.802973747253418, "step": 14030 }, { "epoch": 29.433621366849962, "learning_rate": 0.0003045340344816073, "loss": 2.829861068725586, "step": 14040 }, { "epoch": 29.454569258968316, "learning_rate": 0.0003044256400896769, "loss": 2.823344612121582, "step": 14050 }, { "epoch": 29.47551715108667, "learning_rate": 0.0003043173613595614, "loss": 2.811284065246582, "step": 14060 }, { "epoch": 29.496465043205028, "learning_rate": 0.0003042091980857131, "loss": 2.8590465545654298, "step": 14070 }, { "epoch": 29.51741293532338, "learning_rate": 0.0003041011500630949, "loss": 2.8229595184326173, "step": 14080 }, { "epoch": 29.53836082744174, "learning_rate": 0.00030399321708717947, "loss": 2.8343103408813475, "step": 14090 }, { "epoch": 29.559308719560093, "learning_rate": 0.00030388539895394697, "loss": 2.804738235473633, "step": 14100 }, { "epoch": 29.58025661167845, "learning_rate": 0.00030377769545988394, "loss": 2.8719600677490233, "step": 14110 }, { "epoch": 29.601204503796804, "learning_rate": 0.00030367010640198143, "loss": 2.777914810180664, "step": 14120 }, { "epoch": 29.622152395915162, "learning_rate": 0.0003035626315777333, "loss": 2.837109375, "step": 14130 }, { "epoch": 29.643100288033516, "learning_rate": 0.00030345527078513493, "loss": 2.8141595840454103, "step": 14140 }, { "epoch": 29.664048180151873, "learning_rate": 0.0003033480238226813, "loss": 2.8648092269897463, "step": 14150 }, { "epoch": 29.684996072270227, "learning_rate": 0.0003032408904893656, "loss": 2.7934087753295898, "step": 14160 }, { "epoch": 29.705943964388585, "learning_rate": 0.00030313387058467756, "loss": 2.834004783630371, "step": 14170 }, { "epoch": 29.72689185650694, "learning_rate": 0.0003030269639086021, "loss": 2.8099668502807615, "step": 14180 }, { "epoch": 29.747839748625296, "learning_rate": 0.0003029201702616173, "loss": 2.830114555358887, "step": 14190 }, { "epoch": 29.76878764074365, "learning_rate": 0.0003028134894446933, "loss": 2.780957794189453, "step": 14200 }, { "epoch": 29.789735532862007, "learning_rate": 0.00030270692125929034, "loss": 2.829334831237793, "step": 14210 }, { "epoch": 29.81068342498036, "learning_rate": 0.00030260046550735763, "loss": 2.840847969055176, "step": 14220 }, { "epoch": 29.83163131709872, "learning_rate": 0.0003024941219913316, "loss": 2.855925369262695, "step": 14230 }, { "epoch": 29.852579209217073, "learning_rate": 0.00030238789051413416, "loss": 2.8478092193603515, "step": 14240 }, { "epoch": 29.873527101335426, "learning_rate": 0.00030228177087917153, "loss": 2.8140996932983398, "step": 14250 }, { "epoch": 29.894474993453784, "learning_rate": 0.00030217576289033235, "loss": 2.803069496154785, "step": 14260 }, { "epoch": 29.915422885572138, "learning_rate": 0.00030206986635198654, "loss": 2.7434965133666993, "step": 14270 }, { "epoch": 29.936370777690495, "learning_rate": 0.00030196408106898356, "loss": 2.859099006652832, "step": 14280 }, { "epoch": 29.95731866980885, "learning_rate": 0.0003018584068466507, "loss": 2.9088722229003907, "step": 14290 }, { "epoch": 29.978266561927207, "learning_rate": 0.0003017528434907922, "loss": 2.7880224227905273, "step": 14300 }, { "epoch": 29.99921445404556, "learning_rate": 0.00030164739080768704, "loss": 2.8113405227661135, "step": 14310 }, { "epoch": 30.020947892118354, "learning_rate": 0.0003015420486040879, "loss": 2.893621826171875, "step": 14320 }, { "epoch": 30.04189578423671, "learning_rate": 0.00030143681668721935, "loss": 2.8216567993164063, "step": 14330 }, { "epoch": 30.062843676355065, "learning_rate": 0.00030133169486477694, "loss": 2.8155281066894533, "step": 14340 }, { "epoch": 30.083791568473423, "learning_rate": 0.0003012266829449249, "loss": 2.8872468948364256, "step": 14350 }, { "epoch": 30.104739460591777, "learning_rate": 0.00030112178073629544, "loss": 2.820456886291504, "step": 14360 }, { "epoch": 30.125687352710134, "learning_rate": 0.0003010169880479867, "loss": 2.869482231140137, "step": 14370 }, { "epoch": 30.146635244828488, "learning_rate": 0.0003009123046895618, "loss": 2.8011972427368166, "step": 14380 }, { "epoch": 30.167583136946845, "learning_rate": 0.00030080773047104687, "loss": 2.8537342071533205, "step": 14390 }, { "epoch": 30.1885310290652, "learning_rate": 0.0003007032652029301, "loss": 2.808944892883301, "step": 14400 }, { "epoch": 30.209478921183557, "learning_rate": 0.00030059890869615983, "loss": 2.833651542663574, "step": 14410 }, { "epoch": 30.23042681330191, "learning_rate": 0.0003004946607621435, "loss": 2.860894203186035, "step": 14420 }, { "epoch": 30.251374705420268, "learning_rate": 0.0003003905212127461, "loss": 2.835972213745117, "step": 14430 }, { "epoch": 30.272322597538622, "learning_rate": 0.00030028648986028843, "loss": 2.857589912414551, "step": 14440 }, { "epoch": 30.29327048965698, "learning_rate": 0.00030018256651754633, "loss": 2.845281219482422, "step": 14450 }, { "epoch": 30.314218381775333, "learning_rate": 0.00030007875099774864, "loss": 2.7922155380249025, "step": 14460 }, { "epoch": 30.33516627389369, "learning_rate": 0.0002999750431145761, "loss": 2.846644973754883, "step": 14470 }, { "epoch": 30.356114166012045, "learning_rate": 0.0002998714426821599, "loss": 2.83693904876709, "step": 14480 }, { "epoch": 30.377062058130402, "learning_rate": 0.00029976794951508027, "loss": 2.8328250885009765, "step": 14490 }, { "epoch": 30.398009950248756, "learning_rate": 0.00029966456342836505, "loss": 2.8287097930908205, "step": 14500 }, { "epoch": 30.41895784236711, "learning_rate": 0.0002995612842374884, "loss": 2.818513298034668, "step": 14510 }, { "epoch": 30.439905734485468, "learning_rate": 0.0002994581117583693, "loss": 2.804762077331543, "step": 14520 }, { "epoch": 30.46085362660382, "learning_rate": 0.00029935504580737006, "loss": 2.8560808181762694, "step": 14530 }, { "epoch": 30.48180151872218, "learning_rate": 0.00029925208620129546, "loss": 2.7961631774902345, "step": 14540 }, { "epoch": 30.502749410840533, "learning_rate": 0.0002991492327573909, "loss": 2.8281347274780275, "step": 14550 }, { "epoch": 30.52369730295889, "learning_rate": 0.0002990464852933409, "loss": 2.813071060180664, "step": 14560 }, { "epoch": 30.544645195077244, "learning_rate": 0.0002989438436272684, "loss": 2.765872001647949, "step": 14570 }, { "epoch": 30.5655930871956, "learning_rate": 0.00029884130757773275, "loss": 2.7835336685180665, "step": 14580 }, { "epoch": 30.586540979313956, "learning_rate": 0.0002987388769637288, "loss": 2.855548286437988, "step": 14590 }, { "epoch": 30.607488871432313, "learning_rate": 0.00029863655160468534, "loss": 2.804723358154297, "step": 14600 }, { "epoch": 30.628436763550667, "learning_rate": 0.0002985343313204637, "loss": 2.8737287521362305, "step": 14610 }, { "epoch": 30.649384655669024, "learning_rate": 0.0002984322159313568, "loss": 2.871350860595703, "step": 14620 }, { "epoch": 30.67033254778738, "learning_rate": 0.00029833020525808714, "loss": 2.780613327026367, "step": 14630 }, { "epoch": 30.691280439905736, "learning_rate": 0.00029822829912180636, "loss": 2.8216100692749024, "step": 14640 }, { "epoch": 30.71222833202409, "learning_rate": 0.0002981264973440931, "loss": 2.789328956604004, "step": 14650 }, { "epoch": 30.733176224142447, "learning_rate": 0.00029802479974695223, "loss": 2.7879051208496093, "step": 14660 }, { "epoch": 30.7541241162608, "learning_rate": 0.00029792320615281337, "loss": 2.762567710876465, "step": 14670 }, { "epoch": 30.77507200837916, "learning_rate": 0.00029782171638452937, "loss": 2.8410247802734374, "step": 14680 }, { "epoch": 30.796019900497512, "learning_rate": 0.0002977203302653755, "loss": 2.7910200119018556, "step": 14690 }, { "epoch": 30.81696779261587, "learning_rate": 0.0002976190476190476, "loss": 2.8463191986083984, "step": 14700 }, { "epoch": 30.837915684734224, "learning_rate": 0.0002975178682696613, "loss": 2.816401481628418, "step": 14710 }, { "epoch": 30.858863576852578, "learning_rate": 0.0002974167920417504, "loss": 2.850655746459961, "step": 14720 }, { "epoch": 30.879811468970935, "learning_rate": 0.00029731581876026557, "loss": 2.844277191162109, "step": 14730 }, { "epoch": 30.90075936108929, "learning_rate": 0.00029721494825057357, "loss": 2.8203685760498045, "step": 14740 }, { "epoch": 30.921707253207646, "learning_rate": 0.00029711418033845523, "loss": 2.848883628845215, "step": 14750 }, { "epoch": 30.942655145326, "learning_rate": 0.0002970135148501047, "loss": 2.7703632354736327, "step": 14760 }, { "epoch": 30.963603037444358, "learning_rate": 0.00029691295161212816, "loss": 2.8733938217163084, "step": 14770 }, { "epoch": 30.98455092956271, "learning_rate": 0.0002968124904515423, "loss": 2.8676376342773438, "step": 14780 }, { "epoch": 31.006284367635505, "learning_rate": 0.00029671213119577346, "loss": 2.8960426330566404, "step": 14790 }, { "epoch": 31.027232259753863, "learning_rate": 0.00029661187367265593, "loss": 2.8205034255981447, "step": 14800 }, { "epoch": 31.048180151872216, "learning_rate": 0.0002965117177104311, "loss": 2.8493398666381835, "step": 14810 }, { "epoch": 31.069128043990574, "learning_rate": 0.0002964116631377459, "loss": 2.808573913574219, "step": 14820 }, { "epoch": 31.090075936108928, "learning_rate": 0.000296311709783652, "loss": 2.770844078063965, "step": 14830 }, { "epoch": 31.111023828227285, "learning_rate": 0.00029621185747760406, "loss": 2.7819324493408204, "step": 14840 }, { "epoch": 31.13197172034564, "learning_rate": 0.0002961121060494589, "loss": 2.7976245880126953, "step": 14850 }, { "epoch": 31.152919612463997, "learning_rate": 0.00029601245532947417, "loss": 2.8540115356445312, "step": 14860 }, { "epoch": 31.17386750458235, "learning_rate": 0.0002959129051483069, "loss": 2.7655929565429687, "step": 14870 }, { "epoch": 31.194815396700708, "learning_rate": 0.00029581345533701285, "loss": 2.847081184387207, "step": 14880 }, { "epoch": 31.215763288819062, "learning_rate": 0.0002957141057270448, "loss": 2.82701416015625, "step": 14890 }, { "epoch": 31.23671118093742, "learning_rate": 0.0002956148561502513, "loss": 2.8076833724975585, "step": 14900 }, { "epoch": 31.257659073055773, "learning_rate": 0.00029551570643887603, "loss": 2.7729957580566404, "step": 14910 }, { "epoch": 31.27860696517413, "learning_rate": 0.00029541665642555606, "loss": 2.8175632476806642, "step": 14920 }, { "epoch": 31.299554857292485, "learning_rate": 0.00029531770594332096, "loss": 2.781933403015137, "step": 14930 }, { "epoch": 31.320502749410842, "learning_rate": 0.0002952188548255915, "loss": 2.826693534851074, "step": 14940 }, { "epoch": 31.341450641529196, "learning_rate": 0.00029512010290617854, "loss": 2.7952367782592775, "step": 14950 }, { "epoch": 31.36239853364755, "learning_rate": 0.0002950214500192816, "loss": 2.7863574981689454, "step": 14960 }, { "epoch": 31.383346425765907, "learning_rate": 0.00029492289599948834, "loss": 2.8061588287353514, "step": 14970 }, { "epoch": 31.40429431788426, "learning_rate": 0.0002948244406817725, "loss": 2.8176244735717773, "step": 14980 }, { "epoch": 31.42524221000262, "learning_rate": 0.00029472608390149343, "loss": 2.8314136505126952, "step": 14990 }, { "epoch": 31.446190102120973, "learning_rate": 0.00029462782549439473, "loss": 2.802597999572754, "step": 15000 } ], "max_steps": 15000, "num_train_epochs": 32, "total_flos": 4124974225514526720, "trial_name": null, "trial_params": null }