{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.942528735632184, "global_step": 46800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.031928480204342274, "learning_rate": 1.995742869306088e-05, "loss": 1.0424302673339845, "step": 100 }, { "epoch": 0.06385696040868455, "learning_rate": 1.9914857386121755e-05, "loss": 0.5240679550170898, "step": 200 }, { "epoch": 0.09578544061302682, "learning_rate": 1.9872286079182633e-05, "loss": 0.5014432144165039, "step": 300 }, { "epoch": 0.1277139208173691, "learning_rate": 1.9829714772243508e-05, "loss": 0.48912925720214845, "step": 400 }, { "epoch": 0.15964240102171137, "learning_rate": 1.9787143465304387e-05, "loss": 0.48597183227539065, "step": 500 }, { "epoch": 0.19157088122605365, "learning_rate": 1.974457215836526e-05, "loss": 0.4988706970214844, "step": 600 }, { "epoch": 0.22349936143039592, "learning_rate": 1.970200085142614e-05, "loss": 0.48521129608154295, "step": 700 }, { "epoch": 0.2554278416347382, "learning_rate": 1.9659429544487015e-05, "loss": 0.4737409210205078, "step": 800 }, { "epoch": 0.28735632183908044, "learning_rate": 1.9616858237547893e-05, "loss": 0.478267822265625, "step": 900 }, { "epoch": 0.31928480204342274, "learning_rate": 1.957428693060877e-05, "loss": 0.46842235565185547, "step": 1000 }, { "epoch": 0.351213282247765, "learning_rate": 1.953171562366965e-05, "loss": 0.47340740203857423, "step": 1100 }, { "epoch": 0.3831417624521073, "learning_rate": 1.9489144316730525e-05, "loss": 0.48022716522216796, "step": 1200 }, { "epoch": 0.41507024265644954, "learning_rate": 1.9446573009791403e-05, "loss": 0.4702701950073242, "step": 1300 }, { "epoch": 0.44699872286079184, "learning_rate": 1.9404001702852278e-05, "loss": 0.47483158111572266, "step": 1400 }, { "epoch": 0.4789272030651341, "learning_rate": 1.9361430395913157e-05, "loss": 0.45973930358886717, "step": 1500 }, { "epoch": 0.5108556832694764, "learning_rate": 1.9318859088974035e-05, "loss": 0.43192115783691404, "step": 1600 }, { "epoch": 0.5427841634738186, "learning_rate": 1.927628778203491e-05, "loss": 0.44062965393066406, "step": 1700 }, { "epoch": 0.5747126436781609, "learning_rate": 1.923371647509579e-05, "loss": 0.460767822265625, "step": 1800 }, { "epoch": 0.6066411238825032, "learning_rate": 1.9191145168156663e-05, "loss": 0.4533885192871094, "step": 1900 }, { "epoch": 0.6385696040868455, "learning_rate": 1.914857386121754e-05, "loss": 0.4435346221923828, "step": 2000 }, { "epoch": 0.6704980842911877, "learning_rate": 1.9106002554278417e-05, "loss": 0.4584750747680664, "step": 2100 }, { "epoch": 0.70242656449553, "learning_rate": 1.9063431247339295e-05, "loss": 0.43961280822753906, "step": 2200 }, { "epoch": 0.7343550446998723, "learning_rate": 1.902085994040017e-05, "loss": 0.43362281799316404, "step": 2300 }, { "epoch": 0.7662835249042146, "learning_rate": 1.8978288633461048e-05, "loss": 0.4443118286132812, "step": 2400 }, { "epoch": 0.7982120051085568, "learning_rate": 1.8935717326521927e-05, "loss": 0.4438581848144531, "step": 2500 }, { "epoch": 0.8301404853128991, "learning_rate": 1.88931460195828e-05, "loss": 0.4310215759277344, "step": 2600 }, { "epoch": 0.8620689655172413, "learning_rate": 1.885057471264368e-05, "loss": 0.4399332809448242, "step": 2700 }, { "epoch": 0.8939974457215837, "learning_rate": 1.8808003405704555e-05, "loss": 0.4511014175415039, "step": 2800 }, { "epoch": 0.9259259259259259, "learning_rate": 1.8765432098765433e-05, "loss": 0.44327774047851565, "step": 2900 }, { "epoch": 0.9578544061302682, "learning_rate": 1.872286079182631e-05, "loss": 0.43171138763427735, "step": 3000 }, { "epoch": 0.9897828863346104, "learning_rate": 1.868028948488719e-05, "loss": 0.4460626983642578, "step": 3100 }, { "epoch": 1.0, "eval_loss": 0.3910863995552063, "step": 3132 }, { "epoch": 1.0217113665389528, "learning_rate": 1.8637718177948065e-05, "loss": 0.43170944213867185, "step": 3200 }, { "epoch": 1.053639846743295, "learning_rate": 1.8595146871008943e-05, "loss": 0.42756095886230466, "step": 3300 }, { "epoch": 1.0855683269476373, "learning_rate": 1.8552575564069818e-05, "loss": 0.423099479675293, "step": 3400 }, { "epoch": 1.1174968071519795, "learning_rate": 1.8510004257130697e-05, "loss": 0.42962390899658204, "step": 3500 }, { "epoch": 1.1494252873563218, "learning_rate": 1.846743295019157e-05, "loss": 0.4373976898193359, "step": 3600 }, { "epoch": 1.181353767560664, "learning_rate": 1.842486164325245e-05, "loss": 0.4381977462768555, "step": 3700 }, { "epoch": 1.2132822477650063, "learning_rate": 1.8382290336313325e-05, "loss": 0.4158343887329102, "step": 3800 }, { "epoch": 1.2452107279693487, "learning_rate": 1.8339719029374203e-05, "loss": 0.4281564712524414, "step": 3900 }, { "epoch": 1.277139208173691, "learning_rate": 1.829714772243508e-05, "loss": 0.41806602478027344, "step": 4000 }, { "epoch": 1.3090676883780332, "learning_rate": 1.8254576415495957e-05, "loss": 0.4173674011230469, "step": 4100 }, { "epoch": 1.3409961685823755, "learning_rate": 1.8212005108556835e-05, "loss": 0.43225109100341796, "step": 4200 }, { "epoch": 1.3729246487867177, "learning_rate": 1.816943380161771e-05, "loss": 0.421945686340332, "step": 4300 }, { "epoch": 1.40485312899106, "learning_rate": 1.812686249467859e-05, "loss": 0.42005214691162107, "step": 4400 }, { "epoch": 1.4367816091954024, "learning_rate": 1.8084291187739463e-05, "loss": 0.40552581787109376, "step": 4500 }, { "epoch": 1.4687100893997447, "learning_rate": 1.804171988080034e-05, "loss": 0.41495433807373044, "step": 4600 }, { "epoch": 1.500638569604087, "learning_rate": 1.7999148573861217e-05, "loss": 0.4268427658081055, "step": 4700 }, { "epoch": 1.5325670498084292, "learning_rate": 1.7956577266922095e-05, "loss": 0.4178670120239258, "step": 4800 }, { "epoch": 1.5644955300127714, "learning_rate": 1.7914005959982973e-05, "loss": 0.4088083267211914, "step": 4900 }, { "epoch": 1.5964240102171137, "learning_rate": 1.787143465304385e-05, "loss": 0.4090264129638672, "step": 5000 }, { "epoch": 1.628352490421456, "learning_rate": 1.7828863346104727e-05, "loss": 0.39727077484130857, "step": 5100 }, { "epoch": 1.6602809706257982, "learning_rate": 1.7786292039165605e-05, "loss": 0.40877700805664063, "step": 5200 }, { "epoch": 1.6922094508301404, "learning_rate": 1.774372073222648e-05, "loss": 0.4055898666381836, "step": 5300 }, { "epoch": 1.7241379310344827, "learning_rate": 1.770114942528736e-05, "loss": 0.40825370788574217, "step": 5400 }, { "epoch": 1.756066411238825, "learning_rate": 1.7658578118348237e-05, "loss": 0.39790542602539064, "step": 5500 }, { "epoch": 1.7879948914431671, "learning_rate": 1.761600681140911e-05, "loss": 0.4129365158081055, "step": 5600 }, { "epoch": 1.8199233716475096, "learning_rate": 1.757343550446999e-05, "loss": 0.4137036895751953, "step": 5700 }, { "epoch": 1.8518518518518519, "learning_rate": 1.7530864197530865e-05, "loss": 0.41169502258300783, "step": 5800 }, { "epoch": 1.883780332056194, "learning_rate": 1.7488292890591743e-05, "loss": 0.3932318115234375, "step": 5900 }, { "epoch": 1.9157088122605364, "learning_rate": 1.7445721583652618e-05, "loss": 0.40273929595947267, "step": 6000 }, { "epoch": 1.9476372924648788, "learning_rate": 1.7403150276713497e-05, "loss": 0.4069852066040039, "step": 6100 }, { "epoch": 1.979565772669221, "learning_rate": 1.736057896977437e-05, "loss": 0.40934764862060546, "step": 6200 }, { "epoch": 2.0, "eval_loss": 0.3667986989021301, "step": 6264 }, { "epoch": 2.0114942528735633, "learning_rate": 1.731800766283525e-05, "loss": 0.40322193145751956, "step": 6300 }, { "epoch": 2.0434227330779056, "learning_rate": 1.727543635589613e-05, "loss": 0.3913343048095703, "step": 6400 }, { "epoch": 2.075351213282248, "learning_rate": 1.7232865048957003e-05, "loss": 0.4131240081787109, "step": 6500 }, { "epoch": 2.10727969348659, "learning_rate": 1.719029374201788e-05, "loss": 0.394369010925293, "step": 6600 }, { "epoch": 2.1392081736909323, "learning_rate": 1.7147722435078757e-05, "loss": 0.3991780471801758, "step": 6700 }, { "epoch": 2.1711366538952745, "learning_rate": 1.7105151128139635e-05, "loss": 0.3874116134643555, "step": 6800 }, { "epoch": 2.203065134099617, "learning_rate": 1.706257982120051e-05, "loss": 0.387044792175293, "step": 6900 }, { "epoch": 2.234993614303959, "learning_rate": 1.702000851426139e-05, "loss": 0.39865818023681643, "step": 7000 }, { "epoch": 2.2669220945083013, "learning_rate": 1.6977437207322267e-05, "loss": 0.3999287414550781, "step": 7100 }, { "epoch": 2.2988505747126435, "learning_rate": 1.6934865900383145e-05, "loss": 0.4086351013183594, "step": 7200 }, { "epoch": 2.330779054916986, "learning_rate": 1.689229459344402e-05, "loss": 0.3991414642333984, "step": 7300 }, { "epoch": 2.362707535121328, "learning_rate": 1.68497232865049e-05, "loss": 0.3910430145263672, "step": 7400 }, { "epoch": 2.3946360153256707, "learning_rate": 1.6807151979565773e-05, "loss": 0.39477340698242186, "step": 7500 }, { "epoch": 2.4265644955300125, "learning_rate": 1.676458067262665e-05, "loss": 0.38976318359375, "step": 7600 }, { "epoch": 2.458492975734355, "learning_rate": 1.6722009365687527e-05, "loss": 0.38246253967285154, "step": 7700 }, { "epoch": 2.4904214559386975, "learning_rate": 1.6679438058748405e-05, "loss": 0.38488063812255857, "step": 7800 }, { "epoch": 2.5223499361430397, "learning_rate": 1.6636866751809283e-05, "loss": 0.3952900695800781, "step": 7900 }, { "epoch": 2.554278416347382, "learning_rate": 1.6594295444870158e-05, "loss": 0.3940334701538086, "step": 8000 }, { "epoch": 2.586206896551724, "learning_rate": 1.6551724137931037e-05, "loss": 0.3789644622802734, "step": 8100 }, { "epoch": 2.6181353767560664, "learning_rate": 1.650915283099191e-05, "loss": 0.38175716400146487, "step": 8200 }, { "epoch": 2.6500638569604087, "learning_rate": 1.646658152405279e-05, "loss": 0.3957417678833008, "step": 8300 }, { "epoch": 2.681992337164751, "learning_rate": 1.6424010217113665e-05, "loss": 0.3931695556640625, "step": 8400 }, { "epoch": 2.713920817369093, "learning_rate": 1.6381438910174543e-05, "loss": 0.3700098419189453, "step": 8500 }, { "epoch": 2.7458492975734354, "learning_rate": 1.6338867603235418e-05, "loss": 0.3986296463012695, "step": 8600 }, { "epoch": 2.7777777777777777, "learning_rate": 1.6296296296296297e-05, "loss": 0.377045783996582, "step": 8700 }, { "epoch": 2.80970625798212, "learning_rate": 1.6253724989357175e-05, "loss": 0.38455604553222655, "step": 8800 }, { "epoch": 2.841634738186462, "learning_rate": 1.6211153682418053e-05, "loss": 0.39248775482177733, "step": 8900 }, { "epoch": 2.873563218390805, "learning_rate": 1.6168582375478928e-05, "loss": 0.3952408599853516, "step": 9000 }, { "epoch": 2.9054916985951467, "learning_rate": 1.6126011068539807e-05, "loss": 0.38378463745117186, "step": 9100 }, { "epoch": 2.9374201787994894, "learning_rate": 1.608343976160068e-05, "loss": 0.3779494857788086, "step": 9200 }, { "epoch": 2.969348659003831, "learning_rate": 1.604086845466156e-05, "loss": 0.39154972076416017, "step": 9300 }, { "epoch": 3.0, "eval_loss": 0.3532629609107971, "step": 9396 }, { "epoch": 3.001277139208174, "learning_rate": 1.599829714772244e-05, "loss": 0.38505748748779295, "step": 9400 }, { "epoch": 3.033205619412516, "learning_rate": 1.5955725840783313e-05, "loss": 0.37334869384765623, "step": 9500 }, { "epoch": 3.0651340996168583, "learning_rate": 1.591315453384419e-05, "loss": 0.36475982666015627, "step": 9600 }, { "epoch": 3.0970625798212006, "learning_rate": 1.5870583226905067e-05, "loss": 0.3732810592651367, "step": 9700 }, { "epoch": 3.128991060025543, "learning_rate": 1.5828011919965945e-05, "loss": 0.36443687438964845, "step": 9800 }, { "epoch": 3.160919540229885, "learning_rate": 1.578544061302682e-05, "loss": 0.38134773254394533, "step": 9900 }, { "epoch": 3.1928480204342273, "learning_rate": 1.57428693060877e-05, "loss": 0.3660939407348633, "step": 10000 }, { "epoch": 3.2247765006385696, "learning_rate": 1.5700297999148573e-05, "loss": 0.38626991271972655, "step": 10100 }, { "epoch": 3.256704980842912, "learning_rate": 1.565772669220945e-05, "loss": 0.36326351165771487, "step": 10200 }, { "epoch": 3.288633461047254, "learning_rate": 1.561515538527033e-05, "loss": 0.3856014633178711, "step": 10300 }, { "epoch": 3.3205619412515963, "learning_rate": 1.5572584078331205e-05, "loss": 0.38314430236816405, "step": 10400 }, { "epoch": 3.3524904214559386, "learning_rate": 1.5530012771392083e-05, "loss": 0.3787594223022461, "step": 10500 }, { "epoch": 3.384418901660281, "learning_rate": 1.5487441464452958e-05, "loss": 0.37935165405273436, "step": 10600 }, { "epoch": 3.416347381864623, "learning_rate": 1.5444870157513837e-05, "loss": 0.3672695541381836, "step": 10700 }, { "epoch": 3.4482758620689653, "learning_rate": 1.540229885057471e-05, "loss": 0.3799928283691406, "step": 10800 }, { "epoch": 3.480204342273308, "learning_rate": 1.5359727543635593e-05, "loss": 0.381710090637207, "step": 10900 }, { "epoch": 3.51213282247765, "learning_rate": 1.531715623669647e-05, "loss": 0.35085960388183596, "step": 11000 }, { "epoch": 3.5440613026819925, "learning_rate": 1.5274584929757347e-05, "loss": 0.37504680633544923, "step": 11100 }, { "epoch": 3.5759897828863347, "learning_rate": 1.5232013622818223e-05, "loss": 0.36862171173095704, "step": 11200 }, { "epoch": 3.607918263090677, "learning_rate": 1.51894423158791e-05, "loss": 0.3727375793457031, "step": 11300 }, { "epoch": 3.6398467432950192, "learning_rate": 1.5146871008939977e-05, "loss": 0.37135467529296873, "step": 11400 }, { "epoch": 3.6717752234993615, "learning_rate": 1.5104299702000853e-05, "loss": 0.3693832778930664, "step": 11500 }, { "epoch": 3.7037037037037037, "learning_rate": 1.506172839506173e-05, "loss": 0.36289344787597655, "step": 11600 }, { "epoch": 3.735632183908046, "learning_rate": 1.5019157088122607e-05, "loss": 0.3692543411254883, "step": 11700 }, { "epoch": 3.767560664112388, "learning_rate": 1.4976585781183483e-05, "loss": 0.3623777770996094, "step": 11800 }, { "epoch": 3.7994891443167305, "learning_rate": 1.493401447424436e-05, "loss": 0.3629803848266602, "step": 11900 }, { "epoch": 3.8314176245210727, "learning_rate": 1.4891443167305237e-05, "loss": 0.3619497680664063, "step": 12000 }, { "epoch": 3.863346104725415, "learning_rate": 1.4848871860366115e-05, "loss": 0.35669734954833987, "step": 12100 }, { "epoch": 3.895274584929757, "learning_rate": 1.4806300553426992e-05, "loss": 0.359210205078125, "step": 12200 }, { "epoch": 3.9272030651340994, "learning_rate": 1.4763729246487868e-05, "loss": 0.3655078887939453, "step": 12300 }, { "epoch": 3.959131545338442, "learning_rate": 1.4721157939548745e-05, "loss": 0.3666878890991211, "step": 12400 }, { "epoch": 3.991060025542784, "learning_rate": 1.4678586632609622e-05, "loss": 0.3504248809814453, "step": 12500 }, { "epoch": 4.0, "eval_loss": 0.3464440107345581, "step": 12528 }, { "epoch": 4.022988505747127, "learning_rate": 1.4636015325670498e-05, "loss": 0.35299652099609374, "step": 12600 }, { "epoch": 4.054916985951468, "learning_rate": 1.4593444018731375e-05, "loss": 0.3569066619873047, "step": 12700 }, { "epoch": 4.086845466155811, "learning_rate": 1.4550872711792252e-05, "loss": 0.3493093490600586, "step": 12800 }, { "epoch": 4.118773946360153, "learning_rate": 1.4508301404853132e-05, "loss": 0.3634659957885742, "step": 12900 }, { "epoch": 4.150702426564496, "learning_rate": 1.4465730097914008e-05, "loss": 0.37093032836914064, "step": 13000 }, { "epoch": 4.182630906768837, "learning_rate": 1.4423158790974885e-05, "loss": 0.37300064086914064, "step": 13100 }, { "epoch": 4.21455938697318, "learning_rate": 1.4380587484035762e-05, "loss": 0.358408203125, "step": 13200 }, { "epoch": 4.246487867177523, "learning_rate": 1.4338016177096638e-05, "loss": 0.3569377517700195, "step": 13300 }, { "epoch": 4.278416347381865, "learning_rate": 1.4295444870157515e-05, "loss": 0.3640250015258789, "step": 13400 }, { "epoch": 4.310344827586207, "learning_rate": 1.4252873563218392e-05, "loss": 0.3725627517700195, "step": 13500 }, { "epoch": 4.342273307790549, "learning_rate": 1.421030225627927e-05, "loss": 0.3584425354003906, "step": 13600 }, { "epoch": 4.374201787994892, "learning_rate": 1.4167730949340147e-05, "loss": 0.37500988006591796, "step": 13700 }, { "epoch": 4.406130268199234, "learning_rate": 1.4125159642401023e-05, "loss": 0.35521396636962893, "step": 13800 }, { "epoch": 4.438058748403576, "learning_rate": 1.40825883354619e-05, "loss": 0.35183258056640626, "step": 13900 }, { "epoch": 4.469987228607918, "learning_rate": 1.4040017028522777e-05, "loss": 0.35960380554199217, "step": 14000 }, { "epoch": 4.501915708812261, "learning_rate": 1.3997445721583653e-05, "loss": 0.3571435546875, "step": 14100 }, { "epoch": 4.533844189016603, "learning_rate": 1.395487441464453e-05, "loss": 0.35867115020751955, "step": 14200 }, { "epoch": 4.565772669220945, "learning_rate": 1.3912303107705407e-05, "loss": 0.34017555236816405, "step": 14300 }, { "epoch": 4.597701149425287, "learning_rate": 1.3869731800766283e-05, "loss": 0.3735758209228516, "step": 14400 }, { "epoch": 4.62962962962963, "learning_rate": 1.3827160493827162e-05, "loss": 0.36705623626708983, "step": 14500 }, { "epoch": 4.661558109833972, "learning_rate": 1.3784589186888038e-05, "loss": 0.3525060272216797, "step": 14600 }, { "epoch": 4.693486590038314, "learning_rate": 1.3742017879948915e-05, "loss": 0.3783878326416016, "step": 14700 }, { "epoch": 4.725415070242656, "learning_rate": 1.3699446573009792e-05, "loss": 0.3659477615356445, "step": 14800 }, { "epoch": 4.757343550446999, "learning_rate": 1.365687526607067e-05, "loss": 0.3537181091308594, "step": 14900 }, { "epoch": 4.789272030651341, "learning_rate": 1.3614303959131547e-05, "loss": 0.36351833343505857, "step": 15000 }, { "epoch": 4.821200510855683, "learning_rate": 1.3571732652192425e-05, "loss": 0.3483928298950195, "step": 15100 }, { "epoch": 4.853128991060025, "learning_rate": 1.3529161345253302e-05, "loss": 0.35997581481933594, "step": 15200 }, { "epoch": 4.885057471264368, "learning_rate": 1.3486590038314178e-05, "loss": 0.36364505767822264, "step": 15300 }, { "epoch": 4.91698595146871, "learning_rate": 1.3444018731375055e-05, "loss": 0.3437914276123047, "step": 15400 }, { "epoch": 4.948914431673052, "learning_rate": 1.3401447424435932e-05, "loss": 0.3517444610595703, "step": 15500 }, { "epoch": 4.980842911877395, "learning_rate": 1.3358876117496808e-05, "loss": 0.3623521041870117, "step": 15600 }, { "epoch": 5.0, "eval_loss": 0.33480075001716614, "step": 15660 }, { "epoch": 5.012771392081737, "learning_rate": 1.3316304810557685e-05, "loss": 0.3527105712890625, "step": 15700 }, { "epoch": 5.044699872286079, "learning_rate": 1.3273733503618562e-05, "loss": 0.3470969009399414, "step": 15800 }, { "epoch": 5.076628352490421, "learning_rate": 1.3231162196679438e-05, "loss": 0.34361572265625, "step": 15900 }, { "epoch": 5.108556832694764, "learning_rate": 1.3188590889740317e-05, "loss": 0.35975650787353514, "step": 16000 }, { "epoch": 5.140485312899106, "learning_rate": 1.3146019582801193e-05, "loss": 0.34139663696289063, "step": 16100 }, { "epoch": 5.172413793103448, "learning_rate": 1.310344827586207e-05, "loss": 0.34681896209716795, "step": 16200 }, { "epoch": 5.20434227330779, "learning_rate": 1.3060876968922947e-05, "loss": 0.34409351348876954, "step": 16300 }, { "epoch": 5.236270753512133, "learning_rate": 1.3018305661983823e-05, "loss": 0.3398979949951172, "step": 16400 }, { "epoch": 5.268199233716475, "learning_rate": 1.29757343550447e-05, "loss": 0.3437363815307617, "step": 16500 }, { "epoch": 5.300127713920817, "learning_rate": 1.2933163048105577e-05, "loss": 0.33456321716308596, "step": 16600 }, { "epoch": 5.33205619412516, "learning_rate": 1.2890591741166453e-05, "loss": 0.3437419128417969, "step": 16700 }, { "epoch": 5.363984674329502, "learning_rate": 1.2848020434227333e-05, "loss": 0.3656898880004883, "step": 16800 }, { "epoch": 5.395913154533845, "learning_rate": 1.280544912728821e-05, "loss": 0.33624221801757814, "step": 16900 }, { "epoch": 5.427841634738186, "learning_rate": 1.2762877820349087e-05, "loss": 0.35640827178955076, "step": 17000 }, { "epoch": 5.459770114942529, "learning_rate": 1.2720306513409963e-05, "loss": 0.34689849853515625, "step": 17100 }, { "epoch": 5.491698595146871, "learning_rate": 1.267773520647084e-05, "loss": 0.3492275238037109, "step": 17200 }, { "epoch": 5.5236270753512136, "learning_rate": 1.2635163899531717e-05, "loss": 0.3490084457397461, "step": 17300 }, { "epoch": 5.555555555555555, "learning_rate": 1.2592592592592593e-05, "loss": 0.33231266021728517, "step": 17400 }, { "epoch": 5.587484035759898, "learning_rate": 1.2550021285653472e-05, "loss": 0.3501285552978516, "step": 17500 }, { "epoch": 5.61941251596424, "learning_rate": 1.2507449978714348e-05, "loss": 0.3469379425048828, "step": 17600 }, { "epoch": 5.6513409961685825, "learning_rate": 1.2464878671775225e-05, "loss": 0.3600951766967773, "step": 17700 }, { "epoch": 5.683269476372924, "learning_rate": 1.2422307364836102e-05, "loss": 0.3439628982543945, "step": 17800 }, { "epoch": 5.715197956577267, "learning_rate": 1.2379736057896978e-05, "loss": 0.34368560791015623, "step": 17900 }, { "epoch": 5.747126436781609, "learning_rate": 1.2337164750957855e-05, "loss": 0.3624436950683594, "step": 18000 }, { "epoch": 5.7790549169859515, "learning_rate": 1.2294593444018732e-05, "loss": 0.3493986129760742, "step": 18100 }, { "epoch": 5.810983397190293, "learning_rate": 1.2252022137079608e-05, "loss": 0.33745758056640623, "step": 18200 }, { "epoch": 5.842911877394636, "learning_rate": 1.2209450830140485e-05, "loss": 0.3514177703857422, "step": 18300 }, { "epoch": 5.874840357598979, "learning_rate": 1.2166879523201363e-05, "loss": 0.3409608459472656, "step": 18400 }, { "epoch": 5.9067688378033205, "learning_rate": 1.212430821626224e-05, "loss": 0.3416262054443359, "step": 18500 }, { "epoch": 5.938697318007663, "learning_rate": 1.2081736909323117e-05, "loss": 0.33906620025634765, "step": 18600 }, { "epoch": 5.970625798212005, "learning_rate": 1.2039165602383993e-05, "loss": 0.3565713119506836, "step": 18700 }, { "epoch": 6.0, "eval_loss": 0.32608646154403687, "step": 18792 }, { "epoch": 6.002554278416348, "learning_rate": 1.1996594295444872e-05, "loss": 0.3423441314697266, "step": 18800 }, { "epoch": 6.0344827586206895, "learning_rate": 1.1954022988505748e-05, "loss": 0.3439017105102539, "step": 18900 }, { "epoch": 6.066411238825032, "learning_rate": 1.1911451681566627e-05, "loss": 0.33277828216552735, "step": 19000 }, { "epoch": 6.098339719029374, "learning_rate": 1.1868880374627503e-05, "loss": 0.3349918365478516, "step": 19100 }, { "epoch": 6.130268199233717, "learning_rate": 1.182630906768838e-05, "loss": 0.3411783981323242, "step": 19200 }, { "epoch": 6.1621966794380585, "learning_rate": 1.1783737760749257e-05, "loss": 0.34938507080078124, "step": 19300 }, { "epoch": 6.194125159642401, "learning_rate": 1.1741166453810133e-05, "loss": 0.3302972412109375, "step": 19400 }, { "epoch": 6.226053639846743, "learning_rate": 1.169859514687101e-05, "loss": 0.3324479293823242, "step": 19500 }, { "epoch": 6.257982120051086, "learning_rate": 1.1656023839931887e-05, "loss": 0.3325730133056641, "step": 19600 }, { "epoch": 6.2899106002554275, "learning_rate": 1.1613452532992763e-05, "loss": 0.32390750885009767, "step": 19700 }, { "epoch": 6.32183908045977, "learning_rate": 1.157088122605364e-05, "loss": 0.3410587692260742, "step": 19800 }, { "epoch": 6.353767560664112, "learning_rate": 1.1528309919114518e-05, "loss": 0.33700084686279297, "step": 19900 }, { "epoch": 6.385696040868455, "learning_rate": 1.1485738612175395e-05, "loss": 0.343173942565918, "step": 20000 }, { "epoch": 6.417624521072797, "learning_rate": 1.1443167305236272e-05, "loss": 0.34580535888671876, "step": 20100 }, { "epoch": 6.449553001277139, "learning_rate": 1.1400595998297148e-05, "loss": 0.3282489395141602, "step": 20200 }, { "epoch": 6.481481481481482, "learning_rate": 1.1358024691358025e-05, "loss": 0.32179367065429687, "step": 20300 }, { "epoch": 6.513409961685824, "learning_rate": 1.1315453384418902e-05, "loss": 0.34018295288085937, "step": 20400 }, { "epoch": 6.545338441890166, "learning_rate": 1.1272882077479778e-05, "loss": 0.3345378494262695, "step": 20500 }, { "epoch": 6.577266922094508, "learning_rate": 1.1230310770540655e-05, "loss": 0.34897972106933595, "step": 20600 }, { "epoch": 6.609195402298851, "learning_rate": 1.1187739463601532e-05, "loss": 0.3494709014892578, "step": 20700 }, { "epoch": 6.641123882503193, "learning_rate": 1.1145168156662412e-05, "loss": 0.3467377090454102, "step": 20800 }, { "epoch": 6.673052362707535, "learning_rate": 1.1102596849723288e-05, "loss": 0.3386357116699219, "step": 20900 }, { "epoch": 6.704980842911877, "learning_rate": 1.1060025542784165e-05, "loss": 0.3346070098876953, "step": 21000 }, { "epoch": 6.73690932311622, "learning_rate": 1.1017454235845042e-05, "loss": 0.3389591598510742, "step": 21100 }, { "epoch": 6.768837803320562, "learning_rate": 1.0974882928905918e-05, "loss": 0.33276222229003904, "step": 21200 }, { "epoch": 6.800766283524904, "learning_rate": 1.0932311621966795e-05, "loss": 0.3417454528808594, "step": 21300 }, { "epoch": 6.832694763729246, "learning_rate": 1.0889740315027673e-05, "loss": 0.3352804183959961, "step": 21400 }, { "epoch": 6.864623243933589, "learning_rate": 1.084716900808855e-05, "loss": 0.341392822265625, "step": 21500 }, { "epoch": 6.896551724137931, "learning_rate": 1.0804597701149427e-05, "loss": 0.3257337188720703, "step": 21600 }, { "epoch": 6.928480204342273, "learning_rate": 1.0762026394210303e-05, "loss": 0.340169792175293, "step": 21700 }, { "epoch": 6.960408684546616, "learning_rate": 1.071945508727118e-05, "loss": 0.3393547821044922, "step": 21800 }, { "epoch": 6.992337164750958, "learning_rate": 1.0676883780332057e-05, "loss": 0.3345566940307617, "step": 21900 }, { "epoch": 7.0, "eval_loss": 0.3236748278141022, "step": 21924 }, { "epoch": 7.0242656449553005, "learning_rate": 1.0634312473392933e-05, "loss": 0.341474609375, "step": 22000 }, { "epoch": 7.056194125159642, "learning_rate": 1.059174116645381e-05, "loss": 0.33665103912353517, "step": 22100 }, { "epoch": 7.088122605363985, "learning_rate": 1.0549169859514687e-05, "loss": 0.33666282653808594, "step": 22200 }, { "epoch": 7.120051085568327, "learning_rate": 1.0506598552575565e-05, "loss": 0.32267608642578127, "step": 22300 }, { "epoch": 7.1519795657726695, "learning_rate": 1.0464027245636442e-05, "loss": 0.32230213165283206, "step": 22400 }, { "epoch": 7.183908045977011, "learning_rate": 1.0421455938697318e-05, "loss": 0.3335090637207031, "step": 22500 }, { "epoch": 7.215836526181354, "learning_rate": 1.0378884631758195e-05, "loss": 0.3402565002441406, "step": 22600 }, { "epoch": 7.247765006385696, "learning_rate": 1.0336313324819072e-05, "loss": 0.33222633361816406, "step": 22700 }, { "epoch": 7.2796934865900385, "learning_rate": 1.029374201787995e-05, "loss": 0.3273036956787109, "step": 22800 }, { "epoch": 7.31162196679438, "learning_rate": 1.0251170710940828e-05, "loss": 0.34040824890136717, "step": 22900 }, { "epoch": 7.343550446998723, "learning_rate": 1.0208599404001705e-05, "loss": 0.33351837158203124, "step": 23000 }, { "epoch": 7.375478927203065, "learning_rate": 1.0166028097062582e-05, "loss": 0.3304040145874023, "step": 23100 }, { "epoch": 7.407407407407407, "learning_rate": 1.0123456790123458e-05, "loss": 0.3389059829711914, "step": 23200 }, { "epoch": 7.439335887611749, "learning_rate": 1.0080885483184335e-05, "loss": 0.35038864135742187, "step": 23300 }, { "epoch": 7.471264367816092, "learning_rate": 1.0038314176245212e-05, "loss": 0.32688159942626954, "step": 23400 }, { "epoch": 7.503192848020435, "learning_rate": 9.995742869306088e-06, "loss": 0.3443561935424805, "step": 23500 }, { "epoch": 7.535121328224776, "learning_rate": 9.953171562366965e-06, "loss": 0.33195884704589845, "step": 23600 }, { "epoch": 7.567049808429119, "learning_rate": 9.910600255427842e-06, "loss": 0.3439883041381836, "step": 23700 }, { "epoch": 7.598978288633461, "learning_rate": 9.86802894848872e-06, "loss": 0.32301868438720704, "step": 23800 }, { "epoch": 7.630906768837804, "learning_rate": 9.825457641549597e-06, "loss": 0.3437799072265625, "step": 23900 }, { "epoch": 7.662835249042145, "learning_rate": 9.782886334610473e-06, "loss": 0.3296714401245117, "step": 24000 }, { "epoch": 7.694763729246488, "learning_rate": 9.74031502767135e-06, "loss": 0.3370498275756836, "step": 24100 }, { "epoch": 7.72669220945083, "learning_rate": 9.697743720732228e-06, "loss": 0.3219729232788086, "step": 24200 }, { "epoch": 7.758620689655173, "learning_rate": 9.655172413793105e-06, "loss": 0.3258438491821289, "step": 24300 }, { "epoch": 7.790549169859514, "learning_rate": 9.612601106853982e-06, "loss": 0.33826839447021484, "step": 24400 }, { "epoch": 7.822477650063857, "learning_rate": 9.570029799914858e-06, "loss": 0.3263700866699219, "step": 24500 }, { "epoch": 7.854406130268199, "learning_rate": 9.527458492975735e-06, "loss": 0.3350722122192383, "step": 24600 }, { "epoch": 7.886334610472542, "learning_rate": 9.484887186036612e-06, "loss": 0.334184455871582, "step": 24700 }, { "epoch": 7.918263090676884, "learning_rate": 9.442315879097488e-06, "loss": 0.3267938232421875, "step": 24800 }, { "epoch": 7.950191570881226, "learning_rate": 9.399744572158365e-06, "loss": 0.3196305465698242, "step": 24900 }, { "epoch": 7.982120051085568, "learning_rate": 9.357173265219243e-06, "loss": 0.3230043029785156, "step": 25000 }, { "epoch": 8.0, "eval_loss": 0.32138851284980774, "step": 25056 }, { "epoch": 8.01404853128991, "learning_rate": 9.31460195828012e-06, "loss": 0.3258121109008789, "step": 25100 }, { "epoch": 8.045977011494253, "learning_rate": 9.272030651340997e-06, "loss": 0.32868377685546873, "step": 25200 }, { "epoch": 8.077905491698596, "learning_rate": 9.229459344401875e-06, "loss": 0.31930324554443357, "step": 25300 }, { "epoch": 8.109833971902937, "learning_rate": 9.186888037462752e-06, "loss": 0.3216040420532227, "step": 25400 }, { "epoch": 8.14176245210728, "learning_rate": 9.144316730523628e-06, "loss": 0.3392459869384766, "step": 25500 }, { "epoch": 8.173690932311622, "learning_rate": 9.101745423584505e-06, "loss": 0.3244002914428711, "step": 25600 }, { "epoch": 8.205619412515965, "learning_rate": 9.059174116645382e-06, "loss": 0.329796142578125, "step": 25700 }, { "epoch": 8.237547892720306, "learning_rate": 9.016602809706258e-06, "loss": 0.310886344909668, "step": 25800 }, { "epoch": 8.269476372924649, "learning_rate": 8.974031502767135e-06, "loss": 0.3182815361022949, "step": 25900 }, { "epoch": 8.301404853128991, "learning_rate": 8.931460195828012e-06, "loss": 0.3267630386352539, "step": 26000 }, { "epoch": 8.333333333333334, "learning_rate": 8.888888888888888e-06, "loss": 0.31576622009277344, "step": 26100 }, { "epoch": 8.365261813537675, "learning_rate": 8.846317581949767e-06, "loss": 0.33119094848632813, "step": 26200 }, { "epoch": 8.397190293742018, "learning_rate": 8.803746275010643e-06, "loss": 0.3301513671875, "step": 26300 }, { "epoch": 8.42911877394636, "learning_rate": 8.76117496807152e-06, "loss": 0.3321194839477539, "step": 26400 }, { "epoch": 8.461047254150703, "learning_rate": 8.718603661132398e-06, "loss": 0.332429313659668, "step": 26500 }, { "epoch": 8.492975734355046, "learning_rate": 8.676032354193275e-06, "loss": 0.3258559036254883, "step": 26600 }, { "epoch": 8.524904214559387, "learning_rate": 8.633461047254152e-06, "loss": 0.32335933685302737, "step": 26700 }, { "epoch": 8.55683269476373, "learning_rate": 8.590889740315028e-06, "loss": 0.3441028594970703, "step": 26800 }, { "epoch": 8.588761174968072, "learning_rate": 8.548318433375905e-06, "loss": 0.3302944564819336, "step": 26900 }, { "epoch": 8.620689655172415, "learning_rate": 8.505747126436782e-06, "loss": 0.3203293228149414, "step": 27000 }, { "epoch": 8.652618135376756, "learning_rate": 8.463175819497658e-06, "loss": 0.3180072593688965, "step": 27100 }, { "epoch": 8.684546615581098, "learning_rate": 8.420604512558537e-06, "loss": 0.3143235015869141, "step": 27200 }, { "epoch": 8.71647509578544, "learning_rate": 8.378033205619413e-06, "loss": 0.31695037841796875, "step": 27300 }, { "epoch": 8.748403575989784, "learning_rate": 8.33546189868029e-06, "loss": 0.31933542251586916, "step": 27400 }, { "epoch": 8.780332056194124, "learning_rate": 8.292890591741167e-06, "loss": 0.3094404983520508, "step": 27500 }, { "epoch": 8.812260536398467, "learning_rate": 8.250319284802043e-06, "loss": 0.3251906204223633, "step": 27600 }, { "epoch": 8.84418901660281, "learning_rate": 8.207747977862922e-06, "loss": 0.31949323654174805, "step": 27700 }, { "epoch": 8.876117496807153, "learning_rate": 8.165176670923798e-06, "loss": 0.32407459259033206, "step": 27800 }, { "epoch": 8.908045977011493, "learning_rate": 8.122605363984675e-06, "loss": 0.34080780029296875, "step": 27900 }, { "epoch": 8.939974457215836, "learning_rate": 8.080034057045552e-06, "loss": 0.3133597183227539, "step": 28000 }, { "epoch": 8.971902937420179, "learning_rate": 8.037462750106428e-06, "loss": 0.31053606033325193, "step": 28100 }, { "epoch": 9.0, "eval_loss": 0.3119257688522339, "step": 28188 }, { "epoch": 9.003831417624522, "learning_rate": 7.994891443167307e-06, "loss": 0.3158924293518066, "step": 28200 }, { "epoch": 9.035759897828862, "learning_rate": 7.952320136228183e-06, "loss": 0.3117160987854004, "step": 28300 }, { "epoch": 9.067688378033205, "learning_rate": 7.90974882928906e-06, "loss": 0.32216869354248046, "step": 28400 }, { "epoch": 9.099616858237548, "learning_rate": 7.867177522349937e-06, "loss": 0.314890079498291, "step": 28500 }, { "epoch": 9.13154533844189, "learning_rate": 7.824606215410813e-06, "loss": 0.3113277626037598, "step": 28600 }, { "epoch": 9.163473818646233, "learning_rate": 7.78203490847169e-06, "loss": 0.324998779296875, "step": 28700 }, { "epoch": 9.195402298850574, "learning_rate": 7.739463601532567e-06, "loss": 0.32509784698486327, "step": 28800 }, { "epoch": 9.227330779054917, "learning_rate": 7.696892294593445e-06, "loss": 0.3238474273681641, "step": 28900 }, { "epoch": 9.25925925925926, "learning_rate": 7.654320987654322e-06, "loss": 0.33168052673339843, "step": 29000 }, { "epoch": 9.291187739463602, "learning_rate": 7.611749680715198e-06, "loss": 0.3050485992431641, "step": 29100 }, { "epoch": 9.323116219667943, "learning_rate": 7.569178373776076e-06, "loss": 0.31826154708862303, "step": 29200 }, { "epoch": 9.355044699872286, "learning_rate": 7.5266070668369525e-06, "loss": 0.31972583770751956, "step": 29300 }, { "epoch": 9.386973180076629, "learning_rate": 7.48403575989783e-06, "loss": 0.3287076568603516, "step": 29400 }, { "epoch": 9.418901660280971, "learning_rate": 7.441464452958707e-06, "loss": 0.30761892318725587, "step": 29500 }, { "epoch": 9.450830140485312, "learning_rate": 7.398893146019583e-06, "loss": 0.3097171401977539, "step": 29600 }, { "epoch": 9.482758620689655, "learning_rate": 7.35632183908046e-06, "loss": 0.30806644439697267, "step": 29700 }, { "epoch": 9.514687100893997, "learning_rate": 7.3137505321413375e-06, "loss": 0.3174296760559082, "step": 29800 }, { "epoch": 9.54661558109834, "learning_rate": 7.271179225202214e-06, "loss": 0.31353973388671874, "step": 29900 }, { "epoch": 9.578544061302683, "learning_rate": 7.228607918263091e-06, "loss": 0.30578601837158204, "step": 30000 }, { "epoch": 9.610472541507024, "learning_rate": 7.1860366113239675e-06, "loss": 0.3045210838317871, "step": 30100 }, { "epoch": 9.642401021711366, "learning_rate": 7.143465304384846e-06, "loss": 0.3149559211730957, "step": 30200 }, { "epoch": 9.67432950191571, "learning_rate": 7.1008939974457225e-06, "loss": 0.3207520294189453, "step": 30300 }, { "epoch": 9.706257982120052, "learning_rate": 7.058322690506599e-06, "loss": 0.32308254241943357, "step": 30400 }, { "epoch": 9.738186462324393, "learning_rate": 7.015751383567476e-06, "loss": 0.3183433723449707, "step": 30500 }, { "epoch": 9.770114942528735, "learning_rate": 6.973180076628353e-06, "loss": 0.3200767135620117, "step": 30600 }, { "epoch": 9.802043422733078, "learning_rate": 6.93060876968923e-06, "loss": 0.3113987922668457, "step": 30700 }, { "epoch": 9.83397190293742, "learning_rate": 6.888037462750107e-06, "loss": 0.33167327880859376, "step": 30800 }, { "epoch": 9.865900383141762, "learning_rate": 6.845466155810983e-06, "loss": 0.3172581100463867, "step": 30900 }, { "epoch": 9.897828863346104, "learning_rate": 6.802894848871861e-06, "loss": 0.3124402046203613, "step": 31000 }, { "epoch": 9.929757343550447, "learning_rate": 6.760323541932738e-06, "loss": 0.30687171936035157, "step": 31100 }, { "epoch": 9.96168582375479, "learning_rate": 6.717752234993615e-06, "loss": 0.304738712310791, "step": 31200 }, { "epoch": 9.99361430395913, "learning_rate": 6.6751809280544925e-06, "loss": 0.3069480514526367, "step": 31300 }, { "epoch": 10.0, "eval_loss": 0.31222018599510193, "step": 31320 }, { "epoch": 10.025542784163473, "learning_rate": 6.632609621115369e-06, "loss": 0.30982017517089844, "step": 31400 }, { "epoch": 10.057471264367816, "learning_rate": 6.590038314176246e-06, "loss": 0.29576118469238283, "step": 31500 }, { "epoch": 10.089399744572159, "learning_rate": 6.5474670072371225e-06, "loss": 0.318297004699707, "step": 31600 }, { "epoch": 10.121328224776502, "learning_rate": 6.504895700297999e-06, "loss": 0.2947650337219238, "step": 31700 }, { "epoch": 10.153256704980842, "learning_rate": 6.462324393358877e-06, "loss": 0.31912431716918943, "step": 31800 }, { "epoch": 10.185185185185185, "learning_rate": 6.419753086419753e-06, "loss": 0.31549993515014646, "step": 31900 }, { "epoch": 10.217113665389528, "learning_rate": 6.37718177948063e-06, "loss": 0.3185459327697754, "step": 32000 }, { "epoch": 10.24904214559387, "learning_rate": 6.334610472541508e-06, "loss": 0.3061997413635254, "step": 32100 }, { "epoch": 10.280970625798211, "learning_rate": 6.292039165602385e-06, "loss": 0.30356922149658205, "step": 32200 }, { "epoch": 10.312899106002554, "learning_rate": 6.249467858663262e-06, "loss": 0.30677152633666993, "step": 32300 }, { "epoch": 10.344827586206897, "learning_rate": 6.206896551724138e-06, "loss": 0.3148806190490723, "step": 32400 }, { "epoch": 10.37675606641124, "learning_rate": 6.164325244785016e-06, "loss": 0.3114926528930664, "step": 32500 }, { "epoch": 10.40868454661558, "learning_rate": 6.1217539378458925e-06, "loss": 0.31096630096435546, "step": 32600 }, { "epoch": 10.440613026819923, "learning_rate": 6.079182630906769e-06, "loss": 0.3131294822692871, "step": 32700 }, { "epoch": 10.472541507024266, "learning_rate": 6.036611323967646e-06, "loss": 0.31513975143432615, "step": 32800 }, { "epoch": 10.504469987228608, "learning_rate": 5.9940400170285225e-06, "loss": 0.3035664939880371, "step": 32900 }, { "epoch": 10.53639846743295, "learning_rate": 5.9514687100894e-06, "loss": 0.30514934539794925, "step": 33000 }, { "epoch": 10.568326947637292, "learning_rate": 5.9088974031502775e-06, "loss": 0.3236639404296875, "step": 33100 }, { "epoch": 10.600255427841635, "learning_rate": 5.866326096211154e-06, "loss": 0.32729095458984375, "step": 33200 }, { "epoch": 10.632183908045977, "learning_rate": 5.823754789272032e-06, "loss": 0.30390745162963867, "step": 33300 }, { "epoch": 10.66411238825032, "learning_rate": 5.781183482332908e-06, "loss": 0.30320255279541014, "step": 33400 }, { "epoch": 10.696040868454661, "learning_rate": 5.738612175393785e-06, "loss": 0.31617319107055664, "step": 33500 }, { "epoch": 10.727969348659004, "learning_rate": 5.696040868454662e-06, "loss": 0.31349088668823244, "step": 33600 }, { "epoch": 10.759897828863346, "learning_rate": 5.653469561515539e-06, "loss": 0.3055162620544434, "step": 33700 }, { "epoch": 10.79182630906769, "learning_rate": 5.610898254576416e-06, "loss": 0.29683116912841795, "step": 33800 }, { "epoch": 10.82375478927203, "learning_rate": 5.5683269476372925e-06, "loss": 0.2972592926025391, "step": 33900 }, { "epoch": 10.855683269476373, "learning_rate": 5.525755640698169e-06, "loss": 0.30813514709472656, "step": 34000 }, { "epoch": 10.887611749680715, "learning_rate": 5.4831843337590475e-06, "loss": 0.311496696472168, "step": 34100 }, { "epoch": 10.919540229885058, "learning_rate": 5.440613026819924e-06, "loss": 0.3147770881652832, "step": 34200 }, { "epoch": 10.951468710089399, "learning_rate": 5.398041719880801e-06, "loss": 0.2977629852294922, "step": 34300 }, { "epoch": 10.983397190293742, "learning_rate": 5.3554704129416775e-06, "loss": 0.30722129821777344, "step": 34400 }, { "epoch": 11.0, "eval_loss": 0.30927398800849915, "step": 34452 }, { "epoch": 11.015325670498084, "learning_rate": 5.312899106002555e-06, "loss": 0.30775304794311525, "step": 34500 }, { "epoch": 11.047254150702427, "learning_rate": 5.270327799063432e-06, "loss": 0.31844793319702147, "step": 34600 }, { "epoch": 11.079182630906768, "learning_rate": 5.227756492124308e-06, "loss": 0.3163930511474609, "step": 34700 }, { "epoch": 11.11111111111111, "learning_rate": 5.185185185185185e-06, "loss": 0.3178179359436035, "step": 34800 }, { "epoch": 11.143039591315453, "learning_rate": 5.1426138782460625e-06, "loss": 0.3107015609741211, "step": 34900 }, { "epoch": 11.174968071519796, "learning_rate": 5.100042571306939e-06, "loss": 0.31198001861572267, "step": 35000 }, { "epoch": 11.206896551724139, "learning_rate": 5.057471264367817e-06, "loss": 0.30287263870239256, "step": 35100 }, { "epoch": 11.23882503192848, "learning_rate": 5.014899957428694e-06, "loss": 0.3046586036682129, "step": 35200 }, { "epoch": 11.270753512132822, "learning_rate": 4.972328650489571e-06, "loss": 0.29926385879516604, "step": 35300 }, { "epoch": 11.302681992337165, "learning_rate": 4.9297573435504475e-06, "loss": 0.31370662689208983, "step": 35400 }, { "epoch": 11.334610472541508, "learning_rate": 4.887186036611324e-06, "loss": 0.3177505874633789, "step": 35500 }, { "epoch": 11.366538952745849, "learning_rate": 4.844614729672202e-06, "loss": 0.3028862190246582, "step": 35600 }, { "epoch": 11.398467432950191, "learning_rate": 4.802043422733078e-06, "loss": 0.2965104293823242, "step": 35700 }, { "epoch": 11.430395913154534, "learning_rate": 4.759472115793956e-06, "loss": 0.31520273208618166, "step": 35800 }, { "epoch": 11.462324393358877, "learning_rate": 4.7169008088548325e-06, "loss": 0.2961687469482422, "step": 35900 }, { "epoch": 11.494252873563218, "learning_rate": 4.674329501915709e-06, "loss": 0.30442037582397463, "step": 36000 }, { "epoch": 11.52618135376756, "learning_rate": 4.631758194976586e-06, "loss": 0.3178094863891602, "step": 36100 }, { "epoch": 11.558109833971903, "learning_rate": 4.589186888037463e-06, "loss": 0.3036604118347168, "step": 36200 }, { "epoch": 11.590038314176246, "learning_rate": 4.54661558109834e-06, "loss": 0.3115557861328125, "step": 36300 }, { "epoch": 11.621966794380587, "learning_rate": 4.5040442741592175e-06, "loss": 0.30345108032226564, "step": 36400 }, { "epoch": 11.65389527458493, "learning_rate": 4.461472967220094e-06, "loss": 0.30634918212890627, "step": 36500 }, { "epoch": 11.685823754789272, "learning_rate": 4.418901660280971e-06, "loss": 0.3051659774780273, "step": 36600 }, { "epoch": 11.717752234993615, "learning_rate": 4.3763303533418475e-06, "loss": 0.3069002342224121, "step": 36700 }, { "epoch": 11.749680715197957, "learning_rate": 4.333759046402725e-06, "loss": 0.30947404861450195, "step": 36800 }, { "epoch": 11.781609195402298, "learning_rate": 4.291187739463602e-06, "loss": 0.3073232650756836, "step": 36900 }, { "epoch": 11.813537675606641, "learning_rate": 4.248616432524479e-06, "loss": 0.3001542472839355, "step": 37000 }, { "epoch": 11.845466155810984, "learning_rate": 4.206045125585356e-06, "loss": 0.31570695877075194, "step": 37100 }, { "epoch": 11.877394636015326, "learning_rate": 4.1634738186462325e-06, "loss": 0.311122932434082, "step": 37200 }, { "epoch": 11.909323116219667, "learning_rate": 4.12090251170711e-06, "loss": 0.301647891998291, "step": 37300 }, { "epoch": 11.94125159642401, "learning_rate": 4.078331204767987e-06, "loss": 0.3205462646484375, "step": 37400 }, { "epoch": 11.973180076628353, "learning_rate": 4.035759897828863e-06, "loss": 0.29651784896850586, "step": 37500 }, { "epoch": 12.0, "eval_loss": 0.3090454041957855, "step": 37584 }, { "epoch": 12.005108556832695, "learning_rate": 3.993188590889741e-06, "loss": 0.31057783126831057, "step": 37600 }, { "epoch": 12.037037037037036, "learning_rate": 3.9506172839506175e-06, "loss": 0.305908145904541, "step": 37700 }, { "epoch": 12.068965517241379, "learning_rate": 3.908045977011495e-06, "loss": 0.3145115280151367, "step": 37800 }, { "epoch": 12.100893997445722, "learning_rate": 3.865474670072372e-06, "loss": 0.3086430168151855, "step": 37900 }, { "epoch": 12.132822477650064, "learning_rate": 3.822903363133248e-06, "loss": 0.3069817733764648, "step": 38000 }, { "epoch": 12.164750957854405, "learning_rate": 3.7803320561941254e-06, "loss": 0.3044874954223633, "step": 38100 }, { "epoch": 12.196679438058748, "learning_rate": 3.737760749255002e-06, "loss": 0.3003558731079102, "step": 38200 }, { "epoch": 12.22860791826309, "learning_rate": 3.6951894423158796e-06, "loss": 0.2989999961853027, "step": 38300 }, { "epoch": 12.260536398467433, "learning_rate": 3.6526181353767567e-06, "loss": 0.30233255386352537, "step": 38400 }, { "epoch": 12.292464878671776, "learning_rate": 3.6100468284376333e-06, "loss": 0.30123531341552734, "step": 38500 }, { "epoch": 12.324393358876117, "learning_rate": 3.56747552149851e-06, "loss": 0.3095419502258301, "step": 38600 }, { "epoch": 12.35632183908046, "learning_rate": 3.524904214559387e-06, "loss": 0.32312957763671873, "step": 38700 }, { "epoch": 12.388250319284802, "learning_rate": 3.4823329076202646e-06, "loss": 0.31157236099243163, "step": 38800 }, { "epoch": 12.420178799489145, "learning_rate": 3.4397616006811412e-06, "loss": 0.3003998374938965, "step": 38900 }, { "epoch": 12.452107279693486, "learning_rate": 3.3971902937420183e-06, "loss": 0.3086379051208496, "step": 39000 }, { "epoch": 12.484035759897829, "learning_rate": 3.354618986802895e-06, "loss": 0.28147794723510744, "step": 39100 }, { "epoch": 12.515964240102171, "learning_rate": 3.3120476798637717e-06, "loss": 0.29010528564453125, "step": 39200 }, { "epoch": 12.547892720306514, "learning_rate": 3.269476372924649e-06, "loss": 0.30009984970092773, "step": 39300 }, { "epoch": 12.579821200510855, "learning_rate": 3.2269050659855262e-06, "loss": 0.3059814834594727, "step": 39400 }, { "epoch": 12.611749680715198, "learning_rate": 3.184333759046403e-06, "loss": 0.2934641456604004, "step": 39500 }, { "epoch": 12.64367816091954, "learning_rate": 3.14176245210728e-06, "loss": 0.30141252517700196, "step": 39600 }, { "epoch": 12.675606641123883, "learning_rate": 3.0991911451681567e-06, "loss": 0.3053057289123535, "step": 39700 }, { "epoch": 12.707535121328224, "learning_rate": 3.056619838229034e-06, "loss": 0.2935024261474609, "step": 39800 }, { "epoch": 12.739463601532567, "learning_rate": 3.014048531289911e-06, "loss": 0.2974138069152832, "step": 39900 }, { "epoch": 12.77139208173691, "learning_rate": 2.971477224350788e-06, "loss": 0.29385158538818357, "step": 40000 }, { "epoch": 12.803320561941252, "learning_rate": 2.9289059174116646e-06, "loss": 0.309177303314209, "step": 40100 }, { "epoch": 12.835249042145595, "learning_rate": 2.8863346104725417e-06, "loss": 0.30561391830444334, "step": 40200 }, { "epoch": 12.867177522349936, "learning_rate": 2.8437633035334187e-06, "loss": 0.30923063278198243, "step": 40300 }, { "epoch": 12.899106002554278, "learning_rate": 2.801191996594296e-06, "loss": 0.30836896896362304, "step": 40400 }, { "epoch": 12.931034482758621, "learning_rate": 2.7586206896551725e-06, "loss": 0.31578615188598635, "step": 40500 }, { "epoch": 12.962962962962964, "learning_rate": 2.7160493827160496e-06, "loss": 0.31291526794433594, "step": 40600 }, { "epoch": 12.994891443167305, "learning_rate": 2.6734780757769262e-06, "loss": 0.3064906311035156, "step": 40700 }, { "epoch": 13.0, "eval_loss": 0.30278804898262024, "step": 40716 }, { "epoch": 13.026819923371647, "learning_rate": 2.6309067688378037e-06, "loss": 0.2991274642944336, "step": 40800 }, { "epoch": 13.05874840357599, "learning_rate": 2.5883354618986804e-06, "loss": 0.3081726837158203, "step": 40900 }, { "epoch": 13.090676883780333, "learning_rate": 2.5457641549595575e-06, "loss": 0.28322860717773435, "step": 41000 }, { "epoch": 13.122605363984674, "learning_rate": 2.503192848020434e-06, "loss": 0.3077671813964844, "step": 41100 }, { "epoch": 13.154533844189016, "learning_rate": 2.4606215410813112e-06, "loss": 0.3038086128234863, "step": 41200 }, { "epoch": 13.186462324393359, "learning_rate": 2.4180502341421883e-06, "loss": 0.3015581703186035, "step": 41300 }, { "epoch": 13.218390804597702, "learning_rate": 2.3754789272030654e-06, "loss": 0.30243860244750975, "step": 41400 }, { "epoch": 13.250319284802043, "learning_rate": 2.332907620263942e-06, "loss": 0.3104331398010254, "step": 41500 }, { "epoch": 13.282247765006385, "learning_rate": 2.290336313324819e-06, "loss": 0.30330204010009765, "step": 41600 }, { "epoch": 13.314176245210728, "learning_rate": 2.2477650063856962e-06, "loss": 0.2972829818725586, "step": 41700 }, { "epoch": 13.34610472541507, "learning_rate": 2.205193699446573e-06, "loss": 0.300672607421875, "step": 41800 }, { "epoch": 13.378033205619413, "learning_rate": 2.1626223925074504e-06, "loss": 0.30183706283569334, "step": 41900 }, { "epoch": 13.409961685823754, "learning_rate": 2.120051085568327e-06, "loss": 0.3096357536315918, "step": 42000 }, { "epoch": 13.441890166028097, "learning_rate": 2.077479778629204e-06, "loss": 0.3032659912109375, "step": 42100 }, { "epoch": 13.47381864623244, "learning_rate": 2.0349084716900813e-06, "loss": 0.2949547386169434, "step": 42200 }, { "epoch": 13.505747126436782, "learning_rate": 1.992337164750958e-06, "loss": 0.2889937973022461, "step": 42300 }, { "epoch": 13.537675606641123, "learning_rate": 1.949765857811835e-06, "loss": 0.30804216384887695, "step": 42400 }, { "epoch": 13.569604086845466, "learning_rate": 1.9071945508727119e-06, "loss": 0.30334890365600586, "step": 42500 }, { "epoch": 13.601532567049809, "learning_rate": 1.864623243933589e-06, "loss": 0.2941057586669922, "step": 42600 }, { "epoch": 13.633461047254151, "learning_rate": 1.8220519369944658e-06, "loss": 0.2950305938720703, "step": 42700 }, { "epoch": 13.665389527458492, "learning_rate": 1.7794806300553427e-06, "loss": 0.3061481285095215, "step": 42800 }, { "epoch": 13.697318007662835, "learning_rate": 1.7369093231162198e-06, "loss": 0.2870841217041016, "step": 42900 }, { "epoch": 13.729246487867178, "learning_rate": 1.6943380161770967e-06, "loss": 0.28978254318237306, "step": 43000 }, { "epoch": 13.76117496807152, "learning_rate": 1.6517667092379737e-06, "loss": 0.30341114044189454, "step": 43100 }, { "epoch": 13.793103448275861, "learning_rate": 1.6091954022988506e-06, "loss": 0.307810001373291, "step": 43200 }, { "epoch": 13.825031928480204, "learning_rate": 1.5666240953597275e-06, "loss": 0.30963399887084964, "step": 43300 }, { "epoch": 13.856960408684547, "learning_rate": 1.5240527884206046e-06, "loss": 0.3031142807006836, "step": 43400 }, { "epoch": 13.88888888888889, "learning_rate": 1.4814814814814815e-06, "loss": 0.3144196891784668, "step": 43500 }, { "epoch": 13.920817369093232, "learning_rate": 1.4389101745423588e-06, "loss": 0.28602792739868166, "step": 43600 }, { "epoch": 13.952745849297573, "learning_rate": 1.3963388676032354e-06, "loss": 0.2984015083312988, "step": 43700 }, { "epoch": 13.984674329501916, "learning_rate": 1.3537675606641127e-06, "loss": 0.29513154983520506, "step": 43800 }, { "epoch": 14.0, "eval_loss": 0.3065377175807953, "step": 43848 }, { "epoch": 14.016602809706258, "learning_rate": 1.3111962537249896e-06, "loss": 0.28543767929077146, "step": 43900 }, { "epoch": 14.048531289910601, "learning_rate": 1.2686249467858662e-06, "loss": 0.2963153839111328, "step": 44000 }, { "epoch": 14.080459770114942, "learning_rate": 1.2260536398467433e-06, "loss": 0.3030729293823242, "step": 44100 }, { "epoch": 14.112388250319285, "learning_rate": 1.1834823329076204e-06, "loss": 0.2889740180969238, "step": 44200 }, { "epoch": 14.144316730523627, "learning_rate": 1.1409110259684973e-06, "loss": 0.2915242385864258, "step": 44300 }, { "epoch": 14.17624521072797, "learning_rate": 1.0983397190293744e-06, "loss": 0.28075174331665037, "step": 44400 }, { "epoch": 14.20817369093231, "learning_rate": 1.0557684120902512e-06, "loss": 0.3046562194824219, "step": 44500 }, { "epoch": 14.240102171136654, "learning_rate": 1.0131971051511283e-06, "loss": 0.28045236587524414, "step": 44600 }, { "epoch": 14.272030651340996, "learning_rate": 9.706257982120052e-07, "loss": 0.2948748970031738, "step": 44700 }, { "epoch": 14.303959131545339, "learning_rate": 9.280544912728822e-07, "loss": 0.2956666946411133, "step": 44800 }, { "epoch": 14.33588761174968, "learning_rate": 8.854831843337592e-07, "loss": 0.2919887924194336, "step": 44900 }, { "epoch": 14.367816091954023, "learning_rate": 8.429118773946361e-07, "loss": 0.29414264678955077, "step": 45000 }, { "epoch": 14.399744572158365, "learning_rate": 8.003405704555131e-07, "loss": 0.2997250938415527, "step": 45100 }, { "epoch": 14.431673052362708, "learning_rate": 7.5776926351639e-07, "loss": 0.2927609634399414, "step": 45200 }, { "epoch": 14.46360153256705, "learning_rate": 7.15197956577267e-07, "loss": 0.30317195892333987, "step": 45300 }, { "epoch": 14.495530012771392, "learning_rate": 6.72626649638144e-07, "loss": 0.29728498458862307, "step": 45400 }, { "epoch": 14.527458492975734, "learning_rate": 6.300553426990209e-07, "loss": 0.2906969451904297, "step": 45500 }, { "epoch": 14.559386973180077, "learning_rate": 5.874840357598978e-07, "loss": 0.3137422752380371, "step": 45600 }, { "epoch": 14.59131545338442, "learning_rate": 5.449127288207749e-07, "loss": 0.3111775016784668, "step": 45700 }, { "epoch": 14.62324393358876, "learning_rate": 5.023414218816518e-07, "loss": 0.30036439895629885, "step": 45800 }, { "epoch": 14.655172413793103, "learning_rate": 4.5977011494252875e-07, "loss": 0.2988995361328125, "step": 45900 }, { "epoch": 14.687100893997446, "learning_rate": 4.171988080034058e-07, "loss": 0.306041259765625, "step": 46000 }, { "epoch": 14.719029374201789, "learning_rate": 3.7462750106428265e-07, "loss": 0.30521770477294924, "step": 46100 }, { "epoch": 14.75095785440613, "learning_rate": 3.320561941251597e-07, "loss": 0.28645307540893555, "step": 46200 }, { "epoch": 14.782886334610472, "learning_rate": 2.894848871860366e-07, "loss": 0.29830142974853513, "step": 46300 }, { "epoch": 14.814814814814815, "learning_rate": 2.469135802469136e-07, "loss": 0.29040414810180665, "step": 46400 }, { "epoch": 14.846743295019158, "learning_rate": 2.0434227330779057e-07, "loss": 0.2852139472961426, "step": 46500 }, { "epoch": 14.878671775223498, "learning_rate": 1.6177096636866753e-07, "loss": 0.291912841796875, "step": 46600 }, { "epoch": 14.910600255427841, "learning_rate": 1.1919965942954449e-07, "loss": 0.2825672149658203, "step": 46700 }, { "epoch": 14.942528735632184, "learning_rate": 7.662835249042146e-08, "loss": 0.3074253273010254, "step": 46800 } ], "max_steps": 46980, "num_train_epochs": 15, "total_flos": 76612640706201600, "trial_name": null, "trial_params": null }