{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998841193901369, "global_step": 302000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9983445627162415e-05, "loss": 2.7723, "step": 100 }, { "epoch": 0.0, "learning_rate": 4.9966891254324834e-05, "loss": 2.7712, "step": 200 }, { "epoch": 0.0, "learning_rate": 4.9950336881487246e-05, "loss": 2.7831, "step": 300 }, { "epoch": 0.0, "learning_rate": 4.9933782508649665e-05, "loss": 2.7613, "step": 400 }, { "epoch": 0.0, "learning_rate": 4.991722813581208e-05, "loss": 2.7686, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.990067376297449e-05, "loss": 2.7701, "step": 600 }, { "epoch": 0.0, "learning_rate": 4.988411939013691e-05, "loss": 2.7748, "step": 700 }, { "epoch": 0.0, "learning_rate": 4.986756501729932e-05, "loss": 2.7806, "step": 800 }, { "epoch": 0.0, "learning_rate": 4.985101064446174e-05, "loss": 2.7774, "step": 900 }, { "epoch": 0.0, "learning_rate": 4.983445627162415e-05, "loss": 2.7915, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.9817901898786564e-05, "loss": 2.7917, "step": 1100 }, { "epoch": 0.0, "learning_rate": 4.9801347525948983e-05, "loss": 2.7785, "step": 1200 }, { "epoch": 0.0, "learning_rate": 4.9784793153111396e-05, "loss": 2.8012, "step": 1300 }, { "epoch": 0.0, "learning_rate": 4.9768238780273815e-05, "loss": 2.7804, "step": 1400 }, { "epoch": 0.0, "learning_rate": 4.975168440743623e-05, "loss": 2.7963, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9735130034598646e-05, "loss": 2.7811, "step": 1600 }, { "epoch": 0.01, "learning_rate": 4.971857566176106e-05, "loss": 2.7846, "step": 1700 }, { "epoch": 0.01, "learning_rate": 4.970202128892347e-05, "loss": 2.7903, "step": 1800 }, { "epoch": 0.01, "learning_rate": 4.968546691608589e-05, "loss": 2.7826, "step": 1900 }, { "epoch": 0.01, "learning_rate": 4.96689125432483e-05, "loss": 2.7807, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.965235817041072e-05, "loss": 2.7934, "step": 2100 }, { "epoch": 0.01, "learning_rate": 4.963580379757313e-05, "loss": 2.7766, "step": 2200 }, { "epoch": 0.01, "learning_rate": 4.9619249424735545e-05, "loss": 2.7896, "step": 2300 }, { "epoch": 0.01, "learning_rate": 4.9602695051897964e-05, "loss": 2.7883, "step": 2400 }, { "epoch": 0.01, "learning_rate": 4.958614067906038e-05, "loss": 2.8038, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.9569586306222796e-05, "loss": 2.7987, "step": 2600 }, { "epoch": 0.01, "learning_rate": 4.955303193338521e-05, "loss": 2.7908, "step": 2700 }, { "epoch": 0.01, "learning_rate": 4.953647756054762e-05, "loss": 2.7882, "step": 2800 }, { "epoch": 0.01, "learning_rate": 4.951992318771004e-05, "loss": 2.7946, "step": 2900 }, { "epoch": 0.01, "learning_rate": 4.950336881487245e-05, "loss": 2.8016, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.948681444203487e-05, "loss": 2.8027, "step": 3100 }, { "epoch": 0.01, "learning_rate": 4.947026006919728e-05, "loss": 2.7752, "step": 3200 }, { "epoch": 0.01, "learning_rate": 4.9453705696359695e-05, "loss": 2.7759, "step": 3300 }, { "epoch": 0.01, "learning_rate": 4.9437151323522114e-05, "loss": 2.7988, "step": 3400 }, { "epoch": 0.01, "learning_rate": 4.9420596950684526e-05, "loss": 2.791, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.9404042577846945e-05, "loss": 2.8068, "step": 3600 }, { "epoch": 0.01, "learning_rate": 4.938748820500936e-05, "loss": 2.8004, "step": 3700 }, { "epoch": 0.01, "learning_rate": 4.937093383217177e-05, "loss": 2.7909, "step": 3800 }, { "epoch": 0.01, "learning_rate": 4.935437945933419e-05, "loss": 2.7892, "step": 3900 }, { "epoch": 0.01, "learning_rate": 4.93378250864966e-05, "loss": 2.789, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.932127071365902e-05, "loss": 2.7978, "step": 4100 }, { "epoch": 0.01, "learning_rate": 4.930471634082143e-05, "loss": 2.8031, "step": 4200 }, { "epoch": 0.01, "learning_rate": 4.9288161967983845e-05, "loss": 2.8041, "step": 4300 }, { "epoch": 0.01, "learning_rate": 4.9271607595146264e-05, "loss": 2.8039, "step": 4400 }, { "epoch": 0.01, "learning_rate": 4.925505322230867e-05, "loss": 2.7947, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.923849884947109e-05, "loss": 2.7976, "step": 4600 }, { "epoch": 0.02, "learning_rate": 4.92219444766335e-05, "loss": 2.7924, "step": 4700 }, { "epoch": 0.02, "learning_rate": 4.920539010379592e-05, "loss": 2.7834, "step": 4800 }, { "epoch": 0.02, "learning_rate": 4.918883573095833e-05, "loss": 2.7912, "step": 4900 }, { "epoch": 0.02, "learning_rate": 4.9172281358120744e-05, "loss": 2.8047, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.915572698528316e-05, "loss": 2.7892, "step": 5100 }, { "epoch": 0.02, "learning_rate": 4.9139172612445575e-05, "loss": 2.7825, "step": 5200 }, { "epoch": 0.02, "learning_rate": 4.9122618239607995e-05, "loss": 2.7918, "step": 5300 }, { "epoch": 0.02, "learning_rate": 4.910606386677041e-05, "loss": 2.7886, "step": 5400 }, { "epoch": 0.02, "learning_rate": 4.9089509493932826e-05, "loss": 2.79, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.907295512109524e-05, "loss": 2.7986, "step": 5600 }, { "epoch": 0.02, "learning_rate": 4.905640074825765e-05, "loss": 2.8048, "step": 5700 }, { "epoch": 0.02, "learning_rate": 4.903984637542007e-05, "loss": 2.7946, "step": 5800 }, { "epoch": 0.02, "learning_rate": 4.902329200258248e-05, "loss": 2.8066, "step": 5900 }, { "epoch": 0.02, "learning_rate": 4.90067376297449e-05, "loss": 2.7966, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.899018325690731e-05, "loss": 2.7911, "step": 6100 }, { "epoch": 0.02, "learning_rate": 4.8973628884069725e-05, "loss": 2.793, "step": 6200 }, { "epoch": 0.02, "learning_rate": 4.8957074511232144e-05, "loss": 2.7782, "step": 6300 }, { "epoch": 0.02, "learning_rate": 4.8940520138394556e-05, "loss": 2.8033, "step": 6400 }, { "epoch": 0.02, "learning_rate": 4.8923965765556976e-05, "loss": 2.8095, "step": 6500 }, { "epoch": 0.02, "learning_rate": 4.890741139271939e-05, "loss": 2.7841, "step": 6600 }, { "epoch": 0.02, "learning_rate": 4.88908570198818e-05, "loss": 2.8143, "step": 6700 }, { "epoch": 0.02, "learning_rate": 4.887430264704422e-05, "loss": 2.7956, "step": 6800 }, { "epoch": 0.02, "learning_rate": 4.885774827420663e-05, "loss": 2.8094, "step": 6900 }, { "epoch": 0.02, "learning_rate": 4.884119390136905e-05, "loss": 2.7828, "step": 7000 }, { "epoch": 0.02, "learning_rate": 4.882463952853146e-05, "loss": 2.8003, "step": 7100 }, { "epoch": 0.02, "learning_rate": 4.8808085155693875e-05, "loss": 2.7788, "step": 7200 }, { "epoch": 0.02, "learning_rate": 4.8791530782856294e-05, "loss": 2.812, "step": 7300 }, { "epoch": 0.02, "learning_rate": 4.8774976410018706e-05, "loss": 2.7785, "step": 7400 }, { "epoch": 0.02, "learning_rate": 4.8758422037181125e-05, "loss": 2.7923, "step": 7500 }, { "epoch": 0.03, "learning_rate": 4.874186766434354e-05, "loss": 2.8001, "step": 7600 }, { "epoch": 0.03, "learning_rate": 4.872531329150595e-05, "loss": 2.7898, "step": 7700 }, { "epoch": 0.03, "learning_rate": 4.870875891866837e-05, "loss": 2.8008, "step": 7800 }, { "epoch": 0.03, "learning_rate": 4.869220454583078e-05, "loss": 2.7907, "step": 7900 }, { "epoch": 0.03, "learning_rate": 4.86756501729932e-05, "loss": 2.791, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.865909580015561e-05, "loss": 2.7839, "step": 8100 }, { "epoch": 0.03, "learning_rate": 4.8642541427318025e-05, "loss": 2.7939, "step": 8200 }, { "epoch": 0.03, "learning_rate": 4.8625987054480444e-05, "loss": 2.7828, "step": 8300 }, { "epoch": 0.03, "learning_rate": 4.8609432681642856e-05, "loss": 2.7884, "step": 8400 }, { "epoch": 0.03, "learning_rate": 4.8592878308805275e-05, "loss": 2.798, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.857632393596769e-05, "loss": 2.791, "step": 8600 }, { "epoch": 0.03, "learning_rate": 4.8559769563130106e-05, "loss": 2.8015, "step": 8700 }, { "epoch": 0.03, "learning_rate": 4.854321519029252e-05, "loss": 2.7992, "step": 8800 }, { "epoch": 0.03, "learning_rate": 4.852666081745493e-05, "loss": 2.794, "step": 8900 }, { "epoch": 0.03, "learning_rate": 4.851010644461735e-05, "loss": 2.7896, "step": 9000 }, { "epoch": 0.03, "learning_rate": 4.849355207177976e-05, "loss": 2.792, "step": 9100 }, { "epoch": 0.03, "learning_rate": 4.847699769894218e-05, "loss": 2.7891, "step": 9200 }, { "epoch": 0.03, "learning_rate": 4.846044332610459e-05, "loss": 2.7891, "step": 9300 }, { "epoch": 0.03, "learning_rate": 4.8443888953267006e-05, "loss": 2.8021, "step": 9400 }, { "epoch": 0.03, "learning_rate": 4.8427334580429425e-05, "loss": 2.8079, "step": 9500 }, { "epoch": 0.03, "learning_rate": 4.841078020759184e-05, "loss": 2.8038, "step": 9600 }, { "epoch": 0.03, "learning_rate": 4.8394225834754256e-05, "loss": 2.7942, "step": 9700 }, { "epoch": 0.03, "learning_rate": 4.837767146191667e-05, "loss": 2.805, "step": 9800 }, { "epoch": 0.03, "learning_rate": 4.836111708907908e-05, "loss": 2.8073, "step": 9900 }, { "epoch": 0.03, "learning_rate": 4.83445627162415e-05, "loss": 2.8066, "step": 10000 }, { "epoch": 0.03, "learning_rate": 4.832800834340391e-05, "loss": 2.7873, "step": 10100 }, { "epoch": 0.03, "learning_rate": 4.831145397056633e-05, "loss": 2.7863, "step": 10200 }, { "epoch": 0.03, "learning_rate": 4.829489959772874e-05, "loss": 2.8084, "step": 10300 }, { "epoch": 0.03, "learning_rate": 4.8278345224891155e-05, "loss": 2.7936, "step": 10400 }, { "epoch": 0.03, "learning_rate": 4.8261790852053574e-05, "loss": 2.8019, "step": 10500 }, { "epoch": 0.04, "learning_rate": 4.824523647921599e-05, "loss": 2.7848, "step": 10600 }, { "epoch": 0.04, "learning_rate": 4.8228682106378406e-05, "loss": 2.8021, "step": 10700 }, { "epoch": 0.04, "learning_rate": 4.821212773354082e-05, "loss": 2.8008, "step": 10800 }, { "epoch": 0.04, "learning_rate": 4.819557336070323e-05, "loss": 2.7934, "step": 10900 }, { "epoch": 0.04, "learning_rate": 4.817901898786565e-05, "loss": 2.7936, "step": 11000 }, { "epoch": 0.04, "learning_rate": 4.816246461502806e-05, "loss": 2.8028, "step": 11100 }, { "epoch": 0.04, "learning_rate": 4.814591024219048e-05, "loss": 2.7983, "step": 11200 }, { "epoch": 0.04, "learning_rate": 4.812935586935289e-05, "loss": 2.7971, "step": 11300 }, { "epoch": 0.04, "learning_rate": 4.8112801496515305e-05, "loss": 2.7895, "step": 11400 }, { "epoch": 0.04, "learning_rate": 4.8096247123677724e-05, "loss": 2.8001, "step": 11500 }, { "epoch": 0.04, "learning_rate": 4.8079692750840136e-05, "loss": 2.8016, "step": 11600 }, { "epoch": 0.04, "learning_rate": 4.8063138378002555e-05, "loss": 2.8056, "step": 11700 }, { "epoch": 0.04, "learning_rate": 4.804658400516497e-05, "loss": 2.792, "step": 11800 }, { "epoch": 0.04, "learning_rate": 4.803002963232739e-05, "loss": 2.7795, "step": 11900 }, { "epoch": 0.04, "learning_rate": 4.80134752594898e-05, "loss": 2.7853, "step": 12000 }, { "epoch": 0.04, "learning_rate": 4.799692088665221e-05, "loss": 2.7869, "step": 12100 }, { "epoch": 0.04, "learning_rate": 4.798036651381463e-05, "loss": 2.796, "step": 12200 }, { "epoch": 0.04, "learning_rate": 4.796381214097704e-05, "loss": 2.7972, "step": 12300 }, { "epoch": 0.04, "learning_rate": 4.794725776813946e-05, "loss": 2.8135, "step": 12400 }, { "epoch": 0.04, "learning_rate": 4.7930703395301874e-05, "loss": 2.7929, "step": 12500 }, { "epoch": 0.04, "learning_rate": 4.7914149022464286e-05, "loss": 2.7916, "step": 12600 }, { "epoch": 0.04, "learning_rate": 4.7897594649626705e-05, "loss": 2.7987, "step": 12700 }, { "epoch": 0.04, "learning_rate": 4.788104027678912e-05, "loss": 2.7871, "step": 12800 }, { "epoch": 0.04, "learning_rate": 4.7864485903951536e-05, "loss": 2.799, "step": 12900 }, { "epoch": 0.04, "learning_rate": 4.784793153111395e-05, "loss": 2.7973, "step": 13000 }, { "epoch": 0.04, "learning_rate": 4.783137715827636e-05, "loss": 2.7883, "step": 13100 }, { "epoch": 0.04, "learning_rate": 4.781482278543878e-05, "loss": 2.7874, "step": 13200 }, { "epoch": 0.04, "learning_rate": 4.7798268412601185e-05, "loss": 2.7897, "step": 13300 }, { "epoch": 0.04, "learning_rate": 4.7781714039763604e-05, "loss": 2.7865, "step": 13400 }, { "epoch": 0.04, "learning_rate": 4.776515966692602e-05, "loss": 2.8049, "step": 13500 }, { "epoch": 0.05, "learning_rate": 4.7748605294088436e-05, "loss": 2.8022, "step": 13600 }, { "epoch": 0.05, "learning_rate": 4.773205092125085e-05, "loss": 2.7926, "step": 13700 }, { "epoch": 0.05, "learning_rate": 4.771549654841326e-05, "loss": 2.8083, "step": 13800 }, { "epoch": 0.05, "learning_rate": 4.769894217557568e-05, "loss": 2.7993, "step": 13900 }, { "epoch": 0.05, "learning_rate": 4.768238780273809e-05, "loss": 2.7907, "step": 14000 }, { "epoch": 0.05, "learning_rate": 4.766583342990051e-05, "loss": 2.786, "step": 14100 }, { "epoch": 0.05, "learning_rate": 4.764927905706292e-05, "loss": 2.7756, "step": 14200 }, { "epoch": 0.05, "learning_rate": 4.7632724684225335e-05, "loss": 2.8032, "step": 14300 }, { "epoch": 0.05, "learning_rate": 4.7616170311387754e-05, "loss": 2.807, "step": 14400 }, { "epoch": 0.05, "learning_rate": 4.7599615938550166e-05, "loss": 2.8051, "step": 14500 }, { "epoch": 0.05, "learning_rate": 4.7583061565712586e-05, "loss": 2.7948, "step": 14600 }, { "epoch": 0.05, "learning_rate": 4.7566507192875e-05, "loss": 2.7929, "step": 14700 }, { "epoch": 0.05, "learning_rate": 4.754995282003741e-05, "loss": 2.8025, "step": 14800 }, { "epoch": 0.05, "learning_rate": 4.753339844719983e-05, "loss": 2.7916, "step": 14900 }, { "epoch": 0.05, "learning_rate": 4.751684407436224e-05, "loss": 2.8024, "step": 15000 }, { "epoch": 0.05, "learning_rate": 4.750028970152466e-05, "loss": 2.8043, "step": 15100 }, { "epoch": 0.05, "learning_rate": 4.748373532868707e-05, "loss": 2.8092, "step": 15200 }, { "epoch": 0.05, "learning_rate": 4.746718095584949e-05, "loss": 2.7981, "step": 15300 }, { "epoch": 0.05, "learning_rate": 4.7450626583011904e-05, "loss": 2.7903, "step": 15400 }, { "epoch": 0.05, "learning_rate": 4.7434072210174316e-05, "loss": 2.7969, "step": 15500 }, { "epoch": 0.05, "learning_rate": 4.7417517837336735e-05, "loss": 2.8021, "step": 15600 }, { "epoch": 0.05, "learning_rate": 4.740096346449915e-05, "loss": 2.7899, "step": 15700 }, { "epoch": 0.05, "learning_rate": 4.7384409091661567e-05, "loss": 2.7982, "step": 15800 }, { "epoch": 0.05, "learning_rate": 4.736785471882398e-05, "loss": 2.789, "step": 15900 }, { "epoch": 0.05, "learning_rate": 4.735130034598639e-05, "loss": 2.7935, "step": 16000 }, { "epoch": 0.05, "learning_rate": 4.733474597314881e-05, "loss": 2.8002, "step": 16100 }, { "epoch": 0.05, "learning_rate": 4.731819160031122e-05, "loss": 2.8021, "step": 16200 }, { "epoch": 0.05, "learning_rate": 4.730163722747364e-05, "loss": 2.7968, "step": 16300 }, { "epoch": 0.05, "learning_rate": 4.7285082854636054e-05, "loss": 2.7927, "step": 16400 }, { "epoch": 0.05, "learning_rate": 4.7268528481798466e-05, "loss": 2.7902, "step": 16500 }, { "epoch": 0.05, "learning_rate": 4.7251974108960885e-05, "loss": 2.7975, "step": 16600 }, { "epoch": 0.06, "learning_rate": 4.72354197361233e-05, "loss": 2.8031, "step": 16700 }, { "epoch": 0.06, "learning_rate": 4.7218865363285716e-05, "loss": 2.7943, "step": 16800 }, { "epoch": 0.06, "learning_rate": 4.720231099044813e-05, "loss": 2.7947, "step": 16900 }, { "epoch": 0.06, "learning_rate": 4.718575661761054e-05, "loss": 2.8, "step": 17000 }, { "epoch": 0.06, "learning_rate": 4.716920224477296e-05, "loss": 2.8024, "step": 17100 }, { "epoch": 0.06, "learning_rate": 4.715264787193537e-05, "loss": 2.804, "step": 17200 }, { "epoch": 0.06, "learning_rate": 4.713609349909779e-05, "loss": 2.7841, "step": 17300 }, { "epoch": 0.06, "learning_rate": 4.71195391262602e-05, "loss": 2.792, "step": 17400 }, { "epoch": 0.06, "learning_rate": 4.7102984753422616e-05, "loss": 2.793, "step": 17500 }, { "epoch": 0.06, "learning_rate": 4.7086430380585035e-05, "loss": 2.7858, "step": 17600 }, { "epoch": 0.06, "learning_rate": 4.706987600774745e-05, "loss": 2.8024, "step": 17700 }, { "epoch": 0.06, "learning_rate": 4.7053321634909866e-05, "loss": 2.8041, "step": 17800 }, { "epoch": 0.06, "learning_rate": 4.703676726207228e-05, "loss": 2.7926, "step": 17900 }, { "epoch": 0.06, "learning_rate": 4.702021288923469e-05, "loss": 2.7944, "step": 18000 }, { "epoch": 0.06, "learning_rate": 4.700365851639711e-05, "loss": 2.7989, "step": 18100 }, { "epoch": 0.06, "learning_rate": 4.698710414355952e-05, "loss": 2.7909, "step": 18200 }, { "epoch": 0.06, "learning_rate": 4.697054977072194e-05, "loss": 2.7963, "step": 18300 }, { "epoch": 0.06, "learning_rate": 4.695399539788435e-05, "loss": 2.7873, "step": 18400 }, { "epoch": 0.06, "learning_rate": 4.693744102504677e-05, "loss": 2.7833, "step": 18500 }, { "epoch": 0.06, "learning_rate": 4.6920886652209184e-05, "loss": 2.7892, "step": 18600 }, { "epoch": 0.06, "learning_rate": 4.69043322793716e-05, "loss": 2.7939, "step": 18700 }, { "epoch": 0.06, "learning_rate": 4.6887777906534016e-05, "loss": 2.7966, "step": 18800 }, { "epoch": 0.06, "learning_rate": 4.687122353369643e-05, "loss": 2.7968, "step": 18900 }, { "epoch": 0.06, "learning_rate": 4.685466916085885e-05, "loss": 2.786, "step": 19000 }, { "epoch": 0.06, "learning_rate": 4.683811478802126e-05, "loss": 2.8032, "step": 19100 }, { "epoch": 0.06, "learning_rate": 4.682156041518367e-05, "loss": 2.8071, "step": 19200 }, { "epoch": 0.06, "learning_rate": 4.680500604234609e-05, "loss": 2.8007, "step": 19300 }, { "epoch": 0.06, "learning_rate": 4.67884516695085e-05, "loss": 2.7902, "step": 19400 }, { "epoch": 0.06, "learning_rate": 4.677189729667092e-05, "loss": 2.7954, "step": 19500 }, { "epoch": 0.06, "learning_rate": 4.6755342923833334e-05, "loss": 2.7869, "step": 19600 }, { "epoch": 0.07, "learning_rate": 4.6738788550995746e-05, "loss": 2.7838, "step": 19700 }, { "epoch": 0.07, "learning_rate": 4.6722234178158165e-05, "loss": 2.7894, "step": 19800 }, { "epoch": 0.07, "learning_rate": 4.670567980532058e-05, "loss": 2.7915, "step": 19900 }, { "epoch": 0.07, "learning_rate": 4.6689125432483e-05, "loss": 2.7971, "step": 20000 }, { "epoch": 0.07, "learning_rate": 4.667257105964541e-05, "loss": 2.7916, "step": 20100 }, { "epoch": 0.07, "learning_rate": 4.665601668680782e-05, "loss": 2.7978, "step": 20200 }, { "epoch": 0.07, "learning_rate": 4.663946231397024e-05, "loss": 2.7959, "step": 20300 }, { "epoch": 0.07, "learning_rate": 4.662290794113265e-05, "loss": 2.7885, "step": 20400 }, { "epoch": 0.07, "learning_rate": 4.660635356829507e-05, "loss": 2.7849, "step": 20500 }, { "epoch": 0.07, "learning_rate": 4.6589799195457484e-05, "loss": 2.8034, "step": 20600 }, { "epoch": 0.07, "learning_rate": 4.6573244822619896e-05, "loss": 2.7967, "step": 20700 }, { "epoch": 0.07, "learning_rate": 4.6556690449782315e-05, "loss": 2.7981, "step": 20800 }, { "epoch": 0.07, "learning_rate": 4.654013607694473e-05, "loss": 2.8194, "step": 20900 }, { "epoch": 0.07, "learning_rate": 4.6523581704107146e-05, "loss": 2.798, "step": 21000 }, { "epoch": 0.07, "learning_rate": 4.650702733126956e-05, "loss": 2.7977, "step": 21100 }, { "epoch": 0.07, "learning_rate": 4.649047295843197e-05, "loss": 2.8072, "step": 21200 }, { "epoch": 0.07, "learning_rate": 4.647391858559439e-05, "loss": 2.8019, "step": 21300 }, { "epoch": 0.07, "learning_rate": 4.64573642127568e-05, "loss": 2.7966, "step": 21400 }, { "epoch": 0.07, "learning_rate": 4.644080983991922e-05, "loss": 2.7915, "step": 21500 }, { "epoch": 0.07, "learning_rate": 4.6424255467081634e-05, "loss": 2.7733, "step": 21600 }, { "epoch": 0.07, "learning_rate": 4.640770109424405e-05, "loss": 2.7924, "step": 21700 }, { "epoch": 0.07, "learning_rate": 4.6391146721406465e-05, "loss": 2.7882, "step": 21800 }, { "epoch": 0.07, "learning_rate": 4.637459234856888e-05, "loss": 2.7788, "step": 21900 }, { "epoch": 0.07, "learning_rate": 4.6358037975731296e-05, "loss": 2.782, "step": 22000 }, { "epoch": 0.07, "learning_rate": 4.63414836028937e-05, "loss": 2.7981, "step": 22100 }, { "epoch": 0.07, "learning_rate": 4.632492923005612e-05, "loss": 2.7761, "step": 22200 }, { "epoch": 0.07, "learning_rate": 4.630837485721853e-05, "loss": 2.8116, "step": 22300 }, { "epoch": 0.07, "learning_rate": 4.629182048438095e-05, "loss": 2.792, "step": 22400 }, { "epoch": 0.07, "learning_rate": 4.6275266111543364e-05, "loss": 2.7842, "step": 22500 }, { "epoch": 0.07, "learning_rate": 4.6258711738705776e-05, "loss": 2.789, "step": 22600 }, { "epoch": 0.08, "learning_rate": 4.6242157365868195e-05, "loss": 2.8023, "step": 22700 }, { "epoch": 0.08, "learning_rate": 4.622560299303061e-05, "loss": 2.805, "step": 22800 }, { "epoch": 0.08, "learning_rate": 4.620904862019303e-05, "loss": 2.8074, "step": 22900 }, { "epoch": 0.08, "learning_rate": 4.619249424735544e-05, "loss": 2.8029, "step": 23000 }, { "epoch": 0.08, "learning_rate": 4.617593987451785e-05, "loss": 2.8212, "step": 23100 }, { "epoch": 0.08, "learning_rate": 4.615938550168027e-05, "loss": 2.8042, "step": 23200 }, { "epoch": 0.08, "learning_rate": 4.614283112884268e-05, "loss": 2.7906, "step": 23300 }, { "epoch": 0.08, "learning_rate": 4.61262767560051e-05, "loss": 2.7914, "step": 23400 }, { "epoch": 0.08, "learning_rate": 4.6109722383167514e-05, "loss": 2.802, "step": 23500 }, { "epoch": 0.08, "learning_rate": 4.6093168010329926e-05, "loss": 2.7898, "step": 23600 }, { "epoch": 0.08, "learning_rate": 4.6076613637492345e-05, "loss": 2.7944, "step": 23700 }, { "epoch": 0.08, "learning_rate": 4.606005926465476e-05, "loss": 2.7828, "step": 23800 }, { "epoch": 0.08, "learning_rate": 4.6043504891817176e-05, "loss": 2.7855, "step": 23900 }, { "epoch": 0.08, "learning_rate": 4.602695051897959e-05, "loss": 2.798, "step": 24000 }, { "epoch": 0.08, "learning_rate": 4.6010396146142e-05, "loss": 2.8217, "step": 24100 }, { "epoch": 0.08, "learning_rate": 4.599384177330442e-05, "loss": 2.8046, "step": 24200 }, { "epoch": 0.08, "learning_rate": 4.597728740046683e-05, "loss": 2.7894, "step": 24300 }, { "epoch": 0.08, "learning_rate": 4.596073302762925e-05, "loss": 2.7988, "step": 24400 }, { "epoch": 0.08, "learning_rate": 4.5944178654791664e-05, "loss": 2.8025, "step": 24500 }, { "epoch": 0.08, "learning_rate": 4.5927624281954076e-05, "loss": 2.8177, "step": 24600 }, { "epoch": 0.08, "learning_rate": 4.5911069909116495e-05, "loss": 2.7866, "step": 24700 }, { "epoch": 0.08, "learning_rate": 4.589451553627891e-05, "loss": 2.7887, "step": 24800 }, { "epoch": 0.08, "learning_rate": 4.5877961163441326e-05, "loss": 2.7962, "step": 24900 }, { "epoch": 0.08, "learning_rate": 4.586140679060374e-05, "loss": 2.803, "step": 25000 }, { "epoch": 0.08, "learning_rate": 4.584485241776616e-05, "loss": 2.7793, "step": 25100 }, { "epoch": 0.08, "learning_rate": 4.582829804492857e-05, "loss": 2.797, "step": 25200 }, { "epoch": 0.08, "learning_rate": 4.581174367209098e-05, "loss": 2.7889, "step": 25300 }, { "epoch": 0.08, "learning_rate": 4.57951892992534e-05, "loss": 2.7864, "step": 25400 }, { "epoch": 0.08, "learning_rate": 4.577863492641581e-05, "loss": 2.7977, "step": 25500 }, { "epoch": 0.08, "learning_rate": 4.576208055357823e-05, "loss": 2.7741, "step": 25600 }, { "epoch": 0.09, "learning_rate": 4.5745526180740645e-05, "loss": 2.792, "step": 25700 }, { "epoch": 0.09, "learning_rate": 4.572897180790306e-05, "loss": 2.7956, "step": 25800 }, { "epoch": 0.09, "learning_rate": 4.5712417435065476e-05, "loss": 2.7899, "step": 25900 }, { "epoch": 0.09, "learning_rate": 4.569586306222789e-05, "loss": 2.8051, "step": 26000 }, { "epoch": 0.09, "learning_rate": 4.567930868939031e-05, "loss": 2.7869, "step": 26100 }, { "epoch": 0.09, "learning_rate": 4.566275431655272e-05, "loss": 2.7706, "step": 26200 }, { "epoch": 0.09, "learning_rate": 4.564619994371513e-05, "loss": 2.7979, "step": 26300 }, { "epoch": 0.09, "learning_rate": 4.562964557087755e-05, "loss": 2.7957, "step": 26400 }, { "epoch": 0.09, "learning_rate": 4.561309119803996e-05, "loss": 2.8008, "step": 26500 }, { "epoch": 0.09, "learning_rate": 4.559653682520238e-05, "loss": 2.7969, "step": 26600 }, { "epoch": 0.09, "learning_rate": 4.5579982452364794e-05, "loss": 2.7943, "step": 26700 }, { "epoch": 0.09, "learning_rate": 4.5563428079527207e-05, "loss": 2.8, "step": 26800 }, { "epoch": 0.09, "learning_rate": 4.5546873706689626e-05, "loss": 2.8088, "step": 26900 }, { "epoch": 0.09, "learning_rate": 4.553031933385204e-05, "loss": 2.8057, "step": 27000 }, { "epoch": 0.09, "learning_rate": 4.551376496101446e-05, "loss": 2.7967, "step": 27100 }, { "epoch": 0.09, "learning_rate": 4.549721058817687e-05, "loss": 2.7814, "step": 27200 }, { "epoch": 0.09, "learning_rate": 4.548065621533928e-05, "loss": 2.7779, "step": 27300 }, { "epoch": 0.09, "learning_rate": 4.54641018425017e-05, "loss": 2.7828, "step": 27400 }, { "epoch": 0.09, "learning_rate": 4.544754746966411e-05, "loss": 2.7916, "step": 27500 }, { "epoch": 0.09, "learning_rate": 4.543099309682653e-05, "loss": 2.7926, "step": 27600 }, { "epoch": 0.09, "learning_rate": 4.5414438723988944e-05, "loss": 2.8087, "step": 27700 }, { "epoch": 0.09, "learning_rate": 4.5397884351151356e-05, "loss": 2.7749, "step": 27800 }, { "epoch": 0.09, "learning_rate": 4.5381329978313775e-05, "loss": 2.8012, "step": 27900 }, { "epoch": 0.09, "learning_rate": 4.536477560547619e-05, "loss": 2.8014, "step": 28000 }, { "epoch": 0.09, "learning_rate": 4.534822123263861e-05, "loss": 2.7832, "step": 28100 }, { "epoch": 0.09, "learning_rate": 4.533166685980102e-05, "loss": 2.7901, "step": 28200 }, { "epoch": 0.09, "learning_rate": 4.531511248696344e-05, "loss": 2.785, "step": 28300 }, { "epoch": 0.09, "learning_rate": 4.529855811412585e-05, "loss": 2.8014, "step": 28400 }, { "epoch": 0.09, "learning_rate": 4.528200374128826e-05, "loss": 2.7929, "step": 28500 }, { "epoch": 0.09, "learning_rate": 4.526544936845068e-05, "loss": 2.79, "step": 28600 }, { "epoch": 0.1, "learning_rate": 4.5248894995613094e-05, "loss": 2.7956, "step": 28700 }, { "epoch": 0.1, "learning_rate": 4.523234062277551e-05, "loss": 2.7963, "step": 28800 }, { "epoch": 0.1, "learning_rate": 4.5215786249937925e-05, "loss": 2.7736, "step": 28900 }, { "epoch": 0.1, "learning_rate": 4.519923187710034e-05, "loss": 2.789, "step": 29000 }, { "epoch": 0.1, "learning_rate": 4.5182677504262756e-05, "loss": 2.7964, "step": 29100 }, { "epoch": 0.1, "learning_rate": 4.516612313142517e-05, "loss": 2.7872, "step": 29200 }, { "epoch": 0.1, "learning_rate": 4.514956875858759e-05, "loss": 2.7959, "step": 29300 }, { "epoch": 0.1, "learning_rate": 4.513301438575e-05, "loss": 2.787, "step": 29400 }, { "epoch": 0.1, "learning_rate": 4.511646001291241e-05, "loss": 2.7883, "step": 29500 }, { "epoch": 0.1, "learning_rate": 4.509990564007483e-05, "loss": 2.8037, "step": 29600 }, { "epoch": 0.1, "learning_rate": 4.5083351267237243e-05, "loss": 2.7884, "step": 29700 }, { "epoch": 0.1, "learning_rate": 4.506679689439966e-05, "loss": 2.778, "step": 29800 }, { "epoch": 0.1, "learning_rate": 4.5050242521562075e-05, "loss": 2.7886, "step": 29900 }, { "epoch": 0.1, "learning_rate": 4.503368814872449e-05, "loss": 2.7929, "step": 30000 }, { "epoch": 0.1, "learning_rate": 4.5017133775886906e-05, "loss": 2.782, "step": 30100 }, { "epoch": 0.1, "learning_rate": 4.500057940304932e-05, "loss": 2.8149, "step": 30200 }, { "epoch": 0.1, "learning_rate": 4.498402503021174e-05, "loss": 2.7956, "step": 30300 }, { "epoch": 0.1, "learning_rate": 4.496747065737415e-05, "loss": 2.7935, "step": 30400 }, { "epoch": 0.1, "learning_rate": 4.495091628453656e-05, "loss": 2.798, "step": 30500 }, { "epoch": 0.1, "learning_rate": 4.493436191169898e-05, "loss": 2.8016, "step": 30600 }, { "epoch": 0.1, "learning_rate": 4.491780753886139e-05, "loss": 2.792, "step": 30700 }, { "epoch": 0.1, "learning_rate": 4.490125316602381e-05, "loss": 2.804, "step": 30800 }, { "epoch": 0.1, "learning_rate": 4.4884698793186224e-05, "loss": 2.7958, "step": 30900 }, { "epoch": 0.1, "learning_rate": 4.486814442034864e-05, "loss": 2.7974, "step": 31000 }, { "epoch": 0.1, "learning_rate": 4.485159004751105e-05, "loss": 2.8045, "step": 31100 }, { "epoch": 0.1, "learning_rate": 4.483503567467346e-05, "loss": 2.7946, "step": 31200 }, { "epoch": 0.1, "learning_rate": 4.481848130183588e-05, "loss": 2.8163, "step": 31300 }, { "epoch": 0.1, "learning_rate": 4.480192692899829e-05, "loss": 2.7927, "step": 31400 }, { "epoch": 0.1, "learning_rate": 4.478537255616071e-05, "loss": 2.7713, "step": 31500 }, { "epoch": 0.1, "learning_rate": 4.4768818183323124e-05, "loss": 2.7841, "step": 31600 }, { "epoch": 0.1, "learning_rate": 4.475226381048554e-05, "loss": 2.7755, "step": 31700 }, { "epoch": 0.11, "learning_rate": 4.4735709437647955e-05, "loss": 2.806, "step": 31800 }, { "epoch": 0.11, "learning_rate": 4.471915506481037e-05, "loss": 2.7856, "step": 31900 }, { "epoch": 0.11, "learning_rate": 4.4702600691972786e-05, "loss": 2.787, "step": 32000 }, { "epoch": 0.11, "learning_rate": 4.46860463191352e-05, "loss": 2.8002, "step": 32100 }, { "epoch": 0.11, "learning_rate": 4.466949194629762e-05, "loss": 2.808, "step": 32200 }, { "epoch": 0.11, "learning_rate": 4.465293757346003e-05, "loss": 2.792, "step": 32300 }, { "epoch": 0.11, "learning_rate": 4.463638320062244e-05, "loss": 2.7954, "step": 32400 }, { "epoch": 0.11, "learning_rate": 4.461982882778486e-05, "loss": 2.7955, "step": 32500 }, { "epoch": 0.11, "learning_rate": 4.4603274454947274e-05, "loss": 2.7894, "step": 32600 }, { "epoch": 0.11, "learning_rate": 4.458672008210969e-05, "loss": 2.7965, "step": 32700 }, { "epoch": 0.11, "learning_rate": 4.4570165709272105e-05, "loss": 2.7941, "step": 32800 }, { "epoch": 0.11, "learning_rate": 4.455361133643452e-05, "loss": 2.7885, "step": 32900 }, { "epoch": 0.11, "learning_rate": 4.4537056963596936e-05, "loss": 2.8039, "step": 33000 }, { "epoch": 0.11, "learning_rate": 4.452050259075935e-05, "loss": 2.7836, "step": 33100 }, { "epoch": 0.11, "learning_rate": 4.450394821792177e-05, "loss": 2.7978, "step": 33200 }, { "epoch": 0.11, "learning_rate": 4.448739384508418e-05, "loss": 2.7908, "step": 33300 }, { "epoch": 0.11, "learning_rate": 4.447083947224659e-05, "loss": 2.7943, "step": 33400 }, { "epoch": 0.11, "learning_rate": 4.445428509940901e-05, "loss": 2.791, "step": 33500 }, { "epoch": 0.11, "learning_rate": 4.443773072657142e-05, "loss": 2.7931, "step": 33600 }, { "epoch": 0.11, "learning_rate": 4.442117635373384e-05, "loss": 2.786, "step": 33700 }, { "epoch": 0.11, "learning_rate": 4.4404621980896255e-05, "loss": 2.7929, "step": 33800 }, { "epoch": 0.11, "learning_rate": 4.438806760805867e-05, "loss": 2.7952, "step": 33900 }, { "epoch": 0.11, "learning_rate": 4.4371513235221086e-05, "loss": 2.7895, "step": 34000 }, { "epoch": 0.11, "learning_rate": 4.43549588623835e-05, "loss": 2.8006, "step": 34100 }, { "epoch": 0.11, "learning_rate": 4.433840448954592e-05, "loss": 2.7977, "step": 34200 }, { "epoch": 0.11, "learning_rate": 4.432185011670833e-05, "loss": 2.7985, "step": 34300 }, { "epoch": 0.11, "learning_rate": 4.430529574387074e-05, "loss": 2.79, "step": 34400 }, { "epoch": 0.11, "learning_rate": 4.428874137103316e-05, "loss": 2.8015, "step": 34500 }, { "epoch": 0.11, "learning_rate": 4.427218699819557e-05, "loss": 2.8001, "step": 34600 }, { "epoch": 0.11, "learning_rate": 4.425563262535799e-05, "loss": 2.7898, "step": 34700 }, { "epoch": 0.12, "learning_rate": 4.4239078252520404e-05, "loss": 2.7813, "step": 34800 }, { "epoch": 0.12, "learning_rate": 4.422252387968282e-05, "loss": 2.7819, "step": 34900 }, { "epoch": 0.12, "learning_rate": 4.4205969506845236e-05, "loss": 2.8012, "step": 35000 }, { "epoch": 0.12, "learning_rate": 4.418941513400765e-05, "loss": 2.7991, "step": 35100 }, { "epoch": 0.12, "learning_rate": 4.417286076117007e-05, "loss": 2.7944, "step": 35200 }, { "epoch": 0.12, "learning_rate": 4.415630638833248e-05, "loss": 2.7826, "step": 35300 }, { "epoch": 0.12, "learning_rate": 4.41397520154949e-05, "loss": 2.7942, "step": 35400 }, { "epoch": 0.12, "learning_rate": 4.412319764265731e-05, "loss": 2.802, "step": 35500 }, { "epoch": 0.12, "learning_rate": 4.410664326981972e-05, "loss": 2.8046, "step": 35600 }, { "epoch": 0.12, "learning_rate": 4.409008889698214e-05, "loss": 2.797, "step": 35700 }, { "epoch": 0.12, "learning_rate": 4.4073534524144554e-05, "loss": 2.7863, "step": 35800 }, { "epoch": 0.12, "learning_rate": 4.405698015130697e-05, "loss": 2.7878, "step": 35900 }, { "epoch": 0.12, "learning_rate": 4.4040425778469385e-05, "loss": 2.7917, "step": 36000 }, { "epoch": 0.12, "learning_rate": 4.40238714056318e-05, "loss": 2.8027, "step": 36100 }, { "epoch": 0.12, "learning_rate": 4.400731703279422e-05, "loss": 2.781, "step": 36200 }, { "epoch": 0.12, "learning_rate": 4.399076265995663e-05, "loss": 2.7845, "step": 36300 }, { "epoch": 0.12, "learning_rate": 4.397420828711905e-05, "loss": 2.794, "step": 36400 }, { "epoch": 0.12, "learning_rate": 4.395765391428146e-05, "loss": 2.8011, "step": 36500 }, { "epoch": 0.12, "learning_rate": 4.394109954144387e-05, "loss": 2.7792, "step": 36600 }, { "epoch": 0.12, "learning_rate": 4.392454516860629e-05, "loss": 2.796, "step": 36700 }, { "epoch": 0.12, "learning_rate": 4.3907990795768704e-05, "loss": 2.7843, "step": 36800 }, { "epoch": 0.12, "learning_rate": 4.389143642293112e-05, "loss": 2.8001, "step": 36900 }, { "epoch": 0.12, "learning_rate": 4.3874882050093535e-05, "loss": 2.7942, "step": 37000 }, { "epoch": 0.12, "learning_rate": 4.385832767725595e-05, "loss": 2.8005, "step": 37100 }, { "epoch": 0.12, "learning_rate": 4.3841773304418366e-05, "loss": 2.8104, "step": 37200 }, { "epoch": 0.12, "learning_rate": 4.382521893158078e-05, "loss": 2.8071, "step": 37300 }, { "epoch": 0.12, "learning_rate": 4.38086645587432e-05, "loss": 2.7935, "step": 37400 }, { "epoch": 0.12, "learning_rate": 4.379211018590561e-05, "loss": 2.7958, "step": 37500 }, { "epoch": 0.12, "learning_rate": 4.377555581306802e-05, "loss": 2.7935, "step": 37600 }, { "epoch": 0.12, "learning_rate": 4.375900144023044e-05, "loss": 2.7938, "step": 37700 }, { "epoch": 0.13, "learning_rate": 4.3742447067392853e-05, "loss": 2.7942, "step": 37800 }, { "epoch": 0.13, "learning_rate": 4.372589269455527e-05, "loss": 2.789, "step": 37900 }, { "epoch": 0.13, "learning_rate": 4.3709338321717685e-05, "loss": 2.801, "step": 38000 }, { "epoch": 0.13, "learning_rate": 4.3692783948880104e-05, "loss": 2.7911, "step": 38100 }, { "epoch": 0.13, "learning_rate": 4.3676229576042516e-05, "loss": 2.799, "step": 38200 }, { "epoch": 0.13, "learning_rate": 4.365967520320493e-05, "loss": 2.7882, "step": 38300 }, { "epoch": 0.13, "learning_rate": 4.364312083036735e-05, "loss": 2.8009, "step": 38400 }, { "epoch": 0.13, "learning_rate": 4.362656645752976e-05, "loss": 2.8013, "step": 38500 }, { "epoch": 0.13, "learning_rate": 4.361001208469218e-05, "loss": 2.8038, "step": 38600 }, { "epoch": 0.13, "learning_rate": 4.359345771185459e-05, "loss": 2.8046, "step": 38700 }, { "epoch": 0.13, "learning_rate": 4.3576903339017e-05, "loss": 2.7774, "step": 38800 }, { "epoch": 0.13, "learning_rate": 4.356034896617942e-05, "loss": 2.7825, "step": 38900 }, { "epoch": 0.13, "learning_rate": 4.3543794593341834e-05, "loss": 2.7915, "step": 39000 }, { "epoch": 0.13, "learning_rate": 4.3527240220504254e-05, "loss": 2.7815, "step": 39100 }, { "epoch": 0.13, "learning_rate": 4.3510685847666666e-05, "loss": 2.7868, "step": 39200 }, { "epoch": 0.13, "learning_rate": 4.349413147482908e-05, "loss": 2.7939, "step": 39300 }, { "epoch": 0.13, "learning_rate": 4.34775771019915e-05, "loss": 2.792, "step": 39400 }, { "epoch": 0.13, "learning_rate": 4.346102272915391e-05, "loss": 2.796, "step": 39500 }, { "epoch": 0.13, "learning_rate": 4.344446835631633e-05, "loss": 2.8059, "step": 39600 }, { "epoch": 0.13, "learning_rate": 4.342791398347874e-05, "loss": 2.7839, "step": 39700 }, { "epoch": 0.13, "learning_rate": 4.341135961064115e-05, "loss": 2.7855, "step": 39800 }, { "epoch": 0.13, "learning_rate": 4.3394805237803565e-05, "loss": 2.7924, "step": 39900 }, { "epoch": 0.13, "learning_rate": 4.337825086496598e-05, "loss": 2.7899, "step": 40000 }, { "epoch": 0.13, "learning_rate": 4.3361696492128396e-05, "loss": 2.7974, "step": 40100 }, { "epoch": 0.13, "learning_rate": 4.334514211929081e-05, "loss": 2.8016, "step": 40200 }, { "epoch": 0.13, "learning_rate": 4.332858774645323e-05, "loss": 2.7829, "step": 40300 }, { "epoch": 0.13, "learning_rate": 4.331203337361564e-05, "loss": 2.7865, "step": 40400 }, { "epoch": 0.13, "learning_rate": 4.329547900077805e-05, "loss": 2.8061, "step": 40500 }, { "epoch": 0.13, "learning_rate": 4.327892462794047e-05, "loss": 2.7854, "step": 40600 }, { "epoch": 0.13, "learning_rate": 4.3262370255102884e-05, "loss": 2.784, "step": 40700 }, { "epoch": 0.14, "learning_rate": 4.32458158822653e-05, "loss": 2.78, "step": 40800 }, { "epoch": 0.14, "learning_rate": 4.3229261509427715e-05, "loss": 2.8, "step": 40900 }, { "epoch": 0.14, "learning_rate": 4.321270713659013e-05, "loss": 2.7893, "step": 41000 }, { "epoch": 0.14, "learning_rate": 4.3196152763752546e-05, "loss": 2.7955, "step": 41100 }, { "epoch": 0.14, "learning_rate": 4.317959839091496e-05, "loss": 2.7765, "step": 41200 }, { "epoch": 0.14, "learning_rate": 4.316304401807738e-05, "loss": 2.7981, "step": 41300 }, { "epoch": 0.14, "learning_rate": 4.314648964523979e-05, "loss": 2.7777, "step": 41400 }, { "epoch": 0.14, "learning_rate": 4.312993527240221e-05, "loss": 2.7891, "step": 41500 }, { "epoch": 0.14, "learning_rate": 4.311338089956462e-05, "loss": 2.7943, "step": 41600 }, { "epoch": 0.14, "learning_rate": 4.309682652672703e-05, "loss": 2.7886, "step": 41700 }, { "epoch": 0.14, "learning_rate": 4.308027215388945e-05, "loss": 2.7857, "step": 41800 }, { "epoch": 0.14, "learning_rate": 4.3063717781051865e-05, "loss": 2.7895, "step": 41900 }, { "epoch": 0.14, "learning_rate": 4.3047163408214284e-05, "loss": 2.7876, "step": 42000 }, { "epoch": 0.14, "learning_rate": 4.3030609035376696e-05, "loss": 2.777, "step": 42100 }, { "epoch": 0.14, "learning_rate": 4.301405466253911e-05, "loss": 2.7937, "step": 42200 }, { "epoch": 0.14, "learning_rate": 4.299750028970153e-05, "loss": 2.8083, "step": 42300 }, { "epoch": 0.14, "learning_rate": 4.298094591686394e-05, "loss": 2.791, "step": 42400 }, { "epoch": 0.14, "learning_rate": 4.296439154402636e-05, "loss": 2.7965, "step": 42500 }, { "epoch": 0.14, "learning_rate": 4.294783717118877e-05, "loss": 2.7972, "step": 42600 }, { "epoch": 0.14, "learning_rate": 4.293128279835118e-05, "loss": 2.778, "step": 42700 }, { "epoch": 0.14, "learning_rate": 4.29147284255136e-05, "loss": 2.7922, "step": 42800 }, { "epoch": 0.14, "learning_rate": 4.2898174052676014e-05, "loss": 2.7884, "step": 42900 }, { "epoch": 0.14, "learning_rate": 4.288161967983843e-05, "loss": 2.7881, "step": 43000 }, { "epoch": 0.14, "learning_rate": 4.2865065307000846e-05, "loss": 2.7983, "step": 43100 }, { "epoch": 0.14, "learning_rate": 4.284851093416326e-05, "loss": 2.802, "step": 43200 }, { "epoch": 0.14, "learning_rate": 4.283195656132568e-05, "loss": 2.7969, "step": 43300 }, { "epoch": 0.14, "learning_rate": 4.281540218848809e-05, "loss": 2.7853, "step": 43400 }, { "epoch": 0.14, "learning_rate": 4.279884781565051e-05, "loss": 2.7943, "step": 43500 }, { "epoch": 0.14, "learning_rate": 4.278229344281292e-05, "loss": 2.7931, "step": 43600 }, { "epoch": 0.14, "learning_rate": 4.276573906997533e-05, "loss": 2.7914, "step": 43700 }, { "epoch": 0.15, "learning_rate": 4.274918469713775e-05, "loss": 2.7944, "step": 43800 }, { "epoch": 0.15, "learning_rate": 4.2732630324300164e-05, "loss": 2.7865, "step": 43900 }, { "epoch": 0.15, "learning_rate": 4.271607595146258e-05, "loss": 2.7895, "step": 44000 }, { "epoch": 0.15, "learning_rate": 4.2699521578624995e-05, "loss": 2.7958, "step": 44100 }, { "epoch": 0.15, "learning_rate": 4.268296720578741e-05, "loss": 2.7943, "step": 44200 }, { "epoch": 0.15, "learning_rate": 4.2666412832949827e-05, "loss": 2.8, "step": 44300 }, { "epoch": 0.15, "learning_rate": 4.264985846011224e-05, "loss": 2.7964, "step": 44400 }, { "epoch": 0.15, "learning_rate": 4.263330408727466e-05, "loss": 2.7872, "step": 44500 }, { "epoch": 0.15, "learning_rate": 4.261674971443707e-05, "loss": 2.811, "step": 44600 }, { "epoch": 0.15, "learning_rate": 4.260019534159949e-05, "loss": 2.7929, "step": 44700 }, { "epoch": 0.15, "learning_rate": 4.25836409687619e-05, "loss": 2.7927, "step": 44800 }, { "epoch": 0.15, "learning_rate": 4.2567086595924314e-05, "loss": 2.7872, "step": 44900 }, { "epoch": 0.15, "learning_rate": 4.255053222308673e-05, "loss": 2.801, "step": 45000 }, { "epoch": 0.15, "learning_rate": 4.2533977850249145e-05, "loss": 2.8082, "step": 45100 }, { "epoch": 0.15, "learning_rate": 4.2517423477411564e-05, "loss": 2.7926, "step": 45200 }, { "epoch": 0.15, "learning_rate": 4.2500869104573976e-05, "loss": 2.7832, "step": 45300 }, { "epoch": 0.15, "learning_rate": 4.248431473173639e-05, "loss": 2.7927, "step": 45400 }, { "epoch": 0.15, "learning_rate": 4.246776035889881e-05, "loss": 2.8016, "step": 45500 }, { "epoch": 0.15, "learning_rate": 4.245120598606122e-05, "loss": 2.8048, "step": 45600 }, { "epoch": 0.15, "learning_rate": 4.243465161322364e-05, "loss": 2.7821, "step": 45700 }, { "epoch": 0.15, "learning_rate": 4.241809724038605e-05, "loss": 2.7953, "step": 45800 }, { "epoch": 0.15, "learning_rate": 4.2401542867548463e-05, "loss": 2.8229, "step": 45900 }, { "epoch": 0.15, "learning_rate": 4.238498849471088e-05, "loss": 2.7984, "step": 46000 }, { "epoch": 0.15, "learning_rate": 4.2368434121873295e-05, "loss": 2.79, "step": 46100 }, { "epoch": 0.15, "learning_rate": 4.2351879749035714e-05, "loss": 2.7909, "step": 46200 }, { "epoch": 0.15, "learning_rate": 4.2335325376198126e-05, "loss": 2.7921, "step": 46300 }, { "epoch": 0.15, "learning_rate": 4.231877100336054e-05, "loss": 2.7781, "step": 46400 }, { "epoch": 0.15, "learning_rate": 4.230221663052296e-05, "loss": 2.7956, "step": 46500 }, { "epoch": 0.15, "learning_rate": 4.228566225768537e-05, "loss": 2.7797, "step": 46600 }, { "epoch": 0.15, "learning_rate": 4.226910788484779e-05, "loss": 2.7934, "step": 46700 }, { "epoch": 0.15, "learning_rate": 4.22525535120102e-05, "loss": 2.7917, "step": 46800 }, { "epoch": 0.16, "learning_rate": 4.223599913917261e-05, "loss": 2.7984, "step": 46900 }, { "epoch": 0.16, "learning_rate": 4.221944476633503e-05, "loss": 2.7823, "step": 47000 }, { "epoch": 0.16, "learning_rate": 4.2202890393497444e-05, "loss": 2.807, "step": 47100 }, { "epoch": 0.16, "learning_rate": 4.2186336020659863e-05, "loss": 2.7884, "step": 47200 }, { "epoch": 0.16, "learning_rate": 4.2169781647822276e-05, "loss": 2.7894, "step": 47300 }, { "epoch": 0.16, "learning_rate": 4.215322727498469e-05, "loss": 2.7958, "step": 47400 }, { "epoch": 0.16, "learning_rate": 4.213667290214711e-05, "loss": 2.7967, "step": 47500 }, { "epoch": 0.16, "learning_rate": 4.212011852930952e-05, "loss": 2.7945, "step": 47600 }, { "epoch": 0.16, "learning_rate": 4.210356415647194e-05, "loss": 2.7861, "step": 47700 }, { "epoch": 0.16, "learning_rate": 4.208700978363435e-05, "loss": 2.7843, "step": 47800 }, { "epoch": 0.16, "learning_rate": 4.207045541079677e-05, "loss": 2.7805, "step": 47900 }, { "epoch": 0.16, "learning_rate": 4.205390103795918e-05, "loss": 2.7883, "step": 48000 }, { "epoch": 0.16, "learning_rate": 4.2037346665121594e-05, "loss": 2.7909, "step": 48100 }, { "epoch": 0.16, "learning_rate": 4.202079229228401e-05, "loss": 2.7922, "step": 48200 }, { "epoch": 0.16, "learning_rate": 4.2004237919446425e-05, "loss": 2.7953, "step": 48300 }, { "epoch": 0.16, "learning_rate": 4.1987683546608844e-05, "loss": 2.7883, "step": 48400 }, { "epoch": 0.16, "learning_rate": 4.197112917377126e-05, "loss": 2.7902, "step": 48500 }, { "epoch": 0.16, "learning_rate": 4.195457480093367e-05, "loss": 2.786, "step": 48600 }, { "epoch": 0.16, "learning_rate": 4.193802042809608e-05, "loss": 2.787, "step": 48700 }, { "epoch": 0.16, "learning_rate": 4.1921466055258494e-05, "loss": 2.7839, "step": 48800 }, { "epoch": 0.16, "learning_rate": 4.190491168242091e-05, "loss": 2.7884, "step": 48900 }, { "epoch": 0.16, "learning_rate": 4.1888357309583325e-05, "loss": 2.7879, "step": 49000 }, { "epoch": 0.16, "learning_rate": 4.1871802936745744e-05, "loss": 2.786, "step": 49100 }, { "epoch": 0.16, "learning_rate": 4.1855248563908156e-05, "loss": 2.7887, "step": 49200 }, { "epoch": 0.16, "learning_rate": 4.183869419107057e-05, "loss": 2.7802, "step": 49300 }, { "epoch": 0.16, "learning_rate": 4.182213981823299e-05, "loss": 2.7641, "step": 49400 }, { "epoch": 0.16, "learning_rate": 4.18055854453954e-05, "loss": 2.7996, "step": 49500 }, { "epoch": 0.16, "learning_rate": 4.178903107255782e-05, "loss": 2.7944, "step": 49600 }, { "epoch": 0.16, "learning_rate": 4.177247669972023e-05, "loss": 2.7959, "step": 49700 }, { "epoch": 0.16, "learning_rate": 4.175592232688264e-05, "loss": 2.7961, "step": 49800 }, { "epoch": 0.17, "learning_rate": 4.173936795404506e-05, "loss": 2.793, "step": 49900 }, { "epoch": 0.17, "learning_rate": 4.1722813581207475e-05, "loss": 2.8039, "step": 50000 }, { "epoch": 0.17, "learning_rate": 4.1706259208369894e-05, "loss": 2.7899, "step": 50100 }, { "epoch": 0.17, "learning_rate": 4.1689704835532306e-05, "loss": 2.7861, "step": 50200 }, { "epoch": 0.17, "learning_rate": 4.167315046269472e-05, "loss": 2.7801, "step": 50300 }, { "epoch": 0.17, "learning_rate": 4.165659608985714e-05, "loss": 2.784, "step": 50400 }, { "epoch": 0.17, "learning_rate": 4.164004171701955e-05, "loss": 2.792, "step": 50500 }, { "epoch": 0.17, "learning_rate": 4.162348734418197e-05, "loss": 2.796, "step": 50600 }, { "epoch": 0.17, "learning_rate": 4.160693297134438e-05, "loss": 2.8011, "step": 50700 }, { "epoch": 0.17, "learning_rate": 4.159037859850679e-05, "loss": 2.7767, "step": 50800 }, { "epoch": 0.17, "learning_rate": 4.157382422566921e-05, "loss": 2.7933, "step": 50900 }, { "epoch": 0.17, "learning_rate": 4.1557269852831624e-05, "loss": 2.7884, "step": 51000 }, { "epoch": 0.17, "learning_rate": 4.154071547999404e-05, "loss": 2.7817, "step": 51100 }, { "epoch": 0.17, "learning_rate": 4.1524161107156456e-05, "loss": 2.7961, "step": 51200 }, { "epoch": 0.17, "learning_rate": 4.1507606734318875e-05, "loss": 2.7861, "step": 51300 }, { "epoch": 0.17, "learning_rate": 4.149105236148129e-05, "loss": 2.7916, "step": 51400 }, { "epoch": 0.17, "learning_rate": 4.14744979886437e-05, "loss": 2.7818, "step": 51500 }, { "epoch": 0.17, "learning_rate": 4.145794361580612e-05, "loss": 2.7878, "step": 51600 }, { "epoch": 0.17, "learning_rate": 4.144138924296853e-05, "loss": 2.776, "step": 51700 }, { "epoch": 0.17, "learning_rate": 4.142483487013095e-05, "loss": 2.7734, "step": 51800 }, { "epoch": 0.17, "learning_rate": 4.140828049729336e-05, "loss": 2.7842, "step": 51900 }, { "epoch": 0.17, "learning_rate": 4.1391726124455774e-05, "loss": 2.7845, "step": 52000 }, { "epoch": 0.17, "learning_rate": 4.137517175161819e-05, "loss": 2.7828, "step": 52100 }, { "epoch": 0.17, "learning_rate": 4.1358617378780605e-05, "loss": 2.7961, "step": 52200 }, { "epoch": 0.17, "learning_rate": 4.1342063005943024e-05, "loss": 2.7807, "step": 52300 }, { "epoch": 0.17, "learning_rate": 4.1325508633105437e-05, "loss": 2.7985, "step": 52400 }, { "epoch": 0.17, "learning_rate": 4.130895426026785e-05, "loss": 2.7775, "step": 52500 }, { "epoch": 0.17, "learning_rate": 4.129239988743027e-05, "loss": 2.7883, "step": 52600 }, { "epoch": 0.17, "learning_rate": 4.127584551459268e-05, "loss": 2.784, "step": 52700 }, { "epoch": 0.17, "learning_rate": 4.12592911417551e-05, "loss": 2.7895, "step": 52800 }, { "epoch": 0.18, "learning_rate": 4.124273676891751e-05, "loss": 2.7774, "step": 52900 }, { "epoch": 0.18, "learning_rate": 4.1226182396079924e-05, "loss": 2.8022, "step": 53000 }, { "epoch": 0.18, "learning_rate": 4.120962802324234e-05, "loss": 2.7842, "step": 53100 }, { "epoch": 0.18, "learning_rate": 4.1193073650404755e-05, "loss": 2.7796, "step": 53200 }, { "epoch": 0.18, "learning_rate": 4.1176519277567174e-05, "loss": 2.7882, "step": 53300 }, { "epoch": 0.18, "learning_rate": 4.1159964904729586e-05, "loss": 2.7793, "step": 53400 }, { "epoch": 0.18, "learning_rate": 4.1143410531892e-05, "loss": 2.7917, "step": 53500 }, { "epoch": 0.18, "learning_rate": 4.112685615905442e-05, "loss": 2.7956, "step": 53600 }, { "epoch": 0.18, "learning_rate": 4.111030178621683e-05, "loss": 2.7784, "step": 53700 }, { "epoch": 0.18, "learning_rate": 4.109374741337925e-05, "loss": 2.7952, "step": 53800 }, { "epoch": 0.18, "learning_rate": 4.107719304054166e-05, "loss": 2.776, "step": 53900 }, { "epoch": 0.18, "learning_rate": 4.1060638667704073e-05, "loss": 2.7925, "step": 54000 }, { "epoch": 0.18, "learning_rate": 4.104408429486649e-05, "loss": 2.7847, "step": 54100 }, { "epoch": 0.18, "learning_rate": 4.1027529922028905e-05, "loss": 2.7895, "step": 54200 }, { "epoch": 0.18, "learning_rate": 4.1010975549191324e-05, "loss": 2.7828, "step": 54300 }, { "epoch": 0.18, "learning_rate": 4.0994421176353736e-05, "loss": 2.7868, "step": 54400 }, { "epoch": 0.18, "learning_rate": 4.0977866803516155e-05, "loss": 2.7786, "step": 54500 }, { "epoch": 0.18, "learning_rate": 4.096131243067857e-05, "loss": 2.7958, "step": 54600 }, { "epoch": 0.18, "learning_rate": 4.094475805784098e-05, "loss": 2.7852, "step": 54700 }, { "epoch": 0.18, "learning_rate": 4.09282036850034e-05, "loss": 2.7888, "step": 54800 }, { "epoch": 0.18, "learning_rate": 4.091164931216581e-05, "loss": 2.7915, "step": 54900 }, { "epoch": 0.18, "learning_rate": 4.089509493932823e-05, "loss": 2.7806, "step": 55000 }, { "epoch": 0.18, "learning_rate": 4.087854056649064e-05, "loss": 2.7953, "step": 55100 }, { "epoch": 0.18, "learning_rate": 4.0861986193653054e-05, "loss": 2.7831, "step": 55200 }, { "epoch": 0.18, "learning_rate": 4.0845431820815473e-05, "loss": 2.7888, "step": 55300 }, { "epoch": 0.18, "learning_rate": 4.0828877447977886e-05, "loss": 2.789, "step": 55400 }, { "epoch": 0.18, "learning_rate": 4.0812323075140305e-05, "loss": 2.7955, "step": 55500 }, { "epoch": 0.18, "learning_rate": 4.079576870230272e-05, "loss": 2.7809, "step": 55600 }, { "epoch": 0.18, "learning_rate": 4.077921432946513e-05, "loss": 2.7905, "step": 55700 }, { "epoch": 0.18, "learning_rate": 4.076265995662755e-05, "loss": 2.7944, "step": 55800 }, { "epoch": 0.19, "learning_rate": 4.074610558378996e-05, "loss": 2.7847, "step": 55900 }, { "epoch": 0.19, "learning_rate": 4.072955121095238e-05, "loss": 2.786, "step": 56000 }, { "epoch": 0.19, "learning_rate": 4.071299683811479e-05, "loss": 2.7725, "step": 56100 }, { "epoch": 0.19, "learning_rate": 4.0696442465277204e-05, "loss": 2.7878, "step": 56200 }, { "epoch": 0.19, "learning_rate": 4.067988809243962e-05, "loss": 2.7796, "step": 56300 }, { "epoch": 0.19, "learning_rate": 4.0663333719602035e-05, "loss": 2.7756, "step": 56400 }, { "epoch": 0.19, "learning_rate": 4.0646779346764454e-05, "loss": 2.7907, "step": 56500 }, { "epoch": 0.19, "learning_rate": 4.063022497392687e-05, "loss": 2.7865, "step": 56600 }, { "epoch": 0.19, "learning_rate": 4.061367060108928e-05, "loss": 2.7901, "step": 56700 }, { "epoch": 0.19, "learning_rate": 4.05971162282517e-05, "loss": 2.8029, "step": 56800 }, { "epoch": 0.19, "learning_rate": 4.058056185541411e-05, "loss": 2.7949, "step": 56900 }, { "epoch": 0.19, "learning_rate": 4.056400748257653e-05, "loss": 2.7748, "step": 57000 }, { "epoch": 0.19, "learning_rate": 4.054745310973894e-05, "loss": 2.8028, "step": 57100 }, { "epoch": 0.19, "learning_rate": 4.0530898736901354e-05, "loss": 2.7769, "step": 57200 }, { "epoch": 0.19, "learning_rate": 4.051434436406377e-05, "loss": 2.7719, "step": 57300 }, { "epoch": 0.19, "learning_rate": 4.0497789991226185e-05, "loss": 2.7808, "step": 57400 }, { "epoch": 0.19, "learning_rate": 4.04812356183886e-05, "loss": 2.7854, "step": 57500 }, { "epoch": 0.19, "learning_rate": 4.046468124555101e-05, "loss": 2.7802, "step": 57600 }, { "epoch": 0.19, "learning_rate": 4.044812687271343e-05, "loss": 2.7759, "step": 57700 }, { "epoch": 0.19, "learning_rate": 4.043157249987584e-05, "loss": 2.7911, "step": 57800 }, { "epoch": 0.19, "learning_rate": 4.041501812703826e-05, "loss": 2.7907, "step": 57900 }, { "epoch": 0.19, "learning_rate": 4.039846375420067e-05, "loss": 2.8072, "step": 58000 }, { "epoch": 0.19, "learning_rate": 4.0381909381363085e-05, "loss": 2.7954, "step": 58100 }, { "epoch": 0.19, "learning_rate": 4.0365355008525504e-05, "loss": 2.7866, "step": 58200 }, { "epoch": 0.19, "learning_rate": 4.0348800635687916e-05, "loss": 2.7845, "step": 58300 }, { "epoch": 0.19, "learning_rate": 4.0332246262850335e-05, "loss": 2.8085, "step": 58400 }, { "epoch": 0.19, "learning_rate": 4.031569189001275e-05, "loss": 2.7801, "step": 58500 }, { "epoch": 0.19, "learning_rate": 4.029913751717516e-05, "loss": 2.7795, "step": 58600 }, { "epoch": 0.19, "learning_rate": 4.028258314433758e-05, "loss": 2.7817, "step": 58700 }, { "epoch": 0.19, "learning_rate": 4.026602877149999e-05, "loss": 2.7872, "step": 58800 }, { "epoch": 0.2, "learning_rate": 4.024947439866241e-05, "loss": 2.7774, "step": 58900 }, { "epoch": 0.2, "learning_rate": 4.023292002582482e-05, "loss": 2.7946, "step": 59000 }, { "epoch": 0.2, "learning_rate": 4.0216365652987234e-05, "loss": 2.7873, "step": 59100 }, { "epoch": 0.2, "learning_rate": 4.019981128014965e-05, "loss": 2.7757, "step": 59200 }, { "epoch": 0.2, "learning_rate": 4.0183256907312066e-05, "loss": 2.7628, "step": 59300 }, { "epoch": 0.2, "learning_rate": 4.0166702534474485e-05, "loss": 2.7944, "step": 59400 }, { "epoch": 0.2, "learning_rate": 4.01501481616369e-05, "loss": 2.7818, "step": 59500 }, { "epoch": 0.2, "learning_rate": 4.013359378879931e-05, "loss": 2.7796, "step": 59600 }, { "epoch": 0.2, "learning_rate": 4.011703941596173e-05, "loss": 2.7803, "step": 59700 }, { "epoch": 0.2, "learning_rate": 4.010048504312414e-05, "loss": 2.779, "step": 59800 }, { "epoch": 0.2, "learning_rate": 4.008393067028656e-05, "loss": 2.7898, "step": 59900 }, { "epoch": 0.2, "learning_rate": 4.006737629744897e-05, "loss": 2.7854, "step": 60000 }, { "epoch": 0.2, "learning_rate": 4.0050821924611384e-05, "loss": 2.7845, "step": 60100 }, { "epoch": 0.2, "learning_rate": 4.00342675517738e-05, "loss": 2.7952, "step": 60200 }, { "epoch": 0.2, "learning_rate": 4.0017713178936215e-05, "loss": 2.7945, "step": 60300 }, { "epoch": 0.2, "learning_rate": 4.0001158806098634e-05, "loss": 2.7772, "step": 60400 }, { "epoch": 0.2, "learning_rate": 3.9984604433261047e-05, "loss": 2.7836, "step": 60500 }, { "epoch": 0.2, "learning_rate": 3.996805006042346e-05, "loss": 2.7879, "step": 60600 }, { "epoch": 0.2, "learning_rate": 3.995149568758588e-05, "loss": 2.78, "step": 60700 }, { "epoch": 0.2, "learning_rate": 3.993494131474829e-05, "loss": 2.7872, "step": 60800 }, { "epoch": 0.2, "learning_rate": 3.991838694191071e-05, "loss": 2.7965, "step": 60900 }, { "epoch": 0.2, "learning_rate": 3.990183256907312e-05, "loss": 2.785, "step": 61000 }, { "epoch": 0.2, "learning_rate": 3.988527819623554e-05, "loss": 2.7935, "step": 61100 }, { "epoch": 0.2, "learning_rate": 3.986872382339795e-05, "loss": 2.7897, "step": 61200 }, { "epoch": 0.2, "learning_rate": 3.9852169450560365e-05, "loss": 2.7843, "step": 61300 }, { "epoch": 0.2, "learning_rate": 3.9835615077722784e-05, "loss": 2.7941, "step": 61400 }, { "epoch": 0.2, "learning_rate": 3.9819060704885196e-05, "loss": 2.7945, "step": 61500 }, { "epoch": 0.2, "learning_rate": 3.9802506332047615e-05, "loss": 2.7803, "step": 61600 }, { "epoch": 0.2, "learning_rate": 3.978595195921003e-05, "loss": 2.7973, "step": 61700 }, { "epoch": 0.2, "learning_rate": 3.976939758637244e-05, "loss": 2.7816, "step": 61800 }, { "epoch": 0.2, "learning_rate": 3.975284321353486e-05, "loss": 2.7564, "step": 61900 }, { "epoch": 0.21, "learning_rate": 3.973628884069727e-05, "loss": 2.7831, "step": 62000 }, { "epoch": 0.21, "learning_rate": 3.971973446785969e-05, "loss": 2.7849, "step": 62100 }, { "epoch": 0.21, "learning_rate": 3.97031800950221e-05, "loss": 2.7755, "step": 62200 }, { "epoch": 0.21, "learning_rate": 3.9686625722184515e-05, "loss": 2.7843, "step": 62300 }, { "epoch": 0.21, "learning_rate": 3.9670071349346934e-05, "loss": 2.7807, "step": 62400 }, { "epoch": 0.21, "learning_rate": 3.9653516976509346e-05, "loss": 2.7897, "step": 62500 }, { "epoch": 0.21, "learning_rate": 3.9636962603671765e-05, "loss": 2.7871, "step": 62600 }, { "epoch": 0.21, "learning_rate": 3.962040823083418e-05, "loss": 2.7933, "step": 62700 }, { "epoch": 0.21, "learning_rate": 3.960385385799659e-05, "loss": 2.794, "step": 62800 }, { "epoch": 0.21, "learning_rate": 3.958729948515901e-05, "loss": 2.7691, "step": 62900 }, { "epoch": 0.21, "learning_rate": 3.957074511232142e-05, "loss": 2.7842, "step": 63000 }, { "epoch": 0.21, "learning_rate": 3.955419073948384e-05, "loss": 2.792, "step": 63100 }, { "epoch": 0.21, "learning_rate": 3.953763636664625e-05, "loss": 2.7882, "step": 63200 }, { "epoch": 0.21, "learning_rate": 3.9521081993808664e-05, "loss": 2.7942, "step": 63300 }, { "epoch": 0.21, "learning_rate": 3.9504527620971083e-05, "loss": 2.7781, "step": 63400 }, { "epoch": 0.21, "learning_rate": 3.9487973248133496e-05, "loss": 2.7821, "step": 63500 }, { "epoch": 0.21, "learning_rate": 3.9471418875295915e-05, "loss": 2.7771, "step": 63600 }, { "epoch": 0.21, "learning_rate": 3.945486450245833e-05, "loss": 2.7866, "step": 63700 }, { "epoch": 0.21, "learning_rate": 3.943831012962074e-05, "loss": 2.769, "step": 63800 }, { "epoch": 0.21, "learning_rate": 3.942175575678316e-05, "loss": 2.7881, "step": 63900 }, { "epoch": 0.21, "learning_rate": 3.940520138394557e-05, "loss": 2.8001, "step": 64000 }, { "epoch": 0.21, "learning_rate": 3.938864701110799e-05, "loss": 2.8044, "step": 64100 }, { "epoch": 0.21, "learning_rate": 3.93720926382704e-05, "loss": 2.7891, "step": 64200 }, { "epoch": 0.21, "learning_rate": 3.935553826543282e-05, "loss": 2.7721, "step": 64300 }, { "epoch": 0.21, "learning_rate": 3.933898389259523e-05, "loss": 2.7929, "step": 64400 }, { "epoch": 0.21, "learning_rate": 3.9322429519757645e-05, "loss": 2.7804, "step": 64500 }, { "epoch": 0.21, "learning_rate": 3.9305875146920064e-05, "loss": 2.7829, "step": 64600 }, { "epoch": 0.21, "learning_rate": 3.928932077408248e-05, "loss": 2.7849, "step": 64700 }, { "epoch": 0.21, "learning_rate": 3.9272766401244896e-05, "loss": 2.777, "step": 64800 }, { "epoch": 0.21, "learning_rate": 3.925621202840731e-05, "loss": 2.7913, "step": 64900 }, { "epoch": 0.22, "learning_rate": 3.923965765556972e-05, "loss": 2.7782, "step": 65000 }, { "epoch": 0.22, "learning_rate": 3.922310328273214e-05, "loss": 2.7918, "step": 65100 }, { "epoch": 0.22, "learning_rate": 3.920654890989455e-05, "loss": 2.8006, "step": 65200 }, { "epoch": 0.22, "learning_rate": 3.918999453705697e-05, "loss": 2.7949, "step": 65300 }, { "epoch": 0.22, "learning_rate": 3.917344016421938e-05, "loss": 2.7864, "step": 65400 }, { "epoch": 0.22, "learning_rate": 3.9156885791381795e-05, "loss": 2.788, "step": 65500 }, { "epoch": 0.22, "learning_rate": 3.9140331418544214e-05, "loss": 2.7924, "step": 65600 }, { "epoch": 0.22, "learning_rate": 3.9123777045706626e-05, "loss": 2.788, "step": 65700 }, { "epoch": 0.22, "learning_rate": 3.9107222672869045e-05, "loss": 2.7858, "step": 65800 }, { "epoch": 0.22, "learning_rate": 3.909066830003146e-05, "loss": 2.7777, "step": 65900 }, { "epoch": 0.22, "learning_rate": 3.907411392719387e-05, "loss": 2.7732, "step": 66000 }, { "epoch": 0.22, "learning_rate": 3.905755955435629e-05, "loss": 2.7837, "step": 66100 }, { "epoch": 0.22, "learning_rate": 3.90410051815187e-05, "loss": 2.7643, "step": 66200 }, { "epoch": 0.22, "learning_rate": 3.9024450808681114e-05, "loss": 2.7805, "step": 66300 }, { "epoch": 0.22, "learning_rate": 3.9007896435843526e-05, "loss": 2.7761, "step": 66400 }, { "epoch": 0.22, "learning_rate": 3.8991342063005945e-05, "loss": 2.7846, "step": 66500 }, { "epoch": 0.22, "learning_rate": 3.897478769016836e-05, "loss": 2.785, "step": 66600 }, { "epoch": 0.22, "learning_rate": 3.895823331733077e-05, "loss": 2.7691, "step": 66700 }, { "epoch": 0.22, "learning_rate": 3.894167894449319e-05, "loss": 2.7877, "step": 66800 }, { "epoch": 0.22, "learning_rate": 3.89251245716556e-05, "loss": 2.7766, "step": 66900 }, { "epoch": 0.22, "learning_rate": 3.890857019881802e-05, "loss": 2.788, "step": 67000 }, { "epoch": 0.22, "learning_rate": 3.889201582598043e-05, "loss": 2.7786, "step": 67100 }, { "epoch": 0.22, "learning_rate": 3.8875461453142844e-05, "loss": 2.7715, "step": 67200 }, { "epoch": 0.22, "learning_rate": 3.885890708030526e-05, "loss": 2.7873, "step": 67300 }, { "epoch": 0.22, "learning_rate": 3.8842352707467675e-05, "loss": 2.7801, "step": 67400 }, { "epoch": 0.22, "learning_rate": 3.8825798334630095e-05, "loss": 2.7794, "step": 67500 }, { "epoch": 0.22, "learning_rate": 3.880924396179251e-05, "loss": 2.7857, "step": 67600 }, { "epoch": 0.22, "learning_rate": 3.879268958895492e-05, "loss": 2.8005, "step": 67700 }, { "epoch": 0.22, "learning_rate": 3.877613521611734e-05, "loss": 2.7799, "step": 67800 }, { "epoch": 0.22, "learning_rate": 3.875958084327975e-05, "loss": 2.7819, "step": 67900 }, { "epoch": 0.23, "learning_rate": 3.874302647044217e-05, "loss": 2.7755, "step": 68000 }, { "epoch": 0.23, "learning_rate": 3.872647209760458e-05, "loss": 2.7761, "step": 68100 }, { "epoch": 0.23, "learning_rate": 3.8709917724767e-05, "loss": 2.7881, "step": 68200 }, { "epoch": 0.23, "learning_rate": 3.869336335192941e-05, "loss": 2.7799, "step": 68300 }, { "epoch": 0.23, "learning_rate": 3.8676808979091825e-05, "loss": 2.7933, "step": 68400 }, { "epoch": 0.23, "learning_rate": 3.8660254606254244e-05, "loss": 2.7883, "step": 68500 }, { "epoch": 0.23, "learning_rate": 3.8643700233416657e-05, "loss": 2.7815, "step": 68600 }, { "epoch": 0.23, "learning_rate": 3.8627145860579076e-05, "loss": 2.7869, "step": 68700 }, { "epoch": 0.23, "learning_rate": 3.861059148774149e-05, "loss": 2.7932, "step": 68800 }, { "epoch": 0.23, "learning_rate": 3.85940371149039e-05, "loss": 2.7952, "step": 68900 }, { "epoch": 0.23, "learning_rate": 3.857748274206632e-05, "loss": 2.7999, "step": 69000 }, { "epoch": 0.23, "learning_rate": 3.856092836922873e-05, "loss": 2.7995, "step": 69100 }, { "epoch": 0.23, "learning_rate": 3.854437399639115e-05, "loss": 2.7774, "step": 69200 }, { "epoch": 0.23, "learning_rate": 3.852781962355356e-05, "loss": 2.7667, "step": 69300 }, { "epoch": 0.23, "learning_rate": 3.8511265250715975e-05, "loss": 2.7726, "step": 69400 }, { "epoch": 0.23, "learning_rate": 3.8494710877878394e-05, "loss": 2.7894, "step": 69500 }, { "epoch": 0.23, "learning_rate": 3.8478156505040806e-05, "loss": 2.7695, "step": 69600 }, { "epoch": 0.23, "learning_rate": 3.8461602132203225e-05, "loss": 2.7866, "step": 69700 }, { "epoch": 0.23, "learning_rate": 3.844504775936564e-05, "loss": 2.7916, "step": 69800 }, { "epoch": 0.23, "learning_rate": 3.842849338652805e-05, "loss": 2.7906, "step": 69900 }, { "epoch": 0.23, "learning_rate": 3.841193901369047e-05, "loss": 2.7799, "step": 70000 }, { "epoch": 0.23, "learning_rate": 3.839538464085288e-05, "loss": 2.7896, "step": 70100 }, { "epoch": 0.23, "learning_rate": 3.83788302680153e-05, "loss": 2.7952, "step": 70200 }, { "epoch": 0.23, "learning_rate": 3.836227589517771e-05, "loss": 2.7928, "step": 70300 }, { "epoch": 0.23, "learning_rate": 3.8345721522340125e-05, "loss": 2.7826, "step": 70400 }, { "epoch": 0.23, "learning_rate": 3.8329167149502544e-05, "loss": 2.7933, "step": 70500 }, { "epoch": 0.23, "learning_rate": 3.8312612776664956e-05, "loss": 2.786, "step": 70600 }, { "epoch": 0.23, "learning_rate": 3.8296058403827375e-05, "loss": 2.7757, "step": 70700 }, { "epoch": 0.23, "learning_rate": 3.827950403098979e-05, "loss": 2.769, "step": 70800 }, { "epoch": 0.23, "learning_rate": 3.8262949658152206e-05, "loss": 2.7921, "step": 70900 }, { "epoch": 0.24, "learning_rate": 3.824639528531462e-05, "loss": 2.7699, "step": 71000 }, { "epoch": 0.24, "learning_rate": 3.822984091247703e-05, "loss": 2.7813, "step": 71100 }, { "epoch": 0.24, "learning_rate": 3.821328653963945e-05, "loss": 2.7869, "step": 71200 }, { "epoch": 0.24, "learning_rate": 3.819673216680186e-05, "loss": 2.7738, "step": 71300 }, { "epoch": 0.24, "learning_rate": 3.818017779396428e-05, "loss": 2.7932, "step": 71400 }, { "epoch": 0.24, "learning_rate": 3.8163623421126693e-05, "loss": 2.7797, "step": 71500 }, { "epoch": 0.24, "learning_rate": 3.8147069048289106e-05, "loss": 2.8008, "step": 71600 }, { "epoch": 0.24, "learning_rate": 3.8130514675451525e-05, "loss": 2.7844, "step": 71700 }, { "epoch": 0.24, "learning_rate": 3.811396030261394e-05, "loss": 2.7728, "step": 71800 }, { "epoch": 0.24, "learning_rate": 3.8097405929776356e-05, "loss": 2.7752, "step": 71900 }, { "epoch": 0.24, "learning_rate": 3.808085155693877e-05, "loss": 2.7797, "step": 72000 }, { "epoch": 0.24, "learning_rate": 3.806429718410118e-05, "loss": 2.7863, "step": 72100 }, { "epoch": 0.24, "learning_rate": 3.80477428112636e-05, "loss": 2.7871, "step": 72200 }, { "epoch": 0.24, "learning_rate": 3.803118843842601e-05, "loss": 2.7935, "step": 72300 }, { "epoch": 0.24, "learning_rate": 3.801463406558843e-05, "loss": 2.7985, "step": 72400 }, { "epoch": 0.24, "learning_rate": 3.799807969275084e-05, "loss": 2.7752, "step": 72500 }, { "epoch": 0.24, "learning_rate": 3.7981525319913255e-05, "loss": 2.7957, "step": 72600 }, { "epoch": 0.24, "learning_rate": 3.7964970947075674e-05, "loss": 2.7873, "step": 72700 }, { "epoch": 0.24, "learning_rate": 3.794841657423809e-05, "loss": 2.7805, "step": 72800 }, { "epoch": 0.24, "learning_rate": 3.7931862201400506e-05, "loss": 2.785, "step": 72900 }, { "epoch": 0.24, "learning_rate": 3.791530782856292e-05, "loss": 2.7834, "step": 73000 }, { "epoch": 0.24, "learning_rate": 3.789875345572533e-05, "loss": 2.7753, "step": 73100 }, { "epoch": 0.24, "learning_rate": 3.788219908288775e-05, "loss": 2.7878, "step": 73200 }, { "epoch": 0.24, "learning_rate": 3.786564471005016e-05, "loss": 2.779, "step": 73300 }, { "epoch": 0.24, "learning_rate": 3.784909033721258e-05, "loss": 2.7965, "step": 73400 }, { "epoch": 0.24, "learning_rate": 3.783253596437499e-05, "loss": 2.778, "step": 73500 }, { "epoch": 0.24, "learning_rate": 3.7815981591537405e-05, "loss": 2.7577, "step": 73600 }, { "epoch": 0.24, "learning_rate": 3.7799427218699824e-05, "loss": 2.785, "step": 73700 }, { "epoch": 0.24, "learning_rate": 3.7782872845862236e-05, "loss": 2.7708, "step": 73800 }, { "epoch": 0.24, "learning_rate": 3.7766318473024655e-05, "loss": 2.7798, "step": 73900 }, { "epoch": 0.25, "learning_rate": 3.774976410018707e-05, "loss": 2.7852, "step": 74000 }, { "epoch": 0.25, "learning_rate": 3.773320972734949e-05, "loss": 2.7719, "step": 74100 }, { "epoch": 0.25, "learning_rate": 3.77166553545119e-05, "loss": 2.7878, "step": 74200 }, { "epoch": 0.25, "learning_rate": 3.770010098167431e-05, "loss": 2.7726, "step": 74300 }, { "epoch": 0.25, "learning_rate": 3.768354660883673e-05, "loss": 2.7892, "step": 74400 }, { "epoch": 0.25, "learning_rate": 3.766699223599914e-05, "loss": 2.7746, "step": 74500 }, { "epoch": 0.25, "learning_rate": 3.765043786316156e-05, "loss": 2.7842, "step": 74600 }, { "epoch": 0.25, "learning_rate": 3.7633883490323974e-05, "loss": 2.7798, "step": 74700 }, { "epoch": 0.25, "learning_rate": 3.7617329117486386e-05, "loss": 2.7729, "step": 74800 }, { "epoch": 0.25, "learning_rate": 3.7600774744648805e-05, "loss": 2.7837, "step": 74900 }, { "epoch": 0.25, "learning_rate": 3.758422037181122e-05, "loss": 2.7797, "step": 75000 }, { "epoch": 0.25, "learning_rate": 3.756766599897363e-05, "loss": 2.7864, "step": 75100 }, { "epoch": 0.25, "learning_rate": 3.755111162613604e-05, "loss": 2.7711, "step": 75200 }, { "epoch": 0.25, "learning_rate": 3.753455725329846e-05, "loss": 2.7977, "step": 75300 }, { "epoch": 0.25, "learning_rate": 3.751800288046087e-05, "loss": 2.7568, "step": 75400 }, { "epoch": 0.25, "learning_rate": 3.7501448507623285e-05, "loss": 2.7896, "step": 75500 }, { "epoch": 0.25, "learning_rate": 3.7484894134785705e-05, "loss": 2.7903, "step": 75600 }, { "epoch": 0.25, "learning_rate": 3.746833976194812e-05, "loss": 2.7901, "step": 75700 }, { "epoch": 0.25, "learning_rate": 3.7451785389110536e-05, "loss": 2.8011, "step": 75800 }, { "epoch": 0.25, "learning_rate": 3.743523101627295e-05, "loss": 2.7866, "step": 75900 }, { "epoch": 0.25, "learning_rate": 3.741867664343536e-05, "loss": 2.7612, "step": 76000 }, { "epoch": 0.25, "learning_rate": 3.740212227059778e-05, "loss": 2.7826, "step": 76100 }, { "epoch": 0.25, "learning_rate": 3.738556789776019e-05, "loss": 2.7816, "step": 76200 }, { "epoch": 0.25, "learning_rate": 3.736901352492261e-05, "loss": 2.7721, "step": 76300 }, { "epoch": 0.25, "learning_rate": 3.735245915208502e-05, "loss": 2.7768, "step": 76400 }, { "epoch": 0.25, "learning_rate": 3.7335904779247435e-05, "loss": 2.7814, "step": 76500 }, { "epoch": 0.25, "learning_rate": 3.7319350406409854e-05, "loss": 2.8019, "step": 76600 }, { "epoch": 0.25, "learning_rate": 3.7302796033572266e-05, "loss": 2.7722, "step": 76700 }, { "epoch": 0.25, "learning_rate": 3.7286241660734686e-05, "loss": 2.774, "step": 76800 }, { "epoch": 0.25, "learning_rate": 3.72696872878971e-05, "loss": 2.7802, "step": 76900 }, { "epoch": 0.25, "learning_rate": 3.725313291505951e-05, "loss": 2.7649, "step": 77000 }, { "epoch": 0.26, "learning_rate": 3.723657854222193e-05, "loss": 2.7955, "step": 77100 }, { "epoch": 0.26, "learning_rate": 3.722002416938434e-05, "loss": 2.7826, "step": 77200 }, { "epoch": 0.26, "learning_rate": 3.720346979654676e-05, "loss": 2.7824, "step": 77300 }, { "epoch": 0.26, "learning_rate": 3.718691542370917e-05, "loss": 2.786, "step": 77400 }, { "epoch": 0.26, "learning_rate": 3.7170361050871585e-05, "loss": 2.7742, "step": 77500 }, { "epoch": 0.26, "learning_rate": 3.7153806678034004e-05, "loss": 2.7978, "step": 77600 }, { "epoch": 0.26, "learning_rate": 3.7137252305196416e-05, "loss": 2.7786, "step": 77700 }, { "epoch": 0.26, "learning_rate": 3.7120697932358835e-05, "loss": 2.7851, "step": 77800 }, { "epoch": 0.26, "learning_rate": 3.710414355952125e-05, "loss": 2.7843, "step": 77900 }, { "epoch": 0.26, "learning_rate": 3.7087589186683667e-05, "loss": 2.7825, "step": 78000 }, { "epoch": 0.26, "learning_rate": 3.707103481384608e-05, "loss": 2.7867, "step": 78100 }, { "epoch": 0.26, "learning_rate": 3.705448044100849e-05, "loss": 2.7857, "step": 78200 }, { "epoch": 0.26, "learning_rate": 3.703792606817091e-05, "loss": 2.7991, "step": 78300 }, { "epoch": 0.26, "learning_rate": 3.702137169533332e-05, "loss": 2.7756, "step": 78400 }, { "epoch": 0.26, "learning_rate": 3.700481732249574e-05, "loss": 2.7812, "step": 78500 }, { "epoch": 0.26, "learning_rate": 3.6988262949658154e-05, "loss": 2.7876, "step": 78600 }, { "epoch": 0.26, "learning_rate": 3.6971708576820566e-05, "loss": 2.7841, "step": 78700 }, { "epoch": 0.26, "learning_rate": 3.6955154203982985e-05, "loss": 2.784, "step": 78800 }, { "epoch": 0.26, "learning_rate": 3.69385998311454e-05, "loss": 2.7897, "step": 78900 }, { "epoch": 0.26, "learning_rate": 3.6922045458307816e-05, "loss": 2.7655, "step": 79000 }, { "epoch": 0.26, "learning_rate": 3.690549108547023e-05, "loss": 2.78, "step": 79100 }, { "epoch": 0.26, "learning_rate": 3.688893671263264e-05, "loss": 2.7796, "step": 79200 }, { "epoch": 0.26, "learning_rate": 3.687238233979506e-05, "loss": 2.7784, "step": 79300 }, { "epoch": 0.26, "learning_rate": 3.685582796695747e-05, "loss": 2.7825, "step": 79400 }, { "epoch": 0.26, "learning_rate": 3.683927359411989e-05, "loss": 2.7776, "step": 79500 }, { "epoch": 0.26, "learning_rate": 3.68227192212823e-05, "loss": 2.7916, "step": 79600 }, { "epoch": 0.26, "learning_rate": 3.6806164848444716e-05, "loss": 2.7847, "step": 79700 }, { "epoch": 0.26, "learning_rate": 3.6789610475607135e-05, "loss": 2.7812, "step": 79800 }, { "epoch": 0.26, "learning_rate": 3.677305610276955e-05, "loss": 2.7757, "step": 79900 }, { "epoch": 0.26, "learning_rate": 3.6756501729931966e-05, "loss": 2.7842, "step": 80000 }, { "epoch": 0.27, "learning_rate": 3.673994735709438e-05, "loss": 2.7696, "step": 80100 }, { "epoch": 0.27, "learning_rate": 3.672339298425679e-05, "loss": 2.788, "step": 80200 }, { "epoch": 0.27, "learning_rate": 3.670683861141921e-05, "loss": 2.7721, "step": 80300 }, { "epoch": 0.27, "learning_rate": 3.669028423858162e-05, "loss": 2.7725, "step": 80400 }, { "epoch": 0.27, "learning_rate": 3.667372986574404e-05, "loss": 2.7692, "step": 80500 }, { "epoch": 0.27, "learning_rate": 3.665717549290645e-05, "loss": 2.7789, "step": 80600 }, { "epoch": 0.27, "learning_rate": 3.6640621120068865e-05, "loss": 2.7867, "step": 80700 }, { "epoch": 0.27, "learning_rate": 3.6624066747231284e-05, "loss": 2.7753, "step": 80800 }, { "epoch": 0.27, "learning_rate": 3.66075123743937e-05, "loss": 2.7854, "step": 80900 }, { "epoch": 0.27, "learning_rate": 3.6590958001556116e-05, "loss": 2.786, "step": 81000 }, { "epoch": 0.27, "learning_rate": 3.657440362871853e-05, "loss": 2.7859, "step": 81100 }, { "epoch": 0.27, "learning_rate": 3.655784925588095e-05, "loss": 2.7829, "step": 81200 }, { "epoch": 0.27, "learning_rate": 3.654129488304336e-05, "loss": 2.7808, "step": 81300 }, { "epoch": 0.27, "learning_rate": 3.652474051020577e-05, "loss": 2.7707, "step": 81400 }, { "epoch": 0.27, "learning_rate": 3.650818613736819e-05, "loss": 2.7821, "step": 81500 }, { "epoch": 0.27, "learning_rate": 3.64916317645306e-05, "loss": 2.7832, "step": 81600 }, { "epoch": 0.27, "learning_rate": 3.647507739169302e-05, "loss": 2.7862, "step": 81700 }, { "epoch": 0.27, "learning_rate": 3.6458523018855434e-05, "loss": 2.7863, "step": 81800 }, { "epoch": 0.27, "learning_rate": 3.6441968646017846e-05, "loss": 2.7911, "step": 81900 }, { "epoch": 0.27, "learning_rate": 3.6425414273180265e-05, "loss": 2.7749, "step": 82000 }, { "epoch": 0.27, "learning_rate": 3.640885990034268e-05, "loss": 2.7949, "step": 82100 }, { "epoch": 0.27, "learning_rate": 3.63923055275051e-05, "loss": 2.7813, "step": 82200 }, { "epoch": 0.27, "learning_rate": 3.637575115466751e-05, "loss": 2.7744, "step": 82300 }, { "epoch": 0.27, "learning_rate": 3.635919678182992e-05, "loss": 2.7751, "step": 82400 }, { "epoch": 0.27, "learning_rate": 3.634264240899234e-05, "loss": 2.7608, "step": 82500 }, { "epoch": 0.27, "learning_rate": 3.632608803615475e-05, "loss": 2.7661, "step": 82600 }, { "epoch": 0.27, "learning_rate": 3.630953366331717e-05, "loss": 2.7813, "step": 82700 }, { "epoch": 0.27, "learning_rate": 3.6292979290479584e-05, "loss": 2.7774, "step": 82800 }, { "epoch": 0.27, "learning_rate": 3.6276424917641996e-05, "loss": 2.7659, "step": 82900 }, { "epoch": 0.27, "learning_rate": 3.6259870544804415e-05, "loss": 2.7797, "step": 83000 }, { "epoch": 0.28, "learning_rate": 3.624331617196683e-05, "loss": 2.7668, "step": 83100 }, { "epoch": 0.28, "learning_rate": 3.6226761799129246e-05, "loss": 2.7911, "step": 83200 }, { "epoch": 0.28, "learning_rate": 3.621020742629166e-05, "loss": 2.7762, "step": 83300 }, { "epoch": 0.28, "learning_rate": 3.619365305345407e-05, "loss": 2.7801, "step": 83400 }, { "epoch": 0.28, "learning_rate": 3.617709868061649e-05, "loss": 2.77, "step": 83500 }, { "epoch": 0.28, "learning_rate": 3.61605443077789e-05, "loss": 2.7831, "step": 83600 }, { "epoch": 0.28, "learning_rate": 3.614398993494132e-05, "loss": 2.7713, "step": 83700 }, { "epoch": 0.28, "learning_rate": 3.6127435562103734e-05, "loss": 2.7808, "step": 83800 }, { "epoch": 0.28, "learning_rate": 3.6110881189266146e-05, "loss": 2.7715, "step": 83900 }, { "epoch": 0.28, "learning_rate": 3.609432681642856e-05, "loss": 2.7768, "step": 84000 }, { "epoch": 0.28, "learning_rate": 3.607777244359097e-05, "loss": 2.7809, "step": 84100 }, { "epoch": 0.28, "learning_rate": 3.606121807075339e-05, "loss": 2.7715, "step": 84200 }, { "epoch": 0.28, "learning_rate": 3.60446636979158e-05, "loss": 2.7804, "step": 84300 }, { "epoch": 0.28, "learning_rate": 3.602810932507822e-05, "loss": 2.7654, "step": 84400 }, { "epoch": 0.28, "learning_rate": 3.601155495224063e-05, "loss": 2.7762, "step": 84500 }, { "epoch": 0.28, "learning_rate": 3.599500057940305e-05, "loss": 2.7809, "step": 84600 }, { "epoch": 0.28, "learning_rate": 3.5978446206565464e-05, "loss": 2.793, "step": 84700 }, { "epoch": 0.28, "learning_rate": 3.5961891833727876e-05, "loss": 2.7787, "step": 84800 }, { "epoch": 0.28, "learning_rate": 3.5945337460890295e-05, "loss": 2.7671, "step": 84900 }, { "epoch": 0.28, "learning_rate": 3.592878308805271e-05, "loss": 2.7884, "step": 85000 }, { "epoch": 0.28, "learning_rate": 3.591222871521513e-05, "loss": 2.7922, "step": 85100 }, { "epoch": 0.28, "learning_rate": 3.589567434237754e-05, "loss": 2.7726, "step": 85200 }, { "epoch": 0.28, "learning_rate": 3.587911996953995e-05, "loss": 2.7708, "step": 85300 }, { "epoch": 0.28, "learning_rate": 3.586256559670237e-05, "loss": 2.7743, "step": 85400 }, { "epoch": 0.28, "learning_rate": 3.584601122386478e-05, "loss": 2.758, "step": 85500 }, { "epoch": 0.28, "learning_rate": 3.58294568510272e-05, "loss": 2.7936, "step": 85600 }, { "epoch": 0.28, "learning_rate": 3.5812902478189614e-05, "loss": 2.7975, "step": 85700 }, { "epoch": 0.28, "learning_rate": 3.5796348105352026e-05, "loss": 2.789, "step": 85800 }, { "epoch": 0.28, "learning_rate": 3.5779793732514445e-05, "loss": 2.7746, "step": 85900 }, { "epoch": 0.28, "learning_rate": 3.576323935967686e-05, "loss": 2.7709, "step": 86000 }, { "epoch": 0.29, "learning_rate": 3.5746684986839277e-05, "loss": 2.7723, "step": 86100 }, { "epoch": 0.29, "learning_rate": 3.573013061400169e-05, "loss": 2.7792, "step": 86200 }, { "epoch": 0.29, "learning_rate": 3.57135762411641e-05, "loss": 2.786, "step": 86300 }, { "epoch": 0.29, "learning_rate": 3.569702186832652e-05, "loss": 2.7819, "step": 86400 }, { "epoch": 0.29, "learning_rate": 3.568046749548893e-05, "loss": 2.7743, "step": 86500 }, { "epoch": 0.29, "learning_rate": 3.566391312265135e-05, "loss": 2.7723, "step": 86600 }, { "epoch": 0.29, "learning_rate": 3.5647358749813764e-05, "loss": 2.7687, "step": 86700 }, { "epoch": 0.29, "learning_rate": 3.5630804376976176e-05, "loss": 2.7795, "step": 86800 }, { "epoch": 0.29, "learning_rate": 3.5614250004138595e-05, "loss": 2.7762, "step": 86900 }, { "epoch": 0.29, "learning_rate": 3.559769563130101e-05, "loss": 2.7657, "step": 87000 }, { "epoch": 0.29, "learning_rate": 3.5581141258463426e-05, "loss": 2.7618, "step": 87100 }, { "epoch": 0.29, "learning_rate": 3.556458688562584e-05, "loss": 2.7804, "step": 87200 }, { "epoch": 0.29, "learning_rate": 3.554803251278825e-05, "loss": 2.7627, "step": 87300 }, { "epoch": 0.29, "learning_rate": 3.553147813995067e-05, "loss": 2.7771, "step": 87400 }, { "epoch": 0.29, "learning_rate": 3.551492376711308e-05, "loss": 2.7767, "step": 87500 }, { "epoch": 0.29, "learning_rate": 3.54983693942755e-05, "loss": 2.7898, "step": 87600 }, { "epoch": 0.29, "learning_rate": 3.548181502143791e-05, "loss": 2.7887, "step": 87700 }, { "epoch": 0.29, "learning_rate": 3.546526064860033e-05, "loss": 2.7804, "step": 87800 }, { "epoch": 0.29, "learning_rate": 3.5448706275762745e-05, "loss": 2.7786, "step": 87900 }, { "epoch": 0.29, "learning_rate": 3.543215190292516e-05, "loss": 2.7599, "step": 88000 }, { "epoch": 0.29, "learning_rate": 3.5415597530087576e-05, "loss": 2.7816, "step": 88100 }, { "epoch": 0.29, "learning_rate": 3.539904315724999e-05, "loss": 2.7937, "step": 88200 }, { "epoch": 0.29, "learning_rate": 3.538248878441241e-05, "loss": 2.7737, "step": 88300 }, { "epoch": 0.29, "learning_rate": 3.536593441157482e-05, "loss": 2.7775, "step": 88400 }, { "epoch": 0.29, "learning_rate": 3.534938003873723e-05, "loss": 2.7798, "step": 88500 }, { "epoch": 0.29, "learning_rate": 3.533282566589965e-05, "loss": 2.7769, "step": 88600 }, { "epoch": 0.29, "learning_rate": 3.531627129306206e-05, "loss": 2.7833, "step": 88700 }, { "epoch": 0.29, "learning_rate": 3.529971692022448e-05, "loss": 2.7742, "step": 88800 }, { "epoch": 0.29, "learning_rate": 3.5283162547386894e-05, "loss": 2.7809, "step": 88900 }, { "epoch": 0.29, "learning_rate": 3.5266608174549307e-05, "loss": 2.7868, "step": 89000 }, { "epoch": 0.29, "learning_rate": 3.5250053801711726e-05, "loss": 2.7771, "step": 89100 }, { "epoch": 0.3, "learning_rate": 3.523349942887414e-05, "loss": 2.772, "step": 89200 }, { "epoch": 0.3, "learning_rate": 3.521694505603656e-05, "loss": 2.7718, "step": 89300 }, { "epoch": 0.3, "learning_rate": 3.520039068319897e-05, "loss": 2.7779, "step": 89400 }, { "epoch": 0.3, "learning_rate": 3.518383631036138e-05, "loss": 2.7683, "step": 89500 }, { "epoch": 0.3, "learning_rate": 3.51672819375238e-05, "loss": 2.7909, "step": 89600 }, { "epoch": 0.3, "learning_rate": 3.515072756468621e-05, "loss": 2.7714, "step": 89700 }, { "epoch": 0.3, "learning_rate": 3.513417319184863e-05, "loss": 2.7661, "step": 89800 }, { "epoch": 0.3, "learning_rate": 3.5117618819011044e-05, "loss": 2.7815, "step": 89900 }, { "epoch": 0.3, "learning_rate": 3.5101064446173456e-05, "loss": 2.7682, "step": 90000 }, { "epoch": 0.3, "learning_rate": 3.5084510073335875e-05, "loss": 2.7697, "step": 90100 }, { "epoch": 0.3, "learning_rate": 3.506795570049829e-05, "loss": 2.7856, "step": 90200 }, { "epoch": 0.3, "learning_rate": 3.505140132766071e-05, "loss": 2.7757, "step": 90300 }, { "epoch": 0.3, "learning_rate": 3.503484695482312e-05, "loss": 2.7874, "step": 90400 }, { "epoch": 0.3, "learning_rate": 3.501829258198553e-05, "loss": 2.7706, "step": 90500 }, { "epoch": 0.3, "learning_rate": 3.500173820914795e-05, "loss": 2.7729, "step": 90600 }, { "epoch": 0.3, "learning_rate": 3.498518383631036e-05, "loss": 2.7839, "step": 90700 }, { "epoch": 0.3, "learning_rate": 3.496862946347278e-05, "loss": 2.7744, "step": 90800 }, { "epoch": 0.3, "learning_rate": 3.4952075090635194e-05, "loss": 2.7835, "step": 90900 }, { "epoch": 0.3, "learning_rate": 3.493552071779761e-05, "loss": 2.7915, "step": 91000 }, { "epoch": 0.3, "learning_rate": 3.4918966344960025e-05, "loss": 2.7731, "step": 91100 }, { "epoch": 0.3, "learning_rate": 3.490241197212244e-05, "loss": 2.7648, "step": 91200 }, { "epoch": 0.3, "learning_rate": 3.4885857599284856e-05, "loss": 2.7816, "step": 91300 }, { "epoch": 0.3, "learning_rate": 3.486930322644727e-05, "loss": 2.7831, "step": 91400 }, { "epoch": 0.3, "learning_rate": 3.485274885360969e-05, "loss": 2.7733, "step": 91500 }, { "epoch": 0.3, "learning_rate": 3.48361944807721e-05, "loss": 2.7652, "step": 91600 }, { "epoch": 0.3, "learning_rate": 3.481964010793451e-05, "loss": 2.7655, "step": 91700 }, { "epoch": 0.3, "learning_rate": 3.480308573509693e-05, "loss": 2.7875, "step": 91800 }, { "epoch": 0.3, "learning_rate": 3.4786531362259343e-05, "loss": 2.7852, "step": 91900 }, { "epoch": 0.3, "learning_rate": 3.476997698942176e-05, "loss": 2.7794, "step": 92000 }, { "epoch": 0.3, "learning_rate": 3.4753422616584175e-05, "loss": 2.7684, "step": 92100 }, { "epoch": 0.31, "learning_rate": 3.473686824374659e-05, "loss": 2.7846, "step": 92200 }, { "epoch": 0.31, "learning_rate": 3.4720313870909006e-05, "loss": 2.7718, "step": 92300 }, { "epoch": 0.31, "learning_rate": 3.470375949807142e-05, "loss": 2.7705, "step": 92400 }, { "epoch": 0.31, "learning_rate": 3.468720512523384e-05, "loss": 2.7682, "step": 92500 }, { "epoch": 0.31, "learning_rate": 3.467065075239625e-05, "loss": 2.7749, "step": 92600 }, { "epoch": 0.31, "learning_rate": 3.465409637955866e-05, "loss": 2.7903, "step": 92700 }, { "epoch": 0.31, "learning_rate": 3.4637542006721074e-05, "loss": 2.7925, "step": 92800 }, { "epoch": 0.31, "learning_rate": 3.4620987633883486e-05, "loss": 2.7762, "step": 92900 }, { "epoch": 0.31, "learning_rate": 3.4604433261045905e-05, "loss": 2.7717, "step": 93000 }, { "epoch": 0.31, "learning_rate": 3.458787888820832e-05, "loss": 2.7867, "step": 93100 }, { "epoch": 0.31, "learning_rate": 3.457132451537074e-05, "loss": 2.7791, "step": 93200 }, { "epoch": 0.31, "learning_rate": 3.455477014253315e-05, "loss": 2.7835, "step": 93300 }, { "epoch": 0.31, "learning_rate": 3.453821576969556e-05, "loss": 2.7655, "step": 93400 }, { "epoch": 0.31, "learning_rate": 3.452166139685798e-05, "loss": 2.7796, "step": 93500 }, { "epoch": 0.31, "learning_rate": 3.450510702402039e-05, "loss": 2.7844, "step": 93600 }, { "epoch": 0.31, "learning_rate": 3.448855265118281e-05, "loss": 2.7739, "step": 93700 }, { "epoch": 0.31, "learning_rate": 3.4471998278345224e-05, "loss": 2.7785, "step": 93800 }, { "epoch": 0.31, "learning_rate": 3.4455443905507636e-05, "loss": 2.7706, "step": 93900 }, { "epoch": 0.31, "learning_rate": 3.4438889532670055e-05, "loss": 2.7815, "step": 94000 }, { "epoch": 0.31, "learning_rate": 3.442233515983247e-05, "loss": 2.7691, "step": 94100 }, { "epoch": 0.31, "learning_rate": 3.4405780786994886e-05, "loss": 2.7764, "step": 94200 }, { "epoch": 0.31, "learning_rate": 3.43892264141573e-05, "loss": 2.7791, "step": 94300 }, { "epoch": 0.31, "learning_rate": 3.437267204131972e-05, "loss": 2.775, "step": 94400 }, { "epoch": 0.31, "learning_rate": 3.435611766848213e-05, "loss": 2.7883, "step": 94500 }, { "epoch": 0.31, "learning_rate": 3.433956329564454e-05, "loss": 2.7737, "step": 94600 }, { "epoch": 0.31, "learning_rate": 3.432300892280696e-05, "loss": 2.7846, "step": 94700 }, { "epoch": 0.31, "learning_rate": 3.4306454549969374e-05, "loss": 2.7905, "step": 94800 }, { "epoch": 0.31, "learning_rate": 3.428990017713179e-05, "loss": 2.768, "step": 94900 }, { "epoch": 0.31, "learning_rate": 3.4273345804294205e-05, "loss": 2.7639, "step": 95000 }, { "epoch": 0.31, "learning_rate": 3.425679143145662e-05, "loss": 2.7801, "step": 95100 }, { "epoch": 0.32, "learning_rate": 3.4240237058619036e-05, "loss": 2.7805, "step": 95200 }, { "epoch": 0.32, "learning_rate": 3.422368268578145e-05, "loss": 2.7744, "step": 95300 }, { "epoch": 0.32, "learning_rate": 3.420712831294387e-05, "loss": 2.7739, "step": 95400 }, { "epoch": 0.32, "learning_rate": 3.419057394010628e-05, "loss": 2.7767, "step": 95500 }, { "epoch": 0.32, "learning_rate": 3.417401956726869e-05, "loss": 2.7806, "step": 95600 }, { "epoch": 0.32, "learning_rate": 3.415746519443111e-05, "loss": 2.7736, "step": 95700 }, { "epoch": 0.32, "learning_rate": 3.414091082159352e-05, "loss": 2.7877, "step": 95800 }, { "epoch": 0.32, "learning_rate": 3.412435644875594e-05, "loss": 2.7752, "step": 95900 }, { "epoch": 0.32, "learning_rate": 3.4107802075918355e-05, "loss": 2.7709, "step": 96000 }, { "epoch": 0.32, "learning_rate": 3.409124770308077e-05, "loss": 2.7841, "step": 96100 }, { "epoch": 0.32, "learning_rate": 3.4074693330243186e-05, "loss": 2.7853, "step": 96200 }, { "epoch": 0.32, "learning_rate": 3.40581389574056e-05, "loss": 2.7739, "step": 96300 }, { "epoch": 0.32, "learning_rate": 3.404158458456802e-05, "loss": 2.7713, "step": 96400 }, { "epoch": 0.32, "learning_rate": 3.402503021173043e-05, "loss": 2.784, "step": 96500 }, { "epoch": 0.32, "learning_rate": 3.400847583889284e-05, "loss": 2.7741, "step": 96600 }, { "epoch": 0.32, "learning_rate": 3.399192146605526e-05, "loss": 2.7843, "step": 96700 }, { "epoch": 0.32, "learning_rate": 3.397536709321767e-05, "loss": 2.7742, "step": 96800 }, { "epoch": 0.32, "learning_rate": 3.395881272038009e-05, "loss": 2.7829, "step": 96900 }, { "epoch": 0.32, "learning_rate": 3.3942258347542504e-05, "loss": 2.7706, "step": 97000 }, { "epoch": 0.32, "learning_rate": 3.3925703974704917e-05, "loss": 2.7828, "step": 97100 }, { "epoch": 0.32, "learning_rate": 3.3909149601867336e-05, "loss": 2.7705, "step": 97200 }, { "epoch": 0.32, "learning_rate": 3.389259522902975e-05, "loss": 2.767, "step": 97300 }, { "epoch": 0.32, "learning_rate": 3.387604085619217e-05, "loss": 2.7753, "step": 97400 }, { "epoch": 0.32, "learning_rate": 3.385948648335458e-05, "loss": 2.7843, "step": 97500 }, { "epoch": 0.32, "learning_rate": 3.3842932110517e-05, "loss": 2.7808, "step": 97600 }, { "epoch": 0.32, "learning_rate": 3.382637773767941e-05, "loss": 2.771, "step": 97700 }, { "epoch": 0.32, "learning_rate": 3.380982336484182e-05, "loss": 2.7802, "step": 97800 }, { "epoch": 0.32, "learning_rate": 3.379326899200424e-05, "loss": 2.7805, "step": 97900 }, { "epoch": 0.32, "learning_rate": 3.3776714619166654e-05, "loss": 2.7804, "step": 98000 }, { "epoch": 0.32, "learning_rate": 3.376016024632907e-05, "loss": 2.771, "step": 98100 }, { "epoch": 0.33, "learning_rate": 3.3743605873491485e-05, "loss": 2.7936, "step": 98200 }, { "epoch": 0.33, "learning_rate": 3.37270515006539e-05, "loss": 2.7788, "step": 98300 }, { "epoch": 0.33, "learning_rate": 3.371049712781632e-05, "loss": 2.7818, "step": 98400 }, { "epoch": 0.33, "learning_rate": 3.369394275497873e-05, "loss": 2.7888, "step": 98500 }, { "epoch": 0.33, "learning_rate": 3.367738838214115e-05, "loss": 2.7812, "step": 98600 }, { "epoch": 0.33, "learning_rate": 3.366083400930356e-05, "loss": 2.7802, "step": 98700 }, { "epoch": 0.33, "learning_rate": 3.364427963646597e-05, "loss": 2.7701, "step": 98800 }, { "epoch": 0.33, "learning_rate": 3.362772526362839e-05, "loss": 2.7758, "step": 98900 }, { "epoch": 0.33, "learning_rate": 3.3611170890790804e-05, "loss": 2.7818, "step": 99000 }, { "epoch": 0.33, "learning_rate": 3.359461651795322e-05, "loss": 2.7653, "step": 99100 }, { "epoch": 0.33, "learning_rate": 3.3578062145115635e-05, "loss": 2.7715, "step": 99200 }, { "epoch": 0.33, "learning_rate": 3.356150777227805e-05, "loss": 2.7815, "step": 99300 }, { "epoch": 0.33, "learning_rate": 3.3544953399440466e-05, "loss": 2.7825, "step": 99400 }, { "epoch": 0.33, "learning_rate": 3.352839902660288e-05, "loss": 2.774, "step": 99500 }, { "epoch": 0.33, "learning_rate": 3.35118446537653e-05, "loss": 2.7639, "step": 99600 }, { "epoch": 0.33, "learning_rate": 3.349529028092771e-05, "loss": 2.7649, "step": 99700 }, { "epoch": 0.33, "learning_rate": 3.347873590809012e-05, "loss": 2.7716, "step": 99800 }, { "epoch": 0.33, "learning_rate": 3.346218153525254e-05, "loss": 2.7764, "step": 99900 }, { "epoch": 0.33, "learning_rate": 3.3445627162414953e-05, "loss": 2.7633, "step": 100000 }, { "epoch": 0.33, "learning_rate": 3.342907278957737e-05, "loss": 2.7705, "step": 100100 }, { "epoch": 0.33, "learning_rate": 3.3412518416739785e-05, "loss": 2.7713, "step": 100200 }, { "epoch": 0.33, "learning_rate": 3.33959640439022e-05, "loss": 2.784, "step": 100300 }, { "epoch": 0.33, "learning_rate": 3.3379409671064616e-05, "loss": 2.7755, "step": 100400 }, { "epoch": 0.33, "learning_rate": 3.336285529822703e-05, "loss": 2.7765, "step": 100500 }, { "epoch": 0.33, "learning_rate": 3.334630092538945e-05, "loss": 2.7594, "step": 100600 }, { "epoch": 0.33, "learning_rate": 3.332974655255186e-05, "loss": 2.7813, "step": 100700 }, { "epoch": 0.33, "learning_rate": 3.331319217971428e-05, "loss": 2.7752, "step": 100800 }, { "epoch": 0.33, "learning_rate": 3.329663780687669e-05, "loss": 2.787, "step": 100900 }, { "epoch": 0.33, "learning_rate": 3.32800834340391e-05, "loss": 2.7651, "step": 101000 }, { "epoch": 0.33, "learning_rate": 3.326352906120152e-05, "loss": 2.7576, "step": 101100 }, { "epoch": 0.34, "learning_rate": 3.3246974688363934e-05, "loss": 2.7713, "step": 101200 }, { "epoch": 0.34, "learning_rate": 3.3230420315526354e-05, "loss": 2.7807, "step": 101300 }, { "epoch": 0.34, "learning_rate": 3.3213865942688766e-05, "loss": 2.7825, "step": 101400 }, { "epoch": 0.34, "learning_rate": 3.319731156985118e-05, "loss": 2.7823, "step": 101500 }, { "epoch": 0.34, "learning_rate": 3.318075719701359e-05, "loss": 2.7741, "step": 101600 }, { "epoch": 0.34, "learning_rate": 3.3164202824176e-05, "loss": 2.789, "step": 101700 }, { "epoch": 0.34, "learning_rate": 3.314764845133842e-05, "loss": 2.7983, "step": 101800 }, { "epoch": 0.34, "learning_rate": 3.3131094078500834e-05, "loss": 2.7856, "step": 101900 }, { "epoch": 0.34, "learning_rate": 3.311453970566325e-05, "loss": 2.7914, "step": 102000 }, { "epoch": 0.34, "learning_rate": 3.3097985332825665e-05, "loss": 2.7627, "step": 102100 }, { "epoch": 0.34, "learning_rate": 3.308143095998808e-05, "loss": 2.7756, "step": 102200 }, { "epoch": 0.34, "learning_rate": 3.3064876587150496e-05, "loss": 2.7775, "step": 102300 }, { "epoch": 0.34, "learning_rate": 3.304832221431291e-05, "loss": 2.7794, "step": 102400 }, { "epoch": 0.34, "learning_rate": 3.303176784147533e-05, "loss": 2.7749, "step": 102500 }, { "epoch": 0.34, "learning_rate": 3.301521346863774e-05, "loss": 2.7621, "step": 102600 }, { "epoch": 0.34, "learning_rate": 3.299865909580015e-05, "loss": 2.7732, "step": 102700 }, { "epoch": 0.34, "learning_rate": 3.298210472296257e-05, "loss": 2.7737, "step": 102800 }, { "epoch": 0.34, "learning_rate": 3.2965550350124984e-05, "loss": 2.7783, "step": 102900 }, { "epoch": 0.34, "learning_rate": 3.29489959772874e-05, "loss": 2.7734, "step": 103000 }, { "epoch": 0.34, "learning_rate": 3.2932441604449815e-05, "loss": 2.7766, "step": 103100 }, { "epoch": 0.34, "learning_rate": 3.291588723161223e-05, "loss": 2.7706, "step": 103200 }, { "epoch": 0.34, "learning_rate": 3.2899332858774646e-05, "loss": 2.7759, "step": 103300 }, { "epoch": 0.34, "learning_rate": 3.288277848593706e-05, "loss": 2.7846, "step": 103400 }, { "epoch": 0.34, "learning_rate": 3.286622411309948e-05, "loss": 2.7774, "step": 103500 }, { "epoch": 0.34, "learning_rate": 3.284966974026189e-05, "loss": 2.7682, "step": 103600 }, { "epoch": 0.34, "learning_rate": 3.28331153674243e-05, "loss": 2.79, "step": 103700 }, { "epoch": 0.34, "learning_rate": 3.281656099458672e-05, "loss": 2.7739, "step": 103800 }, { "epoch": 0.34, "learning_rate": 3.280000662174913e-05, "loss": 2.7842, "step": 103900 }, { "epoch": 0.34, "learning_rate": 3.278345224891155e-05, "loss": 2.784, "step": 104000 }, { "epoch": 0.34, "learning_rate": 3.2766897876073965e-05, "loss": 2.768, "step": 104100 }, { "epoch": 0.34, "learning_rate": 3.2750343503236384e-05, "loss": 2.7755, "step": 104200 }, { "epoch": 0.35, "learning_rate": 3.2733789130398796e-05, "loss": 2.7662, "step": 104300 }, { "epoch": 0.35, "learning_rate": 3.271723475756121e-05, "loss": 2.7739, "step": 104400 }, { "epoch": 0.35, "learning_rate": 3.270068038472363e-05, "loss": 2.7882, "step": 104500 }, { "epoch": 0.35, "learning_rate": 3.268412601188604e-05, "loss": 2.7644, "step": 104600 }, { "epoch": 0.35, "learning_rate": 3.266757163904846e-05, "loss": 2.7605, "step": 104700 }, { "epoch": 0.35, "learning_rate": 3.265101726621087e-05, "loss": 2.7728, "step": 104800 }, { "epoch": 0.35, "learning_rate": 3.263446289337328e-05, "loss": 2.7701, "step": 104900 }, { "epoch": 0.35, "learning_rate": 3.26179085205357e-05, "loss": 2.776, "step": 105000 }, { "epoch": 0.35, "learning_rate": 3.2601354147698114e-05, "loss": 2.7712, "step": 105100 }, { "epoch": 0.35, "learning_rate": 3.258479977486053e-05, "loss": 2.7716, "step": 105200 }, { "epoch": 0.35, "learning_rate": 3.2568245402022946e-05, "loss": 2.7774, "step": 105300 }, { "epoch": 0.35, "learning_rate": 3.255169102918536e-05, "loss": 2.785, "step": 105400 }, { "epoch": 0.35, "learning_rate": 3.253513665634778e-05, "loss": 2.7735, "step": 105500 }, { "epoch": 0.35, "learning_rate": 3.251858228351019e-05, "loss": 2.7628, "step": 105600 }, { "epoch": 0.35, "learning_rate": 3.250202791067261e-05, "loss": 2.7881, "step": 105700 }, { "epoch": 0.35, "learning_rate": 3.248547353783502e-05, "loss": 2.7641, "step": 105800 }, { "epoch": 0.35, "learning_rate": 3.246891916499743e-05, "loss": 2.7703, "step": 105900 }, { "epoch": 0.35, "learning_rate": 3.245236479215985e-05, "loss": 2.7634, "step": 106000 }, { "epoch": 0.35, "learning_rate": 3.2435810419322264e-05, "loss": 2.7717, "step": 106100 }, { "epoch": 0.35, "learning_rate": 3.241925604648468e-05, "loss": 2.7641, "step": 106200 }, { "epoch": 0.35, "learning_rate": 3.2402701673647095e-05, "loss": 2.7868, "step": 106300 }, { "epoch": 0.35, "learning_rate": 3.238614730080951e-05, "loss": 2.7773, "step": 106400 }, { "epoch": 0.35, "learning_rate": 3.2369592927971927e-05, "loss": 2.7773, "step": 106500 }, { "epoch": 0.35, "learning_rate": 3.235303855513434e-05, "loss": 2.7799, "step": 106600 }, { "epoch": 0.35, "learning_rate": 3.233648418229676e-05, "loss": 2.784, "step": 106700 }, { "epoch": 0.35, "learning_rate": 3.231992980945917e-05, "loss": 2.7685, "step": 106800 }, { "epoch": 0.35, "learning_rate": 3.230337543662158e-05, "loss": 2.7823, "step": 106900 }, { "epoch": 0.35, "learning_rate": 3.2286821063784e-05, "loss": 2.7562, "step": 107000 }, { "epoch": 0.35, "learning_rate": 3.2270266690946414e-05, "loss": 2.7664, "step": 107100 }, { "epoch": 0.35, "learning_rate": 3.225371231810883e-05, "loss": 2.7667, "step": 107200 }, { "epoch": 0.36, "learning_rate": 3.2237157945271245e-05, "loss": 2.7852, "step": 107300 }, { "epoch": 0.36, "learning_rate": 3.2220603572433664e-05, "loss": 2.7783, "step": 107400 }, { "epoch": 0.36, "learning_rate": 3.2204049199596076e-05, "loss": 2.7797, "step": 107500 }, { "epoch": 0.36, "learning_rate": 3.218749482675849e-05, "loss": 2.7615, "step": 107600 }, { "epoch": 0.36, "learning_rate": 3.217094045392091e-05, "loss": 2.7653, "step": 107700 }, { "epoch": 0.36, "learning_rate": 3.215438608108332e-05, "loss": 2.7752, "step": 107800 }, { "epoch": 0.36, "learning_rate": 3.213783170824574e-05, "loss": 2.7706, "step": 107900 }, { "epoch": 0.36, "learning_rate": 3.212127733540815e-05, "loss": 2.7725, "step": 108000 }, { "epoch": 0.36, "learning_rate": 3.2104722962570563e-05, "loss": 2.7577, "step": 108100 }, { "epoch": 0.36, "learning_rate": 3.208816858973298e-05, "loss": 2.7666, "step": 108200 }, { "epoch": 0.36, "learning_rate": 3.2071614216895395e-05, "loss": 2.7644, "step": 108300 }, { "epoch": 0.36, "learning_rate": 3.2055059844057814e-05, "loss": 2.7897, "step": 108400 }, { "epoch": 0.36, "learning_rate": 3.2038505471220226e-05, "loss": 2.7704, "step": 108500 }, { "epoch": 0.36, "learning_rate": 3.202195109838264e-05, "loss": 2.7812, "step": 108600 }, { "epoch": 0.36, "learning_rate": 3.200539672554506e-05, "loss": 2.7741, "step": 108700 }, { "epoch": 0.36, "learning_rate": 3.198884235270747e-05, "loss": 2.7787, "step": 108800 }, { "epoch": 0.36, "learning_rate": 3.197228797986989e-05, "loss": 2.7758, "step": 108900 }, { "epoch": 0.36, "learning_rate": 3.19557336070323e-05, "loss": 2.773, "step": 109000 }, { "epoch": 0.36, "learning_rate": 3.193917923419471e-05, "loss": 2.7714, "step": 109100 }, { "epoch": 0.36, "learning_rate": 3.192262486135713e-05, "loss": 2.7783, "step": 109200 }, { "epoch": 0.36, "learning_rate": 3.1906070488519544e-05, "loss": 2.7797, "step": 109300 }, { "epoch": 0.36, "learning_rate": 3.1889516115681963e-05, "loss": 2.775, "step": 109400 }, { "epoch": 0.36, "learning_rate": 3.1872961742844376e-05, "loss": 2.7765, "step": 109500 }, { "epoch": 0.36, "learning_rate": 3.185640737000679e-05, "loss": 2.7783, "step": 109600 }, { "epoch": 0.36, "learning_rate": 3.183985299716921e-05, "loss": 2.7854, "step": 109700 }, { "epoch": 0.36, "learning_rate": 3.182329862433162e-05, "loss": 2.7791, "step": 109800 }, { "epoch": 0.36, "learning_rate": 3.180674425149404e-05, "loss": 2.7679, "step": 109900 }, { "epoch": 0.36, "learning_rate": 3.179018987865645e-05, "loss": 2.7684, "step": 110000 }, { "epoch": 0.36, "learning_rate": 3.177363550581886e-05, "loss": 2.7811, "step": 110100 }, { "epoch": 0.36, "learning_rate": 3.175708113298128e-05, "loss": 2.7736, "step": 110200 }, { "epoch": 0.37, "learning_rate": 3.1740526760143694e-05, "loss": 2.7635, "step": 110300 }, { "epoch": 0.37, "learning_rate": 3.1723972387306106e-05, "loss": 2.7828, "step": 110400 }, { "epoch": 0.37, "learning_rate": 3.170741801446852e-05, "loss": 2.7673, "step": 110500 }, { "epoch": 0.37, "learning_rate": 3.169086364163094e-05, "loss": 2.7755, "step": 110600 }, { "epoch": 0.37, "learning_rate": 3.167430926879335e-05, "loss": 2.7807, "step": 110700 }, { "epoch": 0.37, "learning_rate": 3.165775489595577e-05, "loss": 2.7714, "step": 110800 }, { "epoch": 0.37, "learning_rate": 3.164120052311818e-05, "loss": 2.778, "step": 110900 }, { "epoch": 0.37, "learning_rate": 3.1624646150280594e-05, "loss": 2.7727, "step": 111000 }, { "epoch": 0.37, "learning_rate": 3.160809177744301e-05, "loss": 2.7663, "step": 111100 }, { "epoch": 0.37, "learning_rate": 3.1591537404605425e-05, "loss": 2.7816, "step": 111200 }, { "epoch": 0.37, "learning_rate": 3.1574983031767844e-05, "loss": 2.7668, "step": 111300 }, { "epoch": 0.37, "learning_rate": 3.1558428658930256e-05, "loss": 2.776, "step": 111400 }, { "epoch": 0.37, "learning_rate": 3.154187428609267e-05, "loss": 2.7728, "step": 111500 }, { "epoch": 0.37, "learning_rate": 3.152531991325509e-05, "loss": 2.7592, "step": 111600 }, { "epoch": 0.37, "learning_rate": 3.15087655404175e-05, "loss": 2.7637, "step": 111700 }, { "epoch": 0.37, "learning_rate": 3.149221116757992e-05, "loss": 2.7674, "step": 111800 }, { "epoch": 0.37, "learning_rate": 3.147565679474233e-05, "loss": 2.7669, "step": 111900 }, { "epoch": 0.37, "learning_rate": 3.145910242190474e-05, "loss": 2.7729, "step": 112000 }, { "epoch": 0.37, "learning_rate": 3.144254804906716e-05, "loss": 2.7795, "step": 112100 }, { "epoch": 0.37, "learning_rate": 3.1425993676229575e-05, "loss": 2.7743, "step": 112200 }, { "epoch": 0.37, "learning_rate": 3.1409439303391994e-05, "loss": 2.7723, "step": 112300 }, { "epoch": 0.37, "learning_rate": 3.1392884930554406e-05, "loss": 2.7614, "step": 112400 }, { "epoch": 0.37, "learning_rate": 3.137633055771682e-05, "loss": 2.77, "step": 112500 }, { "epoch": 0.37, "learning_rate": 3.135977618487924e-05, "loss": 2.7759, "step": 112600 }, { "epoch": 0.37, "learning_rate": 3.134322181204165e-05, "loss": 2.7656, "step": 112700 }, { "epoch": 0.37, "learning_rate": 3.132666743920407e-05, "loss": 2.7758, "step": 112800 }, { "epoch": 0.37, "learning_rate": 3.131011306636648e-05, "loss": 2.7651, "step": 112900 }, { "epoch": 0.37, "learning_rate": 3.129355869352889e-05, "loss": 2.7618, "step": 113000 }, { "epoch": 0.37, "learning_rate": 3.127700432069131e-05, "loss": 2.7754, "step": 113100 }, { "epoch": 0.37, "learning_rate": 3.1260449947853724e-05, "loss": 2.7729, "step": 113200 }, { "epoch": 0.38, "learning_rate": 3.124389557501614e-05, "loss": 2.773, "step": 113300 }, { "epoch": 0.38, "learning_rate": 3.1227341202178556e-05, "loss": 2.7728, "step": 113400 }, { "epoch": 0.38, "learning_rate": 3.121078682934097e-05, "loss": 2.7802, "step": 113500 }, { "epoch": 0.38, "learning_rate": 3.119423245650339e-05, "loss": 2.7692, "step": 113600 }, { "epoch": 0.38, "learning_rate": 3.11776780836658e-05, "loss": 2.7749, "step": 113700 }, { "epoch": 0.38, "learning_rate": 3.116112371082822e-05, "loss": 2.7893, "step": 113800 }, { "epoch": 0.38, "learning_rate": 3.114456933799063e-05, "loss": 2.7856, "step": 113900 }, { "epoch": 0.38, "learning_rate": 3.112801496515305e-05, "loss": 2.7725, "step": 114000 }, { "epoch": 0.38, "learning_rate": 3.111146059231546e-05, "loss": 2.7749, "step": 114100 }, { "epoch": 0.38, "learning_rate": 3.1094906219477874e-05, "loss": 2.7739, "step": 114200 }, { "epoch": 0.38, "learning_rate": 3.107835184664029e-05, "loss": 2.7755, "step": 114300 }, { "epoch": 0.38, "learning_rate": 3.1061797473802705e-05, "loss": 2.7738, "step": 114400 }, { "epoch": 0.38, "learning_rate": 3.1045243100965124e-05, "loss": 2.7726, "step": 114500 }, { "epoch": 0.38, "learning_rate": 3.1028688728127537e-05, "loss": 2.7723, "step": 114600 }, { "epoch": 0.38, "learning_rate": 3.101213435528995e-05, "loss": 2.7785, "step": 114700 }, { "epoch": 0.38, "learning_rate": 3.099557998245237e-05, "loss": 2.7531, "step": 114800 }, { "epoch": 0.38, "learning_rate": 3.097902560961478e-05, "loss": 2.7686, "step": 114900 }, { "epoch": 0.38, "learning_rate": 3.09624712367772e-05, "loss": 2.7673, "step": 115000 }, { "epoch": 0.38, "learning_rate": 3.094591686393961e-05, "loss": 2.7584, "step": 115100 }, { "epoch": 0.38, "learning_rate": 3.0929362491102024e-05, "loss": 2.763, "step": 115200 }, { "epoch": 0.38, "learning_rate": 3.091280811826444e-05, "loss": 2.7553, "step": 115300 }, { "epoch": 0.38, "learning_rate": 3.0896253745426855e-05, "loss": 2.7735, "step": 115400 }, { "epoch": 0.38, "learning_rate": 3.0879699372589274e-05, "loss": 2.7773, "step": 115500 }, { "epoch": 0.38, "learning_rate": 3.0863144999751686e-05, "loss": 2.7626, "step": 115600 }, { "epoch": 0.38, "learning_rate": 3.08465906269141e-05, "loss": 2.7775, "step": 115700 }, { "epoch": 0.38, "learning_rate": 3.083003625407652e-05, "loss": 2.7956, "step": 115800 }, { "epoch": 0.38, "learning_rate": 3.081348188123893e-05, "loss": 2.781, "step": 115900 }, { "epoch": 0.38, "learning_rate": 3.079692750840135e-05, "loss": 2.768, "step": 116000 }, { "epoch": 0.38, "learning_rate": 3.078037313556376e-05, "loss": 2.7684, "step": 116100 }, { "epoch": 0.38, "learning_rate": 3.0763818762726173e-05, "loss": 2.7666, "step": 116200 }, { "epoch": 0.39, "learning_rate": 3.074726438988859e-05, "loss": 2.7847, "step": 116300 }, { "epoch": 0.39, "learning_rate": 3.0730710017051005e-05, "loss": 2.7653, "step": 116400 }, { "epoch": 0.39, "learning_rate": 3.0714155644213424e-05, "loss": 2.7693, "step": 116500 }, { "epoch": 0.39, "learning_rate": 3.0697601271375836e-05, "loss": 2.7702, "step": 116600 }, { "epoch": 0.39, "learning_rate": 3.068104689853825e-05, "loss": 2.7828, "step": 116700 }, { "epoch": 0.39, "learning_rate": 3.066449252570067e-05, "loss": 2.7628, "step": 116800 }, { "epoch": 0.39, "learning_rate": 3.064793815286308e-05, "loss": 2.7722, "step": 116900 }, { "epoch": 0.39, "learning_rate": 3.06313837800255e-05, "loss": 2.7868, "step": 117000 }, { "epoch": 0.39, "learning_rate": 3.061482940718791e-05, "loss": 2.7793, "step": 117100 }, { "epoch": 0.39, "learning_rate": 3.059827503435033e-05, "loss": 2.7742, "step": 117200 }, { "epoch": 0.39, "learning_rate": 3.058172066151274e-05, "loss": 2.7836, "step": 117300 }, { "epoch": 0.39, "learning_rate": 3.0565166288675154e-05, "loss": 2.7655, "step": 117400 }, { "epoch": 0.39, "learning_rate": 3.0548611915837573e-05, "loss": 2.7816, "step": 117500 }, { "epoch": 0.39, "learning_rate": 3.0532057542999986e-05, "loss": 2.7872, "step": 117600 }, { "epoch": 0.39, "learning_rate": 3.05155031701624e-05, "loss": 2.7787, "step": 117700 }, { "epoch": 0.39, "learning_rate": 3.0498948797324817e-05, "loss": 2.7825, "step": 117800 }, { "epoch": 0.39, "learning_rate": 3.0482394424487233e-05, "loss": 2.7755, "step": 117900 }, { "epoch": 0.39, "learning_rate": 3.046584005164965e-05, "loss": 2.7712, "step": 118000 }, { "epoch": 0.39, "learning_rate": 3.044928567881206e-05, "loss": 2.7638, "step": 118100 }, { "epoch": 0.39, "learning_rate": 3.0432731305974476e-05, "loss": 2.757, "step": 118200 }, { "epoch": 0.39, "learning_rate": 3.0416176933136892e-05, "loss": 2.7497, "step": 118300 }, { "epoch": 0.39, "learning_rate": 3.0399622560299308e-05, "loss": 2.7538, "step": 118400 }, { "epoch": 0.39, "learning_rate": 3.0383068187461723e-05, "loss": 2.7773, "step": 118500 }, { "epoch": 0.39, "learning_rate": 3.0366513814624135e-05, "loss": 2.7749, "step": 118600 }, { "epoch": 0.39, "learning_rate": 3.034995944178655e-05, "loss": 2.7831, "step": 118700 }, { "epoch": 0.39, "learning_rate": 3.0333405068948967e-05, "loss": 2.7634, "step": 118800 }, { "epoch": 0.39, "learning_rate": 3.0316850696111382e-05, "loss": 2.7701, "step": 118900 }, { "epoch": 0.39, "learning_rate": 3.0300296323273798e-05, "loss": 2.772, "step": 119000 }, { "epoch": 0.39, "learning_rate": 3.028374195043621e-05, "loss": 2.7691, "step": 119100 }, { "epoch": 0.39, "learning_rate": 3.0267187577598626e-05, "loss": 2.7587, "step": 119200 }, { "epoch": 0.39, "learning_rate": 3.0250633204761035e-05, "loss": 2.7718, "step": 119300 }, { "epoch": 0.4, "learning_rate": 3.023407883192345e-05, "loss": 2.777, "step": 119400 }, { "epoch": 0.4, "learning_rate": 3.0217524459085866e-05, "loss": 2.773, "step": 119500 }, { "epoch": 0.4, "learning_rate": 3.0200970086248282e-05, "loss": 2.7589, "step": 119600 }, { "epoch": 0.4, "learning_rate": 3.0184415713410697e-05, "loss": 2.7534, "step": 119700 }, { "epoch": 0.4, "learning_rate": 3.0167861340573113e-05, "loss": 2.7665, "step": 119800 }, { "epoch": 0.4, "learning_rate": 3.0151306967735525e-05, "loss": 2.782, "step": 119900 }, { "epoch": 0.4, "learning_rate": 3.013475259489794e-05, "loss": 2.774, "step": 120000 }, { "epoch": 0.4, "learning_rate": 3.0118198222060357e-05, "loss": 2.7819, "step": 120100 }, { "epoch": 0.4, "learning_rate": 3.0101643849222772e-05, "loss": 2.7786, "step": 120200 }, { "epoch": 0.4, "learning_rate": 3.0085089476385188e-05, "loss": 2.7795, "step": 120300 }, { "epoch": 0.4, "learning_rate": 3.00685351035476e-05, "loss": 2.7588, "step": 120400 }, { "epoch": 0.4, "learning_rate": 3.0051980730710016e-05, "loss": 2.7794, "step": 120500 }, { "epoch": 0.4, "learning_rate": 3.003542635787243e-05, "loss": 2.7647, "step": 120600 }, { "epoch": 0.4, "learning_rate": 3.0018871985034847e-05, "loss": 2.7707, "step": 120700 }, { "epoch": 0.4, "learning_rate": 3.0002317612197263e-05, "loss": 2.7733, "step": 120800 }, { "epoch": 0.4, "learning_rate": 2.9985763239359675e-05, "loss": 2.7605, "step": 120900 }, { "epoch": 0.4, "learning_rate": 2.996920886652209e-05, "loss": 2.7599, "step": 121000 }, { "epoch": 0.4, "learning_rate": 2.9952654493684506e-05, "loss": 2.7687, "step": 121100 }, { "epoch": 0.4, "learning_rate": 2.9936100120846922e-05, "loss": 2.7717, "step": 121200 }, { "epoch": 0.4, "learning_rate": 2.9919545748009338e-05, "loss": 2.7611, "step": 121300 }, { "epoch": 0.4, "learning_rate": 2.9902991375171753e-05, "loss": 2.7734, "step": 121400 }, { "epoch": 0.4, "learning_rate": 2.9886437002334166e-05, "loss": 2.7806, "step": 121500 }, { "epoch": 0.4, "learning_rate": 2.986988262949658e-05, "loss": 2.7623, "step": 121600 }, { "epoch": 0.4, "learning_rate": 2.9853328256658997e-05, "loss": 2.7714, "step": 121700 }, { "epoch": 0.4, "learning_rate": 2.9836773883821412e-05, "loss": 2.7525, "step": 121800 }, { "epoch": 0.4, "learning_rate": 2.9820219510983828e-05, "loss": 2.7666, "step": 121900 }, { "epoch": 0.4, "learning_rate": 2.980366513814624e-05, "loss": 2.7632, "step": 122000 }, { "epoch": 0.4, "learning_rate": 2.9787110765308656e-05, "loss": 2.7677, "step": 122100 }, { "epoch": 0.4, "learning_rate": 2.9770556392471072e-05, "loss": 2.7676, "step": 122200 }, { "epoch": 0.4, "learning_rate": 2.9754002019633487e-05, "loss": 2.7662, "step": 122300 }, { "epoch": 0.41, "learning_rate": 2.9737447646795903e-05, "loss": 2.7562, "step": 122400 }, { "epoch": 0.41, "learning_rate": 2.9720893273958315e-05, "loss": 2.7635, "step": 122500 }, { "epoch": 0.41, "learning_rate": 2.970433890112073e-05, "loss": 2.7863, "step": 122600 }, { "epoch": 0.41, "learning_rate": 2.9687784528283147e-05, "loss": 2.77, "step": 122700 }, { "epoch": 0.41, "learning_rate": 2.9671230155445562e-05, "loss": 2.7743, "step": 122800 }, { "epoch": 0.41, "learning_rate": 2.9654675782607978e-05, "loss": 2.7752, "step": 122900 }, { "epoch": 0.41, "learning_rate": 2.9638121409770393e-05, "loss": 2.7648, "step": 123000 }, { "epoch": 0.41, "learning_rate": 2.9621567036932806e-05, "loss": 2.7812, "step": 123100 }, { "epoch": 0.41, "learning_rate": 2.960501266409522e-05, "loss": 2.778, "step": 123200 }, { "epoch": 0.41, "learning_rate": 2.9588458291257637e-05, "loss": 2.7529, "step": 123300 }, { "epoch": 0.41, "learning_rate": 2.9571903918420053e-05, "loss": 2.765, "step": 123400 }, { "epoch": 0.41, "learning_rate": 2.955534954558247e-05, "loss": 2.7577, "step": 123500 }, { "epoch": 0.41, "learning_rate": 2.953879517274488e-05, "loss": 2.7818, "step": 123600 }, { "epoch": 0.41, "learning_rate": 2.9522240799907296e-05, "loss": 2.7638, "step": 123700 }, { "epoch": 0.41, "learning_rate": 2.9505686427069712e-05, "loss": 2.7702, "step": 123800 }, { "epoch": 0.41, "learning_rate": 2.9489132054232128e-05, "loss": 2.7717, "step": 123900 }, { "epoch": 0.41, "learning_rate": 2.9472577681394543e-05, "loss": 2.7722, "step": 124000 }, { "epoch": 0.41, "learning_rate": 2.9456023308556955e-05, "loss": 2.7712, "step": 124100 }, { "epoch": 0.41, "learning_rate": 2.943946893571937e-05, "loss": 2.7712, "step": 124200 }, { "epoch": 0.41, "learning_rate": 2.9422914562881787e-05, "loss": 2.7813, "step": 124300 }, { "epoch": 0.41, "learning_rate": 2.9406360190044202e-05, "loss": 2.7765, "step": 124400 }, { "epoch": 0.41, "learning_rate": 2.9389805817206618e-05, "loss": 2.7585, "step": 124500 }, { "epoch": 0.41, "learning_rate": 2.9373251444369034e-05, "loss": 2.7706, "step": 124600 }, { "epoch": 0.41, "learning_rate": 2.9356697071531446e-05, "loss": 2.761, "step": 124700 }, { "epoch": 0.41, "learning_rate": 2.934014269869386e-05, "loss": 2.7687, "step": 124800 }, { "epoch": 0.41, "learning_rate": 2.9323588325856277e-05, "loss": 2.7836, "step": 124900 }, { "epoch": 0.41, "learning_rate": 2.9307033953018693e-05, "loss": 2.7614, "step": 125000 }, { "epoch": 0.41, "learning_rate": 2.929047958018111e-05, "loss": 2.7665, "step": 125100 }, { "epoch": 0.41, "learning_rate": 2.927392520734352e-05, "loss": 2.7617, "step": 125200 }, { "epoch": 0.41, "learning_rate": 2.9257370834505936e-05, "loss": 2.7705, "step": 125300 }, { "epoch": 0.42, "learning_rate": 2.9240816461668352e-05, "loss": 2.774, "step": 125400 }, { "epoch": 0.42, "learning_rate": 2.9224262088830768e-05, "loss": 2.7703, "step": 125500 }, { "epoch": 0.42, "learning_rate": 2.9207707715993183e-05, "loss": 2.7724, "step": 125600 }, { "epoch": 0.42, "learning_rate": 2.9191153343155596e-05, "loss": 2.7609, "step": 125700 }, { "epoch": 0.42, "learning_rate": 2.917459897031801e-05, "loss": 2.7766, "step": 125800 }, { "epoch": 0.42, "learning_rate": 2.9158044597480427e-05, "loss": 2.7641, "step": 125900 }, { "epoch": 0.42, "learning_rate": 2.9141490224642843e-05, "loss": 2.7824, "step": 126000 }, { "epoch": 0.42, "learning_rate": 2.9124935851805258e-05, "loss": 2.7699, "step": 126100 }, { "epoch": 0.42, "learning_rate": 2.9108381478967674e-05, "loss": 2.7609, "step": 126200 }, { "epoch": 0.42, "learning_rate": 2.9091827106130086e-05, "loss": 2.7829, "step": 126300 }, { "epoch": 0.42, "learning_rate": 2.9075272733292502e-05, "loss": 2.7655, "step": 126400 }, { "epoch": 0.42, "learning_rate": 2.9058718360454917e-05, "loss": 2.7803, "step": 126500 }, { "epoch": 0.42, "learning_rate": 2.9042163987617333e-05, "loss": 2.7583, "step": 126600 }, { "epoch": 0.42, "learning_rate": 2.902560961477975e-05, "loss": 2.7724, "step": 126700 }, { "epoch": 0.42, "learning_rate": 2.900905524194216e-05, "loss": 2.768, "step": 126800 }, { "epoch": 0.42, "learning_rate": 2.8992500869104577e-05, "loss": 2.7735, "step": 126900 }, { "epoch": 0.42, "learning_rate": 2.8975946496266992e-05, "loss": 2.7553, "step": 127000 }, { "epoch": 0.42, "learning_rate": 2.8959392123429408e-05, "loss": 2.7715, "step": 127100 }, { "epoch": 0.42, "learning_rate": 2.8942837750591824e-05, "loss": 2.7681, "step": 127200 }, { "epoch": 0.42, "learning_rate": 2.8926283377754236e-05, "loss": 2.7614, "step": 127300 }, { "epoch": 0.42, "learning_rate": 2.890972900491665e-05, "loss": 2.7601, "step": 127400 }, { "epoch": 0.42, "learning_rate": 2.8893174632079067e-05, "loss": 2.7688, "step": 127500 }, { "epoch": 0.42, "learning_rate": 2.8876620259241483e-05, "loss": 2.761, "step": 127600 }, { "epoch": 0.42, "learning_rate": 2.88600658864039e-05, "loss": 2.7751, "step": 127700 }, { "epoch": 0.42, "learning_rate": 2.8843511513566314e-05, "loss": 2.7939, "step": 127800 }, { "epoch": 0.42, "learning_rate": 2.8826957140728726e-05, "loss": 2.7713, "step": 127900 }, { "epoch": 0.42, "learning_rate": 2.8810402767891142e-05, "loss": 2.7717, "step": 128000 }, { "epoch": 0.42, "learning_rate": 2.879384839505355e-05, "loss": 2.7638, "step": 128100 }, { "epoch": 0.42, "learning_rate": 2.8777294022215967e-05, "loss": 2.7689, "step": 128200 }, { "epoch": 0.42, "learning_rate": 2.8760739649378382e-05, "loss": 2.7674, "step": 128300 }, { "epoch": 0.43, "learning_rate": 2.8744185276540798e-05, "loss": 2.7731, "step": 128400 }, { "epoch": 0.43, "learning_rate": 2.8727630903703214e-05, "loss": 2.7584, "step": 128500 }, { "epoch": 0.43, "learning_rate": 2.8711076530865626e-05, "loss": 2.7634, "step": 128600 }, { "epoch": 0.43, "learning_rate": 2.869452215802804e-05, "loss": 2.7676, "step": 128700 }, { "epoch": 0.43, "learning_rate": 2.8677967785190457e-05, "loss": 2.7672, "step": 128800 }, { "epoch": 0.43, "learning_rate": 2.8661413412352873e-05, "loss": 2.7591, "step": 128900 }, { "epoch": 0.43, "learning_rate": 2.864485903951529e-05, "loss": 2.7667, "step": 129000 }, { "epoch": 0.43, "learning_rate": 2.86283046666777e-05, "loss": 2.7586, "step": 129100 }, { "epoch": 0.43, "learning_rate": 2.8611750293840116e-05, "loss": 2.7567, "step": 129200 }, { "epoch": 0.43, "learning_rate": 2.8595195921002532e-05, "loss": 2.7603, "step": 129300 }, { "epoch": 0.43, "learning_rate": 2.8578641548164948e-05, "loss": 2.7638, "step": 129400 }, { "epoch": 0.43, "learning_rate": 2.8562087175327363e-05, "loss": 2.7663, "step": 129500 }, { "epoch": 0.43, "learning_rate": 2.854553280248978e-05, "loss": 2.7764, "step": 129600 }, { "epoch": 0.43, "learning_rate": 2.852897842965219e-05, "loss": 2.7759, "step": 129700 }, { "epoch": 0.43, "learning_rate": 2.8512424056814607e-05, "loss": 2.7692, "step": 129800 }, { "epoch": 0.43, "learning_rate": 2.8495869683977022e-05, "loss": 2.7648, "step": 129900 }, { "epoch": 0.43, "learning_rate": 2.8479315311139438e-05, "loss": 2.7646, "step": 130000 }, { "epoch": 0.43, "learning_rate": 2.8462760938301854e-05, "loss": 2.769, "step": 130100 }, { "epoch": 0.43, "learning_rate": 2.8446206565464266e-05, "loss": 2.7663, "step": 130200 }, { "epoch": 0.43, "learning_rate": 2.842965219262668e-05, "loss": 2.7672, "step": 130300 }, { "epoch": 0.43, "learning_rate": 2.8413097819789097e-05, "loss": 2.7752, "step": 130400 }, { "epoch": 0.43, "learning_rate": 2.8396543446951513e-05, "loss": 2.7577, "step": 130500 }, { "epoch": 0.43, "learning_rate": 2.837998907411393e-05, "loss": 2.7692, "step": 130600 }, { "epoch": 0.43, "learning_rate": 2.836343470127634e-05, "loss": 2.7913, "step": 130700 }, { "epoch": 0.43, "learning_rate": 2.8346880328438757e-05, "loss": 2.7795, "step": 130800 }, { "epoch": 0.43, "learning_rate": 2.8330325955601172e-05, "loss": 2.7759, "step": 130900 }, { "epoch": 0.43, "learning_rate": 2.8313771582763588e-05, "loss": 2.7664, "step": 131000 }, { "epoch": 0.43, "learning_rate": 2.8297217209926003e-05, "loss": 2.7526, "step": 131100 }, { "epoch": 0.43, "learning_rate": 2.828066283708842e-05, "loss": 2.7729, "step": 131200 }, { "epoch": 0.43, "learning_rate": 2.826410846425083e-05, "loss": 2.7746, "step": 131300 }, { "epoch": 0.44, "learning_rate": 2.8247554091413247e-05, "loss": 2.7763, "step": 131400 }, { "epoch": 0.44, "learning_rate": 2.8230999718575663e-05, "loss": 2.7723, "step": 131500 }, { "epoch": 0.44, "learning_rate": 2.821444534573808e-05, "loss": 2.7716, "step": 131600 }, { "epoch": 0.44, "learning_rate": 2.8197890972900494e-05, "loss": 2.7718, "step": 131700 }, { "epoch": 0.44, "learning_rate": 2.8181336600062906e-05, "loss": 2.7559, "step": 131800 }, { "epoch": 0.44, "learning_rate": 2.8164782227225322e-05, "loss": 2.7677, "step": 131900 }, { "epoch": 0.44, "learning_rate": 2.8148227854387738e-05, "loss": 2.7637, "step": 132000 }, { "epoch": 0.44, "learning_rate": 2.8131673481550153e-05, "loss": 2.7822, "step": 132100 }, { "epoch": 0.44, "learning_rate": 2.811511910871257e-05, "loss": 2.7664, "step": 132200 }, { "epoch": 0.44, "learning_rate": 2.809856473587498e-05, "loss": 2.7551, "step": 132300 }, { "epoch": 0.44, "learning_rate": 2.8082010363037397e-05, "loss": 2.7682, "step": 132400 }, { "epoch": 0.44, "learning_rate": 2.8065455990199812e-05, "loss": 2.7673, "step": 132500 }, { "epoch": 0.44, "learning_rate": 2.8048901617362228e-05, "loss": 2.7651, "step": 132600 }, { "epoch": 0.44, "learning_rate": 2.8032347244524644e-05, "loss": 2.7609, "step": 132700 }, { "epoch": 0.44, "learning_rate": 2.801579287168706e-05, "loss": 2.7732, "step": 132800 }, { "epoch": 0.44, "learning_rate": 2.799923849884947e-05, "loss": 2.7616, "step": 132900 }, { "epoch": 0.44, "learning_rate": 2.7982684126011887e-05, "loss": 2.7718, "step": 133000 }, { "epoch": 0.44, "learning_rate": 2.7966129753174303e-05, "loss": 2.773, "step": 133100 }, { "epoch": 0.44, "learning_rate": 2.794957538033672e-05, "loss": 2.7462, "step": 133200 }, { "epoch": 0.44, "learning_rate": 2.7933021007499134e-05, "loss": 2.7611, "step": 133300 }, { "epoch": 0.44, "learning_rate": 2.7916466634661546e-05, "loss": 2.7546, "step": 133400 }, { "epoch": 0.44, "learning_rate": 2.7899912261823962e-05, "loss": 2.7648, "step": 133500 }, { "epoch": 0.44, "learning_rate": 2.7883357888986378e-05, "loss": 2.7588, "step": 133600 }, { "epoch": 0.44, "learning_rate": 2.7866803516148793e-05, "loss": 2.7626, "step": 133700 }, { "epoch": 0.44, "learning_rate": 2.785024914331121e-05, "loss": 2.7613, "step": 133800 }, { "epoch": 0.44, "learning_rate": 2.783369477047362e-05, "loss": 2.7572, "step": 133900 }, { "epoch": 0.44, "learning_rate": 2.7817140397636037e-05, "loss": 2.7587, "step": 134000 }, { "epoch": 0.44, "learning_rate": 2.7800586024798453e-05, "loss": 2.7708, "step": 134100 }, { "epoch": 0.44, "learning_rate": 2.7784031651960868e-05, "loss": 2.7658, "step": 134200 }, { "epoch": 0.44, "learning_rate": 2.7767477279123284e-05, "loss": 2.771, "step": 134300 }, { "epoch": 0.44, "learning_rate": 2.77509229062857e-05, "loss": 2.7721, "step": 134400 }, { "epoch": 0.45, "learning_rate": 2.7734368533448112e-05, "loss": 2.7608, "step": 134500 }, { "epoch": 0.45, "learning_rate": 2.7717814160610527e-05, "loss": 2.7673, "step": 134600 }, { "epoch": 0.45, "learning_rate": 2.7701259787772943e-05, "loss": 2.7665, "step": 134700 }, { "epoch": 0.45, "learning_rate": 2.768470541493536e-05, "loss": 2.7544, "step": 134800 }, { "epoch": 0.45, "learning_rate": 2.7668151042097774e-05, "loss": 2.7729, "step": 134900 }, { "epoch": 0.45, "learning_rate": 2.7651596669260187e-05, "loss": 2.7816, "step": 135000 }, { "epoch": 0.45, "learning_rate": 2.7635042296422602e-05, "loss": 2.7751, "step": 135100 }, { "epoch": 0.45, "learning_rate": 2.7618487923585018e-05, "loss": 2.7694, "step": 135200 }, { "epoch": 0.45, "learning_rate": 2.7601933550747434e-05, "loss": 2.7621, "step": 135300 }, { "epoch": 0.45, "learning_rate": 2.758537917790985e-05, "loss": 2.7717, "step": 135400 }, { "epoch": 0.45, "learning_rate": 2.756882480507226e-05, "loss": 2.7587, "step": 135500 }, { "epoch": 0.45, "learning_rate": 2.7552270432234677e-05, "loss": 2.7672, "step": 135600 }, { "epoch": 0.45, "learning_rate": 2.7535716059397093e-05, "loss": 2.7724, "step": 135700 }, { "epoch": 0.45, "learning_rate": 2.751916168655951e-05, "loss": 2.7722, "step": 135800 }, { "epoch": 0.45, "learning_rate": 2.7502607313721924e-05, "loss": 2.7614, "step": 135900 }, { "epoch": 0.45, "learning_rate": 2.748605294088434e-05, "loss": 2.7678, "step": 136000 }, { "epoch": 0.45, "learning_rate": 2.7469498568046752e-05, "loss": 2.7522, "step": 136100 }, { "epoch": 0.45, "learning_rate": 2.7452944195209168e-05, "loss": 2.761, "step": 136200 }, { "epoch": 0.45, "learning_rate": 2.7436389822371583e-05, "loss": 2.7681, "step": 136300 }, { "epoch": 0.45, "learning_rate": 2.7419835449534e-05, "loss": 2.7675, "step": 136400 }, { "epoch": 0.45, "learning_rate": 2.7403281076696415e-05, "loss": 2.7725, "step": 136500 }, { "epoch": 0.45, "learning_rate": 2.7386726703858827e-05, "loss": 2.7524, "step": 136600 }, { "epoch": 0.45, "learning_rate": 2.7370172331021243e-05, "loss": 2.7623, "step": 136700 }, { "epoch": 0.45, "learning_rate": 2.7353617958183658e-05, "loss": 2.7596, "step": 136800 }, { "epoch": 0.45, "learning_rate": 2.7337063585346067e-05, "loss": 2.7668, "step": 136900 }, { "epoch": 0.45, "learning_rate": 2.7320509212508483e-05, "loss": 2.7637, "step": 137000 }, { "epoch": 0.45, "learning_rate": 2.73039548396709e-05, "loss": 2.7606, "step": 137100 }, { "epoch": 0.45, "learning_rate": 2.7287400466833314e-05, "loss": 2.7677, "step": 137200 }, { "epoch": 0.45, "learning_rate": 2.7270846093995726e-05, "loss": 2.7726, "step": 137300 }, { "epoch": 0.45, "learning_rate": 2.7254291721158142e-05, "loss": 2.7585, "step": 137400 }, { "epoch": 0.46, "learning_rate": 2.7237737348320558e-05, "loss": 2.7671, "step": 137500 }, { "epoch": 0.46, "learning_rate": 2.7221182975482973e-05, "loss": 2.7632, "step": 137600 }, { "epoch": 0.46, "learning_rate": 2.720462860264539e-05, "loss": 2.7683, "step": 137700 }, { "epoch": 0.46, "learning_rate": 2.7188074229807805e-05, "loss": 2.7597, "step": 137800 }, { "epoch": 0.46, "learning_rate": 2.7171519856970217e-05, "loss": 2.7624, "step": 137900 }, { "epoch": 0.46, "learning_rate": 2.7154965484132632e-05, "loss": 2.7487, "step": 138000 }, { "epoch": 0.46, "learning_rate": 2.7138411111295048e-05, "loss": 2.7608, "step": 138100 }, { "epoch": 0.46, "learning_rate": 2.7121856738457464e-05, "loss": 2.7635, "step": 138200 }, { "epoch": 0.46, "learning_rate": 2.710530236561988e-05, "loss": 2.7572, "step": 138300 }, { "epoch": 0.46, "learning_rate": 2.708874799278229e-05, "loss": 2.7688, "step": 138400 }, { "epoch": 0.46, "learning_rate": 2.7072193619944707e-05, "loss": 2.7577, "step": 138500 }, { "epoch": 0.46, "learning_rate": 2.7055639247107123e-05, "loss": 2.7655, "step": 138600 }, { "epoch": 0.46, "learning_rate": 2.703908487426954e-05, "loss": 2.765, "step": 138700 }, { "epoch": 0.46, "learning_rate": 2.7022530501431954e-05, "loss": 2.7587, "step": 138800 }, { "epoch": 0.46, "learning_rate": 2.7005976128594366e-05, "loss": 2.753, "step": 138900 }, { "epoch": 0.46, "learning_rate": 2.6989421755756782e-05, "loss": 2.7649, "step": 139000 }, { "epoch": 0.46, "learning_rate": 2.6972867382919198e-05, "loss": 2.7736, "step": 139100 }, { "epoch": 0.46, "learning_rate": 2.6956313010081613e-05, "loss": 2.7573, "step": 139200 }, { "epoch": 0.46, "learning_rate": 2.693975863724403e-05, "loss": 2.7588, "step": 139300 }, { "epoch": 0.46, "learning_rate": 2.6923204264406445e-05, "loss": 2.765, "step": 139400 }, { "epoch": 0.46, "learning_rate": 2.6906649891568857e-05, "loss": 2.7553, "step": 139500 }, { "epoch": 0.46, "learning_rate": 2.6890095518731273e-05, "loss": 2.7661, "step": 139600 }, { "epoch": 0.46, "learning_rate": 2.6873541145893688e-05, "loss": 2.7562, "step": 139700 }, { "epoch": 0.46, "learning_rate": 2.6856986773056104e-05, "loss": 2.7764, "step": 139800 }, { "epoch": 0.46, "learning_rate": 2.684043240021852e-05, "loss": 2.7704, "step": 139900 }, { "epoch": 0.46, "learning_rate": 2.6823878027380932e-05, "loss": 2.7584, "step": 140000 }, { "epoch": 0.46, "learning_rate": 2.6807323654543348e-05, "loss": 2.7533, "step": 140100 }, { "epoch": 0.46, "learning_rate": 2.6790769281705763e-05, "loss": 2.7695, "step": 140200 }, { "epoch": 0.46, "learning_rate": 2.677421490886818e-05, "loss": 2.772, "step": 140300 }, { "epoch": 0.46, "learning_rate": 2.6757660536030594e-05, "loss": 2.7739, "step": 140400 }, { "epoch": 0.47, "learning_rate": 2.6741106163193007e-05, "loss": 2.769, "step": 140500 }, { "epoch": 0.47, "learning_rate": 2.6724551790355422e-05, "loss": 2.7648, "step": 140600 }, { "epoch": 0.47, "learning_rate": 2.6707997417517838e-05, "loss": 2.7579, "step": 140700 }, { "epoch": 0.47, "learning_rate": 2.6691443044680254e-05, "loss": 2.76, "step": 140800 }, { "epoch": 0.47, "learning_rate": 2.667488867184267e-05, "loss": 2.7589, "step": 140900 }, { "epoch": 0.47, "learning_rate": 2.6658334299005085e-05, "loss": 2.759, "step": 141000 }, { "epoch": 0.47, "learning_rate": 2.6641779926167497e-05, "loss": 2.7644, "step": 141100 }, { "epoch": 0.47, "learning_rate": 2.6625225553329913e-05, "loss": 2.7615, "step": 141200 }, { "epoch": 0.47, "learning_rate": 2.660867118049233e-05, "loss": 2.7783, "step": 141300 }, { "epoch": 0.47, "learning_rate": 2.6592116807654744e-05, "loss": 2.7634, "step": 141400 }, { "epoch": 0.47, "learning_rate": 2.657556243481716e-05, "loss": 2.7676, "step": 141500 }, { "epoch": 0.47, "learning_rate": 2.6559008061979572e-05, "loss": 2.7678, "step": 141600 }, { "epoch": 0.47, "learning_rate": 2.6542453689141988e-05, "loss": 2.7621, "step": 141700 }, { "epoch": 0.47, "learning_rate": 2.6525899316304403e-05, "loss": 2.7751, "step": 141800 }, { "epoch": 0.47, "learning_rate": 2.650934494346682e-05, "loss": 2.7547, "step": 141900 }, { "epoch": 0.47, "learning_rate": 2.6492790570629235e-05, "loss": 2.7659, "step": 142000 }, { "epoch": 0.47, "learning_rate": 2.6476236197791647e-05, "loss": 2.7744, "step": 142100 }, { "epoch": 0.47, "learning_rate": 2.6459681824954063e-05, "loss": 2.7691, "step": 142200 }, { "epoch": 0.47, "learning_rate": 2.6443127452116478e-05, "loss": 2.751, "step": 142300 }, { "epoch": 0.47, "learning_rate": 2.6426573079278894e-05, "loss": 2.7613, "step": 142400 }, { "epoch": 0.47, "learning_rate": 2.641001870644131e-05, "loss": 2.761, "step": 142500 }, { "epoch": 0.47, "learning_rate": 2.6393464333603725e-05, "loss": 2.7635, "step": 142600 }, { "epoch": 0.47, "learning_rate": 2.6376909960766137e-05, "loss": 2.7661, "step": 142700 }, { "epoch": 0.47, "learning_rate": 2.6360355587928553e-05, "loss": 2.7689, "step": 142800 }, { "epoch": 0.47, "learning_rate": 2.634380121509097e-05, "loss": 2.7646, "step": 142900 }, { "epoch": 0.47, "learning_rate": 2.6327246842253384e-05, "loss": 2.7581, "step": 143000 }, { "epoch": 0.47, "learning_rate": 2.63106924694158e-05, "loss": 2.7611, "step": 143100 }, { "epoch": 0.47, "learning_rate": 2.6294138096578212e-05, "loss": 2.7563, "step": 143200 }, { "epoch": 0.47, "learning_rate": 2.6277583723740628e-05, "loss": 2.7549, "step": 143300 }, { "epoch": 0.47, "learning_rate": 2.6261029350903044e-05, "loss": 2.7579, "step": 143400 }, { "epoch": 0.48, "learning_rate": 2.624447497806546e-05, "loss": 2.7595, "step": 143500 }, { "epoch": 0.48, "learning_rate": 2.6227920605227875e-05, "loss": 2.7621, "step": 143600 }, { "epoch": 0.48, "learning_rate": 2.6211366232390287e-05, "loss": 2.7746, "step": 143700 }, { "epoch": 0.48, "learning_rate": 2.6194811859552703e-05, "loss": 2.7604, "step": 143800 }, { "epoch": 0.48, "learning_rate": 2.617825748671512e-05, "loss": 2.7711, "step": 143900 }, { "epoch": 0.48, "learning_rate": 2.6161703113877534e-05, "loss": 2.7651, "step": 144000 }, { "epoch": 0.48, "learning_rate": 2.614514874103995e-05, "loss": 2.7601, "step": 144100 }, { "epoch": 0.48, "learning_rate": 2.6128594368202365e-05, "loss": 2.7602, "step": 144200 }, { "epoch": 0.48, "learning_rate": 2.6112039995364778e-05, "loss": 2.7756, "step": 144300 }, { "epoch": 0.48, "learning_rate": 2.6095485622527193e-05, "loss": 2.7764, "step": 144400 }, { "epoch": 0.48, "learning_rate": 2.607893124968961e-05, "loss": 2.7707, "step": 144500 }, { "epoch": 0.48, "learning_rate": 2.6062376876852025e-05, "loss": 2.761, "step": 144600 }, { "epoch": 0.48, "learning_rate": 2.604582250401444e-05, "loss": 2.775, "step": 144700 }, { "epoch": 0.48, "learning_rate": 2.6029268131176853e-05, "loss": 2.7612, "step": 144800 }, { "epoch": 0.48, "learning_rate": 2.6012713758339268e-05, "loss": 2.7627, "step": 144900 }, { "epoch": 0.48, "learning_rate": 2.5996159385501684e-05, "loss": 2.7715, "step": 145000 }, { "epoch": 0.48, "learning_rate": 2.59796050126641e-05, "loss": 2.7681, "step": 145100 }, { "epoch": 0.48, "learning_rate": 2.5963050639826515e-05, "loss": 2.7625, "step": 145200 }, { "epoch": 0.48, "learning_rate": 2.5946496266988927e-05, "loss": 2.765, "step": 145300 }, { "epoch": 0.48, "learning_rate": 2.5929941894151343e-05, "loss": 2.7741, "step": 145400 }, { "epoch": 0.48, "learning_rate": 2.591338752131376e-05, "loss": 2.7688, "step": 145500 }, { "epoch": 0.48, "learning_rate": 2.5896833148476174e-05, "loss": 2.7621, "step": 145600 }, { "epoch": 0.48, "learning_rate": 2.588027877563859e-05, "loss": 2.7558, "step": 145700 }, { "epoch": 0.48, "learning_rate": 2.5863724402801e-05, "loss": 2.7556, "step": 145800 }, { "epoch": 0.48, "learning_rate": 2.5847170029963414e-05, "loss": 2.749, "step": 145900 }, { "epoch": 0.48, "learning_rate": 2.5830615657125827e-05, "loss": 2.7642, "step": 146000 }, { "epoch": 0.48, "learning_rate": 2.5814061284288242e-05, "loss": 2.7533, "step": 146100 }, { "epoch": 0.48, "learning_rate": 2.5797506911450658e-05, "loss": 2.7431, "step": 146200 }, { "epoch": 0.48, "learning_rate": 2.5780952538613074e-05, "loss": 2.7578, "step": 146300 }, { "epoch": 0.48, "learning_rate": 2.576439816577549e-05, "loss": 2.767, "step": 146400 }, { "epoch": 0.49, "learning_rate": 2.5747843792937905e-05, "loss": 2.7686, "step": 146500 }, { "epoch": 0.49, "learning_rate": 2.5731289420100317e-05, "loss": 2.7527, "step": 146600 }, { "epoch": 0.49, "learning_rate": 2.5714735047262733e-05, "loss": 2.758, "step": 146700 }, { "epoch": 0.49, "learning_rate": 2.569818067442515e-05, "loss": 2.779, "step": 146800 }, { "epoch": 0.49, "learning_rate": 2.5681626301587564e-05, "loss": 2.7566, "step": 146900 }, { "epoch": 0.49, "learning_rate": 2.566507192874998e-05, "loss": 2.7569, "step": 147000 }, { "epoch": 0.49, "learning_rate": 2.5648517555912392e-05, "loss": 2.7633, "step": 147100 }, { "epoch": 0.49, "learning_rate": 2.5631963183074808e-05, "loss": 2.7659, "step": 147200 }, { "epoch": 0.49, "learning_rate": 2.5615408810237223e-05, "loss": 2.7575, "step": 147300 }, { "epoch": 0.49, "learning_rate": 2.559885443739964e-05, "loss": 2.7632, "step": 147400 }, { "epoch": 0.49, "learning_rate": 2.5582300064562055e-05, "loss": 2.7531, "step": 147500 }, { "epoch": 0.49, "learning_rate": 2.5565745691724467e-05, "loss": 2.7492, "step": 147600 }, { "epoch": 0.49, "learning_rate": 2.5549191318886883e-05, "loss": 2.7779, "step": 147700 }, { "epoch": 0.49, "learning_rate": 2.5532636946049298e-05, "loss": 2.7601, "step": 147800 }, { "epoch": 0.49, "learning_rate": 2.5516082573211714e-05, "loss": 2.7685, "step": 147900 }, { "epoch": 0.49, "learning_rate": 2.549952820037413e-05, "loss": 2.7631, "step": 148000 }, { "epoch": 0.49, "learning_rate": 2.5482973827536545e-05, "loss": 2.7714, "step": 148100 }, { "epoch": 0.49, "learning_rate": 2.5466419454698957e-05, "loss": 2.7667, "step": 148200 }, { "epoch": 0.49, "learning_rate": 2.5449865081861373e-05, "loss": 2.7601, "step": 148300 }, { "epoch": 0.49, "learning_rate": 2.543331070902379e-05, "loss": 2.7657, "step": 148400 }, { "epoch": 0.49, "learning_rate": 2.5416756336186204e-05, "loss": 2.7568, "step": 148500 }, { "epoch": 0.49, "learning_rate": 2.540020196334862e-05, "loss": 2.7585, "step": 148600 }, { "epoch": 0.49, "learning_rate": 2.5383647590511032e-05, "loss": 2.7607, "step": 148700 }, { "epoch": 0.49, "learning_rate": 2.5367093217673448e-05, "loss": 2.7547, "step": 148800 }, { "epoch": 0.49, "learning_rate": 2.5350538844835864e-05, "loss": 2.7499, "step": 148900 }, { "epoch": 0.49, "learning_rate": 2.533398447199828e-05, "loss": 2.7555, "step": 149000 }, { "epoch": 0.49, "learning_rate": 2.5317430099160695e-05, "loss": 2.7587, "step": 149100 }, { "epoch": 0.49, "learning_rate": 2.5300875726323107e-05, "loss": 2.7685, "step": 149200 }, { "epoch": 0.49, "learning_rate": 2.5284321353485523e-05, "loss": 2.7481, "step": 149300 }, { "epoch": 0.49, "learning_rate": 2.526776698064794e-05, "loss": 2.767, "step": 149400 }, { "epoch": 0.49, "learning_rate": 2.5251212607810354e-05, "loss": 2.771, "step": 149500 }, { "epoch": 0.5, "learning_rate": 2.523465823497277e-05, "loss": 2.7641, "step": 149600 }, { "epoch": 0.5, "learning_rate": 2.5218103862135185e-05, "loss": 2.7564, "step": 149700 }, { "epoch": 0.5, "learning_rate": 2.5201549489297598e-05, "loss": 2.7497, "step": 149800 }, { "epoch": 0.5, "learning_rate": 2.5184995116460013e-05, "loss": 2.7568, "step": 149900 }, { "epoch": 0.5, "learning_rate": 2.516844074362243e-05, "loss": 2.769, "step": 150000 }, { "epoch": 0.5, "learning_rate": 2.5151886370784845e-05, "loss": 2.7619, "step": 150100 }, { "epoch": 0.5, "learning_rate": 2.513533199794726e-05, "loss": 2.7685, "step": 150200 }, { "epoch": 0.5, "learning_rate": 2.5118777625109673e-05, "loss": 2.7621, "step": 150300 }, { "epoch": 0.5, "learning_rate": 2.5102223252272088e-05, "loss": 2.7759, "step": 150400 }, { "epoch": 0.5, "learning_rate": 2.5085668879434504e-05, "loss": 2.7617, "step": 150500 }, { "epoch": 0.5, "learning_rate": 2.506911450659692e-05, "loss": 2.774, "step": 150600 }, { "epoch": 0.5, "learning_rate": 2.5052560133759335e-05, "loss": 2.7675, "step": 150700 }, { "epoch": 0.5, "learning_rate": 2.5036005760921747e-05, "loss": 2.7586, "step": 150800 }, { "epoch": 0.5, "learning_rate": 2.5019451388084163e-05, "loss": 2.7527, "step": 150900 }, { "epoch": 0.5, "learning_rate": 2.500289701524658e-05, "loss": 2.7774, "step": 151000 }, { "epoch": 0.5, "learning_rate": 2.4986342642408994e-05, "loss": 2.7564, "step": 151100 }, { "epoch": 0.5, "learning_rate": 2.496978826957141e-05, "loss": 2.7618, "step": 151200 }, { "epoch": 0.5, "learning_rate": 2.4953233896733826e-05, "loss": 2.7643, "step": 151300 }, { "epoch": 0.5, "learning_rate": 2.4936679523896238e-05, "loss": 2.7466, "step": 151400 }, { "epoch": 0.5, "learning_rate": 2.4920125151058654e-05, "loss": 2.7499, "step": 151500 }, { "epoch": 0.5, "learning_rate": 2.490357077822107e-05, "loss": 2.7693, "step": 151600 }, { "epoch": 0.5, "learning_rate": 2.4887016405383485e-05, "loss": 2.7698, "step": 151700 }, { "epoch": 0.5, "learning_rate": 2.48704620325459e-05, "loss": 2.7672, "step": 151800 }, { "epoch": 0.5, "learning_rate": 2.4853907659708313e-05, "loss": 2.7604, "step": 151900 }, { "epoch": 0.5, "learning_rate": 2.483735328687073e-05, "loss": 2.764, "step": 152000 }, { "epoch": 0.5, "learning_rate": 2.4820798914033144e-05, "loss": 2.7617, "step": 152100 }, { "epoch": 0.5, "learning_rate": 2.480424454119556e-05, "loss": 2.7731, "step": 152200 }, { "epoch": 0.5, "learning_rate": 2.4787690168357975e-05, "loss": 2.7697, "step": 152300 }, { "epoch": 0.5, "learning_rate": 2.4771135795520388e-05, "loss": 2.7486, "step": 152400 }, { "epoch": 0.5, "learning_rate": 2.47545814226828e-05, "loss": 2.7679, "step": 152500 }, { "epoch": 0.51, "learning_rate": 2.4738027049845216e-05, "loss": 2.7592, "step": 152600 }, { "epoch": 0.51, "learning_rate": 2.472147267700763e-05, "loss": 2.7666, "step": 152700 }, { "epoch": 0.51, "learning_rate": 2.4704918304170047e-05, "loss": 2.7522, "step": 152800 }, { "epoch": 0.51, "learning_rate": 2.4688363931332462e-05, "loss": 2.7522, "step": 152900 }, { "epoch": 0.51, "learning_rate": 2.4671809558494878e-05, "loss": 2.7573, "step": 153000 }, { "epoch": 0.51, "learning_rate": 2.465525518565729e-05, "loss": 2.7598, "step": 153100 }, { "epoch": 0.51, "learning_rate": 2.4638700812819706e-05, "loss": 2.77, "step": 153200 }, { "epoch": 0.51, "learning_rate": 2.4622146439982122e-05, "loss": 2.7641, "step": 153300 }, { "epoch": 0.51, "learning_rate": 2.4605592067144537e-05, "loss": 2.7722, "step": 153400 }, { "epoch": 0.51, "learning_rate": 2.4589037694306953e-05, "loss": 2.7577, "step": 153500 }, { "epoch": 0.51, "learning_rate": 2.4572483321469365e-05, "loss": 2.7658, "step": 153600 }, { "epoch": 0.51, "learning_rate": 2.455592894863178e-05, "loss": 2.7664, "step": 153700 }, { "epoch": 0.51, "learning_rate": 2.4539374575794197e-05, "loss": 2.7616, "step": 153800 }, { "epoch": 0.51, "learning_rate": 2.4522820202956612e-05, "loss": 2.7588, "step": 153900 }, { "epoch": 0.51, "learning_rate": 2.4506265830119028e-05, "loss": 2.766, "step": 154000 }, { "epoch": 0.51, "learning_rate": 2.448971145728144e-05, "loss": 2.7566, "step": 154100 }, { "epoch": 0.51, "learning_rate": 2.4473157084443856e-05, "loss": 2.759, "step": 154200 }, { "epoch": 0.51, "learning_rate": 2.445660271160627e-05, "loss": 2.7569, "step": 154300 }, { "epoch": 0.51, "learning_rate": 2.4440048338768687e-05, "loss": 2.7595, "step": 154400 }, { "epoch": 0.51, "learning_rate": 2.4423493965931103e-05, "loss": 2.7679, "step": 154500 }, { "epoch": 0.51, "learning_rate": 2.440693959309352e-05, "loss": 2.7456, "step": 154600 }, { "epoch": 0.51, "learning_rate": 2.439038522025593e-05, "loss": 2.7602, "step": 154700 }, { "epoch": 0.51, "learning_rate": 2.4373830847418346e-05, "loss": 2.7597, "step": 154800 }, { "epoch": 0.51, "learning_rate": 2.4357276474580762e-05, "loss": 2.7646, "step": 154900 }, { "epoch": 0.51, "learning_rate": 2.4340722101743178e-05, "loss": 2.7501, "step": 155000 }, { "epoch": 0.51, "learning_rate": 2.4324167728905593e-05, "loss": 2.7667, "step": 155100 }, { "epoch": 0.51, "learning_rate": 2.4307613356068005e-05, "loss": 2.7502, "step": 155200 }, { "epoch": 0.51, "learning_rate": 2.429105898323042e-05, "loss": 2.7727, "step": 155300 }, { "epoch": 0.51, "learning_rate": 2.4274504610392837e-05, "loss": 2.7515, "step": 155400 }, { "epoch": 0.51, "learning_rate": 2.4257950237555252e-05, "loss": 2.7618, "step": 155500 }, { "epoch": 0.52, "learning_rate": 2.4241395864717668e-05, "loss": 2.7689, "step": 155600 }, { "epoch": 0.52, "learning_rate": 2.422484149188008e-05, "loss": 2.762, "step": 155700 }, { "epoch": 0.52, "learning_rate": 2.4208287119042496e-05, "loss": 2.7618, "step": 155800 }, { "epoch": 0.52, "learning_rate": 2.419173274620491e-05, "loss": 2.7516, "step": 155900 }, { "epoch": 0.52, "learning_rate": 2.4175178373367327e-05, "loss": 2.7509, "step": 156000 }, { "epoch": 0.52, "learning_rate": 2.4158624000529743e-05, "loss": 2.7711, "step": 156100 }, { "epoch": 0.52, "learning_rate": 2.414206962769216e-05, "loss": 2.743, "step": 156200 }, { "epoch": 0.52, "learning_rate": 2.412551525485457e-05, "loss": 2.7686, "step": 156300 }, { "epoch": 0.52, "learning_rate": 2.4108960882016986e-05, "loss": 2.7728, "step": 156400 }, { "epoch": 0.52, "learning_rate": 2.4092406509179402e-05, "loss": 2.7622, "step": 156500 }, { "epoch": 0.52, "learning_rate": 2.4075852136341818e-05, "loss": 2.7661, "step": 156600 }, { "epoch": 0.52, "learning_rate": 2.4059297763504233e-05, "loss": 2.753, "step": 156700 }, { "epoch": 0.52, "learning_rate": 2.4042743390666646e-05, "loss": 2.7587, "step": 156800 }, { "epoch": 0.52, "learning_rate": 2.4026189017829058e-05, "loss": 2.7358, "step": 156900 }, { "epoch": 0.52, "learning_rate": 2.4009634644991474e-05, "loss": 2.7457, "step": 157000 }, { "epoch": 0.52, "learning_rate": 2.399308027215389e-05, "loss": 2.7649, "step": 157100 }, { "epoch": 0.52, "learning_rate": 2.3976525899316305e-05, "loss": 2.7546, "step": 157200 }, { "epoch": 0.52, "learning_rate": 2.395997152647872e-05, "loss": 2.7689, "step": 157300 }, { "epoch": 0.52, "learning_rate": 2.3943417153641133e-05, "loss": 2.762, "step": 157400 }, { "epoch": 0.52, "learning_rate": 2.392686278080355e-05, "loss": 2.7613, "step": 157500 }, { "epoch": 0.52, "learning_rate": 2.3910308407965964e-05, "loss": 2.7695, "step": 157600 }, { "epoch": 0.52, "learning_rate": 2.389375403512838e-05, "loss": 2.7827, "step": 157700 }, { "epoch": 0.52, "learning_rate": 2.3877199662290795e-05, "loss": 2.7763, "step": 157800 }, { "epoch": 0.52, "learning_rate": 2.386064528945321e-05, "loss": 2.7638, "step": 157900 }, { "epoch": 0.52, "learning_rate": 2.3844090916615623e-05, "loss": 2.7696, "step": 158000 }, { "epoch": 0.52, "learning_rate": 2.382753654377804e-05, "loss": 2.7541, "step": 158100 }, { "epoch": 0.52, "learning_rate": 2.3810982170940455e-05, "loss": 2.7653, "step": 158200 }, { "epoch": 0.52, "learning_rate": 2.379442779810287e-05, "loss": 2.7581, "step": 158300 }, { "epoch": 0.52, "learning_rate": 2.3777873425265286e-05, "loss": 2.7607, "step": 158400 }, { "epoch": 0.52, "learning_rate": 2.3761319052427698e-05, "loss": 2.7608, "step": 158500 }, { "epoch": 0.53, "learning_rate": 2.3744764679590114e-05, "loss": 2.7555, "step": 158600 }, { "epoch": 0.53, "learning_rate": 2.372821030675253e-05, "loss": 2.7535, "step": 158700 }, { "epoch": 0.53, "learning_rate": 2.3711655933914945e-05, "loss": 2.758, "step": 158800 }, { "epoch": 0.53, "learning_rate": 2.369510156107736e-05, "loss": 2.7536, "step": 158900 }, { "epoch": 0.53, "learning_rate": 2.3678547188239773e-05, "loss": 2.7574, "step": 159000 }, { "epoch": 0.53, "learning_rate": 2.366199281540219e-05, "loss": 2.7538, "step": 159100 }, { "epoch": 0.53, "learning_rate": 2.3645438442564604e-05, "loss": 2.7642, "step": 159200 }, { "epoch": 0.53, "learning_rate": 2.362888406972702e-05, "loss": 2.7506, "step": 159300 }, { "epoch": 0.53, "learning_rate": 2.3612329696889436e-05, "loss": 2.7694, "step": 159400 }, { "epoch": 0.53, "learning_rate": 2.359577532405185e-05, "loss": 2.7547, "step": 159500 }, { "epoch": 0.53, "learning_rate": 2.3579220951214264e-05, "loss": 2.7565, "step": 159600 }, { "epoch": 0.53, "learning_rate": 2.356266657837668e-05, "loss": 2.7783, "step": 159700 }, { "epoch": 0.53, "learning_rate": 2.3546112205539095e-05, "loss": 2.7623, "step": 159800 }, { "epoch": 0.53, "learning_rate": 2.352955783270151e-05, "loss": 2.7555, "step": 159900 }, { "epoch": 0.53, "learning_rate": 2.3513003459863926e-05, "loss": 2.7763, "step": 160000 }, { "epoch": 0.53, "learning_rate": 2.349644908702634e-05, "loss": 2.7544, "step": 160100 }, { "epoch": 0.53, "learning_rate": 2.3479894714188754e-05, "loss": 2.7673, "step": 160200 }, { "epoch": 0.53, "learning_rate": 2.346334034135117e-05, "loss": 2.7655, "step": 160300 }, { "epoch": 0.53, "learning_rate": 2.3446785968513585e-05, "loss": 2.7559, "step": 160400 }, { "epoch": 0.53, "learning_rate": 2.3430231595676e-05, "loss": 2.7551, "step": 160500 }, { "epoch": 0.53, "learning_rate": 2.3413677222838413e-05, "loss": 2.7534, "step": 160600 }, { "epoch": 0.53, "learning_rate": 2.339712285000083e-05, "loss": 2.7566, "step": 160700 }, { "epoch": 0.53, "learning_rate": 2.3380568477163245e-05, "loss": 2.766, "step": 160800 }, { "epoch": 0.53, "learning_rate": 2.336401410432566e-05, "loss": 2.7654, "step": 160900 }, { "epoch": 0.53, "learning_rate": 2.3347459731488076e-05, "loss": 2.7703, "step": 161000 }, { "epoch": 0.53, "learning_rate": 2.333090535865049e-05, "loss": 2.7566, "step": 161100 }, { "epoch": 0.53, "learning_rate": 2.3314350985812904e-05, "loss": 2.7669, "step": 161200 }, { "epoch": 0.53, "learning_rate": 2.3297796612975316e-05, "loss": 2.7644, "step": 161300 }, { "epoch": 0.53, "learning_rate": 2.328124224013773e-05, "loss": 2.7468, "step": 161400 }, { "epoch": 0.53, "learning_rate": 2.3264687867300147e-05, "loss": 2.7487, "step": 161500 }, { "epoch": 0.54, "learning_rate": 2.3248133494462563e-05, "loss": 2.7594, "step": 161600 }, { "epoch": 0.54, "learning_rate": 2.323157912162498e-05, "loss": 2.7513, "step": 161700 }, { "epoch": 0.54, "learning_rate": 2.321502474878739e-05, "loss": 2.7479, "step": 161800 }, { "epoch": 0.54, "learning_rate": 2.3198470375949807e-05, "loss": 2.7674, "step": 161900 }, { "epoch": 0.54, "learning_rate": 2.3181916003112222e-05, "loss": 2.7543, "step": 162000 }, { "epoch": 0.54, "learning_rate": 2.3165361630274638e-05, "loss": 2.7558, "step": 162100 }, { "epoch": 0.54, "learning_rate": 2.3148807257437053e-05, "loss": 2.7706, "step": 162200 }, { "epoch": 0.54, "learning_rate": 2.3132252884599466e-05, "loss": 2.7762, "step": 162300 }, { "epoch": 0.54, "learning_rate": 2.311569851176188e-05, "loss": 2.7605, "step": 162400 }, { "epoch": 0.54, "learning_rate": 2.3099144138924297e-05, "loss": 2.7657, "step": 162500 }, { "epoch": 0.54, "learning_rate": 2.3082589766086713e-05, "loss": 2.774, "step": 162600 }, { "epoch": 0.54, "learning_rate": 2.306603539324913e-05, "loss": 2.7623, "step": 162700 }, { "epoch": 0.54, "learning_rate": 2.3049481020411544e-05, "loss": 2.7465, "step": 162800 }, { "epoch": 0.54, "learning_rate": 2.3032926647573956e-05, "loss": 2.7664, "step": 162900 }, { "epoch": 0.54, "learning_rate": 2.3016372274736372e-05, "loss": 2.7569, "step": 163000 }, { "epoch": 0.54, "learning_rate": 2.2999817901898788e-05, "loss": 2.7459, "step": 163100 }, { "epoch": 0.54, "learning_rate": 2.2983263529061203e-05, "loss": 2.7612, "step": 163200 }, { "epoch": 0.54, "learning_rate": 2.296670915622362e-05, "loss": 2.7619, "step": 163300 }, { "epoch": 0.54, "learning_rate": 2.295015478338603e-05, "loss": 2.7569, "step": 163400 }, { "epoch": 0.54, "learning_rate": 2.2933600410548447e-05, "loss": 2.7652, "step": 163500 }, { "epoch": 0.54, "learning_rate": 2.2917046037710862e-05, "loss": 2.7587, "step": 163600 }, { "epoch": 0.54, "learning_rate": 2.2900491664873278e-05, "loss": 2.7538, "step": 163700 }, { "epoch": 0.54, "learning_rate": 2.2883937292035694e-05, "loss": 2.7635, "step": 163800 }, { "epoch": 0.54, "learning_rate": 2.2867382919198106e-05, "loss": 2.7569, "step": 163900 }, { "epoch": 0.54, "learning_rate": 2.285082854636052e-05, "loss": 2.7699, "step": 164000 }, { "epoch": 0.54, "learning_rate": 2.2834274173522937e-05, "loss": 2.747, "step": 164100 }, { "epoch": 0.54, "learning_rate": 2.2817719800685353e-05, "loss": 2.7667, "step": 164200 }, { "epoch": 0.54, "learning_rate": 2.280116542784777e-05, "loss": 2.7533, "step": 164300 }, { "epoch": 0.54, "learning_rate": 2.2784611055010184e-05, "loss": 2.7504, "step": 164400 }, { "epoch": 0.54, "learning_rate": 2.2768056682172596e-05, "loss": 2.7549, "step": 164500 }, { "epoch": 0.54, "learning_rate": 2.2751502309335012e-05, "loss": 2.7496, "step": 164600 }, { "epoch": 0.55, "learning_rate": 2.2734947936497428e-05, "loss": 2.7592, "step": 164700 }, { "epoch": 0.55, "learning_rate": 2.2718393563659843e-05, "loss": 2.7628, "step": 164800 }, { "epoch": 0.55, "learning_rate": 2.270183919082226e-05, "loss": 2.757, "step": 164900 }, { "epoch": 0.55, "learning_rate": 2.268528481798467e-05, "loss": 2.7624, "step": 165000 }, { "epoch": 0.55, "learning_rate": 2.2668730445147087e-05, "loss": 2.7472, "step": 165100 }, { "epoch": 0.55, "learning_rate": 2.2652176072309503e-05, "loss": 2.7468, "step": 165200 }, { "epoch": 0.55, "learning_rate": 2.2635621699471918e-05, "loss": 2.7613, "step": 165300 }, { "epoch": 0.55, "learning_rate": 2.2619067326634334e-05, "loss": 2.7578, "step": 165400 }, { "epoch": 0.55, "learning_rate": 2.2602512953796746e-05, "loss": 2.7635, "step": 165500 }, { "epoch": 0.55, "learning_rate": 2.258595858095916e-05, "loss": 2.7544, "step": 165600 }, { "epoch": 0.55, "learning_rate": 2.2569404208121574e-05, "loss": 2.7684, "step": 165700 }, { "epoch": 0.55, "learning_rate": 2.255284983528399e-05, "loss": 2.7509, "step": 165800 }, { "epoch": 0.55, "learning_rate": 2.2536295462446405e-05, "loss": 2.7467, "step": 165900 }, { "epoch": 0.55, "learning_rate": 2.251974108960882e-05, "loss": 2.7633, "step": 166000 }, { "epoch": 0.55, "learning_rate": 2.2503186716771237e-05, "loss": 2.752, "step": 166100 }, { "epoch": 0.55, "learning_rate": 2.248663234393365e-05, "loss": 2.776, "step": 166200 }, { "epoch": 0.55, "learning_rate": 2.2470077971096065e-05, "loss": 2.758, "step": 166300 }, { "epoch": 0.55, "learning_rate": 2.245352359825848e-05, "loss": 2.7593, "step": 166400 }, { "epoch": 0.55, "learning_rate": 2.2436969225420896e-05, "loss": 2.7446, "step": 166500 }, { "epoch": 0.55, "learning_rate": 2.242041485258331e-05, "loss": 2.7566, "step": 166600 }, { "epoch": 0.55, "learning_rate": 2.2403860479745724e-05, "loss": 2.7541, "step": 166700 }, { "epoch": 0.55, "learning_rate": 2.238730610690814e-05, "loss": 2.7715, "step": 166800 }, { "epoch": 0.55, "learning_rate": 2.2370751734070555e-05, "loss": 2.7541, "step": 166900 }, { "epoch": 0.55, "learning_rate": 2.235419736123297e-05, "loss": 2.7589, "step": 167000 }, { "epoch": 0.55, "learning_rate": 2.2337642988395386e-05, "loss": 2.7481, "step": 167100 }, { "epoch": 0.55, "learning_rate": 2.23210886155578e-05, "loss": 2.7516, "step": 167200 }, { "epoch": 0.55, "learning_rate": 2.2304534242720214e-05, "loss": 2.7497, "step": 167300 }, { "epoch": 0.55, "learning_rate": 2.228797986988263e-05, "loss": 2.7477, "step": 167400 }, { "epoch": 0.55, "learning_rate": 2.2271425497045046e-05, "loss": 2.7661, "step": 167500 }, { "epoch": 0.55, "learning_rate": 2.225487112420746e-05, "loss": 2.758, "step": 167600 }, { "epoch": 0.56, "learning_rate": 2.2238316751369877e-05, "loss": 2.7561, "step": 167700 }, { "epoch": 0.56, "learning_rate": 2.222176237853229e-05, "loss": 2.7541, "step": 167800 }, { "epoch": 0.56, "learning_rate": 2.2205208005694705e-05, "loss": 2.7516, "step": 167900 }, { "epoch": 0.56, "learning_rate": 2.218865363285712e-05, "loss": 2.761, "step": 168000 }, { "epoch": 0.56, "learning_rate": 2.2172099260019536e-05, "loss": 2.7456, "step": 168100 }, { "epoch": 0.56, "learning_rate": 2.2155544887181952e-05, "loss": 2.7571, "step": 168200 }, { "epoch": 0.56, "learning_rate": 2.2138990514344364e-05, "loss": 2.76, "step": 168300 }, { "epoch": 0.56, "learning_rate": 2.212243614150678e-05, "loss": 2.7626, "step": 168400 }, { "epoch": 0.56, "learning_rate": 2.2105881768669195e-05, "loss": 2.7529, "step": 168500 }, { "epoch": 0.56, "learning_rate": 2.208932739583161e-05, "loss": 2.772, "step": 168600 }, { "epoch": 0.56, "learning_rate": 2.2072773022994027e-05, "loss": 2.7667, "step": 168700 }, { "epoch": 0.56, "learning_rate": 2.205621865015644e-05, "loss": 2.7534, "step": 168800 }, { "epoch": 0.56, "learning_rate": 2.2039664277318855e-05, "loss": 2.7559, "step": 168900 }, { "epoch": 0.56, "learning_rate": 2.202310990448127e-05, "loss": 2.743, "step": 169000 }, { "epoch": 0.56, "learning_rate": 2.2006555531643686e-05, "loss": 2.7539, "step": 169100 }, { "epoch": 0.56, "learning_rate": 2.19900011588061e-05, "loss": 2.7676, "step": 169200 }, { "epoch": 0.56, "learning_rate": 2.1973446785968517e-05, "loss": 2.763, "step": 169300 }, { "epoch": 0.56, "learning_rate": 2.195689241313093e-05, "loss": 2.7507, "step": 169400 }, { "epoch": 0.56, "learning_rate": 2.1940338040293345e-05, "loss": 2.7607, "step": 169500 }, { "epoch": 0.56, "learning_rate": 2.192378366745576e-05, "loss": 2.7614, "step": 169600 }, { "epoch": 0.56, "learning_rate": 2.1907229294618176e-05, "loss": 2.7604, "step": 169700 }, { "epoch": 0.56, "learning_rate": 2.1890674921780592e-05, "loss": 2.7641, "step": 169800 }, { "epoch": 0.56, "learning_rate": 2.1874120548943004e-05, "loss": 2.7523, "step": 169900 }, { "epoch": 0.56, "learning_rate": 2.185756617610542e-05, "loss": 2.751, "step": 170000 }, { "epoch": 0.56, "learning_rate": 2.1841011803267832e-05, "loss": 2.7579, "step": 170100 }, { "epoch": 0.56, "learning_rate": 2.1824457430430248e-05, "loss": 2.7561, "step": 170200 }, { "epoch": 0.56, "learning_rate": 2.1807903057592663e-05, "loss": 2.7606, "step": 170300 }, { "epoch": 0.56, "learning_rate": 2.179134868475508e-05, "loss": 2.7531, "step": 170400 }, { "epoch": 0.56, "learning_rate": 2.177479431191749e-05, "loss": 2.752, "step": 170500 }, { "epoch": 0.56, "learning_rate": 2.1758239939079907e-05, "loss": 2.7467, "step": 170600 }, { "epoch": 0.57, "learning_rate": 2.1741685566242323e-05, "loss": 2.7636, "step": 170700 }, { "epoch": 0.57, "learning_rate": 2.172513119340474e-05, "loss": 2.7591, "step": 170800 }, { "epoch": 0.57, "learning_rate": 2.1708576820567154e-05, "loss": 2.7634, "step": 170900 }, { "epoch": 0.57, "learning_rate": 2.169202244772957e-05, "loss": 2.7558, "step": 171000 }, { "epoch": 0.57, "learning_rate": 2.1675468074891982e-05, "loss": 2.757, "step": 171100 }, { "epoch": 0.57, "learning_rate": 2.1658913702054398e-05, "loss": 2.757, "step": 171200 }, { "epoch": 0.57, "learning_rate": 2.1642359329216813e-05, "loss": 2.7554, "step": 171300 }, { "epoch": 0.57, "learning_rate": 2.162580495637923e-05, "loss": 2.7541, "step": 171400 }, { "epoch": 0.57, "learning_rate": 2.1609250583541644e-05, "loss": 2.7573, "step": 171500 }, { "epoch": 0.57, "learning_rate": 2.1592696210704057e-05, "loss": 2.7545, "step": 171600 }, { "epoch": 0.57, "learning_rate": 2.1576141837866472e-05, "loss": 2.757, "step": 171700 }, { "epoch": 0.57, "learning_rate": 2.1559587465028888e-05, "loss": 2.7497, "step": 171800 }, { "epoch": 0.57, "learning_rate": 2.1543033092191304e-05, "loss": 2.7546, "step": 171900 }, { "epoch": 0.57, "learning_rate": 2.152647871935372e-05, "loss": 2.7519, "step": 172000 }, { "epoch": 0.57, "learning_rate": 2.150992434651613e-05, "loss": 2.7586, "step": 172100 }, { "epoch": 0.57, "learning_rate": 2.1493369973678547e-05, "loss": 2.7488, "step": 172200 }, { "epoch": 0.57, "learning_rate": 2.1476815600840963e-05, "loss": 2.7607, "step": 172300 }, { "epoch": 0.57, "learning_rate": 2.146026122800338e-05, "loss": 2.7493, "step": 172400 }, { "epoch": 0.57, "learning_rate": 2.1443706855165794e-05, "loss": 2.7703, "step": 172500 }, { "epoch": 0.57, "learning_rate": 2.142715248232821e-05, "loss": 2.7505, "step": 172600 }, { "epoch": 0.57, "learning_rate": 2.1410598109490622e-05, "loss": 2.7592, "step": 172700 }, { "epoch": 0.57, "learning_rate": 2.1394043736653038e-05, "loss": 2.7536, "step": 172800 }, { "epoch": 0.57, "learning_rate": 2.1377489363815453e-05, "loss": 2.7547, "step": 172900 }, { "epoch": 0.57, "learning_rate": 2.136093499097787e-05, "loss": 2.7695, "step": 173000 }, { "epoch": 0.57, "learning_rate": 2.1344380618140285e-05, "loss": 2.7408, "step": 173100 }, { "epoch": 0.57, "learning_rate": 2.1327826245302697e-05, "loss": 2.7605, "step": 173200 }, { "epoch": 0.57, "learning_rate": 2.1311271872465113e-05, "loss": 2.7397, "step": 173300 }, { "epoch": 0.57, "learning_rate": 2.1294717499627528e-05, "loss": 2.7586, "step": 173400 }, { "epoch": 0.57, "learning_rate": 2.1278163126789944e-05, "loss": 2.7434, "step": 173500 }, { "epoch": 0.57, "learning_rate": 2.126160875395236e-05, "loss": 2.7673, "step": 173600 }, { "epoch": 0.58, "learning_rate": 2.1245054381114772e-05, "loss": 2.7664, "step": 173700 }, { "epoch": 0.58, "learning_rate": 2.1228500008277187e-05, "loss": 2.7581, "step": 173800 }, { "epoch": 0.58, "learning_rate": 2.1211945635439603e-05, "loss": 2.752, "step": 173900 }, { "epoch": 0.58, "learning_rate": 2.119539126260202e-05, "loss": 2.7673, "step": 174000 }, { "epoch": 0.58, "learning_rate": 2.1178836889764434e-05, "loss": 2.7517, "step": 174100 }, { "epoch": 0.58, "learning_rate": 2.116228251692685e-05, "loss": 2.7555, "step": 174200 }, { "epoch": 0.58, "learning_rate": 2.1145728144089262e-05, "loss": 2.752, "step": 174300 }, { "epoch": 0.58, "learning_rate": 2.1129173771251678e-05, "loss": 2.7606, "step": 174400 }, { "epoch": 0.58, "learning_rate": 2.111261939841409e-05, "loss": 2.7613, "step": 174500 }, { "epoch": 0.58, "learning_rate": 2.1096065025576506e-05, "loss": 2.7651, "step": 174600 }, { "epoch": 0.58, "learning_rate": 2.107951065273892e-05, "loss": 2.7278, "step": 174700 }, { "epoch": 0.58, "learning_rate": 2.1062956279901337e-05, "loss": 2.76, "step": 174800 }, { "epoch": 0.58, "learning_rate": 2.104640190706375e-05, "loss": 2.7475, "step": 174900 }, { "epoch": 0.58, "learning_rate": 2.1029847534226165e-05, "loss": 2.7649, "step": 175000 }, { "epoch": 0.58, "learning_rate": 2.101329316138858e-05, "loss": 2.7541, "step": 175100 }, { "epoch": 0.58, "learning_rate": 2.0996738788550996e-05, "loss": 2.7557, "step": 175200 }, { "epoch": 0.58, "learning_rate": 2.0980184415713412e-05, "loss": 2.7733, "step": 175300 }, { "epoch": 0.58, "learning_rate": 2.0963630042875824e-05, "loss": 2.762, "step": 175400 }, { "epoch": 0.58, "learning_rate": 2.094707567003824e-05, "loss": 2.7556, "step": 175500 }, { "epoch": 0.58, "learning_rate": 2.0930521297200656e-05, "loss": 2.7524, "step": 175600 }, { "epoch": 0.58, "learning_rate": 2.091396692436307e-05, "loss": 2.7534, "step": 175700 }, { "epoch": 0.58, "learning_rate": 2.0897412551525487e-05, "loss": 2.7685, "step": 175800 }, { "epoch": 0.58, "learning_rate": 2.0880858178687903e-05, "loss": 2.7488, "step": 175900 }, { "epoch": 0.58, "learning_rate": 2.0864303805850315e-05, "loss": 2.7595, "step": 176000 }, { "epoch": 0.58, "learning_rate": 2.084774943301273e-05, "loss": 2.7443, "step": 176100 }, { "epoch": 0.58, "learning_rate": 2.0831195060175146e-05, "loss": 2.7476, "step": 176200 }, { "epoch": 0.58, "learning_rate": 2.0814640687337562e-05, "loss": 2.7457, "step": 176300 }, { "epoch": 0.58, "learning_rate": 2.0798086314499977e-05, "loss": 2.7609, "step": 176400 }, { "epoch": 0.58, "learning_rate": 2.078153194166239e-05, "loss": 2.7496, "step": 176500 }, { "epoch": 0.58, "learning_rate": 2.0764977568824805e-05, "loss": 2.765, "step": 176600 }, { "epoch": 0.59, "learning_rate": 2.074842319598722e-05, "loss": 2.7555, "step": 176700 }, { "epoch": 0.59, "learning_rate": 2.0731868823149637e-05, "loss": 2.7544, "step": 176800 }, { "epoch": 0.59, "learning_rate": 2.0715314450312052e-05, "loss": 2.7543, "step": 176900 }, { "epoch": 0.59, "learning_rate": 2.0698760077474465e-05, "loss": 2.7476, "step": 177000 }, { "epoch": 0.59, "learning_rate": 2.068220570463688e-05, "loss": 2.7468, "step": 177100 }, { "epoch": 0.59, "learning_rate": 2.0665651331799296e-05, "loss": 2.7524, "step": 177200 }, { "epoch": 0.59, "learning_rate": 2.064909695896171e-05, "loss": 2.7619, "step": 177300 }, { "epoch": 0.59, "learning_rate": 2.0632542586124127e-05, "loss": 2.7501, "step": 177400 }, { "epoch": 0.59, "learning_rate": 2.0615988213286543e-05, "loss": 2.756, "step": 177500 }, { "epoch": 0.59, "learning_rate": 2.0599433840448955e-05, "loss": 2.7525, "step": 177600 }, { "epoch": 0.59, "learning_rate": 2.058287946761137e-05, "loss": 2.762, "step": 177700 }, { "epoch": 0.59, "learning_rate": 2.0566325094773786e-05, "loss": 2.7364, "step": 177800 }, { "epoch": 0.59, "learning_rate": 2.0549770721936202e-05, "loss": 2.7625, "step": 177900 }, { "epoch": 0.59, "learning_rate": 2.0533216349098618e-05, "loss": 2.7474, "step": 178000 }, { "epoch": 0.59, "learning_rate": 2.051666197626103e-05, "loss": 2.7602, "step": 178100 }, { "epoch": 0.59, "learning_rate": 2.0500107603423446e-05, "loss": 2.7514, "step": 178200 }, { "epoch": 0.59, "learning_rate": 2.048355323058586e-05, "loss": 2.7635, "step": 178300 }, { "epoch": 0.59, "learning_rate": 2.0466998857748277e-05, "loss": 2.7356, "step": 178400 }, { "epoch": 0.59, "learning_rate": 2.0450444484910692e-05, "loss": 2.7484, "step": 178500 }, { "epoch": 0.59, "learning_rate": 2.0433890112073105e-05, "loss": 2.7683, "step": 178600 }, { "epoch": 0.59, "learning_rate": 2.041733573923552e-05, "loss": 2.7649, "step": 178700 }, { "epoch": 0.59, "learning_rate": 2.0400781366397936e-05, "loss": 2.7526, "step": 178800 }, { "epoch": 0.59, "learning_rate": 2.0384226993560348e-05, "loss": 2.7415, "step": 178900 }, { "epoch": 0.59, "learning_rate": 2.0367672620722764e-05, "loss": 2.7505, "step": 179000 }, { "epoch": 0.59, "learning_rate": 2.035111824788518e-05, "loss": 2.7513, "step": 179100 }, { "epoch": 0.59, "learning_rate": 2.0334563875047595e-05, "loss": 2.7511, "step": 179200 }, { "epoch": 0.59, "learning_rate": 2.0318009502210007e-05, "loss": 2.7472, "step": 179300 }, { "epoch": 0.59, "learning_rate": 2.0301455129372423e-05, "loss": 2.7635, "step": 179400 }, { "epoch": 0.59, "learning_rate": 2.028490075653484e-05, "loss": 2.7618, "step": 179500 }, { "epoch": 0.59, "learning_rate": 2.0268346383697254e-05, "loss": 2.7403, "step": 179600 }, { "epoch": 0.59, "learning_rate": 2.025179201085967e-05, "loss": 2.7572, "step": 179700 }, { "epoch": 0.6, "learning_rate": 2.0235237638022082e-05, "loss": 2.764, "step": 179800 }, { "epoch": 0.6, "learning_rate": 2.0218683265184498e-05, "loss": 2.7535, "step": 179900 }, { "epoch": 0.6, "learning_rate": 2.0202128892346914e-05, "loss": 2.7471, "step": 180000 }, { "epoch": 0.6, "learning_rate": 2.018557451950933e-05, "loss": 2.7688, "step": 180100 }, { "epoch": 0.6, "learning_rate": 2.0169020146671745e-05, "loss": 2.758, "step": 180200 }, { "epoch": 0.6, "learning_rate": 2.0152465773834157e-05, "loss": 2.7657, "step": 180300 }, { "epoch": 0.6, "learning_rate": 2.0135911400996573e-05, "loss": 2.7615, "step": 180400 }, { "epoch": 0.6, "learning_rate": 2.011935702815899e-05, "loss": 2.7438, "step": 180500 }, { "epoch": 0.6, "learning_rate": 2.0102802655321404e-05, "loss": 2.7518, "step": 180600 }, { "epoch": 0.6, "learning_rate": 2.008624828248382e-05, "loss": 2.7412, "step": 180700 }, { "epoch": 0.6, "learning_rate": 2.0069693909646235e-05, "loss": 2.7506, "step": 180800 }, { "epoch": 0.6, "learning_rate": 2.0053139536808648e-05, "loss": 2.7598, "step": 180900 }, { "epoch": 0.6, "learning_rate": 2.0036585163971063e-05, "loss": 2.7554, "step": 181000 }, { "epoch": 0.6, "learning_rate": 2.002003079113348e-05, "loss": 2.7589, "step": 181100 }, { "epoch": 0.6, "learning_rate": 2.0003476418295895e-05, "loss": 2.7644, "step": 181200 }, { "epoch": 0.6, "learning_rate": 1.998692204545831e-05, "loss": 2.7621, "step": 181300 }, { "epoch": 0.6, "learning_rate": 1.9970367672620723e-05, "loss": 2.7566, "step": 181400 }, { "epoch": 0.6, "learning_rate": 1.9953813299783138e-05, "loss": 2.7602, "step": 181500 }, { "epoch": 0.6, "learning_rate": 1.9937258926945554e-05, "loss": 2.7508, "step": 181600 }, { "epoch": 0.6, "learning_rate": 1.992070455410797e-05, "loss": 2.761, "step": 181700 }, { "epoch": 0.6, "learning_rate": 1.9904150181270385e-05, "loss": 2.7567, "step": 181800 }, { "epoch": 0.6, "learning_rate": 1.9887595808432797e-05, "loss": 2.7436, "step": 181900 }, { "epoch": 0.6, "learning_rate": 1.9871041435595213e-05, "loss": 2.749, "step": 182000 }, { "epoch": 0.6, "learning_rate": 1.985448706275763e-05, "loss": 2.7616, "step": 182100 }, { "epoch": 0.6, "learning_rate": 1.9837932689920044e-05, "loss": 2.7596, "step": 182200 }, { "epoch": 0.6, "learning_rate": 1.982137831708246e-05, "loss": 2.7442, "step": 182300 }, { "epoch": 0.6, "learning_rate": 1.9804823944244876e-05, "loss": 2.7532, "step": 182400 }, { "epoch": 0.6, "learning_rate": 1.9788269571407288e-05, "loss": 2.7475, "step": 182500 }, { "epoch": 0.6, "learning_rate": 1.9771715198569704e-05, "loss": 2.7617, "step": 182600 }, { "epoch": 0.6, "learning_rate": 1.975516082573212e-05, "loss": 2.7583, "step": 182700 }, { "epoch": 0.61, "learning_rate": 1.9738606452894535e-05, "loss": 2.7614, "step": 182800 }, { "epoch": 0.61, "learning_rate": 1.972205208005695e-05, "loss": 2.7578, "step": 182900 }, { "epoch": 0.61, "learning_rate": 1.9705497707219363e-05, "loss": 2.7673, "step": 183000 }, { "epoch": 0.61, "learning_rate": 1.968894333438178e-05, "loss": 2.7617, "step": 183100 }, { "epoch": 0.61, "learning_rate": 1.9672388961544194e-05, "loss": 2.7737, "step": 183200 }, { "epoch": 0.61, "learning_rate": 1.9655834588706606e-05, "loss": 2.7456, "step": 183300 }, { "epoch": 0.61, "learning_rate": 1.9639280215869022e-05, "loss": 2.7428, "step": 183400 }, { "epoch": 0.61, "learning_rate": 1.9622725843031438e-05, "loss": 2.7546, "step": 183500 }, { "epoch": 0.61, "learning_rate": 1.960617147019385e-05, "loss": 2.7498, "step": 183600 }, { "epoch": 0.61, "learning_rate": 1.9589617097356266e-05, "loss": 2.7522, "step": 183700 }, { "epoch": 0.61, "learning_rate": 1.957306272451868e-05, "loss": 2.7429, "step": 183800 }, { "epoch": 0.61, "learning_rate": 1.9556508351681097e-05, "loss": 2.7488, "step": 183900 }, { "epoch": 0.61, "learning_rate": 1.9539953978843512e-05, "loss": 2.7547, "step": 184000 }, { "epoch": 0.61, "learning_rate": 1.9523399606005928e-05, "loss": 2.7622, "step": 184100 }, { "epoch": 0.61, "learning_rate": 1.950684523316834e-05, "loss": 2.7536, "step": 184200 }, { "epoch": 0.61, "learning_rate": 1.9490290860330756e-05, "loss": 2.7404, "step": 184300 }, { "epoch": 0.61, "learning_rate": 1.9473736487493172e-05, "loss": 2.7471, "step": 184400 }, { "epoch": 0.61, "learning_rate": 1.9457182114655587e-05, "loss": 2.7567, "step": 184500 }, { "epoch": 0.61, "learning_rate": 1.9440627741818003e-05, "loss": 2.7693, "step": 184600 }, { "epoch": 0.61, "learning_rate": 1.9424073368980415e-05, "loss": 2.749, "step": 184700 }, { "epoch": 0.61, "learning_rate": 1.940751899614283e-05, "loss": 2.7446, "step": 184800 }, { "epoch": 0.61, "learning_rate": 1.9390964623305247e-05, "loss": 2.7683, "step": 184900 }, { "epoch": 0.61, "learning_rate": 1.9374410250467662e-05, "loss": 2.7564, "step": 185000 }, { "epoch": 0.61, "learning_rate": 1.9357855877630078e-05, "loss": 2.749, "step": 185100 }, { "epoch": 0.61, "learning_rate": 1.934130150479249e-05, "loss": 2.7415, "step": 185200 }, { "epoch": 0.61, "learning_rate": 1.9324747131954906e-05, "loss": 2.7559, "step": 185300 }, { "epoch": 0.61, "learning_rate": 1.930819275911732e-05, "loss": 2.7544, "step": 185400 }, { "epoch": 0.61, "learning_rate": 1.9291638386279737e-05, "loss": 2.7569, "step": 185500 }, { "epoch": 0.61, "learning_rate": 1.9275084013442153e-05, "loss": 2.758, "step": 185600 }, { "epoch": 0.61, "learning_rate": 1.925852964060457e-05, "loss": 2.7706, "step": 185700 }, { "epoch": 0.62, "learning_rate": 1.924197526776698e-05, "loss": 2.7515, "step": 185800 }, { "epoch": 0.62, "learning_rate": 1.9225420894929396e-05, "loss": 2.7634, "step": 185900 }, { "epoch": 0.62, "learning_rate": 1.9208866522091812e-05, "loss": 2.7438, "step": 186000 }, { "epoch": 0.62, "learning_rate": 1.9192312149254228e-05, "loss": 2.7624, "step": 186100 }, { "epoch": 0.62, "learning_rate": 1.9175757776416643e-05, "loss": 2.751, "step": 186200 }, { "epoch": 0.62, "learning_rate": 1.9159203403579055e-05, "loss": 2.7779, "step": 186300 }, { "epoch": 0.62, "learning_rate": 1.914264903074147e-05, "loss": 2.7514, "step": 186400 }, { "epoch": 0.62, "learning_rate": 1.9126094657903887e-05, "loss": 2.7503, "step": 186500 }, { "epoch": 0.62, "learning_rate": 1.9109540285066302e-05, "loss": 2.7481, "step": 186600 }, { "epoch": 0.62, "learning_rate": 1.9092985912228718e-05, "loss": 2.7373, "step": 186700 }, { "epoch": 0.62, "learning_rate": 1.907643153939113e-05, "loss": 2.7518, "step": 186800 }, { "epoch": 0.62, "learning_rate": 1.9059877166553546e-05, "loss": 2.7531, "step": 186900 }, { "epoch": 0.62, "learning_rate": 1.904332279371596e-05, "loss": 2.7682, "step": 187000 }, { "epoch": 0.62, "learning_rate": 1.9026768420878377e-05, "loss": 2.7557, "step": 187100 }, { "epoch": 0.62, "learning_rate": 1.9010214048040793e-05, "loss": 2.7541, "step": 187200 }, { "epoch": 0.62, "learning_rate": 1.899365967520321e-05, "loss": 2.7567, "step": 187300 }, { "epoch": 0.62, "learning_rate": 1.897710530236562e-05, "loss": 2.7499, "step": 187400 }, { "epoch": 0.62, "learning_rate": 1.8960550929528036e-05, "loss": 2.7595, "step": 187500 }, { "epoch": 0.62, "learning_rate": 1.8943996556690452e-05, "loss": 2.7531, "step": 187600 }, { "epoch": 0.62, "learning_rate": 1.8927442183852864e-05, "loss": 2.7507, "step": 187700 }, { "epoch": 0.62, "learning_rate": 1.891088781101528e-05, "loss": 2.7307, "step": 187800 }, { "epoch": 0.62, "learning_rate": 1.8894333438177696e-05, "loss": 2.7465, "step": 187900 }, { "epoch": 0.62, "learning_rate": 1.8877779065340108e-05, "loss": 2.7383, "step": 188000 }, { "epoch": 0.62, "learning_rate": 1.8861224692502524e-05, "loss": 2.7451, "step": 188100 }, { "epoch": 0.62, "learning_rate": 1.884467031966494e-05, "loss": 2.7572, "step": 188200 }, { "epoch": 0.62, "learning_rate": 1.8828115946827355e-05, "loss": 2.7511, "step": 188300 }, { "epoch": 0.62, "learning_rate": 1.881156157398977e-05, "loss": 2.7476, "step": 188400 }, { "epoch": 0.62, "learning_rate": 1.8795007201152183e-05, "loss": 2.7497, "step": 188500 }, { "epoch": 0.62, "learning_rate": 1.87784528283146e-05, "loss": 2.7419, "step": 188600 }, { "epoch": 0.62, "learning_rate": 1.8761898455477014e-05, "loss": 2.7534, "step": 188700 }, { "epoch": 0.63, "learning_rate": 1.874534408263943e-05, "loss": 2.7565, "step": 188800 }, { "epoch": 0.63, "learning_rate": 1.8728789709801845e-05, "loss": 2.7543, "step": 188900 }, { "epoch": 0.63, "learning_rate": 1.871223533696426e-05, "loss": 2.7633, "step": 189000 }, { "epoch": 0.63, "learning_rate": 1.8695680964126673e-05, "loss": 2.7352, "step": 189100 }, { "epoch": 0.63, "learning_rate": 1.867912659128909e-05, "loss": 2.758, "step": 189200 }, { "epoch": 0.63, "learning_rate": 1.8662572218451505e-05, "loss": 2.7523, "step": 189300 }, { "epoch": 0.63, "learning_rate": 1.864601784561392e-05, "loss": 2.7567, "step": 189400 }, { "epoch": 0.63, "learning_rate": 1.8629463472776336e-05, "loss": 2.7603, "step": 189500 }, { "epoch": 0.63, "learning_rate": 1.8612909099938748e-05, "loss": 2.7538, "step": 189600 }, { "epoch": 0.63, "learning_rate": 1.8596354727101164e-05, "loss": 2.7485, "step": 189700 }, { "epoch": 0.63, "learning_rate": 1.857980035426358e-05, "loss": 2.7578, "step": 189800 }, { "epoch": 0.63, "learning_rate": 1.8563245981425995e-05, "loss": 2.7597, "step": 189900 }, { "epoch": 0.63, "learning_rate": 1.854669160858841e-05, "loss": 2.741, "step": 190000 }, { "epoch": 0.63, "learning_rate": 1.8530137235750823e-05, "loss": 2.7628, "step": 190100 }, { "epoch": 0.63, "learning_rate": 1.851358286291324e-05, "loss": 2.7549, "step": 190200 }, { "epoch": 0.63, "learning_rate": 1.8497028490075654e-05, "loss": 2.7525, "step": 190300 }, { "epoch": 0.63, "learning_rate": 1.848047411723807e-05, "loss": 2.7658, "step": 190400 }, { "epoch": 0.63, "learning_rate": 1.8463919744400486e-05, "loss": 2.7457, "step": 190500 }, { "epoch": 0.63, "learning_rate": 1.84473653715629e-05, "loss": 2.7454, "step": 190600 }, { "epoch": 0.63, "learning_rate": 1.8430810998725314e-05, "loss": 2.7433, "step": 190700 }, { "epoch": 0.63, "learning_rate": 1.841425662588773e-05, "loss": 2.7555, "step": 190800 }, { "epoch": 0.63, "learning_rate": 1.8397702253050145e-05, "loss": 2.7609, "step": 190900 }, { "epoch": 0.63, "learning_rate": 1.838114788021256e-05, "loss": 2.7525, "step": 191000 }, { "epoch": 0.63, "learning_rate": 1.8364593507374976e-05, "loss": 2.7539, "step": 191100 }, { "epoch": 0.63, "learning_rate": 1.834803913453739e-05, "loss": 2.7526, "step": 191200 }, { "epoch": 0.63, "learning_rate": 1.8331484761699804e-05, "loss": 2.7467, "step": 191300 }, { "epoch": 0.63, "learning_rate": 1.831493038886222e-05, "loss": 2.759, "step": 191400 }, { "epoch": 0.63, "learning_rate": 1.8298376016024635e-05, "loss": 2.7352, "step": 191500 }, { "epoch": 0.63, "learning_rate": 1.828182164318705e-05, "loss": 2.7548, "step": 191600 }, { "epoch": 0.63, "learning_rate": 1.8265267270349463e-05, "loss": 2.7471, "step": 191700 }, { "epoch": 0.64, "learning_rate": 1.824871289751188e-05, "loss": 2.7459, "step": 191800 }, { "epoch": 0.64, "learning_rate": 1.8232158524674295e-05, "loss": 2.7631, "step": 191900 }, { "epoch": 0.64, "learning_rate": 1.821560415183671e-05, "loss": 2.7637, "step": 192000 }, { "epoch": 0.64, "learning_rate": 1.8199049778999122e-05, "loss": 2.7491, "step": 192100 }, { "epoch": 0.64, "learning_rate": 1.8182495406161538e-05, "loss": 2.7466, "step": 192200 }, { "epoch": 0.64, "learning_rate": 1.8165941033323954e-05, "loss": 2.7564, "step": 192300 }, { "epoch": 0.64, "learning_rate": 1.8149386660486366e-05, "loss": 2.7306, "step": 192400 }, { "epoch": 0.64, "learning_rate": 1.813283228764878e-05, "loss": 2.7337, "step": 192500 }, { "epoch": 0.64, "learning_rate": 1.8116277914811197e-05, "loss": 2.7581, "step": 192600 }, { "epoch": 0.64, "learning_rate": 1.8099723541973613e-05, "loss": 2.7527, "step": 192700 }, { "epoch": 0.64, "learning_rate": 1.808316916913603e-05, "loss": 2.7398, "step": 192800 }, { "epoch": 0.64, "learning_rate": 1.806661479629844e-05, "loss": 2.7567, "step": 192900 }, { "epoch": 0.64, "learning_rate": 1.8050060423460857e-05, "loss": 2.7429, "step": 193000 }, { "epoch": 0.64, "learning_rate": 1.8033506050623272e-05, "loss": 2.7555, "step": 193100 }, { "epoch": 0.64, "learning_rate": 1.8016951677785688e-05, "loss": 2.7505, "step": 193200 }, { "epoch": 0.64, "learning_rate": 1.8000397304948103e-05, "loss": 2.7464, "step": 193300 }, { "epoch": 0.64, "learning_rate": 1.7983842932110516e-05, "loss": 2.7487, "step": 193400 }, { "epoch": 0.64, "learning_rate": 1.796728855927293e-05, "loss": 2.7459, "step": 193500 }, { "epoch": 0.64, "learning_rate": 1.7950734186435347e-05, "loss": 2.7236, "step": 193600 }, { "epoch": 0.64, "learning_rate": 1.7934179813597763e-05, "loss": 2.748, "step": 193700 }, { "epoch": 0.64, "learning_rate": 1.791762544076018e-05, "loss": 2.765, "step": 193800 }, { "epoch": 0.64, "learning_rate": 1.7901071067922594e-05, "loss": 2.7372, "step": 193900 }, { "epoch": 0.64, "learning_rate": 1.7884516695085006e-05, "loss": 2.7575, "step": 194000 }, { "epoch": 0.64, "learning_rate": 1.7867962322247422e-05, "loss": 2.7495, "step": 194100 }, { "epoch": 0.64, "learning_rate": 1.7851407949409838e-05, "loss": 2.7625, "step": 194200 }, { "epoch": 0.64, "learning_rate": 1.7834853576572253e-05, "loss": 2.7678, "step": 194300 }, { "epoch": 0.64, "learning_rate": 1.781829920373467e-05, "loss": 2.7367, "step": 194400 }, { "epoch": 0.64, "learning_rate": 1.780174483089708e-05, "loss": 2.7481, "step": 194500 }, { "epoch": 0.64, "learning_rate": 1.7785190458059497e-05, "loss": 2.7463, "step": 194600 }, { "epoch": 0.64, "learning_rate": 1.7768636085221912e-05, "loss": 2.7601, "step": 194700 }, { "epoch": 0.64, "learning_rate": 1.7752081712384328e-05, "loss": 2.7463, "step": 194800 }, { "epoch": 0.65, "learning_rate": 1.7735527339546744e-05, "loss": 2.7541, "step": 194900 }, { "epoch": 0.65, "learning_rate": 1.7718972966709156e-05, "loss": 2.7587, "step": 195000 }, { "epoch": 0.65, "learning_rate": 1.770241859387157e-05, "loss": 2.7448, "step": 195100 }, { "epoch": 0.65, "learning_rate": 1.7685864221033987e-05, "loss": 2.742, "step": 195200 }, { "epoch": 0.65, "learning_rate": 1.7669309848196403e-05, "loss": 2.7432, "step": 195300 }, { "epoch": 0.65, "learning_rate": 1.765275547535882e-05, "loss": 2.7405, "step": 195400 }, { "epoch": 0.65, "learning_rate": 1.7636201102521234e-05, "loss": 2.7456, "step": 195500 }, { "epoch": 0.65, "learning_rate": 1.7619646729683646e-05, "loss": 2.7538, "step": 195600 }, { "epoch": 0.65, "learning_rate": 1.7603092356846062e-05, "loss": 2.7466, "step": 195700 }, { "epoch": 0.65, "learning_rate": 1.7586537984008478e-05, "loss": 2.7524, "step": 195800 }, { "epoch": 0.65, "learning_rate": 1.7569983611170893e-05, "loss": 2.754, "step": 195900 }, { "epoch": 0.65, "learning_rate": 1.755342923833331e-05, "loss": 2.7473, "step": 196000 }, { "epoch": 0.65, "learning_rate": 1.753687486549572e-05, "loss": 2.7467, "step": 196100 }, { "epoch": 0.65, "learning_rate": 1.7520320492658137e-05, "loss": 2.747, "step": 196200 }, { "epoch": 0.65, "learning_rate": 1.7503766119820553e-05, "loss": 2.7498, "step": 196300 }, { "epoch": 0.65, "learning_rate": 1.7487211746982968e-05, "loss": 2.7498, "step": 196400 }, { "epoch": 0.65, "learning_rate": 1.747065737414538e-05, "loss": 2.7677, "step": 196500 }, { "epoch": 0.65, "learning_rate": 1.7454103001307796e-05, "loss": 2.7308, "step": 196600 }, { "epoch": 0.65, "learning_rate": 1.743754862847021e-05, "loss": 2.7521, "step": 196700 }, { "epoch": 0.65, "learning_rate": 1.7420994255632624e-05, "loss": 2.7413, "step": 196800 }, { "epoch": 0.65, "learning_rate": 1.740443988279504e-05, "loss": 2.7473, "step": 196900 }, { "epoch": 0.65, "learning_rate": 1.7387885509957455e-05, "loss": 2.7511, "step": 197000 }, { "epoch": 0.65, "learning_rate": 1.737133113711987e-05, "loss": 2.7548, "step": 197100 }, { "epoch": 0.65, "learning_rate": 1.7354776764282287e-05, "loss": 2.7597, "step": 197200 }, { "epoch": 0.65, "learning_rate": 1.73382223914447e-05, "loss": 2.7376, "step": 197300 }, { "epoch": 0.65, "learning_rate": 1.7321668018607115e-05, "loss": 2.7557, "step": 197400 }, { "epoch": 0.65, "learning_rate": 1.730511364576953e-05, "loss": 2.7479, "step": 197500 }, { "epoch": 0.65, "learning_rate": 1.7288559272931946e-05, "loss": 2.7596, "step": 197600 }, { "epoch": 0.65, "learning_rate": 1.727200490009436e-05, "loss": 2.7483, "step": 197700 }, { "epoch": 0.65, "learning_rate": 1.7255450527256774e-05, "loss": 2.7583, "step": 197800 }, { "epoch": 0.66, "learning_rate": 1.723889615441919e-05, "loss": 2.7377, "step": 197900 }, { "epoch": 0.66, "learning_rate": 1.7222341781581605e-05, "loss": 2.7514, "step": 198000 }, { "epoch": 0.66, "learning_rate": 1.720578740874402e-05, "loss": 2.7455, "step": 198100 }, { "epoch": 0.66, "learning_rate": 1.7189233035906436e-05, "loss": 2.7546, "step": 198200 }, { "epoch": 0.66, "learning_rate": 1.717267866306885e-05, "loss": 2.7573, "step": 198300 }, { "epoch": 0.66, "learning_rate": 1.7156124290231264e-05, "loss": 2.7514, "step": 198400 }, { "epoch": 0.66, "learning_rate": 1.713956991739368e-05, "loss": 2.7394, "step": 198500 }, { "epoch": 0.66, "learning_rate": 1.7123015544556096e-05, "loss": 2.7632, "step": 198600 }, { "epoch": 0.66, "learning_rate": 1.710646117171851e-05, "loss": 2.7399, "step": 198700 }, { "epoch": 0.66, "learning_rate": 1.7089906798880927e-05, "loss": 2.7694, "step": 198800 }, { "epoch": 0.66, "learning_rate": 1.707335242604334e-05, "loss": 2.7415, "step": 198900 }, { "epoch": 0.66, "learning_rate": 1.7056798053205755e-05, "loss": 2.7398, "step": 199000 }, { "epoch": 0.66, "learning_rate": 1.704024368036817e-05, "loss": 2.7443, "step": 199100 }, { "epoch": 0.66, "learning_rate": 1.7023689307530586e-05, "loss": 2.7468, "step": 199200 }, { "epoch": 0.66, "learning_rate": 1.7007134934693002e-05, "loss": 2.7511, "step": 199300 }, { "epoch": 0.66, "learning_rate": 1.6990580561855414e-05, "loss": 2.7378, "step": 199400 }, { "epoch": 0.66, "learning_rate": 1.697402618901783e-05, "loss": 2.749, "step": 199500 }, { "epoch": 0.66, "learning_rate": 1.6957471816180245e-05, "loss": 2.7451, "step": 199600 }, { "epoch": 0.66, "learning_rate": 1.694091744334266e-05, "loss": 2.7444, "step": 199700 }, { "epoch": 0.66, "learning_rate": 1.6924363070505077e-05, "loss": 2.7568, "step": 199800 }, { "epoch": 0.66, "learning_rate": 1.690780869766749e-05, "loss": 2.7527, "step": 199900 }, { "epoch": 0.66, "learning_rate": 1.6891254324829905e-05, "loss": 2.759, "step": 200000 }, { "epoch": 0.66, "learning_rate": 1.687469995199232e-05, "loss": 2.7563, "step": 200100 }, { "epoch": 0.66, "learning_rate": 1.6858145579154736e-05, "loss": 2.7435, "step": 200200 }, { "epoch": 0.66, "learning_rate": 1.684159120631715e-05, "loss": 2.7459, "step": 200300 }, { "epoch": 0.66, "learning_rate": 1.6825036833479567e-05, "loss": 2.7453, "step": 200400 }, { "epoch": 0.66, "learning_rate": 1.680848246064198e-05, "loss": 2.7501, "step": 200500 }, { "epoch": 0.66, "learning_rate": 1.6791928087804395e-05, "loss": 2.7485, "step": 200600 }, { "epoch": 0.66, "learning_rate": 1.677537371496681e-05, "loss": 2.7446, "step": 200700 }, { "epoch": 0.66, "learning_rate": 1.6758819342129226e-05, "loss": 2.7659, "step": 200800 }, { "epoch": 0.67, "learning_rate": 1.6742264969291642e-05, "loss": 2.7559, "step": 200900 }, { "epoch": 0.67, "learning_rate": 1.6725710596454054e-05, "loss": 2.7515, "step": 201000 }, { "epoch": 0.67, "learning_rate": 1.6709156223616467e-05, "loss": 2.7527, "step": 201100 }, { "epoch": 0.67, "learning_rate": 1.6692601850778882e-05, "loss": 2.7458, "step": 201200 }, { "epoch": 0.67, "learning_rate": 1.6676047477941298e-05, "loss": 2.7527, "step": 201300 }, { "epoch": 0.67, "learning_rate": 1.6659493105103713e-05, "loss": 2.7563, "step": 201400 }, { "epoch": 0.67, "learning_rate": 1.664293873226613e-05, "loss": 2.7574, "step": 201500 }, { "epoch": 0.67, "learning_rate": 1.662638435942854e-05, "loss": 2.7438, "step": 201600 }, { "epoch": 0.67, "learning_rate": 1.6609829986590957e-05, "loss": 2.7436, "step": 201700 }, { "epoch": 0.67, "learning_rate": 1.6593275613753373e-05, "loss": 2.7473, "step": 201800 }, { "epoch": 0.67, "learning_rate": 1.657672124091579e-05, "loss": 2.7459, "step": 201900 }, { "epoch": 0.67, "learning_rate": 1.6560166868078204e-05, "loss": 2.7338, "step": 202000 }, { "epoch": 0.67, "learning_rate": 1.654361249524062e-05, "loss": 2.761, "step": 202100 }, { "epoch": 0.67, "learning_rate": 1.6527058122403032e-05, "loss": 2.7412, "step": 202200 }, { "epoch": 0.67, "learning_rate": 1.6510503749565448e-05, "loss": 2.7504, "step": 202300 }, { "epoch": 0.67, "learning_rate": 1.6493949376727863e-05, "loss": 2.759, "step": 202400 }, { "epoch": 0.67, "learning_rate": 1.647739500389028e-05, "loss": 2.7497, "step": 202500 }, { "epoch": 0.67, "learning_rate": 1.6460840631052694e-05, "loss": 2.7438, "step": 202600 }, { "epoch": 0.67, "learning_rate": 1.6444286258215107e-05, "loss": 2.7664, "step": 202700 }, { "epoch": 0.67, "learning_rate": 1.6427731885377522e-05, "loss": 2.7446, "step": 202800 }, { "epoch": 0.67, "learning_rate": 1.6411177512539938e-05, "loss": 2.7489, "step": 202900 }, { "epoch": 0.67, "learning_rate": 1.6394623139702354e-05, "loss": 2.7471, "step": 203000 }, { "epoch": 0.67, "learning_rate": 1.637806876686477e-05, "loss": 2.7554, "step": 203100 }, { "epoch": 0.67, "learning_rate": 1.636151439402718e-05, "loss": 2.752, "step": 203200 }, { "epoch": 0.67, "learning_rate": 1.6344960021189597e-05, "loss": 2.747, "step": 203300 }, { "epoch": 0.67, "learning_rate": 1.6328405648352013e-05, "loss": 2.7559, "step": 203400 }, { "epoch": 0.67, "learning_rate": 1.631185127551443e-05, "loss": 2.7415, "step": 203500 }, { "epoch": 0.67, "learning_rate": 1.6295296902676844e-05, "loss": 2.7489, "step": 203600 }, { "epoch": 0.67, "learning_rate": 1.627874252983926e-05, "loss": 2.7475, "step": 203700 }, { "epoch": 0.67, "learning_rate": 1.6262188157001672e-05, "loss": 2.7533, "step": 203800 }, { "epoch": 0.68, "learning_rate": 1.6245633784164088e-05, "loss": 2.7485, "step": 203900 }, { "epoch": 0.68, "learning_rate": 1.6229079411326503e-05, "loss": 2.7485, "step": 204000 }, { "epoch": 0.68, "learning_rate": 1.621252503848892e-05, "loss": 2.7488, "step": 204100 }, { "epoch": 0.68, "learning_rate": 1.6195970665651335e-05, "loss": 2.7612, "step": 204200 }, { "epoch": 0.68, "learning_rate": 1.6179416292813747e-05, "loss": 2.7536, "step": 204300 }, { "epoch": 0.68, "learning_rate": 1.6162861919976163e-05, "loss": 2.7531, "step": 204400 }, { "epoch": 0.68, "learning_rate": 1.6146307547138578e-05, "loss": 2.7504, "step": 204500 }, { "epoch": 0.68, "learning_rate": 1.6129753174300994e-05, "loss": 2.7473, "step": 204600 }, { "epoch": 0.68, "learning_rate": 1.611319880146341e-05, "loss": 2.7492, "step": 204700 }, { "epoch": 0.68, "learning_rate": 1.6096644428625822e-05, "loss": 2.7358, "step": 204800 }, { "epoch": 0.68, "learning_rate": 1.6080090055788237e-05, "loss": 2.7365, "step": 204900 }, { "epoch": 0.68, "learning_rate": 1.6063535682950653e-05, "loss": 2.7672, "step": 205000 }, { "epoch": 0.68, "learning_rate": 1.604698131011307e-05, "loss": 2.7342, "step": 205100 }, { "epoch": 0.68, "learning_rate": 1.6030426937275484e-05, "loss": 2.742, "step": 205200 }, { "epoch": 0.68, "learning_rate": 1.60138725644379e-05, "loss": 2.7537, "step": 205300 }, { "epoch": 0.68, "learning_rate": 1.5997318191600312e-05, "loss": 2.7555, "step": 205400 }, { "epoch": 0.68, "learning_rate": 1.5980763818762725e-05, "loss": 2.7458, "step": 205500 }, { "epoch": 0.68, "learning_rate": 1.596420944592514e-05, "loss": 2.7538, "step": 205600 }, { "epoch": 0.68, "learning_rate": 1.5947655073087556e-05, "loss": 2.7593, "step": 205700 }, { "epoch": 0.68, "learning_rate": 1.593110070024997e-05, "loss": 2.7588, "step": 205800 }, { "epoch": 0.68, "learning_rate": 1.5914546327412387e-05, "loss": 2.7378, "step": 205900 }, { "epoch": 0.68, "learning_rate": 1.58979919545748e-05, "loss": 2.7615, "step": 206000 }, { "epoch": 0.68, "learning_rate": 1.5881437581737215e-05, "loss": 2.7508, "step": 206100 }, { "epoch": 0.68, "learning_rate": 1.586488320889963e-05, "loss": 2.7431, "step": 206200 }, { "epoch": 0.68, "learning_rate": 1.5848328836062046e-05, "loss": 2.7467, "step": 206300 }, { "epoch": 0.68, "learning_rate": 1.5831774463224462e-05, "loss": 2.7561, "step": 206400 }, { "epoch": 0.68, "learning_rate": 1.5815220090386874e-05, "loss": 2.7546, "step": 206500 }, { "epoch": 0.68, "learning_rate": 1.579866571754929e-05, "loss": 2.7325, "step": 206600 }, { "epoch": 0.68, "learning_rate": 1.5782111344711706e-05, "loss": 2.7487, "step": 206700 }, { "epoch": 0.68, "learning_rate": 1.576555697187412e-05, "loss": 2.747, "step": 206800 }, { "epoch": 0.69, "learning_rate": 1.5749002599036537e-05, "loss": 2.7418, "step": 206900 }, { "epoch": 0.69, "learning_rate": 1.5732448226198953e-05, "loss": 2.7484, "step": 207000 }, { "epoch": 0.69, "learning_rate": 1.5715893853361365e-05, "loss": 2.7646, "step": 207100 }, { "epoch": 0.69, "learning_rate": 1.569933948052378e-05, "loss": 2.7525, "step": 207200 }, { "epoch": 0.69, "learning_rate": 1.5682785107686196e-05, "loss": 2.7397, "step": 207300 }, { "epoch": 0.69, "learning_rate": 1.5666230734848612e-05, "loss": 2.753, "step": 207400 }, { "epoch": 0.69, "learning_rate": 1.5649676362011027e-05, "loss": 2.7396, "step": 207500 }, { "epoch": 0.69, "learning_rate": 1.563312198917344e-05, "loss": 2.7388, "step": 207600 }, { "epoch": 0.69, "learning_rate": 1.5616567616335855e-05, "loss": 2.7366, "step": 207700 }, { "epoch": 0.69, "learning_rate": 1.560001324349827e-05, "loss": 2.7487, "step": 207800 }, { "epoch": 0.69, "learning_rate": 1.5583458870660687e-05, "loss": 2.7411, "step": 207900 }, { "epoch": 0.69, "learning_rate": 1.5566904497823102e-05, "loss": 2.7539, "step": 208000 }, { "epoch": 0.69, "learning_rate": 1.5550350124985515e-05, "loss": 2.7649, "step": 208100 }, { "epoch": 0.69, "learning_rate": 1.553379575214793e-05, "loss": 2.7524, "step": 208200 }, { "epoch": 0.69, "learning_rate": 1.5517241379310346e-05, "loss": 2.7527, "step": 208300 }, { "epoch": 0.69, "learning_rate": 1.550068700647276e-05, "loss": 2.7479, "step": 208400 }, { "epoch": 0.69, "learning_rate": 1.5484132633635177e-05, "loss": 2.7526, "step": 208500 }, { "epoch": 0.69, "learning_rate": 1.5467578260797593e-05, "loss": 2.7502, "step": 208600 }, { "epoch": 0.69, "learning_rate": 1.5451023887960005e-05, "loss": 2.7517, "step": 208700 }, { "epoch": 0.69, "learning_rate": 1.543446951512242e-05, "loss": 2.7367, "step": 208800 }, { "epoch": 0.69, "learning_rate": 1.5417915142284836e-05, "loss": 2.7555, "step": 208900 }, { "epoch": 0.69, "learning_rate": 1.5401360769447252e-05, "loss": 2.7452, "step": 209000 }, { "epoch": 0.69, "learning_rate": 1.5384806396609668e-05, "loss": 2.7528, "step": 209100 }, { "epoch": 0.69, "learning_rate": 1.536825202377208e-05, "loss": 2.7645, "step": 209200 }, { "epoch": 0.69, "learning_rate": 1.5351697650934496e-05, "loss": 2.7402, "step": 209300 }, { "epoch": 0.69, "learning_rate": 1.533514327809691e-05, "loss": 2.7583, "step": 209400 }, { "epoch": 0.69, "learning_rate": 1.5318588905259327e-05, "loss": 2.7396, "step": 209500 }, { "epoch": 0.69, "learning_rate": 1.5302034532421742e-05, "loss": 2.7366, "step": 209600 }, { "epoch": 0.69, "learning_rate": 1.5285480159584155e-05, "loss": 2.7318, "step": 209700 }, { "epoch": 0.69, "learning_rate": 1.5268925786746567e-05, "loss": 2.7577, "step": 209800 }, { "epoch": 0.69, "learning_rate": 1.5252371413908984e-05, "loss": 2.756, "step": 209900 }, { "epoch": 0.7, "learning_rate": 1.5235817041071398e-05, "loss": 2.7433, "step": 210000 }, { "epoch": 0.7, "learning_rate": 1.5219262668233814e-05, "loss": 2.7425, "step": 210100 }, { "epoch": 0.7, "learning_rate": 1.5202708295396228e-05, "loss": 2.7447, "step": 210200 }, { "epoch": 0.7, "learning_rate": 1.5186153922558644e-05, "loss": 2.7478, "step": 210300 }, { "epoch": 0.7, "learning_rate": 1.516959954972106e-05, "loss": 2.7383, "step": 210400 }, { "epoch": 0.7, "learning_rate": 1.5153045176883473e-05, "loss": 2.7537, "step": 210500 }, { "epoch": 0.7, "learning_rate": 1.5136490804045889e-05, "loss": 2.7604, "step": 210600 }, { "epoch": 0.7, "learning_rate": 1.5119936431208304e-05, "loss": 2.7393, "step": 210700 }, { "epoch": 0.7, "learning_rate": 1.5103382058370718e-05, "loss": 2.7381, "step": 210800 }, { "epoch": 0.7, "learning_rate": 1.5086827685533134e-05, "loss": 2.7523, "step": 210900 }, { "epoch": 0.7, "learning_rate": 1.5070273312695548e-05, "loss": 2.7367, "step": 211000 }, { "epoch": 0.7, "learning_rate": 1.5053718939857964e-05, "loss": 2.7387, "step": 211100 }, { "epoch": 0.7, "learning_rate": 1.503716456702038e-05, "loss": 2.7369, "step": 211200 }, { "epoch": 0.7, "learning_rate": 1.5020610194182793e-05, "loss": 2.7399, "step": 211300 }, { "epoch": 0.7, "learning_rate": 1.5004055821345209e-05, "loss": 2.7459, "step": 211400 }, { "epoch": 0.7, "learning_rate": 1.4987501448507625e-05, "loss": 2.7443, "step": 211500 }, { "epoch": 0.7, "learning_rate": 1.4970947075670039e-05, "loss": 2.7498, "step": 211600 }, { "epoch": 0.7, "learning_rate": 1.4954392702832454e-05, "loss": 2.7362, "step": 211700 }, { "epoch": 0.7, "learning_rate": 1.4937838329994868e-05, "loss": 2.7286, "step": 211800 }, { "epoch": 0.7, "learning_rate": 1.4921283957157284e-05, "loss": 2.7462, "step": 211900 }, { "epoch": 0.7, "learning_rate": 1.49047295843197e-05, "loss": 2.7569, "step": 212000 }, { "epoch": 0.7, "learning_rate": 1.4888175211482113e-05, "loss": 2.7475, "step": 212100 }, { "epoch": 0.7, "learning_rate": 1.4871620838644529e-05, "loss": 2.7441, "step": 212200 }, { "epoch": 0.7, "learning_rate": 1.4855066465806945e-05, "loss": 2.7449, "step": 212300 }, { "epoch": 0.7, "learning_rate": 1.4838512092969359e-05, "loss": 2.7508, "step": 212400 }, { "epoch": 0.7, "learning_rate": 1.4821957720131774e-05, "loss": 2.7462, "step": 212500 }, { "epoch": 0.7, "learning_rate": 1.4805403347294188e-05, "loss": 2.7522, "step": 212600 }, { "epoch": 0.7, "learning_rate": 1.4788848974456604e-05, "loss": 2.7611, "step": 212700 }, { "epoch": 0.7, "learning_rate": 1.477229460161902e-05, "loss": 2.7445, "step": 212800 }, { "epoch": 0.7, "learning_rate": 1.4755740228781433e-05, "loss": 2.7321, "step": 212900 }, { "epoch": 0.71, "learning_rate": 1.4739185855943849e-05, "loss": 2.7474, "step": 213000 }, { "epoch": 0.71, "learning_rate": 1.4722631483106265e-05, "loss": 2.7665, "step": 213100 }, { "epoch": 0.71, "learning_rate": 1.4706077110268679e-05, "loss": 2.7478, "step": 213200 }, { "epoch": 0.71, "learning_rate": 1.4689522737431094e-05, "loss": 2.7396, "step": 213300 }, { "epoch": 0.71, "learning_rate": 1.4672968364593508e-05, "loss": 2.7526, "step": 213400 }, { "epoch": 0.71, "learning_rate": 1.4656413991755924e-05, "loss": 2.7684, "step": 213500 }, { "epoch": 0.71, "learning_rate": 1.463985961891834e-05, "loss": 2.7255, "step": 213600 }, { "epoch": 0.71, "learning_rate": 1.4623305246080754e-05, "loss": 2.7546, "step": 213700 }, { "epoch": 0.71, "learning_rate": 1.460675087324317e-05, "loss": 2.7313, "step": 213800 }, { "epoch": 0.71, "learning_rate": 1.4590196500405585e-05, "loss": 2.7372, "step": 213900 }, { "epoch": 0.71, "learning_rate": 1.4573642127567999e-05, "loss": 2.7446, "step": 214000 }, { "epoch": 0.71, "learning_rate": 1.4557087754730414e-05, "loss": 2.7521, "step": 214100 }, { "epoch": 0.71, "learning_rate": 1.4540533381892827e-05, "loss": 2.7374, "step": 214200 }, { "epoch": 0.71, "learning_rate": 1.452397900905524e-05, "loss": 2.7505, "step": 214300 }, { "epoch": 0.71, "learning_rate": 1.4507424636217656e-05, "loss": 2.7417, "step": 214400 }, { "epoch": 0.71, "learning_rate": 1.4490870263380072e-05, "loss": 2.7523, "step": 214500 }, { "epoch": 0.71, "learning_rate": 1.4474315890542486e-05, "loss": 2.7588, "step": 214600 }, { "epoch": 0.71, "learning_rate": 1.4457761517704902e-05, "loss": 2.7521, "step": 214700 }, { "epoch": 0.71, "learning_rate": 1.4441207144867317e-05, "loss": 2.7417, "step": 214800 }, { "epoch": 0.71, "learning_rate": 1.4424652772029731e-05, "loss": 2.7422, "step": 214900 }, { "epoch": 0.71, "learning_rate": 1.4408098399192147e-05, "loss": 2.7457, "step": 215000 }, { "epoch": 0.71, "learning_rate": 1.439154402635456e-05, "loss": 2.7333, "step": 215100 }, { "epoch": 0.71, "learning_rate": 1.4374989653516976e-05, "loss": 2.7256, "step": 215200 }, { "epoch": 0.71, "learning_rate": 1.4358435280679392e-05, "loss": 2.74, "step": 215300 }, { "epoch": 0.71, "learning_rate": 1.4341880907841806e-05, "loss": 2.7339, "step": 215400 }, { "epoch": 0.71, "learning_rate": 1.4325326535004222e-05, "loss": 2.7476, "step": 215500 }, { "epoch": 0.71, "learning_rate": 1.4308772162166637e-05, "loss": 2.7306, "step": 215600 }, { "epoch": 0.71, "learning_rate": 1.4292217789329051e-05, "loss": 2.7513, "step": 215700 }, { "epoch": 0.71, "learning_rate": 1.4275663416491467e-05, "loss": 2.7415, "step": 215800 }, { "epoch": 0.71, "learning_rate": 1.4259109043653881e-05, "loss": 2.7337, "step": 215900 }, { "epoch": 0.72, "learning_rate": 1.4242554670816297e-05, "loss": 2.7585, "step": 216000 }, { "epoch": 0.72, "learning_rate": 1.4226000297978712e-05, "loss": 2.7559, "step": 216100 }, { "epoch": 0.72, "learning_rate": 1.4209445925141126e-05, "loss": 2.7436, "step": 216200 }, { "epoch": 0.72, "learning_rate": 1.4192891552303542e-05, "loss": 2.752, "step": 216300 }, { "epoch": 0.72, "learning_rate": 1.4176337179465957e-05, "loss": 2.7367, "step": 216400 }, { "epoch": 0.72, "learning_rate": 1.4159782806628371e-05, "loss": 2.7435, "step": 216500 }, { "epoch": 0.72, "learning_rate": 1.4143228433790787e-05, "loss": 2.7464, "step": 216600 }, { "epoch": 0.72, "learning_rate": 1.4126674060953201e-05, "loss": 2.7301, "step": 216700 }, { "epoch": 0.72, "learning_rate": 1.4110119688115617e-05, "loss": 2.7346, "step": 216800 }, { "epoch": 0.72, "learning_rate": 1.4093565315278032e-05, "loss": 2.7455, "step": 216900 }, { "epoch": 0.72, "learning_rate": 1.4077010942440446e-05, "loss": 2.7432, "step": 217000 }, { "epoch": 0.72, "learning_rate": 1.4060456569602862e-05, "loss": 2.7568, "step": 217100 }, { "epoch": 0.72, "learning_rate": 1.4043902196765278e-05, "loss": 2.7589, "step": 217200 }, { "epoch": 0.72, "learning_rate": 1.4027347823927692e-05, "loss": 2.7595, "step": 217300 }, { "epoch": 0.72, "learning_rate": 1.4010793451090107e-05, "loss": 2.7482, "step": 217400 }, { "epoch": 0.72, "learning_rate": 1.3994239078252521e-05, "loss": 2.7512, "step": 217500 }, { "epoch": 0.72, "learning_rate": 1.3977684705414937e-05, "loss": 2.7434, "step": 217600 }, { "epoch": 0.72, "learning_rate": 1.3961130332577352e-05, "loss": 2.7435, "step": 217700 }, { "epoch": 0.72, "learning_rate": 1.3944575959739766e-05, "loss": 2.7442, "step": 217800 }, { "epoch": 0.72, "learning_rate": 1.3928021586902182e-05, "loss": 2.7519, "step": 217900 }, { "epoch": 0.72, "learning_rate": 1.3911467214064598e-05, "loss": 2.7426, "step": 218000 }, { "epoch": 0.72, "learning_rate": 1.3894912841227012e-05, "loss": 2.7457, "step": 218100 }, { "epoch": 0.72, "learning_rate": 1.3878358468389427e-05, "loss": 2.7438, "step": 218200 }, { "epoch": 0.72, "learning_rate": 1.3861804095551841e-05, "loss": 2.746, "step": 218300 }, { "epoch": 0.72, "learning_rate": 1.3845249722714257e-05, "loss": 2.728, "step": 218400 }, { "epoch": 0.72, "learning_rate": 1.3828695349876673e-05, "loss": 2.7295, "step": 218500 }, { "epoch": 0.72, "learning_rate": 1.3812140977039085e-05, "loss": 2.7324, "step": 218600 }, { "epoch": 0.72, "learning_rate": 1.3795586604201499e-05, "loss": 2.7502, "step": 218700 }, { "epoch": 0.72, "learning_rate": 1.3779032231363914e-05, "loss": 2.7518, "step": 218800 }, { "epoch": 0.72, "learning_rate": 1.3762477858526328e-05, "loss": 2.7535, "step": 218900 }, { "epoch": 0.73, "learning_rate": 1.3745923485688744e-05, "loss": 2.7561, "step": 219000 }, { "epoch": 0.73, "learning_rate": 1.372936911285116e-05, "loss": 2.7428, "step": 219100 }, { "epoch": 0.73, "learning_rate": 1.3712814740013574e-05, "loss": 2.7386, "step": 219200 }, { "epoch": 0.73, "learning_rate": 1.369626036717599e-05, "loss": 2.7328, "step": 219300 }, { "epoch": 0.73, "learning_rate": 1.3679705994338405e-05, "loss": 2.7359, "step": 219400 }, { "epoch": 0.73, "learning_rate": 1.3663151621500819e-05, "loss": 2.7402, "step": 219500 }, { "epoch": 0.73, "learning_rate": 1.3646597248663235e-05, "loss": 2.7577, "step": 219600 }, { "epoch": 0.73, "learning_rate": 1.3630042875825648e-05, "loss": 2.7495, "step": 219700 }, { "epoch": 0.73, "learning_rate": 1.3613488502988064e-05, "loss": 2.7579, "step": 219800 }, { "epoch": 0.73, "learning_rate": 1.359693413015048e-05, "loss": 2.7465, "step": 219900 }, { "epoch": 0.73, "learning_rate": 1.3580379757312894e-05, "loss": 2.7407, "step": 220000 }, { "epoch": 0.73, "learning_rate": 1.356382538447531e-05, "loss": 2.7404, "step": 220100 }, { "epoch": 0.73, "learning_rate": 1.3547271011637725e-05, "loss": 2.7524, "step": 220200 }, { "epoch": 0.73, "learning_rate": 1.3530716638800139e-05, "loss": 2.7446, "step": 220300 }, { "epoch": 0.73, "learning_rate": 1.3514162265962555e-05, "loss": 2.7589, "step": 220400 }, { "epoch": 0.73, "learning_rate": 1.3497607893124969e-05, "loss": 2.7576, "step": 220500 }, { "epoch": 0.73, "learning_rate": 1.3481053520287384e-05, "loss": 2.7626, "step": 220600 }, { "epoch": 0.73, "learning_rate": 1.34644991474498e-05, "loss": 2.7431, "step": 220700 }, { "epoch": 0.73, "learning_rate": 1.3447944774612214e-05, "loss": 2.7534, "step": 220800 }, { "epoch": 0.73, "learning_rate": 1.343139040177463e-05, "loss": 2.7467, "step": 220900 }, { "epoch": 0.73, "learning_rate": 1.3414836028937045e-05, "loss": 2.7364, "step": 221000 }, { "epoch": 0.73, "learning_rate": 1.3398281656099459e-05, "loss": 2.7504, "step": 221100 }, { "epoch": 0.73, "learning_rate": 1.3381727283261875e-05, "loss": 2.7532, "step": 221200 }, { "epoch": 0.73, "learning_rate": 1.3365172910424289e-05, "loss": 2.7411, "step": 221300 }, { "epoch": 0.73, "learning_rate": 1.3348618537586704e-05, "loss": 2.7387, "step": 221400 }, { "epoch": 0.73, "learning_rate": 1.333206416474912e-05, "loss": 2.7385, "step": 221500 }, { "epoch": 0.73, "learning_rate": 1.3315509791911534e-05, "loss": 2.7548, "step": 221600 }, { "epoch": 0.73, "learning_rate": 1.329895541907395e-05, "loss": 2.7512, "step": 221700 }, { "epoch": 0.73, "learning_rate": 1.3282401046236365e-05, "loss": 2.7448, "step": 221800 }, { "epoch": 0.73, "learning_rate": 1.326584667339878e-05, "loss": 2.738, "step": 221900 }, { "epoch": 0.74, "learning_rate": 1.3249292300561195e-05, "loss": 2.7564, "step": 222000 }, { "epoch": 0.74, "learning_rate": 1.3232737927723609e-05, "loss": 2.7513, "step": 222100 }, { "epoch": 0.74, "learning_rate": 1.3216183554886024e-05, "loss": 2.7383, "step": 222200 }, { "epoch": 0.74, "learning_rate": 1.319962918204844e-05, "loss": 2.745, "step": 222300 }, { "epoch": 0.74, "learning_rate": 1.3183074809210854e-05, "loss": 2.7495, "step": 222400 }, { "epoch": 0.74, "learning_rate": 1.316652043637327e-05, "loss": 2.7463, "step": 222500 }, { "epoch": 0.74, "learning_rate": 1.3149966063535685e-05, "loss": 2.742, "step": 222600 }, { "epoch": 0.74, "learning_rate": 1.31334116906981e-05, "loss": 2.7392, "step": 222700 }, { "epoch": 0.74, "learning_rate": 1.3116857317860515e-05, "loss": 2.7534, "step": 222800 }, { "epoch": 0.74, "learning_rate": 1.3100302945022929e-05, "loss": 2.7375, "step": 222900 }, { "epoch": 0.74, "learning_rate": 1.3083748572185341e-05, "loss": 2.7512, "step": 223000 }, { "epoch": 0.74, "learning_rate": 1.3067194199347757e-05, "loss": 2.7532, "step": 223100 }, { "epoch": 0.74, "learning_rate": 1.3050639826510172e-05, "loss": 2.7496, "step": 223200 }, { "epoch": 0.74, "learning_rate": 1.3034085453672586e-05, "loss": 2.7531, "step": 223300 }, { "epoch": 0.74, "learning_rate": 1.3017531080835002e-05, "loss": 2.7457, "step": 223400 }, { "epoch": 0.74, "learning_rate": 1.3000976707997418e-05, "loss": 2.7515, "step": 223500 }, { "epoch": 0.74, "learning_rate": 1.2984422335159832e-05, "loss": 2.7406, "step": 223600 }, { "epoch": 0.74, "learning_rate": 1.2967867962322247e-05, "loss": 2.7557, "step": 223700 }, { "epoch": 0.74, "learning_rate": 1.2951313589484661e-05, "loss": 2.7608, "step": 223800 }, { "epoch": 0.74, "learning_rate": 1.2934759216647077e-05, "loss": 2.747, "step": 223900 }, { "epoch": 0.74, "learning_rate": 1.2918204843809493e-05, "loss": 2.7493, "step": 224000 }, { "epoch": 0.74, "learning_rate": 1.2901650470971907e-05, "loss": 2.7576, "step": 224100 }, { "epoch": 0.74, "learning_rate": 1.2885096098134322e-05, "loss": 2.7331, "step": 224200 }, { "epoch": 0.74, "learning_rate": 1.2868541725296738e-05, "loss": 2.7374, "step": 224300 }, { "epoch": 0.74, "learning_rate": 1.2851987352459152e-05, "loss": 2.7432, "step": 224400 }, { "epoch": 0.74, "learning_rate": 1.2835432979621567e-05, "loss": 2.7381, "step": 224500 }, { "epoch": 0.74, "learning_rate": 1.2818878606783981e-05, "loss": 2.7451, "step": 224600 }, { "epoch": 0.74, "learning_rate": 1.2802324233946397e-05, "loss": 2.7353, "step": 224700 }, { "epoch": 0.74, "learning_rate": 1.2785769861108813e-05, "loss": 2.7483, "step": 224800 }, { "epoch": 0.74, "learning_rate": 1.2769215488271227e-05, "loss": 2.7332, "step": 224900 }, { "epoch": 0.74, "learning_rate": 1.2752661115433642e-05, "loss": 2.7394, "step": 225000 }, { "epoch": 0.75, "learning_rate": 1.2736106742596058e-05, "loss": 2.7438, "step": 225100 }, { "epoch": 0.75, "learning_rate": 1.2719552369758472e-05, "loss": 2.7473, "step": 225200 }, { "epoch": 0.75, "learning_rate": 1.2702997996920888e-05, "loss": 2.7387, "step": 225300 }, { "epoch": 0.75, "learning_rate": 1.2686443624083302e-05, "loss": 2.7363, "step": 225400 }, { "epoch": 0.75, "learning_rate": 1.2669889251245717e-05, "loss": 2.7335, "step": 225500 }, { "epoch": 0.75, "learning_rate": 1.2653334878408133e-05, "loss": 2.733, "step": 225600 }, { "epoch": 0.75, "learning_rate": 1.2636780505570547e-05, "loss": 2.7488, "step": 225700 }, { "epoch": 0.75, "learning_rate": 1.2620226132732962e-05, "loss": 2.7237, "step": 225800 }, { "epoch": 0.75, "learning_rate": 1.2603671759895378e-05, "loss": 2.7405, "step": 225900 }, { "epoch": 0.75, "learning_rate": 1.2587117387057792e-05, "loss": 2.7553, "step": 226000 }, { "epoch": 0.75, "learning_rate": 1.2570563014220208e-05, "loss": 2.7565, "step": 226100 }, { "epoch": 0.75, "learning_rate": 1.2554008641382622e-05, "loss": 2.7361, "step": 226200 }, { "epoch": 0.75, "learning_rate": 1.2537454268545037e-05, "loss": 2.7482, "step": 226300 }, { "epoch": 0.75, "learning_rate": 1.2520899895707453e-05, "loss": 2.7389, "step": 226400 }, { "epoch": 0.75, "learning_rate": 1.2504345522869867e-05, "loss": 2.7519, "step": 226500 }, { "epoch": 0.75, "learning_rate": 1.248779115003228e-05, "loss": 2.7584, "step": 226600 }, { "epoch": 0.75, "learning_rate": 1.2471236777194696e-05, "loss": 2.7492, "step": 226700 }, { "epoch": 0.75, "learning_rate": 1.245468240435711e-05, "loss": 2.7427, "step": 226800 }, { "epoch": 0.75, "learning_rate": 1.2438128031519526e-05, "loss": 2.7452, "step": 226900 }, { "epoch": 0.75, "learning_rate": 1.2421573658681942e-05, "loss": 2.7479, "step": 227000 }, { "epoch": 0.75, "learning_rate": 1.2405019285844356e-05, "loss": 2.7481, "step": 227100 }, { "epoch": 0.75, "learning_rate": 1.2388464913006771e-05, "loss": 2.7316, "step": 227200 }, { "epoch": 0.75, "learning_rate": 1.2371910540169187e-05, "loss": 2.7451, "step": 227300 }, { "epoch": 0.75, "learning_rate": 1.2355356167331601e-05, "loss": 2.742, "step": 227400 }, { "epoch": 0.75, "learning_rate": 1.2338801794494017e-05, "loss": 2.749, "step": 227500 }, { "epoch": 0.75, "learning_rate": 1.232224742165643e-05, "loss": 2.7425, "step": 227600 }, { "epoch": 0.75, "learning_rate": 1.2305693048818846e-05, "loss": 2.7363, "step": 227700 }, { "epoch": 0.75, "learning_rate": 1.2289138675981262e-05, "loss": 2.7392, "step": 227800 }, { "epoch": 0.75, "learning_rate": 1.2272584303143676e-05, "loss": 2.7551, "step": 227900 }, { "epoch": 0.75, "learning_rate": 1.2256029930306091e-05, "loss": 2.7496, "step": 228000 }, { "epoch": 0.76, "learning_rate": 1.2239475557468507e-05, "loss": 2.7465, "step": 228100 }, { "epoch": 0.76, "learning_rate": 1.2222921184630921e-05, "loss": 2.7464, "step": 228200 }, { "epoch": 0.76, "learning_rate": 1.2206366811793337e-05, "loss": 2.7352, "step": 228300 }, { "epoch": 0.76, "learning_rate": 1.218981243895575e-05, "loss": 2.7362, "step": 228400 }, { "epoch": 0.76, "learning_rate": 1.2173258066118166e-05, "loss": 2.7531, "step": 228500 }, { "epoch": 0.76, "learning_rate": 1.215670369328058e-05, "loss": 2.7409, "step": 228600 }, { "epoch": 0.76, "learning_rate": 1.2140149320442994e-05, "loss": 2.746, "step": 228700 }, { "epoch": 0.76, "learning_rate": 1.212359494760541e-05, "loss": 2.7396, "step": 228800 }, { "epoch": 0.76, "learning_rate": 1.2107040574767826e-05, "loss": 2.7444, "step": 228900 }, { "epoch": 0.76, "learning_rate": 1.209048620193024e-05, "loss": 2.7319, "step": 229000 }, { "epoch": 0.76, "learning_rate": 1.2073931829092655e-05, "loss": 2.7357, "step": 229100 }, { "epoch": 0.76, "learning_rate": 1.205737745625507e-05, "loss": 2.7458, "step": 229200 }, { "epoch": 0.76, "learning_rate": 1.2040823083417485e-05, "loss": 2.7553, "step": 229300 }, { "epoch": 0.76, "learning_rate": 1.20242687105799e-05, "loss": 2.7516, "step": 229400 }, { "epoch": 0.76, "learning_rate": 1.2007714337742314e-05, "loss": 2.7464, "step": 229500 }, { "epoch": 0.76, "learning_rate": 1.199115996490473e-05, "loss": 2.7407, "step": 229600 }, { "epoch": 0.76, "learning_rate": 1.1974605592067146e-05, "loss": 2.7443, "step": 229700 }, { "epoch": 0.76, "learning_rate": 1.195805121922956e-05, "loss": 2.7384, "step": 229800 }, { "epoch": 0.76, "learning_rate": 1.1941496846391975e-05, "loss": 2.7536, "step": 229900 }, { "epoch": 0.76, "learning_rate": 1.1924942473554391e-05, "loss": 2.7451, "step": 230000 }, { "epoch": 0.76, "learning_rate": 1.1908388100716805e-05, "loss": 2.7501, "step": 230100 }, { "epoch": 0.76, "learning_rate": 1.189183372787922e-05, "loss": 2.7447, "step": 230200 }, { "epoch": 0.76, "learning_rate": 1.1875279355041634e-05, "loss": 2.7596, "step": 230300 }, { "epoch": 0.76, "learning_rate": 1.185872498220405e-05, "loss": 2.7497, "step": 230400 }, { "epoch": 0.76, "learning_rate": 1.1842170609366466e-05, "loss": 2.7317, "step": 230500 }, { "epoch": 0.76, "learning_rate": 1.182561623652888e-05, "loss": 2.7373, "step": 230600 }, { "epoch": 0.76, "learning_rate": 1.1809061863691295e-05, "loss": 2.7329, "step": 230700 }, { "epoch": 0.76, "learning_rate": 1.179250749085371e-05, "loss": 2.7355, "step": 230800 }, { "epoch": 0.76, "learning_rate": 1.1775953118016123e-05, "loss": 2.7476, "step": 230900 }, { "epoch": 0.76, "learning_rate": 1.1759398745178539e-05, "loss": 2.757, "step": 231000 }, { "epoch": 0.77, "learning_rate": 1.1742844372340955e-05, "loss": 2.7496, "step": 231100 }, { "epoch": 0.77, "learning_rate": 1.1726289999503368e-05, "loss": 2.7524, "step": 231200 }, { "epoch": 0.77, "learning_rate": 1.1709735626665784e-05, "loss": 2.7384, "step": 231300 }, { "epoch": 0.77, "learning_rate": 1.16931812538282e-05, "loss": 2.7517, "step": 231400 }, { "epoch": 0.77, "learning_rate": 1.1676626880990614e-05, "loss": 2.7336, "step": 231500 }, { "epoch": 0.77, "learning_rate": 1.166007250815303e-05, "loss": 2.7514, "step": 231600 }, { "epoch": 0.77, "learning_rate": 1.1643518135315443e-05, "loss": 2.7475, "step": 231700 }, { "epoch": 0.77, "learning_rate": 1.1626963762477859e-05, "loss": 2.7613, "step": 231800 }, { "epoch": 0.77, "learning_rate": 1.1610409389640275e-05, "loss": 2.7348, "step": 231900 }, { "epoch": 0.77, "learning_rate": 1.1593855016802689e-05, "loss": 2.7397, "step": 232000 }, { "epoch": 0.77, "learning_rate": 1.1577300643965104e-05, "loss": 2.7425, "step": 232100 }, { "epoch": 0.77, "learning_rate": 1.156074627112752e-05, "loss": 2.7319, "step": 232200 }, { "epoch": 0.77, "learning_rate": 1.1544191898289934e-05, "loss": 2.7528, "step": 232300 }, { "epoch": 0.77, "learning_rate": 1.152763752545235e-05, "loss": 2.7383, "step": 232400 }, { "epoch": 0.77, "learning_rate": 1.1511083152614763e-05, "loss": 2.7334, "step": 232500 }, { "epoch": 0.77, "learning_rate": 1.1494528779777179e-05, "loss": 2.7459, "step": 232600 }, { "epoch": 0.77, "learning_rate": 1.1477974406939595e-05, "loss": 2.7478, "step": 232700 }, { "epoch": 0.77, "learning_rate": 1.1461420034102009e-05, "loss": 2.7426, "step": 232800 }, { "epoch": 0.77, "learning_rate": 1.1444865661264424e-05, "loss": 2.7348, "step": 232900 }, { "epoch": 0.77, "learning_rate": 1.1428311288426838e-05, "loss": 2.7426, "step": 233000 }, { "epoch": 0.77, "learning_rate": 1.1411756915589252e-05, "loss": 2.7587, "step": 233100 }, { "epoch": 0.77, "learning_rate": 1.1395202542751668e-05, "loss": 2.7479, "step": 233200 }, { "epoch": 0.77, "learning_rate": 1.1378648169914084e-05, "loss": 2.7518, "step": 233300 }, { "epoch": 0.77, "learning_rate": 1.1362093797076498e-05, "loss": 2.7426, "step": 233400 }, { "epoch": 0.77, "learning_rate": 1.1345539424238913e-05, "loss": 2.7376, "step": 233500 }, { "epoch": 0.77, "learning_rate": 1.1328985051401327e-05, "loss": 2.728, "step": 233600 }, { "epoch": 0.77, "learning_rate": 1.1312430678563743e-05, "loss": 2.7333, "step": 233700 }, { "epoch": 0.77, "learning_rate": 1.1295876305726158e-05, "loss": 2.7324, "step": 233800 }, { "epoch": 0.77, "learning_rate": 1.1279321932888572e-05, "loss": 2.7316, "step": 233900 }, { "epoch": 0.77, "learning_rate": 1.1262767560050988e-05, "loss": 2.7416, "step": 234000 }, { "epoch": 0.78, "learning_rate": 1.1246213187213404e-05, "loss": 2.729, "step": 234100 }, { "epoch": 0.78, "learning_rate": 1.1229658814375818e-05, "loss": 2.7398, "step": 234200 }, { "epoch": 0.78, "learning_rate": 1.1213104441538233e-05, "loss": 2.7352, "step": 234300 }, { "epoch": 0.78, "learning_rate": 1.1196550068700647e-05, "loss": 2.7359, "step": 234400 }, { "epoch": 0.78, "learning_rate": 1.1179995695863063e-05, "loss": 2.7495, "step": 234500 }, { "epoch": 0.78, "learning_rate": 1.1163441323025479e-05, "loss": 2.7248, "step": 234600 }, { "epoch": 0.78, "learning_rate": 1.1146886950187892e-05, "loss": 2.7605, "step": 234700 }, { "epoch": 0.78, "learning_rate": 1.1130332577350308e-05, "loss": 2.744, "step": 234800 }, { "epoch": 0.78, "learning_rate": 1.1113778204512724e-05, "loss": 2.7448, "step": 234900 }, { "epoch": 0.78, "learning_rate": 1.1097223831675138e-05, "loss": 2.7273, "step": 235000 }, { "epoch": 0.78, "learning_rate": 1.1080669458837553e-05, "loss": 2.7384, "step": 235100 }, { "epoch": 0.78, "learning_rate": 1.1064115085999967e-05, "loss": 2.7269, "step": 235200 }, { "epoch": 0.78, "learning_rate": 1.1047560713162381e-05, "loss": 2.7432, "step": 235300 }, { "epoch": 0.78, "learning_rate": 1.1031006340324797e-05, "loss": 2.7434, "step": 235400 }, { "epoch": 0.78, "learning_rate": 1.1014451967487213e-05, "loss": 2.7228, "step": 235500 }, { "epoch": 0.78, "learning_rate": 1.0997897594649627e-05, "loss": 2.7324, "step": 235600 }, { "epoch": 0.78, "learning_rate": 1.0981343221812042e-05, "loss": 2.7438, "step": 235700 }, { "epoch": 0.78, "learning_rate": 1.0964788848974456e-05, "loss": 2.7417, "step": 235800 }, { "epoch": 0.78, "learning_rate": 1.0948234476136872e-05, "loss": 2.7496, "step": 235900 }, { "epoch": 0.78, "learning_rate": 1.0931680103299287e-05, "loss": 2.7379, "step": 236000 }, { "epoch": 0.78, "learning_rate": 1.0915125730461701e-05, "loss": 2.7366, "step": 236100 }, { "epoch": 0.78, "learning_rate": 1.0898571357624117e-05, "loss": 2.7548, "step": 236200 }, { "epoch": 0.78, "learning_rate": 1.0882016984786533e-05, "loss": 2.7474, "step": 236300 }, { "epoch": 0.78, "learning_rate": 1.0865462611948947e-05, "loss": 2.745, "step": 236400 }, { "epoch": 0.78, "learning_rate": 1.0848908239111362e-05, "loss": 2.7446, "step": 236500 }, { "epoch": 0.78, "learning_rate": 1.0832353866273776e-05, "loss": 2.721, "step": 236600 }, { "epoch": 0.78, "learning_rate": 1.0815799493436192e-05, "loss": 2.7305, "step": 236700 }, { "epoch": 0.78, "learning_rate": 1.0799245120598608e-05, "loss": 2.7319, "step": 236800 }, { "epoch": 0.78, "learning_rate": 1.0782690747761022e-05, "loss": 2.7489, "step": 236900 }, { "epoch": 0.78, "learning_rate": 1.0766136374923437e-05, "loss": 2.741, "step": 237000 }, { "epoch": 0.79, "learning_rate": 1.0749582002085853e-05, "loss": 2.7347, "step": 237100 }, { "epoch": 0.79, "learning_rate": 1.0733027629248267e-05, "loss": 2.7414, "step": 237200 }, { "epoch": 0.79, "learning_rate": 1.0716473256410682e-05, "loss": 2.7237, "step": 237300 }, { "epoch": 0.79, "learning_rate": 1.0699918883573096e-05, "loss": 2.7413, "step": 237400 }, { "epoch": 0.79, "learning_rate": 1.068336451073551e-05, "loss": 2.7393, "step": 237500 }, { "epoch": 0.79, "learning_rate": 1.0666810137897926e-05, "loss": 2.7405, "step": 237600 }, { "epoch": 0.79, "learning_rate": 1.065025576506034e-05, "loss": 2.7351, "step": 237700 }, { "epoch": 0.79, "learning_rate": 1.0633701392222756e-05, "loss": 2.7418, "step": 237800 }, { "epoch": 0.79, "learning_rate": 1.0617147019385171e-05, "loss": 2.7374, "step": 237900 }, { "epoch": 0.79, "learning_rate": 1.0600592646547585e-05, "loss": 2.7515, "step": 238000 }, { "epoch": 0.79, "learning_rate": 1.058403827371e-05, "loss": 2.7371, "step": 238100 }, { "epoch": 0.79, "learning_rate": 1.0567483900872416e-05, "loss": 2.7455, "step": 238200 }, { "epoch": 0.79, "learning_rate": 1.055092952803483e-05, "loss": 2.7351, "step": 238300 }, { "epoch": 0.79, "learning_rate": 1.0534375155197246e-05, "loss": 2.7367, "step": 238400 }, { "epoch": 0.79, "learning_rate": 1.051782078235966e-05, "loss": 2.7361, "step": 238500 }, { "epoch": 0.79, "learning_rate": 1.0501266409522076e-05, "loss": 2.74, "step": 238600 }, { "epoch": 0.79, "learning_rate": 1.0484712036684491e-05, "loss": 2.7639, "step": 238700 }, { "epoch": 0.79, "learning_rate": 1.0468157663846905e-05, "loss": 2.7485, "step": 238800 }, { "epoch": 0.79, "learning_rate": 1.0451603291009321e-05, "loss": 2.7205, "step": 238900 }, { "epoch": 0.79, "learning_rate": 1.0435048918171737e-05, "loss": 2.76, "step": 239000 }, { "epoch": 0.79, "learning_rate": 1.041849454533415e-05, "loss": 2.7435, "step": 239100 }, { "epoch": 0.79, "learning_rate": 1.0401940172496566e-05, "loss": 2.7403, "step": 239200 }, { "epoch": 0.79, "learning_rate": 1.038538579965898e-05, "loss": 2.7429, "step": 239300 }, { "epoch": 0.79, "learning_rate": 1.0368831426821396e-05, "loss": 2.7554, "step": 239400 }, { "epoch": 0.79, "learning_rate": 1.0352277053983811e-05, "loss": 2.7406, "step": 239500 }, { "epoch": 0.79, "learning_rate": 1.0335722681146225e-05, "loss": 2.7466, "step": 239600 }, { "epoch": 0.79, "learning_rate": 1.031916830830864e-05, "loss": 2.7489, "step": 239700 }, { "epoch": 0.79, "learning_rate": 1.0302613935471055e-05, "loss": 2.727, "step": 239800 }, { "epoch": 0.79, "learning_rate": 1.0286059562633469e-05, "loss": 2.7357, "step": 239900 }, { "epoch": 0.79, "learning_rate": 1.0269505189795885e-05, "loss": 2.7406, "step": 240000 }, { "epoch": 0.79, "learning_rate": 1.02529508169583e-05, "loss": 2.7517, "step": 240100 }, { "epoch": 0.8, "learning_rate": 1.0236396444120714e-05, "loss": 2.7306, "step": 240200 }, { "epoch": 0.8, "learning_rate": 1.021984207128313e-05, "loss": 2.7336, "step": 240300 }, { "epoch": 0.8, "learning_rate": 1.0203287698445546e-05, "loss": 2.7443, "step": 240400 }, { "epoch": 0.8, "learning_rate": 1.018673332560796e-05, "loss": 2.7485, "step": 240500 }, { "epoch": 0.8, "learning_rate": 1.0170178952770375e-05, "loss": 2.7336, "step": 240600 }, { "epoch": 0.8, "learning_rate": 1.0153624579932789e-05, "loss": 2.7348, "step": 240700 }, { "epoch": 0.8, "learning_rate": 1.0137070207095205e-05, "loss": 2.7212, "step": 240800 }, { "epoch": 0.8, "learning_rate": 1.012051583425762e-05, "loss": 2.7343, "step": 240900 }, { "epoch": 0.8, "learning_rate": 1.0103961461420034e-05, "loss": 2.7293, "step": 241000 }, { "epoch": 0.8, "learning_rate": 1.008740708858245e-05, "loss": 2.7273, "step": 241100 }, { "epoch": 0.8, "learning_rate": 1.0070852715744866e-05, "loss": 2.7453, "step": 241200 }, { "epoch": 0.8, "learning_rate": 1.005429834290728e-05, "loss": 2.7538, "step": 241300 }, { "epoch": 0.8, "learning_rate": 1.0037743970069695e-05, "loss": 2.7399, "step": 241400 }, { "epoch": 0.8, "learning_rate": 1.002118959723211e-05, "loss": 2.7486, "step": 241500 }, { "epoch": 0.8, "learning_rate": 1.0004635224394525e-05, "loss": 2.7318, "step": 241600 }, { "epoch": 0.8, "learning_rate": 9.98808085155694e-06, "loss": 2.7319, "step": 241700 }, { "epoch": 0.8, "learning_rate": 9.971526478719353e-06, "loss": 2.7505, "step": 241800 }, { "epoch": 0.8, "learning_rate": 9.954972105881768e-06, "loss": 2.7475, "step": 241900 }, { "epoch": 0.8, "learning_rate": 9.938417733044184e-06, "loss": 2.7573, "step": 242000 }, { "epoch": 0.8, "learning_rate": 9.921863360206598e-06, "loss": 2.7482, "step": 242100 }, { "epoch": 0.8, "learning_rate": 9.905308987369014e-06, "loss": 2.7415, "step": 242200 }, { "epoch": 0.8, "learning_rate": 9.88875461453143e-06, "loss": 2.7434, "step": 242300 }, { "epoch": 0.8, "learning_rate": 9.872200241693843e-06, "loss": 2.7401, "step": 242400 }, { "epoch": 0.8, "learning_rate": 9.855645868856259e-06, "loss": 2.7367, "step": 242500 }, { "epoch": 0.8, "learning_rate": 9.839091496018673e-06, "loss": 2.7548, "step": 242600 }, { "epoch": 0.8, "learning_rate": 9.822537123181089e-06, "loss": 2.7341, "step": 242700 }, { "epoch": 0.8, "learning_rate": 9.805982750343504e-06, "loss": 2.738, "step": 242800 }, { "epoch": 0.8, "learning_rate": 9.789428377505918e-06, "loss": 2.7412, "step": 242900 }, { "epoch": 0.8, "learning_rate": 9.772874004668334e-06, "loss": 2.7319, "step": 243000 }, { "epoch": 0.8, "learning_rate": 9.75631963183075e-06, "loss": 2.7267, "step": 243100 }, { "epoch": 0.81, "learning_rate": 9.739765258993163e-06, "loss": 2.7339, "step": 243200 }, { "epoch": 0.81, "learning_rate": 9.723210886155579e-06, "loss": 2.7246, "step": 243300 }, { "epoch": 0.81, "learning_rate": 9.706656513317993e-06, "loss": 2.7443, "step": 243400 }, { "epoch": 0.81, "learning_rate": 9.690102140480409e-06, "loss": 2.7346, "step": 243500 }, { "epoch": 0.81, "learning_rate": 9.673547767642824e-06, "loss": 2.7435, "step": 243600 }, { "epoch": 0.81, "learning_rate": 9.656993394805238e-06, "loss": 2.7533, "step": 243700 }, { "epoch": 0.81, "learning_rate": 9.640439021967654e-06, "loss": 2.7593, "step": 243800 }, { "epoch": 0.81, "learning_rate": 9.62388464913007e-06, "loss": 2.7394, "step": 243900 }, { "epoch": 0.81, "learning_rate": 9.607330276292482e-06, "loss": 2.7142, "step": 244000 }, { "epoch": 0.81, "learning_rate": 9.590775903454897e-06, "loss": 2.7469, "step": 244100 }, { "epoch": 0.81, "learning_rate": 9.574221530617313e-06, "loss": 2.7424, "step": 244200 }, { "epoch": 0.81, "learning_rate": 9.557667157779727e-06, "loss": 2.7337, "step": 244300 }, { "epoch": 0.81, "learning_rate": 9.541112784942143e-06, "loss": 2.7325, "step": 244400 }, { "epoch": 0.81, "learning_rate": 9.524558412104558e-06, "loss": 2.7257, "step": 244500 }, { "epoch": 0.81, "learning_rate": 9.508004039266972e-06, "loss": 2.7487, "step": 244600 }, { "epoch": 0.81, "learning_rate": 9.491449666429388e-06, "loss": 2.7275, "step": 244700 }, { "epoch": 0.81, "learning_rate": 9.474895293591802e-06, "loss": 2.7328, "step": 244800 }, { "epoch": 0.81, "learning_rate": 9.458340920754218e-06, "loss": 2.7504, "step": 244900 }, { "epoch": 0.81, "learning_rate": 9.441786547916633e-06, "loss": 2.7517, "step": 245000 }, { "epoch": 0.81, "learning_rate": 9.425232175079047e-06, "loss": 2.7347, "step": 245100 }, { "epoch": 0.81, "learning_rate": 9.408677802241463e-06, "loss": 2.7313, "step": 245200 }, { "epoch": 0.81, "learning_rate": 9.392123429403878e-06, "loss": 2.7466, "step": 245300 }, { "epoch": 0.81, "learning_rate": 9.375569056566292e-06, "loss": 2.7434, "step": 245400 }, { "epoch": 0.81, "learning_rate": 9.359014683728708e-06, "loss": 2.7474, "step": 245500 }, { "epoch": 0.81, "learning_rate": 9.342460310891122e-06, "loss": 2.7445, "step": 245600 }, { "epoch": 0.81, "learning_rate": 9.325905938053538e-06, "loss": 2.7539, "step": 245700 }, { "epoch": 0.81, "learning_rate": 9.309351565215953e-06, "loss": 2.7381, "step": 245800 }, { "epoch": 0.81, "learning_rate": 9.292797192378367e-06, "loss": 2.7332, "step": 245900 }, { "epoch": 0.81, "learning_rate": 9.276242819540783e-06, "loss": 2.7354, "step": 246000 }, { "epoch": 0.81, "learning_rate": 9.259688446703199e-06, "loss": 2.7312, "step": 246100 }, { "epoch": 0.82, "learning_rate": 9.24313407386561e-06, "loss": 2.7438, "step": 246200 }, { "epoch": 0.82, "learning_rate": 9.226579701028026e-06, "loss": 2.7505, "step": 246300 }, { "epoch": 0.82, "learning_rate": 9.210025328190442e-06, "loss": 2.7491, "step": 246400 }, { "epoch": 0.82, "learning_rate": 9.193470955352856e-06, "loss": 2.7434, "step": 246500 }, { "epoch": 0.82, "learning_rate": 9.176916582515272e-06, "loss": 2.7435, "step": 246600 }, { "epoch": 0.82, "learning_rate": 9.160362209677686e-06, "loss": 2.7374, "step": 246700 }, { "epoch": 0.82, "learning_rate": 9.143807836840101e-06, "loss": 2.725, "step": 246800 }, { "epoch": 0.82, "learning_rate": 9.127253464002517e-06, "loss": 2.756, "step": 246900 }, { "epoch": 0.82, "learning_rate": 9.110699091164931e-06, "loss": 2.7279, "step": 247000 }, { "epoch": 0.82, "learning_rate": 9.094144718327347e-06, "loss": 2.7432, "step": 247100 }, { "epoch": 0.82, "learning_rate": 9.077590345489762e-06, "loss": 2.7376, "step": 247200 }, { "epoch": 0.82, "learning_rate": 9.061035972652176e-06, "loss": 2.7393, "step": 247300 }, { "epoch": 0.82, "learning_rate": 9.044481599814592e-06, "loss": 2.7387, "step": 247400 }, { "epoch": 0.82, "learning_rate": 9.027927226977006e-06, "loss": 2.743, "step": 247500 }, { "epoch": 0.82, "learning_rate": 9.011372854139421e-06, "loss": 2.7325, "step": 247600 }, { "epoch": 0.82, "learning_rate": 8.994818481301837e-06, "loss": 2.7272, "step": 247700 }, { "epoch": 0.82, "learning_rate": 8.978264108464251e-06, "loss": 2.7413, "step": 247800 }, { "epoch": 0.82, "learning_rate": 8.961709735626667e-06, "loss": 2.7346, "step": 247900 }, { "epoch": 0.82, "learning_rate": 8.945155362789082e-06, "loss": 2.7456, "step": 248000 }, { "epoch": 0.82, "learning_rate": 8.928600989951496e-06, "loss": 2.7535, "step": 248100 }, { "epoch": 0.82, "learning_rate": 8.912046617113912e-06, "loss": 2.754, "step": 248200 }, { "epoch": 0.82, "learning_rate": 8.895492244276326e-06, "loss": 2.7347, "step": 248300 }, { "epoch": 0.82, "learning_rate": 8.87893787143874e-06, "loss": 2.7304, "step": 248400 }, { "epoch": 0.82, "learning_rate": 8.862383498601155e-06, "loss": 2.7264, "step": 248500 }, { "epoch": 0.82, "learning_rate": 8.845829125763571e-06, "loss": 2.744, "step": 248600 }, { "epoch": 0.82, "learning_rate": 8.829274752925985e-06, "loss": 2.729, "step": 248700 }, { "epoch": 0.82, "learning_rate": 8.8127203800884e-06, "loss": 2.7393, "step": 248800 }, { "epoch": 0.82, "learning_rate": 8.796166007250815e-06, "loss": 2.7461, "step": 248900 }, { "epoch": 0.82, "learning_rate": 8.77961163441323e-06, "loss": 2.733, "step": 249000 }, { "epoch": 0.82, "learning_rate": 8.763057261575646e-06, "loss": 2.7302, "step": 249100 }, { "epoch": 0.83, "learning_rate": 8.74650288873806e-06, "loss": 2.7356, "step": 249200 }, { "epoch": 0.83, "learning_rate": 8.729948515900476e-06, "loss": 2.7453, "step": 249300 }, { "epoch": 0.83, "learning_rate": 8.713394143062891e-06, "loss": 2.7319, "step": 249400 }, { "epoch": 0.83, "learning_rate": 8.696839770225305e-06, "loss": 2.7295, "step": 249500 }, { "epoch": 0.83, "learning_rate": 8.680285397387721e-06, "loss": 2.7342, "step": 249600 }, { "epoch": 0.83, "learning_rate": 8.663731024550135e-06, "loss": 2.748, "step": 249700 }, { "epoch": 0.83, "learning_rate": 8.64717665171255e-06, "loss": 2.7412, "step": 249800 }, { "epoch": 0.83, "learning_rate": 8.630622278874966e-06, "loss": 2.7463, "step": 249900 }, { "epoch": 0.83, "learning_rate": 8.61406790603738e-06, "loss": 2.733, "step": 250000 }, { "epoch": 0.83, "learning_rate": 8.597513533199796e-06, "loss": 2.7451, "step": 250100 }, { "epoch": 0.83, "learning_rate": 8.580959160362211e-06, "loss": 2.7157, "step": 250200 }, { "epoch": 0.83, "learning_rate": 8.564404787524625e-06, "loss": 2.7447, "step": 250300 }, { "epoch": 0.83, "learning_rate": 8.547850414687041e-06, "loss": 2.7285, "step": 250400 }, { "epoch": 0.83, "learning_rate": 8.531296041849455e-06, "loss": 2.7462, "step": 250500 }, { "epoch": 0.83, "learning_rate": 8.514741669011869e-06, "loss": 2.7406, "step": 250600 }, { "epoch": 0.83, "learning_rate": 8.498187296174285e-06, "loss": 2.7325, "step": 250700 }, { "epoch": 0.83, "learning_rate": 8.481632923336698e-06, "loss": 2.7378, "step": 250800 }, { "epoch": 0.83, "learning_rate": 8.465078550499114e-06, "loss": 2.7419, "step": 250900 }, { "epoch": 0.83, "learning_rate": 8.44852417766153e-06, "loss": 2.7302, "step": 251000 }, { "epoch": 0.83, "learning_rate": 8.431969804823944e-06, "loss": 2.7363, "step": 251100 }, { "epoch": 0.83, "learning_rate": 8.41541543198636e-06, "loss": 2.7451, "step": 251200 }, { "epoch": 0.83, "learning_rate": 8.398861059148775e-06, "loss": 2.728, "step": 251300 }, { "epoch": 0.83, "learning_rate": 8.382306686311189e-06, "loss": 2.7401, "step": 251400 }, { "epoch": 0.83, "learning_rate": 8.365752313473605e-06, "loss": 2.7444, "step": 251500 }, { "epoch": 0.83, "learning_rate": 8.349197940636019e-06, "loss": 2.723, "step": 251600 }, { "epoch": 0.83, "learning_rate": 8.332643567798434e-06, "loss": 2.7401, "step": 251700 }, { "epoch": 0.83, "learning_rate": 8.31608919496085e-06, "loss": 2.7524, "step": 251800 }, { "epoch": 0.83, "learning_rate": 8.299534822123264e-06, "loss": 2.7397, "step": 251900 }, { "epoch": 0.83, "learning_rate": 8.28298044928568e-06, "loss": 2.7336, "step": 252000 }, { "epoch": 0.83, "learning_rate": 8.266426076448095e-06, "loss": 2.7403, "step": 252100 }, { "epoch": 0.84, "learning_rate": 8.249871703610509e-06, "loss": 2.7486, "step": 252200 }, { "epoch": 0.84, "learning_rate": 8.233317330772925e-06, "loss": 2.7298, "step": 252300 }, { "epoch": 0.84, "learning_rate": 8.216762957935339e-06, "loss": 2.7428, "step": 252400 }, { "epoch": 0.84, "learning_rate": 8.200208585097754e-06, "loss": 2.7441, "step": 252500 }, { "epoch": 0.84, "learning_rate": 8.18365421226017e-06, "loss": 2.7311, "step": 252600 }, { "epoch": 0.84, "learning_rate": 8.167099839422584e-06, "loss": 2.7507, "step": 252700 }, { "epoch": 0.84, "learning_rate": 8.150545466584998e-06, "loss": 2.7363, "step": 252800 }, { "epoch": 0.84, "learning_rate": 8.133991093747414e-06, "loss": 2.746, "step": 252900 }, { "epoch": 0.84, "learning_rate": 8.117436720909828e-06, "loss": 2.728, "step": 253000 }, { "epoch": 0.84, "learning_rate": 8.100882348072243e-06, "loss": 2.7463, "step": 253100 }, { "epoch": 0.84, "learning_rate": 8.084327975234659e-06, "loss": 2.7568, "step": 253200 }, { "epoch": 0.84, "learning_rate": 8.067773602397073e-06, "loss": 2.7404, "step": 253300 }, { "epoch": 0.84, "learning_rate": 8.051219229559488e-06, "loss": 2.74, "step": 253400 }, { "epoch": 0.84, "learning_rate": 8.034664856721904e-06, "loss": 2.7384, "step": 253500 }, { "epoch": 0.84, "learning_rate": 8.018110483884318e-06, "loss": 2.7477, "step": 253600 }, { "epoch": 0.84, "learning_rate": 8.001556111046734e-06, "loss": 2.7455, "step": 253700 }, { "epoch": 0.84, "learning_rate": 7.985001738209148e-06, "loss": 2.725, "step": 253800 }, { "epoch": 0.84, "learning_rate": 7.968447365371563e-06, "loss": 2.7562, "step": 253900 }, { "epoch": 0.84, "learning_rate": 7.951892992533979e-06, "loss": 2.7371, "step": 254000 }, { "epoch": 0.84, "learning_rate": 7.935338619696393e-06, "loss": 2.7335, "step": 254100 }, { "epoch": 0.84, "learning_rate": 7.918784246858809e-06, "loss": 2.7311, "step": 254200 }, { "epoch": 0.84, "learning_rate": 7.902229874021224e-06, "loss": 2.741, "step": 254300 }, { "epoch": 0.84, "learning_rate": 7.885675501183638e-06, "loss": 2.7472, "step": 254400 }, { "epoch": 0.84, "learning_rate": 7.869121128346054e-06, "loss": 2.719, "step": 254500 }, { "epoch": 0.84, "learning_rate": 7.852566755508468e-06, "loss": 2.7385, "step": 254600 }, { "epoch": 0.84, "learning_rate": 7.836012382670883e-06, "loss": 2.7426, "step": 254700 }, { "epoch": 0.84, "learning_rate": 7.819458009833299e-06, "loss": 2.7336, "step": 254800 }, { "epoch": 0.84, "learning_rate": 7.802903636995713e-06, "loss": 2.7452, "step": 254900 }, { "epoch": 0.84, "learning_rate": 7.786349264158129e-06, "loss": 2.7408, "step": 255000 }, { "epoch": 0.84, "learning_rate": 7.769794891320543e-06, "loss": 2.7486, "step": 255100 }, { "epoch": 0.84, "learning_rate": 7.753240518482957e-06, "loss": 2.7353, "step": 255200 }, { "epoch": 0.85, "learning_rate": 7.736686145645372e-06, "loss": 2.7298, "step": 255300 }, { "epoch": 0.85, "learning_rate": 7.720131772807788e-06, "loss": 2.7394, "step": 255400 }, { "epoch": 0.85, "learning_rate": 7.703577399970202e-06, "loss": 2.7406, "step": 255500 }, { "epoch": 0.85, "learning_rate": 7.687023027132617e-06, "loss": 2.7442, "step": 255600 }, { "epoch": 0.85, "learning_rate": 7.670468654295031e-06, "loss": 2.7437, "step": 255700 }, { "epoch": 0.85, "learning_rate": 7.653914281457447e-06, "loss": 2.7406, "step": 255800 }, { "epoch": 0.85, "learning_rate": 7.637359908619863e-06, "loss": 2.7434, "step": 255900 }, { "epoch": 0.85, "learning_rate": 7.6208055357822775e-06, "loss": 2.7418, "step": 256000 }, { "epoch": 0.85, "learning_rate": 7.604251162944692e-06, "loss": 2.7385, "step": 256100 }, { "epoch": 0.85, "learning_rate": 7.587696790107107e-06, "loss": 2.7366, "step": 256200 }, { "epoch": 0.85, "learning_rate": 7.571142417269522e-06, "loss": 2.7187, "step": 256300 }, { "epoch": 0.85, "learning_rate": 7.5545880444319376e-06, "loss": 2.7416, "step": 256400 }, { "epoch": 0.85, "learning_rate": 7.538033671594352e-06, "loss": 2.7359, "step": 256500 }, { "epoch": 0.85, "learning_rate": 7.521479298756767e-06, "loss": 2.7326, "step": 256600 }, { "epoch": 0.85, "learning_rate": 7.504924925919182e-06, "loss": 2.7365, "step": 256700 }, { "epoch": 0.85, "learning_rate": 7.488370553081598e-06, "loss": 2.7412, "step": 256800 }, { "epoch": 0.85, "learning_rate": 7.471816180244012e-06, "loss": 2.7424, "step": 256900 }, { "epoch": 0.85, "learning_rate": 7.455261807406427e-06, "loss": 2.7537, "step": 257000 }, { "epoch": 0.85, "learning_rate": 7.438707434568842e-06, "loss": 2.7385, "step": 257100 }, { "epoch": 0.85, "learning_rate": 7.422153061731258e-06, "loss": 2.7418, "step": 257200 }, { "epoch": 0.85, "learning_rate": 7.405598688893671e-06, "loss": 2.7269, "step": 257300 }, { "epoch": 0.85, "learning_rate": 7.389044316056086e-06, "loss": 2.7453, "step": 257400 }, { "epoch": 0.85, "learning_rate": 7.372489943218501e-06, "loss": 2.7346, "step": 257500 }, { "epoch": 0.85, "learning_rate": 7.355935570380916e-06, "loss": 2.7354, "step": 257600 }, { "epoch": 0.85, "learning_rate": 7.339381197543331e-06, "loss": 2.7384, "step": 257700 }, { "epoch": 0.85, "learning_rate": 7.322826824705746e-06, "loss": 2.7382, "step": 257800 }, { "epoch": 0.85, "learning_rate": 7.306272451868161e-06, "loss": 2.7356, "step": 257900 }, { "epoch": 0.85, "learning_rate": 7.289718079030576e-06, "loss": 2.7409, "step": 258000 }, { "epoch": 0.85, "learning_rate": 7.273163706192991e-06, "loss": 2.7443, "step": 258100 }, { "epoch": 0.85, "learning_rate": 7.256609333355406e-06, "loss": 2.7394, "step": 258200 }, { "epoch": 0.86, "learning_rate": 7.240054960517821e-06, "loss": 2.7375, "step": 258300 }, { "epoch": 0.86, "learning_rate": 7.223500587680236e-06, "loss": 2.7428, "step": 258400 }, { "epoch": 0.86, "learning_rate": 7.206946214842651e-06, "loss": 2.7418, "step": 258500 }, { "epoch": 0.86, "learning_rate": 7.190391842005066e-06, "loss": 2.7342, "step": 258600 }, { "epoch": 0.86, "learning_rate": 7.173837469167481e-06, "loss": 2.7337, "step": 258700 }, { "epoch": 0.86, "learning_rate": 7.157283096329896e-06, "loss": 2.7301, "step": 258800 }, { "epoch": 0.86, "learning_rate": 7.140728723492311e-06, "loss": 2.7385, "step": 258900 }, { "epoch": 0.86, "learning_rate": 7.124174350654726e-06, "loss": 2.7435, "step": 259000 }, { "epoch": 0.86, "learning_rate": 7.1076199778171414e-06, "loss": 2.7416, "step": 259100 }, { "epoch": 0.86, "learning_rate": 7.091065604979556e-06, "loss": 2.7284, "step": 259200 }, { "epoch": 0.86, "learning_rate": 7.074511232141971e-06, "loss": 2.7247, "step": 259300 }, { "epoch": 0.86, "learning_rate": 7.057956859304386e-06, "loss": 2.7632, "step": 259400 }, { "epoch": 0.86, "learning_rate": 7.0414024864668e-06, "loss": 2.7417, "step": 259500 }, { "epoch": 0.86, "learning_rate": 7.024848113629215e-06, "loss": 2.7283, "step": 259600 }, { "epoch": 0.86, "learning_rate": 7.00829374079163e-06, "loss": 2.7276, "step": 259700 }, { "epoch": 0.86, "learning_rate": 6.991739367954045e-06, "loss": 2.7358, "step": 259800 }, { "epoch": 0.86, "learning_rate": 6.97518499511646e-06, "loss": 2.7347, "step": 259900 }, { "epoch": 0.86, "learning_rate": 6.958630622278875e-06, "loss": 2.7338, "step": 260000 }, { "epoch": 0.86, "learning_rate": 6.94207624944129e-06, "loss": 2.7307, "step": 260100 }, { "epoch": 0.86, "learning_rate": 6.925521876603705e-06, "loss": 2.7398, "step": 260200 }, { "epoch": 0.86, "learning_rate": 6.90896750376612e-06, "loss": 2.7306, "step": 260300 }, { "epoch": 0.86, "learning_rate": 6.892413130928535e-06, "loss": 2.7517, "step": 260400 }, { "epoch": 0.86, "learning_rate": 6.87585875809095e-06, "loss": 2.7369, "step": 260500 }, { "epoch": 0.86, "learning_rate": 6.859304385253365e-06, "loss": 2.7477, "step": 260600 }, { "epoch": 0.86, "learning_rate": 6.84275001241578e-06, "loss": 2.7353, "step": 260700 }, { "epoch": 0.86, "learning_rate": 6.826195639578195e-06, "loss": 2.7442, "step": 260800 }, { "epoch": 0.86, "learning_rate": 6.8096412667406104e-06, "loss": 2.7413, "step": 260900 }, { "epoch": 0.86, "learning_rate": 6.793086893903025e-06, "loss": 2.7339, "step": 261000 }, { "epoch": 0.86, "learning_rate": 6.77653252106544e-06, "loss": 2.7319, "step": 261100 }, { "epoch": 0.86, "learning_rate": 6.759978148227855e-06, "loss": 2.7341, "step": 261200 }, { "epoch": 0.87, "learning_rate": 6.7434237753902705e-06, "loss": 2.7282, "step": 261300 }, { "epoch": 0.87, "learning_rate": 6.726869402552685e-06, "loss": 2.7514, "step": 261400 }, { "epoch": 0.87, "learning_rate": 6.7103150297151e-06, "loss": 2.7406, "step": 261500 }, { "epoch": 0.87, "learning_rate": 6.693760656877515e-06, "loss": 2.7351, "step": 261600 }, { "epoch": 0.87, "learning_rate": 6.677206284039929e-06, "loss": 2.7404, "step": 261700 }, { "epoch": 0.87, "learning_rate": 6.660651911202344e-06, "loss": 2.7455, "step": 261800 }, { "epoch": 0.87, "learning_rate": 6.6440975383647584e-06, "loss": 2.7393, "step": 261900 }, { "epoch": 0.87, "learning_rate": 6.627543165527174e-06, "loss": 2.7226, "step": 262000 }, { "epoch": 0.87, "learning_rate": 6.610988792689589e-06, "loss": 2.7289, "step": 262100 }, { "epoch": 0.87, "learning_rate": 6.594434419852004e-06, "loss": 2.7254, "step": 262200 }, { "epoch": 0.87, "learning_rate": 6.5778800470144185e-06, "loss": 2.7397, "step": 262300 }, { "epoch": 0.87, "learning_rate": 6.561325674176834e-06, "loss": 2.7507, "step": 262400 }, { "epoch": 0.87, "learning_rate": 6.544771301339249e-06, "loss": 2.7465, "step": 262500 }, { "epoch": 0.87, "learning_rate": 6.528216928501664e-06, "loss": 2.7299, "step": 262600 }, { "epoch": 0.87, "learning_rate": 6.5116625556640786e-06, "loss": 2.7408, "step": 262700 }, { "epoch": 0.87, "learning_rate": 6.495108182826494e-06, "loss": 2.728, "step": 262800 }, { "epoch": 0.87, "learning_rate": 6.478553809988909e-06, "loss": 2.7467, "step": 262900 }, { "epoch": 0.87, "learning_rate": 6.461999437151324e-06, "loss": 2.7354, "step": 263000 }, { "epoch": 0.87, "learning_rate": 6.445445064313739e-06, "loss": 2.7388, "step": 263100 }, { "epoch": 0.87, "learning_rate": 6.428890691476154e-06, "loss": 2.7328, "step": 263200 }, { "epoch": 0.87, "learning_rate": 6.412336318638569e-06, "loss": 2.7449, "step": 263300 }, { "epoch": 0.87, "learning_rate": 6.395781945800984e-06, "loss": 2.7309, "step": 263400 }, { "epoch": 0.87, "learning_rate": 6.379227572963399e-06, "loss": 2.7438, "step": 263500 }, { "epoch": 0.87, "learning_rate": 6.362673200125814e-06, "loss": 2.7372, "step": 263600 }, { "epoch": 0.87, "learning_rate": 6.346118827288229e-06, "loss": 2.7253, "step": 263700 }, { "epoch": 0.87, "learning_rate": 6.329564454450644e-06, "loss": 2.735, "step": 263800 }, { "epoch": 0.87, "learning_rate": 6.313010081613058e-06, "loss": 2.7235, "step": 263900 }, { "epoch": 0.87, "learning_rate": 6.296455708775473e-06, "loss": 2.7292, "step": 264000 }, { "epoch": 0.87, "learning_rate": 6.2799013359378875e-06, "loss": 2.7258, "step": 264100 }, { "epoch": 0.87, "learning_rate": 6.263346963100303e-06, "loss": 2.7417, "step": 264200 }, { "epoch": 0.88, "learning_rate": 6.246792590262719e-06, "loss": 2.7576, "step": 264300 }, { "epoch": 0.88, "learning_rate": 6.230238217425133e-06, "loss": 2.7562, "step": 264400 }, { "epoch": 0.88, "learning_rate": 6.2136838445875475e-06, "loss": 2.7444, "step": 264500 }, { "epoch": 0.88, "learning_rate": 6.197129471749963e-06, "loss": 2.7318, "step": 264600 }, { "epoch": 0.88, "learning_rate": 6.180575098912378e-06, "loss": 2.7317, "step": 264700 }, { "epoch": 0.88, "learning_rate": 6.164020726074793e-06, "loss": 2.7188, "step": 264800 }, { "epoch": 0.88, "learning_rate": 6.147466353237208e-06, "loss": 2.738, "step": 264900 }, { "epoch": 0.88, "learning_rate": 6.130911980399623e-06, "loss": 2.7404, "step": 265000 }, { "epoch": 0.88, "learning_rate": 6.114357607562038e-06, "loss": 2.7448, "step": 265100 }, { "epoch": 0.88, "learning_rate": 6.097803234724453e-06, "loss": 2.7223, "step": 265200 }, { "epoch": 0.88, "learning_rate": 6.081248861886868e-06, "loss": 2.7233, "step": 265300 }, { "epoch": 0.88, "learning_rate": 6.064694489049283e-06, "loss": 2.7413, "step": 265400 }, { "epoch": 0.88, "learning_rate": 6.048140116211697e-06, "loss": 2.7228, "step": 265500 }, { "epoch": 0.88, "learning_rate": 6.031585743374112e-06, "loss": 2.7293, "step": 265600 }, { "epoch": 0.88, "learning_rate": 6.015031370536528e-06, "loss": 2.7478, "step": 265700 }, { "epoch": 0.88, "learning_rate": 5.9984769976989425e-06, "loss": 2.7355, "step": 265800 }, { "epoch": 0.88, "learning_rate": 5.981922624861357e-06, "loss": 2.7382, "step": 265900 }, { "epoch": 0.88, "learning_rate": 5.965368252023772e-06, "loss": 2.7416, "step": 266000 }, { "epoch": 0.88, "learning_rate": 5.948813879186188e-06, "loss": 2.7356, "step": 266100 }, { "epoch": 0.88, "learning_rate": 5.9322595063486026e-06, "loss": 2.7427, "step": 266200 }, { "epoch": 0.88, "learning_rate": 5.915705133511017e-06, "loss": 2.7164, "step": 266300 }, { "epoch": 0.88, "learning_rate": 5.899150760673432e-06, "loss": 2.7473, "step": 266400 }, { "epoch": 0.88, "learning_rate": 5.882596387835848e-06, "loss": 2.7295, "step": 266500 }, { "epoch": 0.88, "learning_rate": 5.866042014998262e-06, "loss": 2.7366, "step": 266600 }, { "epoch": 0.88, "learning_rate": 5.8494876421606766e-06, "loss": 2.7421, "step": 266700 }, { "epoch": 0.88, "learning_rate": 5.832933269323091e-06, "loss": 2.7299, "step": 266800 }, { "epoch": 0.88, "learning_rate": 5.816378896485507e-06, "loss": 2.7329, "step": 266900 }, { "epoch": 0.88, "learning_rate": 5.799824523647922e-06, "loss": 2.7357, "step": 267000 }, { "epoch": 0.88, "learning_rate": 5.783270150810337e-06, "loss": 2.7302, "step": 267100 }, { "epoch": 0.88, "learning_rate": 5.766715777972751e-06, "loss": 2.7354, "step": 267200 }, { "epoch": 0.88, "learning_rate": 5.750161405135167e-06, "loss": 2.7416, "step": 267300 }, { "epoch": 0.89, "learning_rate": 5.733607032297582e-06, "loss": 2.7393, "step": 267400 }, { "epoch": 0.89, "learning_rate": 5.717052659459997e-06, "loss": 2.7399, "step": 267500 }, { "epoch": 0.89, "learning_rate": 5.7004982866224115e-06, "loss": 2.738, "step": 267600 }, { "epoch": 0.89, "learning_rate": 5.683943913784827e-06, "loss": 2.7331, "step": 267700 }, { "epoch": 0.89, "learning_rate": 5.667389540947241e-06, "loss": 2.7401, "step": 267800 }, { "epoch": 0.89, "learning_rate": 5.650835168109656e-06, "loss": 2.7278, "step": 267900 }, { "epoch": 0.89, "learning_rate": 5.6342807952720715e-06, "loss": 2.7334, "step": 268000 }, { "epoch": 0.89, "learning_rate": 5.617726422434486e-06, "loss": 2.7406, "step": 268100 }, { "epoch": 0.89, "learning_rate": 5.601172049596901e-06, "loss": 2.7436, "step": 268200 }, { "epoch": 0.89, "learning_rate": 5.584617676759316e-06, "loss": 2.7337, "step": 268300 }, { "epoch": 0.89, "learning_rate": 5.568063303921732e-06, "loss": 2.7283, "step": 268400 }, { "epoch": 0.89, "learning_rate": 5.551508931084146e-06, "loss": 2.7433, "step": 268500 }, { "epoch": 0.89, "learning_rate": 5.534954558246561e-06, "loss": 2.7405, "step": 268600 }, { "epoch": 0.89, "learning_rate": 5.518400185408976e-06, "loss": 2.7498, "step": 268700 }, { "epoch": 0.89, "learning_rate": 5.501845812571392e-06, "loss": 2.7533, "step": 268800 }, { "epoch": 0.89, "learning_rate": 5.485291439733806e-06, "loss": 2.7439, "step": 268900 }, { "epoch": 0.89, "learning_rate": 5.46873706689622e-06, "loss": 2.7233, "step": 269000 }, { "epoch": 0.89, "learning_rate": 5.452182694058636e-06, "loss": 2.7346, "step": 269100 }, { "epoch": 0.89, "learning_rate": 5.435628321221051e-06, "loss": 2.7388, "step": 269200 }, { "epoch": 0.89, "learning_rate": 5.419073948383466e-06, "loss": 2.7291, "step": 269300 }, { "epoch": 0.89, "learning_rate": 5.4025195755458805e-06, "loss": 2.7388, "step": 269400 }, { "epoch": 0.89, "learning_rate": 5.385965202708296e-06, "loss": 2.7378, "step": 269500 }, { "epoch": 0.89, "learning_rate": 5.369410829870711e-06, "loss": 2.7322, "step": 269600 }, { "epoch": 0.89, "learning_rate": 5.352856457033126e-06, "loss": 2.7328, "step": 269700 }, { "epoch": 0.89, "learning_rate": 5.3363020841955405e-06, "loss": 2.7428, "step": 269800 }, { "epoch": 0.89, "learning_rate": 5.319747711357956e-06, "loss": 2.739, "step": 269900 }, { "epoch": 0.89, "learning_rate": 5.30319333852037e-06, "loss": 2.7348, "step": 270000 }, { "epoch": 0.89, "learning_rate": 5.286638965682785e-06, "loss": 2.7324, "step": 270100 }, { "epoch": 0.89, "learning_rate": 5.2700845928452006e-06, "loss": 2.7443, "step": 270200 }, { "epoch": 0.89, "learning_rate": 5.253530220007615e-06, "loss": 2.7262, "step": 270300 }, { "epoch": 0.9, "learning_rate": 5.23697584717003e-06, "loss": 2.7411, "step": 270400 }, { "epoch": 0.9, "learning_rate": 5.220421474332445e-06, "loss": 2.7476, "step": 270500 }, { "epoch": 0.9, "learning_rate": 5.203867101494861e-06, "loss": 2.7523, "step": 270600 }, { "epoch": 0.9, "learning_rate": 5.187312728657275e-06, "loss": 2.7257, "step": 270700 }, { "epoch": 0.9, "learning_rate": 5.17075835581969e-06, "loss": 2.7373, "step": 270800 }, { "epoch": 0.9, "learning_rate": 5.154203982982105e-06, "loss": 2.7168, "step": 270900 }, { "epoch": 0.9, "learning_rate": 5.137649610144521e-06, "loss": 2.7335, "step": 271000 }, { "epoch": 0.9, "learning_rate": 5.121095237306935e-06, "loss": 2.7365, "step": 271100 }, { "epoch": 0.9, "learning_rate": 5.1045408644693494e-06, "loss": 2.747, "step": 271200 }, { "epoch": 0.9, "learning_rate": 5.087986491631764e-06, "loss": 2.7509, "step": 271300 }, { "epoch": 0.9, "learning_rate": 5.07143211879418e-06, "loss": 2.7305, "step": 271400 }, { "epoch": 0.9, "learning_rate": 5.054877745956595e-06, "loss": 2.7419, "step": 271500 }, { "epoch": 0.9, "learning_rate": 5.0383233731190095e-06, "loss": 2.7257, "step": 271600 }, { "epoch": 0.9, "learning_rate": 5.021769000281424e-06, "loss": 2.7411, "step": 271700 }, { "epoch": 0.9, "learning_rate": 5.00521462744384e-06, "loss": 2.7345, "step": 271800 }, { "epoch": 0.9, "learning_rate": 4.988660254606255e-06, "loss": 2.7409, "step": 271900 }, { "epoch": 0.9, "learning_rate": 4.9721058817686695e-06, "loss": 2.7297, "step": 272000 }, { "epoch": 0.9, "learning_rate": 4.955551508931084e-06, "loss": 2.7472, "step": 272100 }, { "epoch": 0.9, "learning_rate": 4.938997136093499e-06, "loss": 2.7425, "step": 272200 }, { "epoch": 0.9, "learning_rate": 4.922442763255914e-06, "loss": 2.736, "step": 272300 }, { "epoch": 0.9, "learning_rate": 4.905888390418329e-06, "loss": 2.7397, "step": 272400 }, { "epoch": 0.9, "learning_rate": 4.889334017580744e-06, "loss": 2.7353, "step": 272500 }, { "epoch": 0.9, "learning_rate": 4.872779644743159e-06, "loss": 2.7474, "step": 272600 }, { "epoch": 0.9, "learning_rate": 4.856225271905574e-06, "loss": 2.7411, "step": 272700 }, { "epoch": 0.9, "learning_rate": 4.839670899067989e-06, "loss": 2.7318, "step": 272800 }, { "epoch": 0.9, "learning_rate": 4.8231165262304045e-06, "loss": 2.7461, "step": 272900 }, { "epoch": 0.9, "learning_rate": 4.806562153392819e-06, "loss": 2.7363, "step": 273000 }, { "epoch": 0.9, "learning_rate": 4.790007780555234e-06, "loss": 2.7459, "step": 273100 }, { "epoch": 0.9, "learning_rate": 4.773453407717649e-06, "loss": 2.7409, "step": 273200 }, { "epoch": 0.9, "learning_rate": 4.756899034880064e-06, "loss": 2.7221, "step": 273300 }, { "epoch": 0.91, "learning_rate": 4.7403446620424785e-06, "loss": 2.7421, "step": 273400 }, { "epoch": 0.91, "learning_rate": 4.723790289204893e-06, "loss": 2.7324, "step": 273500 }, { "epoch": 0.91, "learning_rate": 4.707235916367309e-06, "loss": 2.7382, "step": 273600 }, { "epoch": 0.91, "learning_rate": 4.690681543529724e-06, "loss": 2.7275, "step": 273700 }, { "epoch": 0.91, "learning_rate": 4.6741271706921385e-06, "loss": 2.7431, "step": 273800 }, { "epoch": 0.91, "learning_rate": 4.657572797854553e-06, "loss": 2.7302, "step": 273900 }, { "epoch": 0.91, "learning_rate": 4.641018425016969e-06, "loss": 2.7286, "step": 274000 }, { "epoch": 0.91, "learning_rate": 4.624464052179384e-06, "loss": 2.7352, "step": 274100 }, { "epoch": 0.91, "learning_rate": 4.6079096793417986e-06, "loss": 2.7317, "step": 274200 }, { "epoch": 0.91, "learning_rate": 4.591355306504213e-06, "loss": 2.7463, "step": 274300 }, { "epoch": 0.91, "learning_rate": 4.574800933666628e-06, "loss": 2.7348, "step": 274400 }, { "epoch": 0.91, "learning_rate": 4.558246560829043e-06, "loss": 2.735, "step": 274500 }, { "epoch": 0.91, "learning_rate": 4.541692187991458e-06, "loss": 2.7315, "step": 274600 }, { "epoch": 0.91, "learning_rate": 4.525137815153873e-06, "loss": 2.7368, "step": 274700 }, { "epoch": 0.91, "learning_rate": 4.508583442316288e-06, "loss": 2.7232, "step": 274800 }, { "epoch": 0.91, "learning_rate": 4.492029069478703e-06, "loss": 2.7443, "step": 274900 }, { "epoch": 0.91, "learning_rate": 4.475474696641118e-06, "loss": 2.7352, "step": 275000 }, { "epoch": 0.91, "learning_rate": 4.458920323803533e-06, "loss": 2.723, "step": 275100 }, { "epoch": 0.91, "learning_rate": 4.442365950965948e-06, "loss": 2.7207, "step": 275200 }, { "epoch": 0.91, "learning_rate": 4.425811578128363e-06, "loss": 2.7361, "step": 275300 }, { "epoch": 0.91, "learning_rate": 4.409257205290778e-06, "loss": 2.7261, "step": 275400 }, { "epoch": 0.91, "learning_rate": 4.392702832453193e-06, "loss": 2.7444, "step": 275500 }, { "epoch": 0.91, "learning_rate": 4.3761484596156075e-06, "loss": 2.7391, "step": 275600 }, { "epoch": 0.91, "learning_rate": 4.359594086778022e-06, "loss": 2.7396, "step": 275700 }, { "epoch": 0.91, "learning_rate": 4.343039713940437e-06, "loss": 2.7344, "step": 275800 }, { "epoch": 0.91, "learning_rate": 4.326485341102853e-06, "loss": 2.728, "step": 275900 }, { "epoch": 0.91, "learning_rate": 4.3099309682652675e-06, "loss": 2.7429, "step": 276000 }, { "epoch": 0.91, "learning_rate": 4.293376595427682e-06, "loss": 2.742, "step": 276100 }, { "epoch": 0.91, "learning_rate": 4.276822222590097e-06, "loss": 2.7351, "step": 276200 }, { "epoch": 0.91, "learning_rate": 4.260267849752513e-06, "loss": 2.7393, "step": 276300 }, { "epoch": 0.92, "learning_rate": 4.243713476914928e-06, "loss": 2.7563, "step": 276400 }, { "epoch": 0.92, "learning_rate": 4.227159104077342e-06, "loss": 2.7337, "step": 276500 }, { "epoch": 0.92, "learning_rate": 4.210604731239757e-06, "loss": 2.726, "step": 276600 }, { "epoch": 0.92, "learning_rate": 4.194050358402172e-06, "loss": 2.7363, "step": 276700 }, { "epoch": 0.92, "learning_rate": 4.177495985564587e-06, "loss": 2.7425, "step": 276800 }, { "epoch": 0.92, "learning_rate": 4.160941612727002e-06, "loss": 2.7301, "step": 276900 }, { "epoch": 0.92, "learning_rate": 4.144387239889417e-06, "loss": 2.7295, "step": 277000 }, { "epoch": 0.92, "learning_rate": 4.127832867051832e-06, "loss": 2.7122, "step": 277100 }, { "epoch": 0.92, "learning_rate": 4.111278494214247e-06, "loss": 2.7384, "step": 277200 }, { "epoch": 0.92, "learning_rate": 4.094724121376662e-06, "loss": 2.7365, "step": 277300 }, { "epoch": 0.92, "learning_rate": 4.078169748539077e-06, "loss": 2.7418, "step": 277400 }, { "epoch": 0.92, "learning_rate": 4.061615375701492e-06, "loss": 2.7417, "step": 277500 }, { "epoch": 0.92, "learning_rate": 4.045061002863907e-06, "loss": 2.7324, "step": 277600 }, { "epoch": 0.92, "learning_rate": 4.028506630026322e-06, "loss": 2.7328, "step": 277700 }, { "epoch": 0.92, "learning_rate": 4.0119522571887365e-06, "loss": 2.7419, "step": 277800 }, { "epoch": 0.92, "learning_rate": 3.995397884351151e-06, "loss": 2.7172, "step": 277900 }, { "epoch": 0.92, "learning_rate": 3.978843511513566e-06, "loss": 2.7298, "step": 278000 }, { "epoch": 0.92, "learning_rate": 3.962289138675982e-06, "loss": 2.7424, "step": 278100 }, { "epoch": 0.92, "learning_rate": 3.945734765838397e-06, "loss": 2.7266, "step": 278200 }, { "epoch": 0.92, "learning_rate": 3.929180393000811e-06, "loss": 2.7334, "step": 278300 }, { "epoch": 0.92, "learning_rate": 3.912626020163226e-06, "loss": 2.7369, "step": 278400 }, { "epoch": 0.92, "learning_rate": 3.896071647325642e-06, "loss": 2.7563, "step": 278500 }, { "epoch": 0.92, "learning_rate": 3.879517274488057e-06, "loss": 2.745, "step": 278600 }, { "epoch": 0.92, "learning_rate": 3.8629629016504714e-06, "loss": 2.7261, "step": 278700 }, { "epoch": 0.92, "learning_rate": 3.846408528812885e-06, "loss": 2.7249, "step": 278800 }, { "epoch": 0.92, "learning_rate": 3.829854155975301e-06, "loss": 2.7397, "step": 278900 }, { "epoch": 0.92, "learning_rate": 3.813299783137716e-06, "loss": 2.7258, "step": 279000 }, { "epoch": 0.92, "learning_rate": 3.7967454103001306e-06, "loss": 2.7307, "step": 279100 }, { "epoch": 0.92, "learning_rate": 3.780191037462546e-06, "loss": 2.7378, "step": 279200 }, { "epoch": 0.92, "learning_rate": 3.7636366646249607e-06, "loss": 2.7424, "step": 279300 }, { "epoch": 0.93, "learning_rate": 3.747082291787376e-06, "loss": 2.7412, "step": 279400 }, { "epoch": 0.93, "learning_rate": 3.7305279189497907e-06, "loss": 2.7267, "step": 279500 }, { "epoch": 0.93, "learning_rate": 3.713973546112206e-06, "loss": 2.7356, "step": 279600 }, { "epoch": 0.93, "learning_rate": 3.6974191732746207e-06, "loss": 2.7257, "step": 279700 }, { "epoch": 0.93, "learning_rate": 3.680864800437036e-06, "loss": 2.737, "step": 279800 }, { "epoch": 0.93, "learning_rate": 3.6643104275994503e-06, "loss": 2.7216, "step": 279900 }, { "epoch": 0.93, "learning_rate": 3.647756054761865e-06, "loss": 2.7354, "step": 280000 }, { "epoch": 0.93, "learning_rate": 3.6312016819242804e-06, "loss": 2.729, "step": 280100 }, { "epoch": 0.93, "learning_rate": 3.614647309086695e-06, "loss": 2.7448, "step": 280200 }, { "epoch": 0.93, "learning_rate": 3.5980929362491104e-06, "loss": 2.7415, "step": 280300 }, { "epoch": 0.93, "learning_rate": 3.581538563411525e-06, "loss": 2.7357, "step": 280400 }, { "epoch": 0.93, "learning_rate": 3.5649841905739404e-06, "loss": 2.7372, "step": 280500 }, { "epoch": 0.93, "learning_rate": 3.5484298177363552e-06, "loss": 2.7216, "step": 280600 }, { "epoch": 0.93, "learning_rate": 3.5318754448987704e-06, "loss": 2.7203, "step": 280700 }, { "epoch": 0.93, "learning_rate": 3.5153210720611852e-06, "loss": 2.7401, "step": 280800 }, { "epoch": 0.93, "learning_rate": 3.4987666992236005e-06, "loss": 2.7221, "step": 280900 }, { "epoch": 0.93, "learning_rate": 3.482212326386015e-06, "loss": 2.736, "step": 281000 }, { "epoch": 0.93, "learning_rate": 3.4656579535484297e-06, "loss": 2.7281, "step": 281100 }, { "epoch": 0.93, "learning_rate": 3.449103580710845e-06, "loss": 2.7203, "step": 281200 }, { "epoch": 0.93, "learning_rate": 3.4325492078732597e-06, "loss": 2.7379, "step": 281300 }, { "epoch": 0.93, "learning_rate": 3.415994835035675e-06, "loss": 2.7164, "step": 281400 }, { "epoch": 0.93, "learning_rate": 3.3994404621980897e-06, "loss": 2.7324, "step": 281500 }, { "epoch": 0.93, "learning_rate": 3.382886089360505e-06, "loss": 2.7403, "step": 281600 }, { "epoch": 0.93, "learning_rate": 3.3663317165229197e-06, "loss": 2.7298, "step": 281700 }, { "epoch": 0.93, "learning_rate": 3.349777343685335e-06, "loss": 2.7217, "step": 281800 }, { "epoch": 0.93, "learning_rate": 3.3332229708477498e-06, "loss": 2.7271, "step": 281900 }, { "epoch": 0.93, "learning_rate": 3.316668598010165e-06, "loss": 2.731, "step": 282000 }, { "epoch": 0.93, "learning_rate": 3.3001142251725794e-06, "loss": 2.7471, "step": 282100 }, { "epoch": 0.93, "learning_rate": 3.283559852334994e-06, "loss": 2.742, "step": 282200 }, { "epoch": 0.93, "learning_rate": 3.2670054794974094e-06, "loss": 2.7289, "step": 282300 }, { "epoch": 0.93, "learning_rate": 3.250451106659824e-06, "loss": 2.7371, "step": 282400 }, { "epoch": 0.94, "learning_rate": 3.2338967338222394e-06, "loss": 2.7206, "step": 282500 }, { "epoch": 0.94, "learning_rate": 3.2173423609846542e-06, "loss": 2.7245, "step": 282600 }, { "epoch": 0.94, "learning_rate": 3.2007879881470694e-06, "loss": 2.7303, "step": 282700 }, { "epoch": 0.94, "learning_rate": 3.1842336153094842e-06, "loss": 2.7485, "step": 282800 }, { "epoch": 0.94, "learning_rate": 3.1676792424718995e-06, "loss": 2.7394, "step": 282900 }, { "epoch": 0.94, "learning_rate": 3.1511248696343143e-06, "loss": 2.7414, "step": 283000 }, { "epoch": 0.94, "learning_rate": 3.1345704967967295e-06, "loss": 2.7328, "step": 283100 }, { "epoch": 0.94, "learning_rate": 3.118016123959144e-06, "loss": 2.7281, "step": 283200 }, { "epoch": 0.94, "learning_rate": 3.101461751121559e-06, "loss": 2.7444, "step": 283300 }, { "epoch": 0.94, "learning_rate": 3.084907378283974e-06, "loss": 2.7386, "step": 283400 }, { "epoch": 0.94, "learning_rate": 3.0683530054463887e-06, "loss": 2.743, "step": 283500 }, { "epoch": 0.94, "learning_rate": 3.0517986326088035e-06, "loss": 2.7329, "step": 283600 }, { "epoch": 0.94, "learning_rate": 3.0352442597712187e-06, "loss": 2.7217, "step": 283700 }, { "epoch": 0.94, "learning_rate": 3.0186898869336335e-06, "loss": 2.7233, "step": 283800 }, { "epoch": 0.94, "learning_rate": 3.0021355140960488e-06, "loss": 2.7362, "step": 283900 }, { "epoch": 0.94, "learning_rate": 2.9855811412584636e-06, "loss": 2.7451, "step": 284000 }, { "epoch": 0.94, "learning_rate": 2.9690267684208784e-06, "loss": 2.7458, "step": 284100 }, { "epoch": 0.94, "learning_rate": 2.9524723955832936e-06, "loss": 2.727, "step": 284200 }, { "epoch": 0.94, "learning_rate": 2.9359180227457084e-06, "loss": 2.7372, "step": 284300 }, { "epoch": 0.94, "learning_rate": 2.9193636499081236e-06, "loss": 2.7383, "step": 284400 }, { "epoch": 0.94, "learning_rate": 2.9028092770705384e-06, "loss": 2.7412, "step": 284500 }, { "epoch": 0.94, "learning_rate": 2.8862549042329532e-06, "loss": 2.7317, "step": 284600 }, { "epoch": 0.94, "learning_rate": 2.869700531395368e-06, "loss": 2.7281, "step": 284700 }, { "epoch": 0.94, "learning_rate": 2.8531461585577833e-06, "loss": 2.7397, "step": 284800 }, { "epoch": 0.94, "learning_rate": 2.836591785720198e-06, "loss": 2.7326, "step": 284900 }, { "epoch": 0.94, "learning_rate": 2.8200374128826133e-06, "loss": 2.7373, "step": 285000 }, { "epoch": 0.94, "learning_rate": 2.8034830400450277e-06, "loss": 2.7374, "step": 285100 }, { "epoch": 0.94, "learning_rate": 2.786928667207443e-06, "loss": 2.7386, "step": 285200 }, { "epoch": 0.94, "learning_rate": 2.7703742943698577e-06, "loss": 2.7408, "step": 285300 }, { "epoch": 0.94, "learning_rate": 2.753819921532273e-06, "loss": 2.7174, "step": 285400 }, { "epoch": 0.95, "learning_rate": 2.7372655486946877e-06, "loss": 2.7216, "step": 285500 }, { "epoch": 0.95, "learning_rate": 2.720711175857103e-06, "loss": 2.7304, "step": 285600 }, { "epoch": 0.95, "learning_rate": 2.7041568030195177e-06, "loss": 2.7313, "step": 285700 }, { "epoch": 0.95, "learning_rate": 2.6876024301819325e-06, "loss": 2.7299, "step": 285800 }, { "epoch": 0.95, "learning_rate": 2.6710480573443478e-06, "loss": 2.739, "step": 285900 }, { "epoch": 0.95, "learning_rate": 2.6544936845067626e-06, "loss": 2.7279, "step": 286000 }, { "epoch": 0.95, "learning_rate": 2.637939311669178e-06, "loss": 2.7419, "step": 286100 }, { "epoch": 0.95, "learning_rate": 2.621384938831592e-06, "loss": 2.7352, "step": 286200 }, { "epoch": 0.95, "learning_rate": 2.6048305659940074e-06, "loss": 2.7313, "step": 286300 }, { "epoch": 0.95, "learning_rate": 2.588276193156422e-06, "loss": 2.7266, "step": 286400 }, { "epoch": 0.95, "learning_rate": 2.5717218203188374e-06, "loss": 2.7352, "step": 286500 }, { "epoch": 0.95, "learning_rate": 2.5551674474812522e-06, "loss": 2.7312, "step": 286600 }, { "epoch": 0.95, "learning_rate": 2.5386130746436675e-06, "loss": 2.7432, "step": 286700 }, { "epoch": 0.95, "learning_rate": 2.5220587018060823e-06, "loss": 2.7407, "step": 286800 }, { "epoch": 0.95, "learning_rate": 2.505504328968497e-06, "loss": 2.7404, "step": 286900 }, { "epoch": 0.95, "learning_rate": 2.4889499561309123e-06, "loss": 2.7352, "step": 287000 }, { "epoch": 0.95, "learning_rate": 2.472395583293327e-06, "loss": 2.7441, "step": 287100 }, { "epoch": 0.95, "learning_rate": 2.4558412104557423e-06, "loss": 2.7291, "step": 287200 }, { "epoch": 0.95, "learning_rate": 2.4392868376181567e-06, "loss": 2.7308, "step": 287300 }, { "epoch": 0.95, "learning_rate": 2.422732464780572e-06, "loss": 2.7368, "step": 287400 }, { "epoch": 0.95, "learning_rate": 2.4061780919429867e-06, "loss": 2.7406, "step": 287500 }, { "epoch": 0.95, "learning_rate": 2.389623719105402e-06, "loss": 2.7285, "step": 287600 }, { "epoch": 0.95, "learning_rate": 2.3730693462678167e-06, "loss": 2.7284, "step": 287700 }, { "epoch": 0.95, "learning_rate": 2.356514973430232e-06, "loss": 2.7394, "step": 287800 }, { "epoch": 0.95, "learning_rate": 2.3399606005926464e-06, "loss": 2.731, "step": 287900 }, { "epoch": 0.95, "learning_rate": 2.3234062277550616e-06, "loss": 2.7311, "step": 288000 }, { "epoch": 0.95, "learning_rate": 2.3068518549174764e-06, "loss": 2.7339, "step": 288100 }, { "epoch": 0.95, "learning_rate": 2.2902974820798916e-06, "loss": 2.7303, "step": 288200 }, { "epoch": 0.95, "learning_rate": 2.2737431092423064e-06, "loss": 2.7417, "step": 288300 }, { "epoch": 0.95, "learning_rate": 2.2571887364047216e-06, "loss": 2.7297, "step": 288400 }, { "epoch": 0.96, "learning_rate": 2.2406343635671364e-06, "loss": 2.7391, "step": 288500 }, { "epoch": 0.96, "learning_rate": 2.2240799907295512e-06, "loss": 2.7378, "step": 288600 }, { "epoch": 0.96, "learning_rate": 2.2075256178919665e-06, "loss": 2.7321, "step": 288700 }, { "epoch": 0.96, "learning_rate": 2.1909712450543813e-06, "loss": 2.7319, "step": 288800 }, { "epoch": 0.96, "learning_rate": 2.1744168722167965e-06, "loss": 2.7372, "step": 288900 }, { "epoch": 0.96, "learning_rate": 2.157862499379211e-06, "loss": 2.7209, "step": 289000 }, { "epoch": 0.96, "learning_rate": 2.141308126541626e-06, "loss": 2.7321, "step": 289100 }, { "epoch": 0.96, "learning_rate": 2.124753753704041e-06, "loss": 2.7389, "step": 289200 }, { "epoch": 0.96, "learning_rate": 2.108199380866456e-06, "loss": 2.7366, "step": 289300 }, { "epoch": 0.96, "learning_rate": 2.091645008028871e-06, "loss": 2.7324, "step": 289400 }, { "epoch": 0.96, "learning_rate": 2.075090635191286e-06, "loss": 2.7404, "step": 289500 }, { "epoch": 0.96, "learning_rate": 2.0585362623537005e-06, "loss": 2.7326, "step": 289600 }, { "epoch": 0.96, "learning_rate": 2.0419818895161158e-06, "loss": 2.7395, "step": 289700 }, { "epoch": 0.96, "learning_rate": 2.0254275166785306e-06, "loss": 2.7369, "step": 289800 }, { "epoch": 0.96, "learning_rate": 2.0088731438409458e-06, "loss": 2.7126, "step": 289900 }, { "epoch": 0.96, "learning_rate": 1.9923187710033606e-06, "loss": 2.7309, "step": 290000 }, { "epoch": 0.96, "learning_rate": 1.9757643981657754e-06, "loss": 2.7405, "step": 290100 }, { "epoch": 0.96, "learning_rate": 1.9592100253281906e-06, "loss": 2.7304, "step": 290200 }, { "epoch": 0.96, "learning_rate": 1.9426556524906054e-06, "loss": 2.7162, "step": 290300 }, { "epoch": 0.96, "learning_rate": 1.9261012796530206e-06, "loss": 2.7207, "step": 290400 }, { "epoch": 0.96, "learning_rate": 1.9095469068154354e-06, "loss": 2.7285, "step": 290500 }, { "epoch": 0.96, "learning_rate": 1.8929925339778505e-06, "loss": 2.7339, "step": 290600 }, { "epoch": 0.96, "learning_rate": 1.8764381611402653e-06, "loss": 2.7157, "step": 290700 }, { "epoch": 0.96, "learning_rate": 1.8598837883026803e-06, "loss": 2.7484, "step": 290800 }, { "epoch": 0.96, "learning_rate": 1.8433294154650953e-06, "loss": 2.7351, "step": 290900 }, { "epoch": 0.96, "learning_rate": 1.8267750426275103e-06, "loss": 2.7233, "step": 291000 }, { "epoch": 0.96, "learning_rate": 1.8102206697899253e-06, "loss": 2.7171, "step": 291100 }, { "epoch": 0.96, "learning_rate": 1.79366629695234e-06, "loss": 2.7234, "step": 291200 }, { "epoch": 0.96, "learning_rate": 1.777111924114755e-06, "loss": 2.7388, "step": 291300 }, { "epoch": 0.96, "learning_rate": 1.76055755127717e-06, "loss": 2.7344, "step": 291400 }, { "epoch": 0.97, "learning_rate": 1.744003178439585e-06, "loss": 2.7277, "step": 291500 }, { "epoch": 0.97, "learning_rate": 1.727448805602e-06, "loss": 2.7194, "step": 291600 }, { "epoch": 0.97, "learning_rate": 1.710894432764415e-06, "loss": 2.7352, "step": 291700 }, { "epoch": 0.97, "learning_rate": 1.6943400599268296e-06, "loss": 2.7301, "step": 291800 }, { "epoch": 0.97, "learning_rate": 1.6777856870892446e-06, "loss": 2.7346, "step": 291900 }, { "epoch": 0.97, "learning_rate": 1.6612313142516596e-06, "loss": 2.7297, "step": 292000 }, { "epoch": 0.97, "learning_rate": 1.6446769414140746e-06, "loss": 2.7534, "step": 292100 }, { "epoch": 0.97, "learning_rate": 1.6281225685764896e-06, "loss": 2.7288, "step": 292200 }, { "epoch": 0.97, "learning_rate": 1.6115681957389044e-06, "loss": 2.73, "step": 292300 }, { "epoch": 0.97, "learning_rate": 1.5950138229013194e-06, "loss": 2.7348, "step": 292400 }, { "epoch": 0.97, "learning_rate": 1.5784594500637344e-06, "loss": 2.7241, "step": 292500 }, { "epoch": 0.97, "learning_rate": 1.5619050772261495e-06, "loss": 2.7265, "step": 292600 }, { "epoch": 0.97, "learning_rate": 1.5453507043885643e-06, "loss": 2.7312, "step": 292700 }, { "epoch": 0.97, "learning_rate": 1.5287963315509793e-06, "loss": 2.7316, "step": 292800 }, { "epoch": 0.97, "learning_rate": 1.5122419587133943e-06, "loss": 2.7209, "step": 292900 }, { "epoch": 0.97, "learning_rate": 1.495687585875809e-06, "loss": 2.7243, "step": 293000 }, { "epoch": 0.97, "learning_rate": 1.479133213038224e-06, "loss": 2.7239, "step": 293100 }, { "epoch": 0.97, "learning_rate": 1.4625788402006391e-06, "loss": 2.7234, "step": 293200 }, { "epoch": 0.97, "learning_rate": 1.446024467363054e-06, "loss": 2.7389, "step": 293300 }, { "epoch": 0.97, "learning_rate": 1.429470094525469e-06, "loss": 2.7244, "step": 293400 }, { "epoch": 0.97, "learning_rate": 1.412915721687884e-06, "loss": 2.734, "step": 293500 }, { "epoch": 0.97, "learning_rate": 1.3963613488502987e-06, "loss": 2.715, "step": 293600 }, { "epoch": 0.97, "learning_rate": 1.3798069760127138e-06, "loss": 2.7331, "step": 293700 }, { "epoch": 0.97, "learning_rate": 1.3632526031751288e-06, "loss": 2.7285, "step": 293800 }, { "epoch": 0.97, "learning_rate": 1.3466982303375438e-06, "loss": 2.7248, "step": 293900 }, { "epoch": 0.97, "learning_rate": 1.3301438574999588e-06, "loss": 2.7133, "step": 294000 }, { "epoch": 0.97, "learning_rate": 1.3135894846623736e-06, "loss": 2.7509, "step": 294100 }, { "epoch": 0.97, "learning_rate": 1.2970351118247886e-06, "loss": 2.7434, "step": 294200 }, { "epoch": 0.97, "learning_rate": 1.2804807389872036e-06, "loss": 2.7196, "step": 294300 }, { "epoch": 0.97, "learning_rate": 1.2639263661496184e-06, "loss": 2.7294, "step": 294400 }, { "epoch": 0.98, "learning_rate": 1.2473719933120334e-06, "loss": 2.73, "step": 294500 }, { "epoch": 0.98, "learning_rate": 1.2308176204744485e-06, "loss": 2.7257, "step": 294600 }, { "epoch": 0.98, "learning_rate": 1.2142632476368633e-06, "loss": 2.7491, "step": 294700 }, { "epoch": 0.98, "learning_rate": 1.1977088747992783e-06, "loss": 2.7451, "step": 294800 }, { "epoch": 0.98, "learning_rate": 1.181154501961693e-06, "loss": 2.743, "step": 294900 }, { "epoch": 0.98, "learning_rate": 1.164600129124108e-06, "loss": 2.736, "step": 295000 }, { "epoch": 0.98, "learning_rate": 1.148045756286523e-06, "loss": 2.7331, "step": 295100 }, { "epoch": 0.98, "learning_rate": 1.1314913834489381e-06, "loss": 2.7366, "step": 295200 }, { "epoch": 0.98, "learning_rate": 1.1149370106113531e-06, "loss": 2.7489, "step": 295300 }, { "epoch": 0.98, "learning_rate": 1.0983826377737681e-06, "loss": 2.7302, "step": 295400 }, { "epoch": 0.98, "learning_rate": 1.081828264936183e-06, "loss": 2.7292, "step": 295500 }, { "epoch": 0.98, "learning_rate": 1.065273892098598e-06, "loss": 2.7351, "step": 295600 }, { "epoch": 0.98, "learning_rate": 1.048719519261013e-06, "loss": 2.7286, "step": 295700 }, { "epoch": 0.98, "learning_rate": 1.0321651464234278e-06, "loss": 2.7297, "step": 295800 }, { "epoch": 0.98, "learning_rate": 1.0156107735858428e-06, "loss": 2.7197, "step": 295900 }, { "epoch": 0.98, "learning_rate": 9.990564007482576e-07, "loss": 2.7266, "step": 296000 }, { "epoch": 0.98, "learning_rate": 9.825020279106726e-07, "loss": 2.7281, "step": 296100 }, { "epoch": 0.98, "learning_rate": 9.659476550730876e-07, "loss": 2.7275, "step": 296200 }, { "epoch": 0.98, "learning_rate": 9.493932822355025e-07, "loss": 2.7329, "step": 296300 }, { "epoch": 0.98, "learning_rate": 9.328389093979175e-07, "loss": 2.7183, "step": 296400 }, { "epoch": 0.98, "learning_rate": 9.162845365603326e-07, "loss": 2.7387, "step": 296500 }, { "epoch": 0.98, "learning_rate": 8.997301637227474e-07, "loss": 2.7331, "step": 296600 }, { "epoch": 0.98, "learning_rate": 8.831757908851624e-07, "loss": 2.7203, "step": 296700 }, { "epoch": 0.98, "learning_rate": 8.666214180475774e-07, "loss": 2.7411, "step": 296800 }, { "epoch": 0.98, "learning_rate": 8.500670452099922e-07, "loss": 2.7353, "step": 296900 }, { "epoch": 0.98, "learning_rate": 8.335126723724072e-07, "loss": 2.7369, "step": 297000 }, { "epoch": 0.98, "learning_rate": 8.169582995348221e-07, "loss": 2.7273, "step": 297100 }, { "epoch": 0.98, "learning_rate": 8.004039266972371e-07, "loss": 2.7433, "step": 297200 }, { "epoch": 0.98, "learning_rate": 7.838495538596521e-07, "loss": 2.7402, "step": 297300 }, { "epoch": 0.98, "learning_rate": 7.67295181022067e-07, "loss": 2.719, "step": 297400 }, { "epoch": 0.98, "learning_rate": 7.50740808184482e-07, "loss": 2.7309, "step": 297500 }, { "epoch": 0.99, "learning_rate": 7.341864353468969e-07, "loss": 2.7343, "step": 297600 }, { "epoch": 0.99, "learning_rate": 7.176320625093119e-07, "loss": 2.7083, "step": 297700 }, { "epoch": 0.99, "learning_rate": 7.010776896717268e-07, "loss": 2.7453, "step": 297800 }, { "epoch": 0.99, "learning_rate": 6.845233168341418e-07, "loss": 2.7358, "step": 297900 }, { "epoch": 0.99, "learning_rate": 6.679689439965567e-07, "loss": 2.7356, "step": 298000 }, { "epoch": 0.99, "learning_rate": 6.514145711589717e-07, "loss": 2.7361, "step": 298100 }, { "epoch": 0.99, "learning_rate": 6.348601983213866e-07, "loss": 2.7162, "step": 298200 }, { "epoch": 0.99, "learning_rate": 6.183058254838015e-07, "loss": 2.7418, "step": 298300 }, { "epoch": 0.99, "learning_rate": 6.017514526462165e-07, "loss": 2.7424, "step": 298400 }, { "epoch": 0.99, "learning_rate": 5.851970798086315e-07, "loss": 2.732, "step": 298500 }, { "epoch": 0.99, "learning_rate": 5.686427069710465e-07, "loss": 2.7347, "step": 298600 }, { "epoch": 0.99, "learning_rate": 5.520883341334614e-07, "loss": 2.7288, "step": 298700 }, { "epoch": 0.99, "learning_rate": 5.355339612958764e-07, "loss": 2.7386, "step": 298800 }, { "epoch": 0.99, "learning_rate": 5.189795884582913e-07, "loss": 2.7244, "step": 298900 }, { "epoch": 0.99, "learning_rate": 5.024252156207062e-07, "loss": 2.7334, "step": 299000 }, { "epoch": 0.99, "learning_rate": 4.858708427831211e-07, "loss": 2.7422, "step": 299100 }, { "epoch": 0.99, "learning_rate": 4.693164699455362e-07, "loss": 2.7343, "step": 299200 }, { "epoch": 0.99, "learning_rate": 4.527620971079511e-07, "loss": 2.7413, "step": 299300 }, { "epoch": 0.99, "learning_rate": 4.36207724270366e-07, "loss": 2.7256, "step": 299400 }, { "epoch": 0.99, "learning_rate": 4.19653351432781e-07, "loss": 2.721, "step": 299500 }, { "epoch": 0.99, "learning_rate": 4.0309897859519597e-07, "loss": 2.7338, "step": 299600 }, { "epoch": 0.99, "learning_rate": 3.865446057576109e-07, "loss": 2.7303, "step": 299700 }, { "epoch": 0.99, "learning_rate": 3.6999023292002584e-07, "loss": 2.7354, "step": 299800 }, { "epoch": 0.99, "learning_rate": 3.534358600824408e-07, "loss": 2.7318, "step": 299900 }, { "epoch": 0.99, "learning_rate": 3.3688148724485576e-07, "loss": 2.7319, "step": 300000 }, { "epoch": 0.99, "learning_rate": 3.2032711440727067e-07, "loss": 2.7354, "step": 300100 }, { "epoch": 0.99, "learning_rate": 3.0377274156968563e-07, "loss": 2.7311, "step": 300200 }, { "epoch": 0.99, "learning_rate": 2.872183687321006e-07, "loss": 2.7428, "step": 300300 }, { "epoch": 0.99, "learning_rate": 2.7066399589451555e-07, "loss": 2.7232, "step": 300400 }, { "epoch": 0.99, "learning_rate": 2.541096230569305e-07, "loss": 2.7255, "step": 300500 }, { "epoch": 1.0, "learning_rate": 2.3755525021934545e-07, "loss": 2.7256, "step": 300600 }, { "epoch": 1.0, "learning_rate": 2.210008773817604e-07, "loss": 2.7225, "step": 300700 }, { "epoch": 1.0, "learning_rate": 2.0444650454417534e-07, "loss": 2.7233, "step": 300800 }, { "epoch": 1.0, "learning_rate": 1.878921317065903e-07, "loss": 2.735, "step": 300900 }, { "epoch": 1.0, "learning_rate": 1.7133775886900526e-07, "loss": 2.7371, "step": 301000 }, { "epoch": 1.0, "learning_rate": 1.547833860314202e-07, "loss": 2.7366, "step": 301100 }, { "epoch": 1.0, "learning_rate": 1.3822901319383516e-07, "loss": 2.7291, "step": 301200 }, { "epoch": 1.0, "learning_rate": 1.2167464035625012e-07, "loss": 2.7364, "step": 301300 }, { "epoch": 1.0, "learning_rate": 1.0512026751866505e-07, "loss": 2.7318, "step": 301400 }, { "epoch": 1.0, "learning_rate": 8.856589468108002e-08, "loss": 2.7394, "step": 301500 }, { "epoch": 1.0, "learning_rate": 7.201152184349496e-08, "loss": 2.7233, "step": 301600 }, { "epoch": 1.0, "learning_rate": 5.545714900590991e-08, "loss": 2.7397, "step": 301700 }, { "epoch": 1.0, "learning_rate": 3.8902776168324865e-08, "loss": 2.7302, "step": 301800 }, { "epoch": 1.0, "learning_rate": 2.2348403330739815e-08, "loss": 2.7429, "step": 301900 }, { "epoch": 1.0, "learning_rate": 5.794030493154767e-09, "loss": 2.7452, "step": 302000 } ], "max_steps": 302035, "num_train_epochs": 1, "total_flos": 8.184418510307328e+18, "trial_name": null, "trial_params": null }