{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4040931527969463, "global_step": 65000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2985, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 2.0231, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.00015, "loss": 1.877, "step": 300 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 1.8394, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.00025, "loss": 1.7965, "step": 500 }, { "epoch": 0.0, "learning_rate": 0.0003, "loss": 1.7915, "step": 600 }, { "epoch": 0.0, "learning_rate": 0.00035, "loss": 1.7553, "step": 700 }, { "epoch": 0.0, "learning_rate": 0.0004, "loss": 1.7212, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.00045000000000000004, "loss": 1.7203, "step": 900 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 1.6887, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.00055, "loss": 1.6692, "step": 1100 }, { "epoch": 0.01, "learning_rate": 0.0006, "loss": 1.6935, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.0006500000000000001, "loss": 1.6906, "step": 1300 }, { "epoch": 0.01, "learning_rate": 0.0007, "loss": 1.6797, "step": 1400 }, { "epoch": 0.01, "learning_rate": 0.00075, "loss": 1.6568, "step": 1500 }, { "epoch": 0.01, "learning_rate": 0.0008, "loss": 1.6602, "step": 1600 }, { "epoch": 0.01, "learning_rate": 0.00085, "loss": 1.6474, "step": 1700 }, { "epoch": 0.01, "learning_rate": 0.0009000000000000001, "loss": 1.6446, "step": 1800 }, { "epoch": 0.01, "learning_rate": 0.00095, "loss": 1.6337, "step": 1900 }, { "epoch": 0.01, "learning_rate": 0.001, "loss": 1.6399, "step": 2000 }, { "epoch": 0.01, "learning_rate": 0.0009993704911428105, "loss": 1.6011, "step": 2100 }, { "epoch": 0.01, "learning_rate": 0.0009987409822856208, "loss": 1.6253, "step": 2200 }, { "epoch": 0.01, "learning_rate": 0.000998111473428431, "loss": 1.6252, "step": 2300 }, { "epoch": 0.01, "learning_rate": 0.0009974819645712416, "loss": 1.603, "step": 2400 }, { "epoch": 0.02, "learning_rate": 0.000996852455714052, "loss": 1.5798, "step": 2500 }, { "epoch": 0.02, "learning_rate": 0.0009962229468568624, "loss": 1.5986, "step": 2600 }, { "epoch": 0.02, "learning_rate": 0.0009955934379996727, "loss": 1.6352, "step": 2700 }, { "epoch": 0.02, "learning_rate": 0.000994963929142483, "loss": 1.5591, "step": 2800 }, { "epoch": 0.02, "learning_rate": 0.0009943344202852935, "loss": 1.5352, "step": 2900 }, { "epoch": 0.02, "learning_rate": 0.0009937049114281038, "loss": 1.5257, "step": 3000 }, { "epoch": 0.02, "learning_rate": 0.0009930754025709143, "loss": 1.5289, "step": 3100 }, { "epoch": 0.02, "learning_rate": 0.0009924458937137246, "loss": 1.5246, "step": 3200 }, { "epoch": 0.02, "learning_rate": 0.0009918163848565348, "loss": 1.5221, "step": 3300 }, { "epoch": 0.02, "learning_rate": 0.0009911868759993453, "loss": 1.5214, "step": 3400 }, { "epoch": 0.02, "learning_rate": 0.0009905573671421556, "loss": 1.4942, "step": 3500 }, { "epoch": 0.02, "learning_rate": 0.0009899278582849661, "loss": 1.4914, "step": 3600 }, { "epoch": 0.02, "learning_rate": 0.0009892983494277764, "loss": 1.459, "step": 3700 }, { "epoch": 0.02, "learning_rate": 0.0009886688405705867, "loss": 1.4696, "step": 3800 }, { "epoch": 0.02, "learning_rate": 0.0009880393317133972, "loss": 1.4493, "step": 3900 }, { "epoch": 0.02, "learning_rate": 0.0009874098228562077, "loss": 1.4612, "step": 4000 }, { "epoch": 0.03, "learning_rate": 0.000986780313999018, "loss": 1.4506, "step": 4100 }, { "epoch": 0.03, "learning_rate": 0.0009861508051418283, "loss": 1.4585, "step": 4200 }, { "epoch": 0.03, "learning_rate": 0.0009855212962846388, "loss": 1.4414, "step": 4300 }, { "epoch": 0.03, "learning_rate": 0.000984891787427449, "loss": 1.4436, "step": 4400 }, { "epoch": 0.03, "learning_rate": 0.0009842622785702596, "loss": 1.4547, "step": 4500 }, { "epoch": 0.03, "learning_rate": 0.0009836327697130699, "loss": 1.4049, "step": 4600 }, { "epoch": 0.03, "learning_rate": 0.0009830032608558802, "loss": 1.4293, "step": 4700 }, { "epoch": 0.03, "learning_rate": 0.0009823737519986907, "loss": 1.4558, "step": 4800 }, { "epoch": 0.03, "learning_rate": 0.000981744243141501, "loss": 1.4235, "step": 4900 }, { "epoch": 0.03, "learning_rate": 0.0009811147342843115, "loss": 1.4318, "step": 5000 }, { "epoch": 0.03, "learning_rate": 0.0009804852254271218, "loss": 1.4459, "step": 5100 }, { "epoch": 0.03, "learning_rate": 0.000979855716569932, "loss": 1.4174, "step": 5200 }, { "epoch": 0.03, "learning_rate": 0.0009792262077127425, "loss": 1.3947, "step": 5300 }, { "epoch": 0.03, "learning_rate": 0.0009785966988555528, "loss": 1.4177, "step": 5400 }, { "epoch": 0.03, "learning_rate": 0.0009779671899983633, "loss": 1.4225, "step": 5500 }, { "epoch": 0.03, "learning_rate": 0.0009773376811411736, "loss": 1.4049, "step": 5600 }, { "epoch": 0.04, "learning_rate": 0.000976708172283984, "loss": 1.3942, "step": 5700 }, { "epoch": 0.04, "learning_rate": 0.0009760786634267944, "loss": 1.3987, "step": 5800 }, { "epoch": 0.04, "learning_rate": 0.0009754491545696048, "loss": 1.3907, "step": 5900 }, { "epoch": 0.04, "learning_rate": 0.0009748196457124152, "loss": 1.3817, "step": 6000 }, { "epoch": 0.04, "learning_rate": 0.0009741901368552256, "loss": 1.3905, "step": 6100 }, { "epoch": 0.04, "learning_rate": 0.0009735606279980359, "loss": 1.4099, "step": 6200 }, { "epoch": 0.04, "learning_rate": 0.0009729311191408463, "loss": 1.3696, "step": 6300 }, { "epoch": 0.04, "learning_rate": 0.0009723016102836567, "loss": 1.3858, "step": 6400 }, { "epoch": 0.04, "learning_rate": 0.0009716721014264671, "loss": 1.3832, "step": 6500 }, { "epoch": 0.04, "learning_rate": 0.0009710425925692775, "loss": 1.3781, "step": 6600 }, { "epoch": 0.04, "learning_rate": 0.0009704130837120878, "loss": 1.3789, "step": 6700 }, { "epoch": 0.04, "learning_rate": 0.0009697835748548982, "loss": 1.3709, "step": 6800 }, { "epoch": 0.04, "learning_rate": 0.0009691540659977086, "loss": 1.3751, "step": 6900 }, { "epoch": 0.04, "learning_rate": 0.0009685245571405191, "loss": 1.3728, "step": 7000 }, { "epoch": 0.04, "learning_rate": 0.0009678950482833295, "loss": 1.3693, "step": 7100 }, { "epoch": 0.04, "learning_rate": 0.0009672655394261397, "loss": 1.3585, "step": 7200 }, { "epoch": 0.05, "learning_rate": 0.0009666360305689501, "loss": 1.3613, "step": 7300 }, { "epoch": 0.05, "learning_rate": 0.0009660065217117605, "loss": 1.3782, "step": 7400 }, { "epoch": 0.05, "learning_rate": 0.0009653770128545709, "loss": 1.3551, "step": 7500 }, { "epoch": 0.05, "learning_rate": 0.0009647475039973813, "loss": 1.3473, "step": 7600 }, { "epoch": 0.05, "learning_rate": 0.0009641179951401916, "loss": 1.338, "step": 7700 }, { "epoch": 0.05, "learning_rate": 0.000963488486283002, "loss": 1.3795, "step": 7800 }, { "epoch": 0.05, "learning_rate": 0.0009628589774258124, "loss": 1.3429, "step": 7900 }, { "epoch": 0.05, "learning_rate": 0.0009622294685686228, "loss": 1.3476, "step": 8000 }, { "epoch": 0.05, "learning_rate": 0.0009615999597114332, "loss": 1.3826, "step": 8100 }, { "epoch": 0.05, "learning_rate": 0.0009609704508542435, "loss": 1.3683, "step": 8200 }, { "epoch": 0.05, "learning_rate": 0.0009603409419970539, "loss": 1.3378, "step": 8300 }, { "epoch": 0.05, "learning_rate": 0.0009597114331398643, "loss": 1.3613, "step": 8400 }, { "epoch": 0.05, "learning_rate": 0.0009590819242826747, "loss": 1.366, "step": 8500 }, { "epoch": 0.05, "learning_rate": 0.000958452415425485, "loss": 1.3375, "step": 8600 }, { "epoch": 0.05, "learning_rate": 0.0009578229065682954, "loss": 1.3516, "step": 8700 }, { "epoch": 0.05, "learning_rate": 0.0009571933977111058, "loss": 1.3671, "step": 8800 }, { "epoch": 0.06, "learning_rate": 0.0009565638888539163, "loss": 1.3305, "step": 8900 }, { "epoch": 0.06, "learning_rate": 0.0009559343799967267, "loss": 1.3439, "step": 9000 }, { "epoch": 0.06, "learning_rate": 0.0009553048711395369, "loss": 1.3475, "step": 9100 }, { "epoch": 0.06, "learning_rate": 0.0009546753622823473, "loss": 1.3374, "step": 9200 }, { "epoch": 0.06, "learning_rate": 0.0009540458534251577, "loss": 1.3304, "step": 9300 }, { "epoch": 0.06, "learning_rate": 0.0009534163445679681, "loss": 1.3704, "step": 9400 }, { "epoch": 0.06, "learning_rate": 0.0009527868357107785, "loss": 1.3521, "step": 9500 }, { "epoch": 0.06, "learning_rate": 0.0009521573268535888, "loss": 1.3508, "step": 9600 }, { "epoch": 0.06, "learning_rate": 0.0009515278179963992, "loss": 1.3308, "step": 9700 }, { "epoch": 0.06, "learning_rate": 0.0009508983091392096, "loss": 1.3347, "step": 9800 }, { "epoch": 0.06, "learning_rate": 0.00095026880028202, "loss": 1.3334, "step": 9900 }, { "epoch": 0.06, "learning_rate": 0.0009496392914248304, "loss": 1.3593, "step": 10000 }, { "epoch": 0.06, "learning_rate": 0.0009490097825676407, "loss": 1.3447, "step": 10100 }, { "epoch": 0.06, "learning_rate": 0.0009483802737104511, "loss": 1.3335, "step": 10200 }, { "epoch": 0.06, "learning_rate": 0.0009477507648532615, "loss": 1.3131, "step": 10300 }, { "epoch": 0.06, "learning_rate": 0.0009471212559960719, "loss": 1.3358, "step": 10400 }, { "epoch": 0.07, "learning_rate": 0.0009464917471388823, "loss": 1.3449, "step": 10500 }, { "epoch": 0.07, "learning_rate": 0.0009458622382816926, "loss": 1.3342, "step": 10600 }, { "epoch": 0.07, "learning_rate": 0.000945232729424503, "loss": 1.3349, "step": 10700 }, { "epoch": 0.07, "learning_rate": 0.0009446032205673134, "loss": 1.3444, "step": 10800 }, { "epoch": 0.07, "learning_rate": 0.0009439737117101239, "loss": 1.3218, "step": 10900 }, { "epoch": 0.07, "learning_rate": 0.0009433442028529343, "loss": 1.3339, "step": 11000 }, { "epoch": 0.07, "learning_rate": 0.0009427146939957445, "loss": 1.3336, "step": 11100 }, { "epoch": 0.07, "learning_rate": 0.0009420851851385549, "loss": 1.339, "step": 11200 }, { "epoch": 0.07, "learning_rate": 0.0009414556762813653, "loss": 1.3128, "step": 11300 }, { "epoch": 0.07, "learning_rate": 0.0009408261674241757, "loss": 1.3503, "step": 11400 }, { "epoch": 0.07, "learning_rate": 0.0009401966585669861, "loss": 1.3324, "step": 11500 }, { "epoch": 0.07, "learning_rate": 0.0009395671497097964, "loss": 1.3368, "step": 11600 }, { "epoch": 0.07, "learning_rate": 0.0009389376408526068, "loss": 1.3123, "step": 11700 }, { "epoch": 0.07, "learning_rate": 0.0009383081319954172, "loss": 1.3316, "step": 11800 }, { "epoch": 0.07, "learning_rate": 0.0009376786231382276, "loss": 1.3282, "step": 11900 }, { "epoch": 0.07, "learning_rate": 0.000937049114281038, "loss": 1.3181, "step": 12000 }, { "epoch": 0.08, "learning_rate": 0.0009364196054238483, "loss": 1.3278, "step": 12100 }, { "epoch": 0.08, "learning_rate": 0.0009357900965666587, "loss": 1.3077, "step": 12200 }, { "epoch": 0.08, "learning_rate": 0.0009351605877094691, "loss": 1.3242, "step": 12300 }, { "epoch": 0.08, "learning_rate": 0.0009345310788522795, "loss": 1.3162, "step": 12400 }, { "epoch": 0.08, "learning_rate": 0.0009339015699950899, "loss": 1.3182, "step": 12500 }, { "epoch": 0.08, "learning_rate": 0.0009332720611379002, "loss": 1.322, "step": 12600 }, { "epoch": 0.08, "learning_rate": 0.0009326425522807105, "loss": 1.3309, "step": 12700 }, { "epoch": 0.08, "learning_rate": 0.0009320130434235211, "loss": 1.332, "step": 12800 }, { "epoch": 0.08, "learning_rate": 0.0009313835345663315, "loss": 1.3111, "step": 12900 }, { "epoch": 0.08, "learning_rate": 0.0009307540257091417, "loss": 1.3084, "step": 13000 }, { "epoch": 0.08, "learning_rate": 0.0009301245168519521, "loss": 1.3183, "step": 13100 }, { "epoch": 0.08, "learning_rate": 0.0009294950079947625, "loss": 1.3336, "step": 13200 }, { "epoch": 0.08, "learning_rate": 0.0009288654991375729, "loss": 1.3221, "step": 13300 }, { "epoch": 0.08, "learning_rate": 0.0009282359902803833, "loss": 1.3266, "step": 13400 }, { "epoch": 0.08, "learning_rate": 0.0009276064814231936, "loss": 1.3219, "step": 13500 }, { "epoch": 0.08, "learning_rate": 0.000926976972566004, "loss": 1.3055, "step": 13600 }, { "epoch": 0.09, "learning_rate": 0.0009263474637088144, "loss": 1.3091, "step": 13700 }, { "epoch": 0.09, "learning_rate": 0.0009257179548516248, "loss": 1.3057, "step": 13800 }, { "epoch": 0.09, "learning_rate": 0.0009250884459944352, "loss": 1.3128, "step": 13900 }, { "epoch": 0.09, "learning_rate": 0.0009244589371372455, "loss": 1.2988, "step": 14000 }, { "epoch": 0.09, "learning_rate": 0.0009238294282800559, "loss": 1.3197, "step": 14100 }, { "epoch": 0.09, "learning_rate": 0.0009231999194228663, "loss": 1.3043, "step": 14200 }, { "epoch": 0.09, "learning_rate": 0.0009225704105656767, "loss": 1.3013, "step": 14300 }, { "epoch": 0.09, "learning_rate": 0.0009219409017084871, "loss": 1.2999, "step": 14400 }, { "epoch": 0.09, "learning_rate": 0.0009213113928512974, "loss": 1.3059, "step": 14500 }, { "epoch": 0.09, "learning_rate": 0.0009206818839941077, "loss": 1.3004, "step": 14600 }, { "epoch": 0.09, "learning_rate": 0.0009200523751369181, "loss": 1.3012, "step": 14700 }, { "epoch": 0.09, "learning_rate": 0.0009194228662797286, "loss": 1.2966, "step": 14800 }, { "epoch": 0.09, "learning_rate": 0.000918793357422539, "loss": 1.3052, "step": 14900 }, { "epoch": 0.09, "learning_rate": 0.0009181638485653493, "loss": 1.3077, "step": 15000 }, { "epoch": 0.09, "learning_rate": 0.0009175343397081597, "loss": 1.308, "step": 15100 }, { "epoch": 0.09, "learning_rate": 0.0009169048308509701, "loss": 1.3002, "step": 15200 }, { "epoch": 0.1, "learning_rate": 0.0009162753219937805, "loss": 1.3095, "step": 15300 }, { "epoch": 0.1, "learning_rate": 0.0009156458131365909, "loss": 1.3154, "step": 15400 }, { "epoch": 0.1, "learning_rate": 0.0009150163042794012, "loss": 1.2998, "step": 15500 }, { "epoch": 0.1, "learning_rate": 0.0009143867954222116, "loss": 1.3179, "step": 15600 }, { "epoch": 0.1, "learning_rate": 0.000913757286565022, "loss": 1.2986, "step": 15700 }, { "epoch": 0.1, "learning_rate": 0.0009131277777078324, "loss": 1.3143, "step": 15800 }, { "epoch": 0.1, "learning_rate": 0.0009124982688506428, "loss": 1.2853, "step": 15900 }, { "epoch": 0.1, "learning_rate": 0.0009118687599934531, "loss": 1.2879, "step": 16000 }, { "epoch": 0.1, "learning_rate": 0.0009112392511362635, "loss": 1.2931, "step": 16100 }, { "epoch": 0.1, "learning_rate": 0.0009106097422790739, "loss": 1.2906, "step": 16200 }, { "epoch": 0.1, "learning_rate": 0.0009099802334218843, "loss": 1.3251, "step": 16300 }, { "epoch": 0.1, "learning_rate": 0.0009093507245646947, "loss": 1.3187, "step": 16400 }, { "epoch": 0.1, "learning_rate": 0.000908721215707505, "loss": 1.2984, "step": 16500 }, { "epoch": 0.1, "learning_rate": 0.0009080917068503153, "loss": 1.3023, "step": 16600 }, { "epoch": 0.1, "learning_rate": 0.0009074621979931257, "loss": 1.3056, "step": 16700 }, { "epoch": 0.1, "learning_rate": 0.0009068326891359362, "loss": 1.3159, "step": 16800 }, { "epoch": 0.11, "learning_rate": 0.0009062031802787465, "loss": 1.3058, "step": 16900 }, { "epoch": 0.11, "learning_rate": 0.0009055736714215569, "loss": 1.3023, "step": 17000 }, { "epoch": 0.11, "learning_rate": 0.0009049441625643673, "loss": 1.3214, "step": 17100 }, { "epoch": 0.11, "learning_rate": 0.0009043146537071777, "loss": 1.2811, "step": 17200 }, { "epoch": 0.11, "learning_rate": 0.0009036851448499881, "loss": 1.295, "step": 17300 }, { "epoch": 0.11, "learning_rate": 0.0009030556359927984, "loss": 1.3167, "step": 17400 }, { "epoch": 0.11, "learning_rate": 0.0009024261271356088, "loss": 1.3003, "step": 17500 }, { "epoch": 0.11, "learning_rate": 0.0009017966182784192, "loss": 1.3076, "step": 17600 }, { "epoch": 0.11, "learning_rate": 0.0009011671094212296, "loss": 1.2644, "step": 17700 }, { "epoch": 0.11, "learning_rate": 0.00090053760056404, "loss": 1.3073, "step": 17800 }, { "epoch": 0.11, "learning_rate": 0.0008999080917068503, "loss": 1.2843, "step": 17900 }, { "epoch": 0.11, "learning_rate": 0.0008992785828496607, "loss": 1.2831, "step": 18000 }, { "epoch": 0.11, "learning_rate": 0.0008986490739924711, "loss": 1.2854, "step": 18100 }, { "epoch": 0.11, "learning_rate": 0.0008980195651352815, "loss": 1.3237, "step": 18200 }, { "epoch": 0.11, "learning_rate": 0.0008973900562780919, "loss": 1.2951, "step": 18300 }, { "epoch": 0.11, "learning_rate": 0.0008967605474209021, "loss": 1.297, "step": 18400 }, { "epoch": 0.12, "learning_rate": 0.0008961310385637125, "loss": 1.2937, "step": 18500 }, { "epoch": 0.12, "learning_rate": 0.0008955015297065229, "loss": 1.2877, "step": 18600 }, { "epoch": 0.12, "learning_rate": 0.0008948720208493334, "loss": 1.3139, "step": 18700 }, { "epoch": 0.12, "learning_rate": 0.0008942425119921438, "loss": 1.2998, "step": 18800 }, { "epoch": 0.12, "learning_rate": 0.0008936130031349541, "loss": 1.2798, "step": 18900 }, { "epoch": 0.12, "learning_rate": 0.0008929834942777645, "loss": 1.2865, "step": 19000 }, { "epoch": 0.12, "learning_rate": 0.0008923539854205749, "loss": 1.2954, "step": 19100 }, { "epoch": 0.12, "learning_rate": 0.0008917244765633853, "loss": 1.2915, "step": 19200 }, { "epoch": 0.12, "learning_rate": 0.0008910949677061957, "loss": 1.2948, "step": 19300 }, { "epoch": 0.12, "learning_rate": 0.000890465458849006, "loss": 1.2829, "step": 19400 }, { "epoch": 0.12, "learning_rate": 0.0008898359499918164, "loss": 1.2898, "step": 19500 }, { "epoch": 0.12, "learning_rate": 0.0008892064411346268, "loss": 1.2964, "step": 19600 }, { "epoch": 0.12, "learning_rate": 0.0008885769322774372, "loss": 1.2765, "step": 19700 }, { "epoch": 0.12, "learning_rate": 0.0008879474234202476, "loss": 1.3033, "step": 19800 }, { "epoch": 0.12, "learning_rate": 0.0008873179145630579, "loss": 1.2904, "step": 19900 }, { "epoch": 0.12, "learning_rate": 0.0008866884057058683, "loss": 1.2792, "step": 20000 }, { "epoch": 0.12, "learning_rate": 0.0008860588968486787, "loss": 1.2662, "step": 20100 }, { "epoch": 0.13, "learning_rate": 0.0008854293879914891, "loss": 1.2728, "step": 20200 }, { "epoch": 0.13, "learning_rate": 0.0008847998791342995, "loss": 1.2877, "step": 20300 }, { "epoch": 0.13, "learning_rate": 0.0008841703702771097, "loss": 1.2896, "step": 20400 }, { "epoch": 0.13, "learning_rate": 0.0008835408614199201, "loss": 1.3013, "step": 20500 }, { "epoch": 0.13, "learning_rate": 0.0008829113525627305, "loss": 1.3009, "step": 20600 }, { "epoch": 0.13, "learning_rate": 0.000882281843705541, "loss": 1.2754, "step": 20700 }, { "epoch": 0.13, "learning_rate": 0.0008816523348483514, "loss": 1.2888, "step": 20800 }, { "epoch": 0.13, "learning_rate": 0.0008810228259911617, "loss": 1.2733, "step": 20900 }, { "epoch": 0.13, "learning_rate": 0.0008803933171339721, "loss": 1.2743, "step": 21000 }, { "epoch": 0.13, "learning_rate": 0.0008797638082767825, "loss": 1.2764, "step": 21100 }, { "epoch": 0.13, "learning_rate": 0.0008791342994195929, "loss": 1.2808, "step": 21200 }, { "epoch": 0.13, "learning_rate": 0.0008785047905624032, "loss": 1.2991, "step": 21300 }, { "epoch": 0.13, "learning_rate": 0.0008778752817052136, "loss": 1.3172, "step": 21400 }, { "epoch": 0.13, "learning_rate": 0.000877245772848024, "loss": 1.2788, "step": 21500 }, { "epoch": 0.13, "learning_rate": 0.0008766162639908344, "loss": 1.2986, "step": 21600 }, { "epoch": 0.13, "learning_rate": 0.0008759867551336448, "loss": 1.2815, "step": 21700 }, { "epoch": 0.14, "learning_rate": 0.0008753572462764551, "loss": 1.2779, "step": 21800 }, { "epoch": 0.14, "learning_rate": 0.0008747277374192655, "loss": 1.2972, "step": 21900 }, { "epoch": 0.14, "learning_rate": 0.0008740982285620759, "loss": 1.2974, "step": 22000 }, { "epoch": 0.14, "learning_rate": 0.0008734687197048863, "loss": 1.2851, "step": 22100 }, { "epoch": 0.14, "learning_rate": 0.0008728392108476967, "loss": 1.2687, "step": 22200 }, { "epoch": 0.14, "learning_rate": 0.0008722097019905069, "loss": 1.2728, "step": 22300 }, { "epoch": 0.14, "learning_rate": 0.0008715801931333173, "loss": 1.2831, "step": 22400 }, { "epoch": 0.14, "learning_rate": 0.0008709506842761277, "loss": 1.2744, "step": 22500 }, { "epoch": 0.14, "learning_rate": 0.0008703211754189382, "loss": 1.2873, "step": 22600 }, { "epoch": 0.14, "learning_rate": 0.0008696916665617486, "loss": 1.2987, "step": 22700 }, { "epoch": 0.14, "learning_rate": 0.0008690621577045589, "loss": 1.2887, "step": 22800 }, { "epoch": 0.14, "learning_rate": 0.0008684326488473693, "loss": 1.2787, "step": 22900 }, { "epoch": 0.14, "learning_rate": 0.0008678031399901797, "loss": 1.2715, "step": 23000 }, { "epoch": 0.14, "learning_rate": 0.0008671736311329901, "loss": 1.2721, "step": 23100 }, { "epoch": 0.14, "learning_rate": 0.0008665441222758005, "loss": 1.3105, "step": 23200 }, { "epoch": 0.14, "learning_rate": 0.0008659146134186108, "loss": 1.2679, "step": 23300 }, { "epoch": 0.15, "learning_rate": 0.0008652851045614212, "loss": 1.2852, "step": 23400 }, { "epoch": 0.15, "learning_rate": 0.0008646555957042316, "loss": 1.302, "step": 23500 }, { "epoch": 0.15, "learning_rate": 0.000864026086847042, "loss": 1.2765, "step": 23600 }, { "epoch": 0.15, "learning_rate": 0.0008633965779898524, "loss": 1.2782, "step": 23700 }, { "epoch": 0.15, "learning_rate": 0.0008627670691326627, "loss": 1.2708, "step": 23800 }, { "epoch": 0.15, "learning_rate": 0.0008621375602754731, "loss": 1.2676, "step": 23900 }, { "epoch": 0.15, "learning_rate": 0.0008615080514182835, "loss": 1.2763, "step": 24000 }, { "epoch": 0.15, "learning_rate": 0.0008608785425610939, "loss": 1.2728, "step": 24100 }, { "epoch": 0.15, "learning_rate": 0.0008602490337039042, "loss": 1.2731, "step": 24200 }, { "epoch": 0.15, "learning_rate": 0.0008596195248467145, "loss": 1.2719, "step": 24300 }, { "epoch": 0.15, "learning_rate": 0.0008589900159895249, "loss": 1.2658, "step": 24400 }, { "epoch": 0.15, "learning_rate": 0.0008583605071323353, "loss": 1.2811, "step": 24500 }, { "epoch": 0.15, "learning_rate": 0.0008577309982751458, "loss": 1.2736, "step": 24600 }, { "epoch": 0.15, "learning_rate": 0.0008571014894179562, "loss": 1.2812, "step": 24700 }, { "epoch": 0.15, "learning_rate": 0.0008564719805607665, "loss": 1.2397, "step": 24800 }, { "epoch": 0.15, "learning_rate": 0.0008558424717035769, "loss": 1.2607, "step": 24900 }, { "epoch": 0.16, "learning_rate": 0.0008552129628463873, "loss": 1.2669, "step": 25000 }, { "epoch": 0.16, "learning_rate": 0.0008545834539891977, "loss": 1.2871, "step": 25100 }, { "epoch": 0.16, "learning_rate": 0.000853953945132008, "loss": 1.2696, "step": 25200 }, { "epoch": 0.16, "learning_rate": 0.0008533244362748184, "loss": 1.2778, "step": 25300 }, { "epoch": 0.16, "learning_rate": 0.0008526949274176288, "loss": 1.2824, "step": 25400 }, { "epoch": 0.16, "learning_rate": 0.0008520654185604392, "loss": 1.2723, "step": 25500 }, { "epoch": 0.16, "learning_rate": 0.0008514359097032496, "loss": 1.2603, "step": 25600 }, { "epoch": 0.16, "learning_rate": 0.0008508064008460599, "loss": 1.2776, "step": 25700 }, { "epoch": 0.16, "learning_rate": 0.0008501768919888703, "loss": 1.2779, "step": 25800 }, { "epoch": 0.16, "learning_rate": 0.0008495473831316807, "loss": 1.2758, "step": 25900 }, { "epoch": 0.16, "learning_rate": 0.000848917874274491, "loss": 1.2858, "step": 26000 }, { "epoch": 0.16, "learning_rate": 0.0008482883654173014, "loss": 1.2738, "step": 26100 }, { "epoch": 0.16, "learning_rate": 0.0008476588565601117, "loss": 1.2877, "step": 26200 }, { "epoch": 0.16, "learning_rate": 0.0008470293477029221, "loss": 1.2843, "step": 26300 }, { "epoch": 0.16, "learning_rate": 0.0008463998388457325, "loss": 1.2805, "step": 26400 }, { "epoch": 0.16, "learning_rate": 0.0008457703299885429, "loss": 1.2742, "step": 26500 }, { "epoch": 0.17, "learning_rate": 0.0008451408211313534, "loss": 1.2627, "step": 26600 }, { "epoch": 0.17, "learning_rate": 0.0008445113122741637, "loss": 1.2718, "step": 26700 }, { "epoch": 0.17, "learning_rate": 0.0008438818034169741, "loss": 1.2755, "step": 26800 }, { "epoch": 0.17, "learning_rate": 0.0008432522945597845, "loss": 1.2828, "step": 26900 }, { "epoch": 0.17, "learning_rate": 0.0008426227857025949, "loss": 1.2591, "step": 27000 }, { "epoch": 0.17, "learning_rate": 0.0008419932768454053, "loss": 1.2731, "step": 27100 }, { "epoch": 0.17, "learning_rate": 0.0008413637679882156, "loss": 1.2594, "step": 27200 }, { "epoch": 0.17, "learning_rate": 0.000840734259131026, "loss": 1.2709, "step": 27300 }, { "epoch": 0.17, "learning_rate": 0.0008401047502738364, "loss": 1.2677, "step": 27400 }, { "epoch": 0.17, "learning_rate": 0.0008394752414166468, "loss": 1.2509, "step": 27500 }, { "epoch": 0.17, "learning_rate": 0.0008388457325594572, "loss": 1.2835, "step": 27600 }, { "epoch": 0.17, "learning_rate": 0.0008382162237022675, "loss": 1.266, "step": 27700 }, { "epoch": 0.17, "learning_rate": 0.0008375867148450779, "loss": 1.2609, "step": 27800 }, { "epoch": 0.17, "learning_rate": 0.0008369572059878882, "loss": 1.2824, "step": 27900 }, { "epoch": 0.17, "learning_rate": 0.0008363276971306986, "loss": 1.2568, "step": 28000 }, { "epoch": 0.17, "learning_rate": 0.000835698188273509, "loss": 1.2771, "step": 28100 }, { "epoch": 0.18, "learning_rate": 0.0008350686794163193, "loss": 1.2487, "step": 28200 }, { "epoch": 0.18, "learning_rate": 0.0008344391705591297, "loss": 1.259, "step": 28300 }, { "epoch": 0.18, "learning_rate": 0.0008338096617019401, "loss": 1.277, "step": 28400 }, { "epoch": 0.18, "learning_rate": 0.0008331801528447506, "loss": 1.2671, "step": 28500 }, { "epoch": 0.18, "learning_rate": 0.000832550643987561, "loss": 1.266, "step": 28600 }, { "epoch": 0.18, "learning_rate": 0.0008319211351303713, "loss": 1.2604, "step": 28700 }, { "epoch": 0.18, "learning_rate": 0.0008312916262731817, "loss": 1.2685, "step": 28800 }, { "epoch": 0.18, "learning_rate": 0.0008306621174159921, "loss": 1.27, "step": 28900 }, { "epoch": 0.18, "learning_rate": 0.0008300326085588025, "loss": 1.2575, "step": 29000 }, { "epoch": 0.18, "learning_rate": 0.0008294030997016129, "loss": 1.29, "step": 29100 }, { "epoch": 0.18, "learning_rate": 0.0008287735908444232, "loss": 1.2682, "step": 29200 }, { "epoch": 0.18, "learning_rate": 0.0008281440819872336, "loss": 1.2658, "step": 29300 }, { "epoch": 0.18, "learning_rate": 0.000827514573130044, "loss": 1.2612, "step": 29400 }, { "epoch": 0.18, "learning_rate": 0.0008268850642728544, "loss": 1.2509, "step": 29500 }, { "epoch": 0.18, "learning_rate": 0.0008262555554156647, "loss": 1.2814, "step": 29600 }, { "epoch": 0.18, "learning_rate": 0.000825626046558475, "loss": 1.2538, "step": 29700 }, { "epoch": 0.19, "learning_rate": 0.0008249965377012854, "loss": 1.2484, "step": 29800 }, { "epoch": 0.19, "learning_rate": 0.0008243670288440958, "loss": 1.2516, "step": 29900 }, { "epoch": 0.19, "learning_rate": 0.0008237375199869062, "loss": 1.2584, "step": 30000 }, { "epoch": 0.19, "learning_rate": 0.0008231080111297165, "loss": 1.2561, "step": 30100 }, { "epoch": 0.19, "learning_rate": 0.0008224785022725269, "loss": 1.2665, "step": 30200 }, { "epoch": 0.19, "learning_rate": 0.0008218489934153373, "loss": 1.2721, "step": 30300 }, { "epoch": 0.19, "learning_rate": 0.0008212194845581477, "loss": 1.253, "step": 30400 }, { "epoch": 0.19, "learning_rate": 0.0008205899757009582, "loss": 1.2641, "step": 30500 }, { "epoch": 0.19, "learning_rate": 0.0008199604668437685, "loss": 1.2631, "step": 30600 }, { "epoch": 0.19, "learning_rate": 0.0008193309579865789, "loss": 1.245, "step": 30700 }, { "epoch": 0.19, "learning_rate": 0.0008187014491293893, "loss": 1.2572, "step": 30800 }, { "epoch": 0.19, "learning_rate": 0.0008180719402721997, "loss": 1.2853, "step": 30900 }, { "epoch": 0.19, "learning_rate": 0.0008174424314150101, "loss": 1.2601, "step": 31000 }, { "epoch": 0.19, "learning_rate": 0.0008168129225578204, "loss": 1.2586, "step": 31100 }, { "epoch": 0.19, "learning_rate": 0.0008161834137006308, "loss": 1.2695, "step": 31200 }, { "epoch": 0.19, "learning_rate": 0.0008155539048434412, "loss": 1.2587, "step": 31300 }, { "epoch": 0.2, "learning_rate": 0.0008149243959862516, "loss": 1.2489, "step": 31400 }, { "epoch": 0.2, "learning_rate": 0.000814294887129062, "loss": 1.2709, "step": 31500 }, { "epoch": 0.2, "learning_rate": 0.0008136653782718723, "loss": 1.2513, "step": 31600 }, { "epoch": 0.2, "learning_rate": 0.0008130358694146826, "loss": 1.2458, "step": 31700 }, { "epoch": 0.2, "learning_rate": 0.000812406360557493, "loss": 1.2802, "step": 31800 }, { "epoch": 0.2, "learning_rate": 0.0008117768517003034, "loss": 1.2685, "step": 31900 }, { "epoch": 0.2, "learning_rate": 0.0008111473428431138, "loss": 1.2485, "step": 32000 }, { "epoch": 0.2, "learning_rate": 0.0008105178339859241, "loss": 1.2749, "step": 32100 }, { "epoch": 0.2, "learning_rate": 0.0008098883251287345, "loss": 1.2382, "step": 32200 }, { "epoch": 0.2, "learning_rate": 0.0008092588162715449, "loss": 1.2547, "step": 32300 }, { "epoch": 0.2, "learning_rate": 0.0008086293074143553, "loss": 1.2621, "step": 32400 }, { "epoch": 0.2, "learning_rate": 0.0008079997985571658, "loss": 1.2545, "step": 32500 }, { "epoch": 0.2, "learning_rate": 0.0008073702896999761, "loss": 1.2586, "step": 32600 }, { "epoch": 0.2, "learning_rate": 0.0008067407808427865, "loss": 1.2533, "step": 32700 }, { "epoch": 0.2, "learning_rate": 0.0008061112719855969, "loss": 1.2611, "step": 32800 }, { "epoch": 0.2, "learning_rate": 0.0008054817631284073, "loss": 1.2714, "step": 32900 }, { "epoch": 0.21, "learning_rate": 0.0008048522542712177, "loss": 1.2583, "step": 33000 }, { "epoch": 0.21, "learning_rate": 0.000804222745414028, "loss": 1.2684, "step": 33100 }, { "epoch": 0.21, "learning_rate": 0.0008035932365568384, "loss": 1.2703, "step": 33200 }, { "epoch": 0.21, "learning_rate": 0.0008029637276996488, "loss": 1.241, "step": 33300 }, { "epoch": 0.21, "learning_rate": 0.0008023342188424592, "loss": 1.2473, "step": 33400 }, { "epoch": 0.21, "learning_rate": 0.0008017047099852696, "loss": 1.2567, "step": 33500 }, { "epoch": 0.21, "learning_rate": 0.0008010752011280798, "loss": 1.242, "step": 33600 }, { "epoch": 0.21, "learning_rate": 0.0008004456922708902, "loss": 1.2488, "step": 33700 }, { "epoch": 0.21, "learning_rate": 0.0007998161834137006, "loss": 1.2747, "step": 33800 }, { "epoch": 0.21, "learning_rate": 0.000799186674556511, "loss": 1.2697, "step": 33900 }, { "epoch": 0.21, "learning_rate": 0.0007985571656993213, "loss": 1.2691, "step": 34000 }, { "epoch": 0.21, "learning_rate": 0.0007979276568421317, "loss": 1.2479, "step": 34100 }, { "epoch": 0.21, "learning_rate": 0.0007972981479849421, "loss": 1.2619, "step": 34200 }, { "epoch": 0.21, "learning_rate": 0.0007966686391277525, "loss": 1.2439, "step": 34300 }, { "epoch": 0.21, "learning_rate": 0.000796039130270563, "loss": 1.2404, "step": 34400 }, { "epoch": 0.21, "learning_rate": 0.0007954096214133733, "loss": 1.254, "step": 34500 }, { "epoch": 0.22, "learning_rate": 0.0007947801125561837, "loss": 1.233, "step": 34600 }, { "epoch": 0.22, "learning_rate": 0.0007941506036989941, "loss": 1.2532, "step": 34700 }, { "epoch": 0.22, "learning_rate": 0.0007935210948418045, "loss": 1.2445, "step": 34800 }, { "epoch": 0.22, "learning_rate": 0.0007928915859846149, "loss": 1.259, "step": 34900 }, { "epoch": 0.22, "learning_rate": 0.0007922620771274252, "loss": 1.2816, "step": 35000 }, { "epoch": 0.22, "learning_rate": 0.0007916325682702356, "loss": 1.2529, "step": 35100 }, { "epoch": 0.22, "learning_rate": 0.000791003059413046, "loss": 1.2629, "step": 35200 }, { "epoch": 0.22, "learning_rate": 0.0007903735505558564, "loss": 1.26, "step": 35300 }, { "epoch": 0.22, "learning_rate": 0.0007897440416986668, "loss": 1.2612, "step": 35400 }, { "epoch": 0.22, "learning_rate": 0.000789114532841477, "loss": 1.2512, "step": 35500 }, { "epoch": 0.22, "learning_rate": 0.0007884850239842874, "loss": 1.2738, "step": 35600 }, { "epoch": 0.22, "learning_rate": 0.0007878555151270978, "loss": 1.2674, "step": 35700 }, { "epoch": 0.22, "learning_rate": 0.0007872260062699082, "loss": 1.2577, "step": 35800 }, { "epoch": 0.22, "learning_rate": 0.0007865964974127186, "loss": 1.2641, "step": 35900 }, { "epoch": 0.22, "learning_rate": 0.0007859669885555289, "loss": 1.2634, "step": 36000 }, { "epoch": 0.22, "learning_rate": 0.0007853374796983393, "loss": 1.2424, "step": 36100 }, { "epoch": 0.23, "learning_rate": 0.0007847079708411497, "loss": 1.249, "step": 36200 }, { "epoch": 0.23, "learning_rate": 0.0007840784619839601, "loss": 1.2518, "step": 36300 }, { "epoch": 0.23, "learning_rate": 0.0007834489531267706, "loss": 1.2436, "step": 36400 }, { "epoch": 0.23, "learning_rate": 0.0007828194442695809, "loss": 1.232, "step": 36500 }, { "epoch": 0.23, "learning_rate": 0.0007821899354123913, "loss": 1.2574, "step": 36600 }, { "epoch": 0.23, "learning_rate": 0.0007815604265552017, "loss": 1.2481, "step": 36700 }, { "epoch": 0.23, "learning_rate": 0.0007809309176980121, "loss": 1.2587, "step": 36800 }, { "epoch": 0.23, "learning_rate": 0.0007803014088408225, "loss": 1.2593, "step": 36900 }, { "epoch": 0.23, "learning_rate": 0.0007796718999836328, "loss": 1.2428, "step": 37000 }, { "epoch": 0.23, "learning_rate": 0.0007790423911264432, "loss": 1.2624, "step": 37100 }, { "epoch": 0.23, "learning_rate": 0.0007784128822692536, "loss": 1.2676, "step": 37200 }, { "epoch": 0.23, "learning_rate": 0.000777783373412064, "loss": 1.2319, "step": 37300 }, { "epoch": 0.23, "learning_rate": 0.0007771538645548744, "loss": 1.2609, "step": 37400 }, { "epoch": 0.23, "learning_rate": 0.0007765243556976846, "loss": 1.2563, "step": 37500 }, { "epoch": 0.23, "learning_rate": 0.000775894846840495, "loss": 1.2736, "step": 37600 }, { "epoch": 0.23, "learning_rate": 0.0007752653379833054, "loss": 1.2551, "step": 37700 }, { "epoch": 0.23, "learning_rate": 0.0007746358291261158, "loss": 1.2493, "step": 37800 }, { "epoch": 0.24, "learning_rate": 0.0007740063202689261, "loss": 1.2744, "step": 37900 }, { "epoch": 0.24, "learning_rate": 0.0007733768114117365, "loss": 1.2656, "step": 38000 }, { "epoch": 0.24, "learning_rate": 0.0007727473025545469, "loss": 1.2557, "step": 38100 }, { "epoch": 0.24, "learning_rate": 0.0007721177936973573, "loss": 1.2424, "step": 38200 }, { "epoch": 0.24, "learning_rate": 0.0007714882848401678, "loss": 1.2561, "step": 38300 }, { "epoch": 0.24, "learning_rate": 0.0007708587759829781, "loss": 1.2468, "step": 38400 }, { "epoch": 0.24, "learning_rate": 0.0007702292671257885, "loss": 1.2476, "step": 38500 }, { "epoch": 0.24, "learning_rate": 0.0007695997582685989, "loss": 1.2495, "step": 38600 }, { "epoch": 0.24, "learning_rate": 0.0007689702494114093, "loss": 1.2759, "step": 38700 }, { "epoch": 0.24, "learning_rate": 0.0007683407405542197, "loss": 1.2446, "step": 38800 }, { "epoch": 0.24, "learning_rate": 0.00076771123169703, "loss": 1.2586, "step": 38900 }, { "epoch": 0.24, "learning_rate": 0.0007670817228398404, "loss": 1.2452, "step": 39000 }, { "epoch": 0.24, "learning_rate": 0.0007664522139826508, "loss": 1.2421, "step": 39100 }, { "epoch": 0.24, "learning_rate": 0.0007658227051254612, "loss": 1.2487, "step": 39200 }, { "epoch": 0.24, "learning_rate": 0.0007651931962682715, "loss": 1.2342, "step": 39300 }, { "epoch": 0.24, "learning_rate": 0.0007645636874110818, "loss": 1.2376, "step": 39400 }, { "epoch": 0.25, "learning_rate": 0.0007639341785538922, "loss": 1.2399, "step": 39500 }, { "epoch": 0.25, "learning_rate": 0.0007633046696967026, "loss": 1.2582, "step": 39600 }, { "epoch": 0.25, "learning_rate": 0.000762675160839513, "loss": 1.2306, "step": 39700 }, { "epoch": 0.25, "learning_rate": 0.0007620456519823234, "loss": 1.2343, "step": 39800 }, { "epoch": 0.25, "learning_rate": 0.0007614161431251337, "loss": 1.2595, "step": 39900 }, { "epoch": 0.25, "learning_rate": 0.0007607866342679441, "loss": 1.2399, "step": 40000 }, { "epoch": 0.25, "learning_rate": 0.0007601571254107545, "loss": 1.2428, "step": 40100 }, { "epoch": 0.25, "learning_rate": 0.0007595276165535649, "loss": 1.2367, "step": 40200 }, { "epoch": 0.25, "learning_rate": 0.0007588981076963754, "loss": 1.2658, "step": 40300 }, { "epoch": 0.25, "learning_rate": 0.0007582685988391857, "loss": 1.2362, "step": 40400 }, { "epoch": 0.25, "learning_rate": 0.0007576390899819961, "loss": 1.2493, "step": 40500 }, { "epoch": 0.25, "learning_rate": 0.0007570095811248065, "loss": 1.2663, "step": 40600 }, { "epoch": 0.25, "learning_rate": 0.0007563800722676169, "loss": 1.2488, "step": 40700 }, { "epoch": 0.25, "learning_rate": 0.0007557505634104273, "loss": 1.2423, "step": 40800 }, { "epoch": 0.25, "learning_rate": 0.0007551210545532376, "loss": 1.2312, "step": 40900 }, { "epoch": 0.25, "learning_rate": 0.000754491545696048, "loss": 1.2499, "step": 41000 }, { "epoch": 0.26, "learning_rate": 0.0007538620368388584, "loss": 1.2335, "step": 41100 }, { "epoch": 0.26, "learning_rate": 0.0007532325279816687, "loss": 1.2267, "step": 41200 }, { "epoch": 0.26, "learning_rate": 0.0007526030191244791, "loss": 1.2377, "step": 41300 }, { "epoch": 0.26, "learning_rate": 0.0007519735102672894, "loss": 1.2266, "step": 41400 }, { "epoch": 0.26, "learning_rate": 0.0007513440014100998, "loss": 1.2445, "step": 41500 }, { "epoch": 0.26, "learning_rate": 0.0007507144925529102, "loss": 1.2316, "step": 41600 }, { "epoch": 0.26, "learning_rate": 0.0007500849836957206, "loss": 1.2431, "step": 41700 }, { "epoch": 0.26, "learning_rate": 0.000749455474838531, "loss": 1.2499, "step": 41800 }, { "epoch": 0.26, "learning_rate": 0.0007488259659813413, "loss": 1.255, "step": 41900 }, { "epoch": 0.26, "learning_rate": 0.0007481964571241517, "loss": 1.2342, "step": 42000 }, { "epoch": 0.26, "learning_rate": 0.0007475669482669621, "loss": 1.2233, "step": 42100 }, { "epoch": 0.26, "learning_rate": 0.0007469374394097725, "loss": 1.2464, "step": 42200 }, { "epoch": 0.26, "learning_rate": 0.0007463079305525829, "loss": 1.2444, "step": 42300 }, { "epoch": 0.26, "learning_rate": 0.0007456784216953933, "loss": 1.2452, "step": 42400 }, { "epoch": 0.26, "learning_rate": 0.0007450489128382037, "loss": 1.2278, "step": 42500 }, { "epoch": 0.26, "learning_rate": 0.0007444194039810141, "loss": 1.2509, "step": 42600 }, { "epoch": 0.27, "learning_rate": 0.0007437898951238245, "loss": 1.248, "step": 42700 }, { "epoch": 0.27, "learning_rate": 0.0007431603862666348, "loss": 1.2386, "step": 42800 }, { "epoch": 0.27, "learning_rate": 0.0007425308774094452, "loss": 1.2385, "step": 42900 }, { "epoch": 0.27, "learning_rate": 0.0007419013685522556, "loss": 1.2365, "step": 43000 }, { "epoch": 0.27, "learning_rate": 0.000741271859695066, "loss": 1.2554, "step": 43100 }, { "epoch": 0.27, "learning_rate": 0.0007406423508378763, "loss": 1.2586, "step": 43200 }, { "epoch": 0.27, "learning_rate": 0.0007400128419806866, "loss": 1.2397, "step": 43300 }, { "epoch": 0.27, "learning_rate": 0.000739383333123497, "loss": 1.2447, "step": 43400 }, { "epoch": 0.27, "learning_rate": 0.0007387538242663074, "loss": 1.2455, "step": 43500 }, { "epoch": 0.27, "learning_rate": 0.0007381243154091178, "loss": 1.2415, "step": 43600 }, { "epoch": 0.27, "learning_rate": 0.0007374948065519282, "loss": 1.2504, "step": 43700 }, { "epoch": 0.27, "learning_rate": 0.0007368652976947385, "loss": 1.2417, "step": 43800 }, { "epoch": 0.27, "learning_rate": 0.0007362357888375489, "loss": 1.2382, "step": 43900 }, { "epoch": 0.27, "learning_rate": 0.0007356062799803593, "loss": 1.2399, "step": 44000 }, { "epoch": 0.27, "learning_rate": 0.0007349767711231697, "loss": 1.2654, "step": 44100 }, { "epoch": 0.27, "learning_rate": 0.0007343472622659802, "loss": 1.2425, "step": 44200 }, { "epoch": 0.28, "learning_rate": 0.0007337177534087905, "loss": 1.255, "step": 44300 }, { "epoch": 0.28, "learning_rate": 0.0007330882445516009, "loss": 1.2418, "step": 44400 }, { "epoch": 0.28, "learning_rate": 0.0007324587356944113, "loss": 1.2313, "step": 44500 }, { "epoch": 0.28, "learning_rate": 0.0007318292268372217, "loss": 1.2429, "step": 44600 }, { "epoch": 0.28, "learning_rate": 0.0007311997179800321, "loss": 1.2677, "step": 44700 }, { "epoch": 0.28, "learning_rate": 0.0007305702091228424, "loss": 1.2235, "step": 44800 }, { "epoch": 0.28, "learning_rate": 0.0007299407002656528, "loss": 1.2359, "step": 44900 }, { "epoch": 0.28, "learning_rate": 0.0007293111914084631, "loss": 1.2512, "step": 45000 }, { "epoch": 0.28, "learning_rate": 0.0007286816825512735, "loss": 1.2216, "step": 45100 }, { "epoch": 0.28, "learning_rate": 0.0007280521736940839, "loss": 1.2559, "step": 45200 }, { "epoch": 0.28, "learning_rate": 0.0007274226648368942, "loss": 1.2301, "step": 45300 }, { "epoch": 0.28, "learning_rate": 0.0007267931559797046, "loss": 1.2275, "step": 45400 }, { "epoch": 0.28, "learning_rate": 0.000726163647122515, "loss": 1.24, "step": 45500 }, { "epoch": 0.28, "learning_rate": 0.0007255341382653254, "loss": 1.2484, "step": 45600 }, { "epoch": 0.28, "learning_rate": 0.0007249046294081358, "loss": 1.2383, "step": 45700 }, { "epoch": 0.28, "learning_rate": 0.0007242751205509461, "loss": 1.2442, "step": 45800 }, { "epoch": 0.29, "learning_rate": 0.0007236456116937565, "loss": 1.2214, "step": 45900 }, { "epoch": 0.29, "learning_rate": 0.0007230161028365669, "loss": 1.2405, "step": 46000 }, { "epoch": 0.29, "learning_rate": 0.0007223865939793773, "loss": 1.2372, "step": 46100 }, { "epoch": 0.29, "learning_rate": 0.0007217570851221877, "loss": 1.2423, "step": 46200 }, { "epoch": 0.29, "learning_rate": 0.0007211275762649981, "loss": 1.2399, "step": 46300 }, { "epoch": 0.29, "learning_rate": 0.0007204980674078085, "loss": 1.235, "step": 46400 }, { "epoch": 0.29, "learning_rate": 0.0007198685585506189, "loss": 1.2395, "step": 46500 }, { "epoch": 0.29, "learning_rate": 0.0007192390496934293, "loss": 1.2473, "step": 46600 }, { "epoch": 0.29, "learning_rate": 0.0007186095408362396, "loss": 1.2339, "step": 46700 }, { "epoch": 0.29, "learning_rate": 0.00071798003197905, "loss": 1.2356, "step": 46800 }, { "epoch": 0.29, "learning_rate": 0.0007173505231218603, "loss": 1.227, "step": 46900 }, { "epoch": 0.29, "learning_rate": 0.0007167210142646707, "loss": 1.2495, "step": 47000 }, { "epoch": 0.29, "learning_rate": 0.0007160915054074811, "loss": 1.2372, "step": 47100 }, { "epoch": 0.29, "learning_rate": 0.0007154619965502914, "loss": 1.2758, "step": 47200 }, { "epoch": 0.29, "learning_rate": 0.0007148324876931018, "loss": 1.2317, "step": 47300 }, { "epoch": 0.29, "learning_rate": 0.0007142029788359122, "loss": 1.2337, "step": 47400 }, { "epoch": 0.3, "learning_rate": 0.0007135734699787226, "loss": 1.2283, "step": 47500 }, { "epoch": 0.3, "learning_rate": 0.000712943961121533, "loss": 1.2466, "step": 47600 }, { "epoch": 0.3, "learning_rate": 0.0007123144522643433, "loss": 1.2351, "step": 47700 }, { "epoch": 0.3, "learning_rate": 0.0007116849434071537, "loss": 1.2418, "step": 47800 }, { "epoch": 0.3, "learning_rate": 0.0007110554345499641, "loss": 1.241, "step": 47900 }, { "epoch": 0.3, "learning_rate": 0.0007104259256927745, "loss": 1.2293, "step": 48000 }, { "epoch": 0.3, "learning_rate": 0.000709796416835585, "loss": 1.2399, "step": 48100 }, { "epoch": 0.3, "learning_rate": 0.0007091669079783953, "loss": 1.2309, "step": 48200 }, { "epoch": 0.3, "learning_rate": 0.0007085373991212057, "loss": 1.2266, "step": 48300 }, { "epoch": 0.3, "learning_rate": 0.0007079078902640161, "loss": 1.2332, "step": 48400 }, { "epoch": 0.3, "learning_rate": 0.0007072783814068265, "loss": 1.2427, "step": 48500 }, { "epoch": 0.3, "learning_rate": 0.0007066488725496369, "loss": 1.2463, "step": 48600 }, { "epoch": 0.3, "learning_rate": 0.0007060193636924471, "loss": 1.2421, "step": 48700 }, { "epoch": 0.3, "learning_rate": 0.0007053898548352575, "loss": 1.2318, "step": 48800 }, { "epoch": 0.3, "learning_rate": 0.0007047603459780679, "loss": 1.223, "step": 48900 }, { "epoch": 0.3, "learning_rate": 0.0007041308371208783, "loss": 1.2298, "step": 49000 }, { "epoch": 0.31, "learning_rate": 0.0007035013282636887, "loss": 1.2403, "step": 49100 }, { "epoch": 0.31, "learning_rate": 0.000702871819406499, "loss": 1.2324, "step": 49200 }, { "epoch": 0.31, "learning_rate": 0.0007022423105493094, "loss": 1.2465, "step": 49300 }, { "epoch": 0.31, "learning_rate": 0.0007016128016921198, "loss": 1.2361, "step": 49400 }, { "epoch": 0.31, "learning_rate": 0.0007009832928349302, "loss": 1.2313, "step": 49500 }, { "epoch": 0.31, "learning_rate": 0.0007003537839777406, "loss": 1.2439, "step": 49600 }, { "epoch": 0.31, "learning_rate": 0.0006997242751205509, "loss": 1.2409, "step": 49700 }, { "epoch": 0.31, "learning_rate": 0.0006990947662633613, "loss": 1.2406, "step": 49800 }, { "epoch": 0.31, "learning_rate": 0.0006984652574061717, "loss": 1.2146, "step": 49900 }, { "epoch": 0.31, "learning_rate": 0.0006978357485489821, "loss": 1.2345, "step": 50000 }, { "epoch": 0.31, "learning_rate": 0.0006972062396917926, "loss": 1.243, "step": 50100 }, { "epoch": 0.31, "learning_rate": 0.0006965767308346029, "loss": 1.2378, "step": 50200 }, { "epoch": 0.31, "learning_rate": 0.0006959472219774133, "loss": 1.2309, "step": 50300 }, { "epoch": 0.31, "learning_rate": 0.0006953177131202237, "loss": 1.2337, "step": 50400 }, { "epoch": 0.31, "learning_rate": 0.0006946882042630341, "loss": 1.2406, "step": 50500 }, { "epoch": 0.31, "learning_rate": 0.0006940586954058443, "loss": 1.2389, "step": 50600 }, { "epoch": 0.32, "learning_rate": 0.0006934291865486547, "loss": 1.2323, "step": 50700 }, { "epoch": 0.32, "learning_rate": 0.0006927996776914651, "loss": 1.2504, "step": 50800 }, { "epoch": 0.32, "learning_rate": 0.0006921701688342755, "loss": 1.2323, "step": 50900 }, { "epoch": 0.32, "learning_rate": 0.0006915406599770859, "loss": 1.2321, "step": 51000 }, { "epoch": 0.32, "learning_rate": 0.0006909111511198962, "loss": 1.2458, "step": 51100 }, { "epoch": 0.32, "learning_rate": 0.0006902816422627066, "loss": 1.2371, "step": 51200 }, { "epoch": 0.32, "learning_rate": 0.000689652133405517, "loss": 1.2227, "step": 51300 }, { "epoch": 0.32, "learning_rate": 0.0006890226245483274, "loss": 1.2007, "step": 51400 }, { "epoch": 0.32, "learning_rate": 0.0006883931156911378, "loss": 1.2522, "step": 51500 }, { "epoch": 0.32, "learning_rate": 0.0006877636068339481, "loss": 1.2535, "step": 51600 }, { "epoch": 0.32, "learning_rate": 0.0006871340979767585, "loss": 1.2256, "step": 51700 }, { "epoch": 0.32, "learning_rate": 0.0006865045891195689, "loss": 1.2258, "step": 51800 }, { "epoch": 0.32, "learning_rate": 0.0006858750802623793, "loss": 1.2065, "step": 51900 }, { "epoch": 0.32, "learning_rate": 0.0006852455714051897, "loss": 1.2471, "step": 52000 }, { "epoch": 0.32, "learning_rate": 0.0006846160625480001, "loss": 1.2313, "step": 52100 }, { "epoch": 0.32, "learning_rate": 0.0006839865536908105, "loss": 1.2136, "step": 52200 }, { "epoch": 0.33, "learning_rate": 0.0006833570448336209, "loss": 1.2374, "step": 52300 }, { "epoch": 0.33, "learning_rate": 0.0006827275359764313, "loss": 1.2368, "step": 52400 }, { "epoch": 0.33, "learning_rate": 0.0006820980271192417, "loss": 1.2295, "step": 52500 }, { "epoch": 0.33, "learning_rate": 0.0006814685182620519, "loss": 1.2498, "step": 52600 }, { "epoch": 0.33, "learning_rate": 0.0006808390094048623, "loss": 1.2475, "step": 52700 }, { "epoch": 0.33, "learning_rate": 0.0006802095005476727, "loss": 1.2169, "step": 52800 }, { "epoch": 0.33, "learning_rate": 0.0006795799916904831, "loss": 1.2418, "step": 52900 }, { "epoch": 0.33, "learning_rate": 0.0006789504828332935, "loss": 1.2283, "step": 53000 }, { "epoch": 0.33, "learning_rate": 0.0006783209739761038, "loss": 1.2274, "step": 53100 }, { "epoch": 0.33, "learning_rate": 0.0006776914651189142, "loss": 1.2336, "step": 53200 }, { "epoch": 0.33, "learning_rate": 0.0006770619562617246, "loss": 1.2277, "step": 53300 }, { "epoch": 0.33, "learning_rate": 0.000676432447404535, "loss": 1.2338, "step": 53400 }, { "epoch": 0.33, "learning_rate": 0.0006758029385473454, "loss": 1.2453, "step": 53500 }, { "epoch": 0.33, "learning_rate": 0.0006751734296901557, "loss": 1.2277, "step": 53600 }, { "epoch": 0.33, "learning_rate": 0.0006745439208329661, "loss": 1.2136, "step": 53700 }, { "epoch": 0.33, "learning_rate": 0.0006739144119757765, "loss": 1.2345, "step": 53800 }, { "epoch": 0.34, "learning_rate": 0.0006732849031185869, "loss": 1.2291, "step": 53900 }, { "epoch": 0.34, "learning_rate": 0.0006726553942613974, "loss": 1.233, "step": 54000 }, { "epoch": 0.34, "learning_rate": 0.0006720258854042077, "loss": 1.2422, "step": 54100 }, { "epoch": 0.34, "learning_rate": 0.0006713963765470181, "loss": 1.2385, "step": 54200 }, { "epoch": 0.34, "learning_rate": 0.0006707668676898285, "loss": 1.2402, "step": 54300 }, { "epoch": 0.34, "learning_rate": 0.0006701373588326389, "loss": 1.2081, "step": 54400 }, { "epoch": 0.34, "learning_rate": 0.0006695078499754492, "loss": 1.2211, "step": 54500 }, { "epoch": 0.34, "learning_rate": 0.0006688783411182595, "loss": 1.2149, "step": 54600 }, { "epoch": 0.34, "learning_rate": 0.0006682488322610699, "loss": 1.2328, "step": 54700 }, { "epoch": 0.34, "learning_rate": 0.0006676193234038803, "loss": 1.231, "step": 54800 }, { "epoch": 0.34, "learning_rate": 0.0006669898145466907, "loss": 1.2277, "step": 54900 }, { "epoch": 0.34, "learning_rate": 0.000666360305689501, "loss": 1.2414, "step": 55000 }, { "epoch": 0.34, "learning_rate": 0.0006657307968323114, "loss": 1.2395, "step": 55100 }, { "epoch": 0.34, "learning_rate": 0.0006651012879751218, "loss": 1.2471, "step": 55200 }, { "epoch": 0.34, "learning_rate": 0.0006644717791179322, "loss": 1.2363, "step": 55300 }, { "epoch": 0.34, "learning_rate": 0.0006638422702607426, "loss": 1.2248, "step": 55400 }, { "epoch": 0.35, "learning_rate": 0.0006632127614035529, "loss": 1.2244, "step": 55500 }, { "epoch": 0.35, "learning_rate": 0.0006625832525463633, "loss": 1.2177, "step": 55600 }, { "epoch": 0.35, "learning_rate": 0.0006619537436891737, "loss": 1.233, "step": 55700 }, { "epoch": 0.35, "learning_rate": 0.0006613242348319841, "loss": 1.2349, "step": 55800 }, { "epoch": 0.35, "learning_rate": 0.0006606947259747945, "loss": 1.2389, "step": 55900 }, { "epoch": 0.35, "learning_rate": 0.0006600652171176049, "loss": 1.2368, "step": 56000 }, { "epoch": 0.35, "learning_rate": 0.0006594357082604153, "loss": 1.2198, "step": 56100 }, { "epoch": 0.35, "learning_rate": 0.0006588061994032257, "loss": 1.2178, "step": 56200 }, { "epoch": 0.35, "learning_rate": 0.000658176690546036, "loss": 1.2177, "step": 56300 }, { "epoch": 0.35, "learning_rate": 0.0006575471816888464, "loss": 1.2249, "step": 56400 }, { "epoch": 0.35, "learning_rate": 0.0006569176728316567, "loss": 1.2234, "step": 56500 }, { "epoch": 0.35, "learning_rate": 0.0006562881639744671, "loss": 1.2255, "step": 56600 }, { "epoch": 0.35, "learning_rate": 0.0006556586551172775, "loss": 1.2133, "step": 56700 }, { "epoch": 0.35, "learning_rate": 0.0006550291462600879, "loss": 1.2065, "step": 56800 }, { "epoch": 0.35, "learning_rate": 0.0006543996374028983, "loss": 1.2112, "step": 56900 }, { "epoch": 0.35, "learning_rate": 0.0006537701285457086, "loss": 1.2118, "step": 57000 }, { "epoch": 0.35, "learning_rate": 0.000653140619688519, "loss": 1.224, "step": 57100 }, { "epoch": 0.36, "learning_rate": 0.0006525111108313294, "loss": 1.2395, "step": 57200 }, { "epoch": 0.36, "learning_rate": 0.0006518816019741398, "loss": 1.2382, "step": 57300 }, { "epoch": 0.36, "learning_rate": 0.0006512520931169502, "loss": 1.2086, "step": 57400 }, { "epoch": 0.36, "learning_rate": 0.0006506225842597605, "loss": 1.2059, "step": 57500 }, { "epoch": 0.36, "learning_rate": 0.0006499930754025709, "loss": 1.2496, "step": 57600 }, { "epoch": 0.36, "learning_rate": 0.0006493635665453813, "loss": 1.2223, "step": 57700 }, { "epoch": 0.36, "learning_rate": 0.0006487340576881917, "loss": 1.2291, "step": 57800 }, { "epoch": 0.36, "learning_rate": 0.0006481045488310022, "loss": 1.2347, "step": 57900 }, { "epoch": 0.36, "learning_rate": 0.0006474750399738125, "loss": 1.241, "step": 58000 }, { "epoch": 0.36, "learning_rate": 0.0006468455311166229, "loss": 1.2339, "step": 58100 }, { "epoch": 0.36, "learning_rate": 0.0006462160222594333, "loss": 1.2201, "step": 58200 }, { "epoch": 0.36, "learning_rate": 0.0006455865134022436, "loss": 1.2282, "step": 58300 }, { "epoch": 0.36, "learning_rate": 0.000644957004545054, "loss": 1.2187, "step": 58400 }, { "epoch": 0.36, "learning_rate": 0.0006443274956878643, "loss": 1.2174, "step": 58500 }, { "epoch": 0.36, "learning_rate": 0.0006436979868306747, "loss": 1.2245, "step": 58600 }, { "epoch": 0.36, "learning_rate": 0.0006430684779734851, "loss": 1.227, "step": 58700 }, { "epoch": 0.37, "learning_rate": 0.0006424389691162955, "loss": 1.2228, "step": 58800 }, { "epoch": 0.37, "learning_rate": 0.0006418094602591058, "loss": 1.2219, "step": 58900 }, { "epoch": 0.37, "learning_rate": 0.0006411799514019162, "loss": 1.2243, "step": 59000 }, { "epoch": 0.37, "learning_rate": 0.0006405504425447266, "loss": 1.2228, "step": 59100 }, { "epoch": 0.37, "learning_rate": 0.000639920933687537, "loss": 1.214, "step": 59200 }, { "epoch": 0.37, "learning_rate": 0.0006392914248303474, "loss": 1.224, "step": 59300 }, { "epoch": 0.37, "learning_rate": 0.0006386619159731577, "loss": 1.2232, "step": 59400 }, { "epoch": 0.37, "learning_rate": 0.0006380324071159681, "loss": 1.2311, "step": 59500 }, { "epoch": 0.37, "learning_rate": 0.0006374028982587785, "loss": 1.2368, "step": 59600 }, { "epoch": 0.37, "learning_rate": 0.0006367733894015889, "loss": 1.2137, "step": 59700 }, { "epoch": 0.37, "learning_rate": 0.0006361438805443993, "loss": 1.2279, "step": 59800 }, { "epoch": 0.37, "learning_rate": 0.0006355143716872095, "loss": 1.2236, "step": 59900 }, { "epoch": 0.37, "learning_rate": 0.00063488486283002, "loss": 1.216, "step": 60000 }, { "epoch": 0.37, "learning_rate": 0.0006342553539728304, "loss": 1.242, "step": 60100 }, { "epoch": 0.37, "learning_rate": 0.0006336258451156408, "loss": 1.2124, "step": 60200 }, { "epoch": 0.37, "learning_rate": 0.0006329963362584512, "loss": 1.2166, "step": 60300 }, { "epoch": 0.38, "learning_rate": 0.0006323668274012615, "loss": 1.2089, "step": 60400 }, { "epoch": 0.38, "learning_rate": 0.0006317373185440719, "loss": 1.1967, "step": 60500 }, { "epoch": 0.38, "learning_rate": 0.0006311078096868823, "loss": 1.2151, "step": 60600 }, { "epoch": 0.38, "learning_rate": 0.0006304783008296927, "loss": 1.2359, "step": 60700 }, { "epoch": 0.38, "learning_rate": 0.0006298487919725031, "loss": 1.2107, "step": 60800 }, { "epoch": 0.38, "learning_rate": 0.0006292192831153134, "loss": 1.2142, "step": 60900 }, { "epoch": 0.38, "learning_rate": 0.0006285897742581238, "loss": 1.223, "step": 61000 }, { "epoch": 0.38, "learning_rate": 0.0006279602654009342, "loss": 1.2185, "step": 61100 }, { "epoch": 0.38, "learning_rate": 0.0006273307565437446, "loss": 1.2203, "step": 61200 }, { "epoch": 0.38, "learning_rate": 0.000626701247686555, "loss": 1.2122, "step": 61300 }, { "epoch": 0.38, "learning_rate": 0.0006260717388293653, "loss": 1.2382, "step": 61400 }, { "epoch": 0.38, "learning_rate": 0.0006254422299721757, "loss": 1.2017, "step": 61500 }, { "epoch": 0.38, "learning_rate": 0.0006248127211149861, "loss": 1.2364, "step": 61600 }, { "epoch": 0.38, "learning_rate": 0.0006241832122577965, "loss": 1.2179, "step": 61700 }, { "epoch": 0.38, "learning_rate": 0.0006235537034006069, "loss": 1.2138, "step": 61800 }, { "epoch": 0.38, "learning_rate": 0.0006229241945434173, "loss": 1.2292, "step": 61900 }, { "epoch": 0.39, "learning_rate": 0.0006222946856862276, "loss": 1.2185, "step": 62000 }, { "epoch": 0.39, "learning_rate": 0.000621665176829038, "loss": 1.2146, "step": 62100 }, { "epoch": 0.39, "learning_rate": 0.0006210356679718484, "loss": 1.2449, "step": 62200 }, { "epoch": 0.39, "learning_rate": 0.0006204061591146588, "loss": 1.2316, "step": 62300 }, { "epoch": 0.39, "learning_rate": 0.0006197766502574691, "loss": 1.2121, "step": 62400 }, { "epoch": 0.39, "learning_rate": 0.0006191471414002795, "loss": 1.2084, "step": 62500 }, { "epoch": 0.39, "learning_rate": 0.0006185176325430899, "loss": 1.2175, "step": 62600 }, { "epoch": 0.39, "learning_rate": 0.0006178881236859003, "loss": 1.2189, "step": 62700 }, { "epoch": 0.39, "learning_rate": 0.0006172586148287107, "loss": 1.2202, "step": 62800 }, { "epoch": 0.39, "learning_rate": 0.000616629105971521, "loss": 1.2174, "step": 62900 }, { "epoch": 0.39, "learning_rate": 0.0006159995971143314, "loss": 1.2174, "step": 63000 }, { "epoch": 0.39, "learning_rate": 0.0006153700882571418, "loss": 1.2101, "step": 63100 }, { "epoch": 0.39, "learning_rate": 0.0006147405793999522, "loss": 1.2305, "step": 63200 }, { "epoch": 0.39, "learning_rate": 0.0006141110705427625, "loss": 1.2192, "step": 63300 }, { "epoch": 0.39, "learning_rate": 0.0006134815616855729, "loss": 1.2098, "step": 63400 }, { "epoch": 0.39, "learning_rate": 0.0006128520528283833, "loss": 1.2172, "step": 63500 }, { "epoch": 0.4, "learning_rate": 0.0006122225439711937, "loss": 1.2192, "step": 63600 }, { "epoch": 0.4, "learning_rate": 0.0006115930351140041, "loss": 1.2223, "step": 63700 }, { "epoch": 0.4, "learning_rate": 0.0006109635262568143, "loss": 1.1929, "step": 63800 }, { "epoch": 0.4, "learning_rate": 0.0006103340173996248, "loss": 1.2011, "step": 63900 }, { "epoch": 0.4, "learning_rate": 0.0006097045085424352, "loss": 1.2243, "step": 64000 }, { "epoch": 0.4, "learning_rate": 0.0006090749996852456, "loss": 1.2271, "step": 64100 }, { "epoch": 0.4, "learning_rate": 0.000608445490828056, "loss": 1.2312, "step": 64200 }, { "epoch": 0.4, "learning_rate": 0.0006078159819708663, "loss": 1.2267, "step": 64300 }, { "epoch": 0.4, "learning_rate": 0.0006071864731136767, "loss": 1.218, "step": 64400 }, { "epoch": 0.4, "learning_rate": 0.0006065569642564871, "loss": 1.2317, "step": 64500 }, { "epoch": 0.4, "learning_rate": 0.0006059274553992975, "loss": 1.2166, "step": 64600 }, { "epoch": 0.4, "learning_rate": 0.0006052979465421079, "loss": 1.2096, "step": 64700 }, { "epoch": 0.4, "learning_rate": 0.0006046684376849182, "loss": 1.2183, "step": 64800 }, { "epoch": 0.4, "learning_rate": 0.0006040389288277286, "loss": 1.208, "step": 64900 }, { "epoch": 0.4, "learning_rate": 0.000603409419970539, "loss": 1.217, "step": 65000 } ], "max_steps": 160854, "num_train_epochs": 1, "total_flos": 6.729383411712e+16, "trial_name": null, "trial_params": null }