lora-flan-t5-base / trainer_state.json
chunpingvi's picture
Upload folder using huggingface_hub
7d0e5a9
raw
history blame contribute delete
No virus
79.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.4040931527969463,
"global_step": 65000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-05,
"loss": 2.2985,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 0.0001,
"loss": 2.0231,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.00015,
"loss": 1.877,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 0.0002,
"loss": 1.8394,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.00025,
"loss": 1.7965,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 0.0003,
"loss": 1.7915,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 0.00035,
"loss": 1.7553,
"step": 700
},
{
"epoch": 0.0,
"learning_rate": 0.0004,
"loss": 1.7212,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 0.00045000000000000004,
"loss": 1.7203,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 0.0005,
"loss": 1.6887,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.00055,
"loss": 1.6692,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 0.0006,
"loss": 1.6935,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.0006500000000000001,
"loss": 1.6906,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 0.0007,
"loss": 1.6797,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.00075,
"loss": 1.6568,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 0.0008,
"loss": 1.6602,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 0.00085,
"loss": 1.6474,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 0.0009000000000000001,
"loss": 1.6446,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 0.00095,
"loss": 1.6337,
"step": 1900
},
{
"epoch": 0.01,
"learning_rate": 0.001,
"loss": 1.6399,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 0.0009993704911428105,
"loss": 1.6011,
"step": 2100
},
{
"epoch": 0.01,
"learning_rate": 0.0009987409822856208,
"loss": 1.6253,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 0.000998111473428431,
"loss": 1.6252,
"step": 2300
},
{
"epoch": 0.01,
"learning_rate": 0.0009974819645712416,
"loss": 1.603,
"step": 2400
},
{
"epoch": 0.02,
"learning_rate": 0.000996852455714052,
"loss": 1.5798,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 0.0009962229468568624,
"loss": 1.5986,
"step": 2600
},
{
"epoch": 0.02,
"learning_rate": 0.0009955934379996727,
"loss": 1.6352,
"step": 2700
},
{
"epoch": 0.02,
"learning_rate": 0.000994963929142483,
"loss": 1.5591,
"step": 2800
},
{
"epoch": 0.02,
"learning_rate": 0.0009943344202852935,
"loss": 1.5352,
"step": 2900
},
{
"epoch": 0.02,
"learning_rate": 0.0009937049114281038,
"loss": 1.5257,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 0.0009930754025709143,
"loss": 1.5289,
"step": 3100
},
{
"epoch": 0.02,
"learning_rate": 0.0009924458937137246,
"loss": 1.5246,
"step": 3200
},
{
"epoch": 0.02,
"learning_rate": 0.0009918163848565348,
"loss": 1.5221,
"step": 3300
},
{
"epoch": 0.02,
"learning_rate": 0.0009911868759993453,
"loss": 1.5214,
"step": 3400
},
{
"epoch": 0.02,
"learning_rate": 0.0009905573671421556,
"loss": 1.4942,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 0.0009899278582849661,
"loss": 1.4914,
"step": 3600
},
{
"epoch": 0.02,
"learning_rate": 0.0009892983494277764,
"loss": 1.459,
"step": 3700
},
{
"epoch": 0.02,
"learning_rate": 0.0009886688405705867,
"loss": 1.4696,
"step": 3800
},
{
"epoch": 0.02,
"learning_rate": 0.0009880393317133972,
"loss": 1.4493,
"step": 3900
},
{
"epoch": 0.02,
"learning_rate": 0.0009874098228562077,
"loss": 1.4612,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 0.000986780313999018,
"loss": 1.4506,
"step": 4100
},
{
"epoch": 0.03,
"learning_rate": 0.0009861508051418283,
"loss": 1.4585,
"step": 4200
},
{
"epoch": 0.03,
"learning_rate": 0.0009855212962846388,
"loss": 1.4414,
"step": 4300
},
{
"epoch": 0.03,
"learning_rate": 0.000984891787427449,
"loss": 1.4436,
"step": 4400
},
{
"epoch": 0.03,
"learning_rate": 0.0009842622785702596,
"loss": 1.4547,
"step": 4500
},
{
"epoch": 0.03,
"learning_rate": 0.0009836327697130699,
"loss": 1.4049,
"step": 4600
},
{
"epoch": 0.03,
"learning_rate": 0.0009830032608558802,
"loss": 1.4293,
"step": 4700
},
{
"epoch": 0.03,
"learning_rate": 0.0009823737519986907,
"loss": 1.4558,
"step": 4800
},
{
"epoch": 0.03,
"learning_rate": 0.000981744243141501,
"loss": 1.4235,
"step": 4900
},
{
"epoch": 0.03,
"learning_rate": 0.0009811147342843115,
"loss": 1.4318,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 0.0009804852254271218,
"loss": 1.4459,
"step": 5100
},
{
"epoch": 0.03,
"learning_rate": 0.000979855716569932,
"loss": 1.4174,
"step": 5200
},
{
"epoch": 0.03,
"learning_rate": 0.0009792262077127425,
"loss": 1.3947,
"step": 5300
},
{
"epoch": 0.03,
"learning_rate": 0.0009785966988555528,
"loss": 1.4177,
"step": 5400
},
{
"epoch": 0.03,
"learning_rate": 0.0009779671899983633,
"loss": 1.4225,
"step": 5500
},
{
"epoch": 0.03,
"learning_rate": 0.0009773376811411736,
"loss": 1.4049,
"step": 5600
},
{
"epoch": 0.04,
"learning_rate": 0.000976708172283984,
"loss": 1.3942,
"step": 5700
},
{
"epoch": 0.04,
"learning_rate": 0.0009760786634267944,
"loss": 1.3987,
"step": 5800
},
{
"epoch": 0.04,
"learning_rate": 0.0009754491545696048,
"loss": 1.3907,
"step": 5900
},
{
"epoch": 0.04,
"learning_rate": 0.0009748196457124152,
"loss": 1.3817,
"step": 6000
},
{
"epoch": 0.04,
"learning_rate": 0.0009741901368552256,
"loss": 1.3905,
"step": 6100
},
{
"epoch": 0.04,
"learning_rate": 0.0009735606279980359,
"loss": 1.4099,
"step": 6200
},
{
"epoch": 0.04,
"learning_rate": 0.0009729311191408463,
"loss": 1.3696,
"step": 6300
},
{
"epoch": 0.04,
"learning_rate": 0.0009723016102836567,
"loss": 1.3858,
"step": 6400
},
{
"epoch": 0.04,
"learning_rate": 0.0009716721014264671,
"loss": 1.3832,
"step": 6500
},
{
"epoch": 0.04,
"learning_rate": 0.0009710425925692775,
"loss": 1.3781,
"step": 6600
},
{
"epoch": 0.04,
"learning_rate": 0.0009704130837120878,
"loss": 1.3789,
"step": 6700
},
{
"epoch": 0.04,
"learning_rate": 0.0009697835748548982,
"loss": 1.3709,
"step": 6800
},
{
"epoch": 0.04,
"learning_rate": 0.0009691540659977086,
"loss": 1.3751,
"step": 6900
},
{
"epoch": 0.04,
"learning_rate": 0.0009685245571405191,
"loss": 1.3728,
"step": 7000
},
{
"epoch": 0.04,
"learning_rate": 0.0009678950482833295,
"loss": 1.3693,
"step": 7100
},
{
"epoch": 0.04,
"learning_rate": 0.0009672655394261397,
"loss": 1.3585,
"step": 7200
},
{
"epoch": 0.05,
"learning_rate": 0.0009666360305689501,
"loss": 1.3613,
"step": 7300
},
{
"epoch": 0.05,
"learning_rate": 0.0009660065217117605,
"loss": 1.3782,
"step": 7400
},
{
"epoch": 0.05,
"learning_rate": 0.0009653770128545709,
"loss": 1.3551,
"step": 7500
},
{
"epoch": 0.05,
"learning_rate": 0.0009647475039973813,
"loss": 1.3473,
"step": 7600
},
{
"epoch": 0.05,
"learning_rate": 0.0009641179951401916,
"loss": 1.338,
"step": 7700
},
{
"epoch": 0.05,
"learning_rate": 0.000963488486283002,
"loss": 1.3795,
"step": 7800
},
{
"epoch": 0.05,
"learning_rate": 0.0009628589774258124,
"loss": 1.3429,
"step": 7900
},
{
"epoch": 0.05,
"learning_rate": 0.0009622294685686228,
"loss": 1.3476,
"step": 8000
},
{
"epoch": 0.05,
"learning_rate": 0.0009615999597114332,
"loss": 1.3826,
"step": 8100
},
{
"epoch": 0.05,
"learning_rate": 0.0009609704508542435,
"loss": 1.3683,
"step": 8200
},
{
"epoch": 0.05,
"learning_rate": 0.0009603409419970539,
"loss": 1.3378,
"step": 8300
},
{
"epoch": 0.05,
"learning_rate": 0.0009597114331398643,
"loss": 1.3613,
"step": 8400
},
{
"epoch": 0.05,
"learning_rate": 0.0009590819242826747,
"loss": 1.366,
"step": 8500
},
{
"epoch": 0.05,
"learning_rate": 0.000958452415425485,
"loss": 1.3375,
"step": 8600
},
{
"epoch": 0.05,
"learning_rate": 0.0009578229065682954,
"loss": 1.3516,
"step": 8700
},
{
"epoch": 0.05,
"learning_rate": 0.0009571933977111058,
"loss": 1.3671,
"step": 8800
},
{
"epoch": 0.06,
"learning_rate": 0.0009565638888539163,
"loss": 1.3305,
"step": 8900
},
{
"epoch": 0.06,
"learning_rate": 0.0009559343799967267,
"loss": 1.3439,
"step": 9000
},
{
"epoch": 0.06,
"learning_rate": 0.0009553048711395369,
"loss": 1.3475,
"step": 9100
},
{
"epoch": 0.06,
"learning_rate": 0.0009546753622823473,
"loss": 1.3374,
"step": 9200
},
{
"epoch": 0.06,
"learning_rate": 0.0009540458534251577,
"loss": 1.3304,
"step": 9300
},
{
"epoch": 0.06,
"learning_rate": 0.0009534163445679681,
"loss": 1.3704,
"step": 9400
},
{
"epoch": 0.06,
"learning_rate": 0.0009527868357107785,
"loss": 1.3521,
"step": 9500
},
{
"epoch": 0.06,
"learning_rate": 0.0009521573268535888,
"loss": 1.3508,
"step": 9600
},
{
"epoch": 0.06,
"learning_rate": 0.0009515278179963992,
"loss": 1.3308,
"step": 9700
},
{
"epoch": 0.06,
"learning_rate": 0.0009508983091392096,
"loss": 1.3347,
"step": 9800
},
{
"epoch": 0.06,
"learning_rate": 0.00095026880028202,
"loss": 1.3334,
"step": 9900
},
{
"epoch": 0.06,
"learning_rate": 0.0009496392914248304,
"loss": 1.3593,
"step": 10000
},
{
"epoch": 0.06,
"learning_rate": 0.0009490097825676407,
"loss": 1.3447,
"step": 10100
},
{
"epoch": 0.06,
"learning_rate": 0.0009483802737104511,
"loss": 1.3335,
"step": 10200
},
{
"epoch": 0.06,
"learning_rate": 0.0009477507648532615,
"loss": 1.3131,
"step": 10300
},
{
"epoch": 0.06,
"learning_rate": 0.0009471212559960719,
"loss": 1.3358,
"step": 10400
},
{
"epoch": 0.07,
"learning_rate": 0.0009464917471388823,
"loss": 1.3449,
"step": 10500
},
{
"epoch": 0.07,
"learning_rate": 0.0009458622382816926,
"loss": 1.3342,
"step": 10600
},
{
"epoch": 0.07,
"learning_rate": 0.000945232729424503,
"loss": 1.3349,
"step": 10700
},
{
"epoch": 0.07,
"learning_rate": 0.0009446032205673134,
"loss": 1.3444,
"step": 10800
},
{
"epoch": 0.07,
"learning_rate": 0.0009439737117101239,
"loss": 1.3218,
"step": 10900
},
{
"epoch": 0.07,
"learning_rate": 0.0009433442028529343,
"loss": 1.3339,
"step": 11000
},
{
"epoch": 0.07,
"learning_rate": 0.0009427146939957445,
"loss": 1.3336,
"step": 11100
},
{
"epoch": 0.07,
"learning_rate": 0.0009420851851385549,
"loss": 1.339,
"step": 11200
},
{
"epoch": 0.07,
"learning_rate": 0.0009414556762813653,
"loss": 1.3128,
"step": 11300
},
{
"epoch": 0.07,
"learning_rate": 0.0009408261674241757,
"loss": 1.3503,
"step": 11400
},
{
"epoch": 0.07,
"learning_rate": 0.0009401966585669861,
"loss": 1.3324,
"step": 11500
},
{
"epoch": 0.07,
"learning_rate": 0.0009395671497097964,
"loss": 1.3368,
"step": 11600
},
{
"epoch": 0.07,
"learning_rate": 0.0009389376408526068,
"loss": 1.3123,
"step": 11700
},
{
"epoch": 0.07,
"learning_rate": 0.0009383081319954172,
"loss": 1.3316,
"step": 11800
},
{
"epoch": 0.07,
"learning_rate": 0.0009376786231382276,
"loss": 1.3282,
"step": 11900
},
{
"epoch": 0.07,
"learning_rate": 0.000937049114281038,
"loss": 1.3181,
"step": 12000
},
{
"epoch": 0.08,
"learning_rate": 0.0009364196054238483,
"loss": 1.3278,
"step": 12100
},
{
"epoch": 0.08,
"learning_rate": 0.0009357900965666587,
"loss": 1.3077,
"step": 12200
},
{
"epoch": 0.08,
"learning_rate": 0.0009351605877094691,
"loss": 1.3242,
"step": 12300
},
{
"epoch": 0.08,
"learning_rate": 0.0009345310788522795,
"loss": 1.3162,
"step": 12400
},
{
"epoch": 0.08,
"learning_rate": 0.0009339015699950899,
"loss": 1.3182,
"step": 12500
},
{
"epoch": 0.08,
"learning_rate": 0.0009332720611379002,
"loss": 1.322,
"step": 12600
},
{
"epoch": 0.08,
"learning_rate": 0.0009326425522807105,
"loss": 1.3309,
"step": 12700
},
{
"epoch": 0.08,
"learning_rate": 0.0009320130434235211,
"loss": 1.332,
"step": 12800
},
{
"epoch": 0.08,
"learning_rate": 0.0009313835345663315,
"loss": 1.3111,
"step": 12900
},
{
"epoch": 0.08,
"learning_rate": 0.0009307540257091417,
"loss": 1.3084,
"step": 13000
},
{
"epoch": 0.08,
"learning_rate": 0.0009301245168519521,
"loss": 1.3183,
"step": 13100
},
{
"epoch": 0.08,
"learning_rate": 0.0009294950079947625,
"loss": 1.3336,
"step": 13200
},
{
"epoch": 0.08,
"learning_rate": 0.0009288654991375729,
"loss": 1.3221,
"step": 13300
},
{
"epoch": 0.08,
"learning_rate": 0.0009282359902803833,
"loss": 1.3266,
"step": 13400
},
{
"epoch": 0.08,
"learning_rate": 0.0009276064814231936,
"loss": 1.3219,
"step": 13500
},
{
"epoch": 0.08,
"learning_rate": 0.000926976972566004,
"loss": 1.3055,
"step": 13600
},
{
"epoch": 0.09,
"learning_rate": 0.0009263474637088144,
"loss": 1.3091,
"step": 13700
},
{
"epoch": 0.09,
"learning_rate": 0.0009257179548516248,
"loss": 1.3057,
"step": 13800
},
{
"epoch": 0.09,
"learning_rate": 0.0009250884459944352,
"loss": 1.3128,
"step": 13900
},
{
"epoch": 0.09,
"learning_rate": 0.0009244589371372455,
"loss": 1.2988,
"step": 14000
},
{
"epoch": 0.09,
"learning_rate": 0.0009238294282800559,
"loss": 1.3197,
"step": 14100
},
{
"epoch": 0.09,
"learning_rate": 0.0009231999194228663,
"loss": 1.3043,
"step": 14200
},
{
"epoch": 0.09,
"learning_rate": 0.0009225704105656767,
"loss": 1.3013,
"step": 14300
},
{
"epoch": 0.09,
"learning_rate": 0.0009219409017084871,
"loss": 1.2999,
"step": 14400
},
{
"epoch": 0.09,
"learning_rate": 0.0009213113928512974,
"loss": 1.3059,
"step": 14500
},
{
"epoch": 0.09,
"learning_rate": 0.0009206818839941077,
"loss": 1.3004,
"step": 14600
},
{
"epoch": 0.09,
"learning_rate": 0.0009200523751369181,
"loss": 1.3012,
"step": 14700
},
{
"epoch": 0.09,
"learning_rate": 0.0009194228662797286,
"loss": 1.2966,
"step": 14800
},
{
"epoch": 0.09,
"learning_rate": 0.000918793357422539,
"loss": 1.3052,
"step": 14900
},
{
"epoch": 0.09,
"learning_rate": 0.0009181638485653493,
"loss": 1.3077,
"step": 15000
},
{
"epoch": 0.09,
"learning_rate": 0.0009175343397081597,
"loss": 1.308,
"step": 15100
},
{
"epoch": 0.09,
"learning_rate": 0.0009169048308509701,
"loss": 1.3002,
"step": 15200
},
{
"epoch": 0.1,
"learning_rate": 0.0009162753219937805,
"loss": 1.3095,
"step": 15300
},
{
"epoch": 0.1,
"learning_rate": 0.0009156458131365909,
"loss": 1.3154,
"step": 15400
},
{
"epoch": 0.1,
"learning_rate": 0.0009150163042794012,
"loss": 1.2998,
"step": 15500
},
{
"epoch": 0.1,
"learning_rate": 0.0009143867954222116,
"loss": 1.3179,
"step": 15600
},
{
"epoch": 0.1,
"learning_rate": 0.000913757286565022,
"loss": 1.2986,
"step": 15700
},
{
"epoch": 0.1,
"learning_rate": 0.0009131277777078324,
"loss": 1.3143,
"step": 15800
},
{
"epoch": 0.1,
"learning_rate": 0.0009124982688506428,
"loss": 1.2853,
"step": 15900
},
{
"epoch": 0.1,
"learning_rate": 0.0009118687599934531,
"loss": 1.2879,
"step": 16000
},
{
"epoch": 0.1,
"learning_rate": 0.0009112392511362635,
"loss": 1.2931,
"step": 16100
},
{
"epoch": 0.1,
"learning_rate": 0.0009106097422790739,
"loss": 1.2906,
"step": 16200
},
{
"epoch": 0.1,
"learning_rate": 0.0009099802334218843,
"loss": 1.3251,
"step": 16300
},
{
"epoch": 0.1,
"learning_rate": 0.0009093507245646947,
"loss": 1.3187,
"step": 16400
},
{
"epoch": 0.1,
"learning_rate": 0.000908721215707505,
"loss": 1.2984,
"step": 16500
},
{
"epoch": 0.1,
"learning_rate": 0.0009080917068503153,
"loss": 1.3023,
"step": 16600
},
{
"epoch": 0.1,
"learning_rate": 0.0009074621979931257,
"loss": 1.3056,
"step": 16700
},
{
"epoch": 0.1,
"learning_rate": 0.0009068326891359362,
"loss": 1.3159,
"step": 16800
},
{
"epoch": 0.11,
"learning_rate": 0.0009062031802787465,
"loss": 1.3058,
"step": 16900
},
{
"epoch": 0.11,
"learning_rate": 0.0009055736714215569,
"loss": 1.3023,
"step": 17000
},
{
"epoch": 0.11,
"learning_rate": 0.0009049441625643673,
"loss": 1.3214,
"step": 17100
},
{
"epoch": 0.11,
"learning_rate": 0.0009043146537071777,
"loss": 1.2811,
"step": 17200
},
{
"epoch": 0.11,
"learning_rate": 0.0009036851448499881,
"loss": 1.295,
"step": 17300
},
{
"epoch": 0.11,
"learning_rate": 0.0009030556359927984,
"loss": 1.3167,
"step": 17400
},
{
"epoch": 0.11,
"learning_rate": 0.0009024261271356088,
"loss": 1.3003,
"step": 17500
},
{
"epoch": 0.11,
"learning_rate": 0.0009017966182784192,
"loss": 1.3076,
"step": 17600
},
{
"epoch": 0.11,
"learning_rate": 0.0009011671094212296,
"loss": 1.2644,
"step": 17700
},
{
"epoch": 0.11,
"learning_rate": 0.00090053760056404,
"loss": 1.3073,
"step": 17800
},
{
"epoch": 0.11,
"learning_rate": 0.0008999080917068503,
"loss": 1.2843,
"step": 17900
},
{
"epoch": 0.11,
"learning_rate": 0.0008992785828496607,
"loss": 1.2831,
"step": 18000
},
{
"epoch": 0.11,
"learning_rate": 0.0008986490739924711,
"loss": 1.2854,
"step": 18100
},
{
"epoch": 0.11,
"learning_rate": 0.0008980195651352815,
"loss": 1.3237,
"step": 18200
},
{
"epoch": 0.11,
"learning_rate": 0.0008973900562780919,
"loss": 1.2951,
"step": 18300
},
{
"epoch": 0.11,
"learning_rate": 0.0008967605474209021,
"loss": 1.297,
"step": 18400
},
{
"epoch": 0.12,
"learning_rate": 0.0008961310385637125,
"loss": 1.2937,
"step": 18500
},
{
"epoch": 0.12,
"learning_rate": 0.0008955015297065229,
"loss": 1.2877,
"step": 18600
},
{
"epoch": 0.12,
"learning_rate": 0.0008948720208493334,
"loss": 1.3139,
"step": 18700
},
{
"epoch": 0.12,
"learning_rate": 0.0008942425119921438,
"loss": 1.2998,
"step": 18800
},
{
"epoch": 0.12,
"learning_rate": 0.0008936130031349541,
"loss": 1.2798,
"step": 18900
},
{
"epoch": 0.12,
"learning_rate": 0.0008929834942777645,
"loss": 1.2865,
"step": 19000
},
{
"epoch": 0.12,
"learning_rate": 0.0008923539854205749,
"loss": 1.2954,
"step": 19100
},
{
"epoch": 0.12,
"learning_rate": 0.0008917244765633853,
"loss": 1.2915,
"step": 19200
},
{
"epoch": 0.12,
"learning_rate": 0.0008910949677061957,
"loss": 1.2948,
"step": 19300
},
{
"epoch": 0.12,
"learning_rate": 0.000890465458849006,
"loss": 1.2829,
"step": 19400
},
{
"epoch": 0.12,
"learning_rate": 0.0008898359499918164,
"loss": 1.2898,
"step": 19500
},
{
"epoch": 0.12,
"learning_rate": 0.0008892064411346268,
"loss": 1.2964,
"step": 19600
},
{
"epoch": 0.12,
"learning_rate": 0.0008885769322774372,
"loss": 1.2765,
"step": 19700
},
{
"epoch": 0.12,
"learning_rate": 0.0008879474234202476,
"loss": 1.3033,
"step": 19800
},
{
"epoch": 0.12,
"learning_rate": 0.0008873179145630579,
"loss": 1.2904,
"step": 19900
},
{
"epoch": 0.12,
"learning_rate": 0.0008866884057058683,
"loss": 1.2792,
"step": 20000
},
{
"epoch": 0.12,
"learning_rate": 0.0008860588968486787,
"loss": 1.2662,
"step": 20100
},
{
"epoch": 0.13,
"learning_rate": 0.0008854293879914891,
"loss": 1.2728,
"step": 20200
},
{
"epoch": 0.13,
"learning_rate": 0.0008847998791342995,
"loss": 1.2877,
"step": 20300
},
{
"epoch": 0.13,
"learning_rate": 0.0008841703702771097,
"loss": 1.2896,
"step": 20400
},
{
"epoch": 0.13,
"learning_rate": 0.0008835408614199201,
"loss": 1.3013,
"step": 20500
},
{
"epoch": 0.13,
"learning_rate": 0.0008829113525627305,
"loss": 1.3009,
"step": 20600
},
{
"epoch": 0.13,
"learning_rate": 0.000882281843705541,
"loss": 1.2754,
"step": 20700
},
{
"epoch": 0.13,
"learning_rate": 0.0008816523348483514,
"loss": 1.2888,
"step": 20800
},
{
"epoch": 0.13,
"learning_rate": 0.0008810228259911617,
"loss": 1.2733,
"step": 20900
},
{
"epoch": 0.13,
"learning_rate": 0.0008803933171339721,
"loss": 1.2743,
"step": 21000
},
{
"epoch": 0.13,
"learning_rate": 0.0008797638082767825,
"loss": 1.2764,
"step": 21100
},
{
"epoch": 0.13,
"learning_rate": 0.0008791342994195929,
"loss": 1.2808,
"step": 21200
},
{
"epoch": 0.13,
"learning_rate": 0.0008785047905624032,
"loss": 1.2991,
"step": 21300
},
{
"epoch": 0.13,
"learning_rate": 0.0008778752817052136,
"loss": 1.3172,
"step": 21400
},
{
"epoch": 0.13,
"learning_rate": 0.000877245772848024,
"loss": 1.2788,
"step": 21500
},
{
"epoch": 0.13,
"learning_rate": 0.0008766162639908344,
"loss": 1.2986,
"step": 21600
},
{
"epoch": 0.13,
"learning_rate": 0.0008759867551336448,
"loss": 1.2815,
"step": 21700
},
{
"epoch": 0.14,
"learning_rate": 0.0008753572462764551,
"loss": 1.2779,
"step": 21800
},
{
"epoch": 0.14,
"learning_rate": 0.0008747277374192655,
"loss": 1.2972,
"step": 21900
},
{
"epoch": 0.14,
"learning_rate": 0.0008740982285620759,
"loss": 1.2974,
"step": 22000
},
{
"epoch": 0.14,
"learning_rate": 0.0008734687197048863,
"loss": 1.2851,
"step": 22100
},
{
"epoch": 0.14,
"learning_rate": 0.0008728392108476967,
"loss": 1.2687,
"step": 22200
},
{
"epoch": 0.14,
"learning_rate": 0.0008722097019905069,
"loss": 1.2728,
"step": 22300
},
{
"epoch": 0.14,
"learning_rate": 0.0008715801931333173,
"loss": 1.2831,
"step": 22400
},
{
"epoch": 0.14,
"learning_rate": 0.0008709506842761277,
"loss": 1.2744,
"step": 22500
},
{
"epoch": 0.14,
"learning_rate": 0.0008703211754189382,
"loss": 1.2873,
"step": 22600
},
{
"epoch": 0.14,
"learning_rate": 0.0008696916665617486,
"loss": 1.2987,
"step": 22700
},
{
"epoch": 0.14,
"learning_rate": 0.0008690621577045589,
"loss": 1.2887,
"step": 22800
},
{
"epoch": 0.14,
"learning_rate": 0.0008684326488473693,
"loss": 1.2787,
"step": 22900
},
{
"epoch": 0.14,
"learning_rate": 0.0008678031399901797,
"loss": 1.2715,
"step": 23000
},
{
"epoch": 0.14,
"learning_rate": 0.0008671736311329901,
"loss": 1.2721,
"step": 23100
},
{
"epoch": 0.14,
"learning_rate": 0.0008665441222758005,
"loss": 1.3105,
"step": 23200
},
{
"epoch": 0.14,
"learning_rate": 0.0008659146134186108,
"loss": 1.2679,
"step": 23300
},
{
"epoch": 0.15,
"learning_rate": 0.0008652851045614212,
"loss": 1.2852,
"step": 23400
},
{
"epoch": 0.15,
"learning_rate": 0.0008646555957042316,
"loss": 1.302,
"step": 23500
},
{
"epoch": 0.15,
"learning_rate": 0.000864026086847042,
"loss": 1.2765,
"step": 23600
},
{
"epoch": 0.15,
"learning_rate": 0.0008633965779898524,
"loss": 1.2782,
"step": 23700
},
{
"epoch": 0.15,
"learning_rate": 0.0008627670691326627,
"loss": 1.2708,
"step": 23800
},
{
"epoch": 0.15,
"learning_rate": 0.0008621375602754731,
"loss": 1.2676,
"step": 23900
},
{
"epoch": 0.15,
"learning_rate": 0.0008615080514182835,
"loss": 1.2763,
"step": 24000
},
{
"epoch": 0.15,
"learning_rate": 0.0008608785425610939,
"loss": 1.2728,
"step": 24100
},
{
"epoch": 0.15,
"learning_rate": 0.0008602490337039042,
"loss": 1.2731,
"step": 24200
},
{
"epoch": 0.15,
"learning_rate": 0.0008596195248467145,
"loss": 1.2719,
"step": 24300
},
{
"epoch": 0.15,
"learning_rate": 0.0008589900159895249,
"loss": 1.2658,
"step": 24400
},
{
"epoch": 0.15,
"learning_rate": 0.0008583605071323353,
"loss": 1.2811,
"step": 24500
},
{
"epoch": 0.15,
"learning_rate": 0.0008577309982751458,
"loss": 1.2736,
"step": 24600
},
{
"epoch": 0.15,
"learning_rate": 0.0008571014894179562,
"loss": 1.2812,
"step": 24700
},
{
"epoch": 0.15,
"learning_rate": 0.0008564719805607665,
"loss": 1.2397,
"step": 24800
},
{
"epoch": 0.15,
"learning_rate": 0.0008558424717035769,
"loss": 1.2607,
"step": 24900
},
{
"epoch": 0.16,
"learning_rate": 0.0008552129628463873,
"loss": 1.2669,
"step": 25000
},
{
"epoch": 0.16,
"learning_rate": 0.0008545834539891977,
"loss": 1.2871,
"step": 25100
},
{
"epoch": 0.16,
"learning_rate": 0.000853953945132008,
"loss": 1.2696,
"step": 25200
},
{
"epoch": 0.16,
"learning_rate": 0.0008533244362748184,
"loss": 1.2778,
"step": 25300
},
{
"epoch": 0.16,
"learning_rate": 0.0008526949274176288,
"loss": 1.2824,
"step": 25400
},
{
"epoch": 0.16,
"learning_rate": 0.0008520654185604392,
"loss": 1.2723,
"step": 25500
},
{
"epoch": 0.16,
"learning_rate": 0.0008514359097032496,
"loss": 1.2603,
"step": 25600
},
{
"epoch": 0.16,
"learning_rate": 0.0008508064008460599,
"loss": 1.2776,
"step": 25700
},
{
"epoch": 0.16,
"learning_rate": 0.0008501768919888703,
"loss": 1.2779,
"step": 25800
},
{
"epoch": 0.16,
"learning_rate": 0.0008495473831316807,
"loss": 1.2758,
"step": 25900
},
{
"epoch": 0.16,
"learning_rate": 0.000848917874274491,
"loss": 1.2858,
"step": 26000
},
{
"epoch": 0.16,
"learning_rate": 0.0008482883654173014,
"loss": 1.2738,
"step": 26100
},
{
"epoch": 0.16,
"learning_rate": 0.0008476588565601117,
"loss": 1.2877,
"step": 26200
},
{
"epoch": 0.16,
"learning_rate": 0.0008470293477029221,
"loss": 1.2843,
"step": 26300
},
{
"epoch": 0.16,
"learning_rate": 0.0008463998388457325,
"loss": 1.2805,
"step": 26400
},
{
"epoch": 0.16,
"learning_rate": 0.0008457703299885429,
"loss": 1.2742,
"step": 26500
},
{
"epoch": 0.17,
"learning_rate": 0.0008451408211313534,
"loss": 1.2627,
"step": 26600
},
{
"epoch": 0.17,
"learning_rate": 0.0008445113122741637,
"loss": 1.2718,
"step": 26700
},
{
"epoch": 0.17,
"learning_rate": 0.0008438818034169741,
"loss": 1.2755,
"step": 26800
},
{
"epoch": 0.17,
"learning_rate": 0.0008432522945597845,
"loss": 1.2828,
"step": 26900
},
{
"epoch": 0.17,
"learning_rate": 0.0008426227857025949,
"loss": 1.2591,
"step": 27000
},
{
"epoch": 0.17,
"learning_rate": 0.0008419932768454053,
"loss": 1.2731,
"step": 27100
},
{
"epoch": 0.17,
"learning_rate": 0.0008413637679882156,
"loss": 1.2594,
"step": 27200
},
{
"epoch": 0.17,
"learning_rate": 0.000840734259131026,
"loss": 1.2709,
"step": 27300
},
{
"epoch": 0.17,
"learning_rate": 0.0008401047502738364,
"loss": 1.2677,
"step": 27400
},
{
"epoch": 0.17,
"learning_rate": 0.0008394752414166468,
"loss": 1.2509,
"step": 27500
},
{
"epoch": 0.17,
"learning_rate": 0.0008388457325594572,
"loss": 1.2835,
"step": 27600
},
{
"epoch": 0.17,
"learning_rate": 0.0008382162237022675,
"loss": 1.266,
"step": 27700
},
{
"epoch": 0.17,
"learning_rate": 0.0008375867148450779,
"loss": 1.2609,
"step": 27800
},
{
"epoch": 0.17,
"learning_rate": 0.0008369572059878882,
"loss": 1.2824,
"step": 27900
},
{
"epoch": 0.17,
"learning_rate": 0.0008363276971306986,
"loss": 1.2568,
"step": 28000
},
{
"epoch": 0.17,
"learning_rate": 0.000835698188273509,
"loss": 1.2771,
"step": 28100
},
{
"epoch": 0.18,
"learning_rate": 0.0008350686794163193,
"loss": 1.2487,
"step": 28200
},
{
"epoch": 0.18,
"learning_rate": 0.0008344391705591297,
"loss": 1.259,
"step": 28300
},
{
"epoch": 0.18,
"learning_rate": 0.0008338096617019401,
"loss": 1.277,
"step": 28400
},
{
"epoch": 0.18,
"learning_rate": 0.0008331801528447506,
"loss": 1.2671,
"step": 28500
},
{
"epoch": 0.18,
"learning_rate": 0.000832550643987561,
"loss": 1.266,
"step": 28600
},
{
"epoch": 0.18,
"learning_rate": 0.0008319211351303713,
"loss": 1.2604,
"step": 28700
},
{
"epoch": 0.18,
"learning_rate": 0.0008312916262731817,
"loss": 1.2685,
"step": 28800
},
{
"epoch": 0.18,
"learning_rate": 0.0008306621174159921,
"loss": 1.27,
"step": 28900
},
{
"epoch": 0.18,
"learning_rate": 0.0008300326085588025,
"loss": 1.2575,
"step": 29000
},
{
"epoch": 0.18,
"learning_rate": 0.0008294030997016129,
"loss": 1.29,
"step": 29100
},
{
"epoch": 0.18,
"learning_rate": 0.0008287735908444232,
"loss": 1.2682,
"step": 29200
},
{
"epoch": 0.18,
"learning_rate": 0.0008281440819872336,
"loss": 1.2658,
"step": 29300
},
{
"epoch": 0.18,
"learning_rate": 0.000827514573130044,
"loss": 1.2612,
"step": 29400
},
{
"epoch": 0.18,
"learning_rate": 0.0008268850642728544,
"loss": 1.2509,
"step": 29500
},
{
"epoch": 0.18,
"learning_rate": 0.0008262555554156647,
"loss": 1.2814,
"step": 29600
},
{
"epoch": 0.18,
"learning_rate": 0.000825626046558475,
"loss": 1.2538,
"step": 29700
},
{
"epoch": 0.19,
"learning_rate": 0.0008249965377012854,
"loss": 1.2484,
"step": 29800
},
{
"epoch": 0.19,
"learning_rate": 0.0008243670288440958,
"loss": 1.2516,
"step": 29900
},
{
"epoch": 0.19,
"learning_rate": 0.0008237375199869062,
"loss": 1.2584,
"step": 30000
},
{
"epoch": 0.19,
"learning_rate": 0.0008231080111297165,
"loss": 1.2561,
"step": 30100
},
{
"epoch": 0.19,
"learning_rate": 0.0008224785022725269,
"loss": 1.2665,
"step": 30200
},
{
"epoch": 0.19,
"learning_rate": 0.0008218489934153373,
"loss": 1.2721,
"step": 30300
},
{
"epoch": 0.19,
"learning_rate": 0.0008212194845581477,
"loss": 1.253,
"step": 30400
},
{
"epoch": 0.19,
"learning_rate": 0.0008205899757009582,
"loss": 1.2641,
"step": 30500
},
{
"epoch": 0.19,
"learning_rate": 0.0008199604668437685,
"loss": 1.2631,
"step": 30600
},
{
"epoch": 0.19,
"learning_rate": 0.0008193309579865789,
"loss": 1.245,
"step": 30700
},
{
"epoch": 0.19,
"learning_rate": 0.0008187014491293893,
"loss": 1.2572,
"step": 30800
},
{
"epoch": 0.19,
"learning_rate": 0.0008180719402721997,
"loss": 1.2853,
"step": 30900
},
{
"epoch": 0.19,
"learning_rate": 0.0008174424314150101,
"loss": 1.2601,
"step": 31000
},
{
"epoch": 0.19,
"learning_rate": 0.0008168129225578204,
"loss": 1.2586,
"step": 31100
},
{
"epoch": 0.19,
"learning_rate": 0.0008161834137006308,
"loss": 1.2695,
"step": 31200
},
{
"epoch": 0.19,
"learning_rate": 0.0008155539048434412,
"loss": 1.2587,
"step": 31300
},
{
"epoch": 0.2,
"learning_rate": 0.0008149243959862516,
"loss": 1.2489,
"step": 31400
},
{
"epoch": 0.2,
"learning_rate": 0.000814294887129062,
"loss": 1.2709,
"step": 31500
},
{
"epoch": 0.2,
"learning_rate": 0.0008136653782718723,
"loss": 1.2513,
"step": 31600
},
{
"epoch": 0.2,
"learning_rate": 0.0008130358694146826,
"loss": 1.2458,
"step": 31700
},
{
"epoch": 0.2,
"learning_rate": 0.000812406360557493,
"loss": 1.2802,
"step": 31800
},
{
"epoch": 0.2,
"learning_rate": 0.0008117768517003034,
"loss": 1.2685,
"step": 31900
},
{
"epoch": 0.2,
"learning_rate": 0.0008111473428431138,
"loss": 1.2485,
"step": 32000
},
{
"epoch": 0.2,
"learning_rate": 0.0008105178339859241,
"loss": 1.2749,
"step": 32100
},
{
"epoch": 0.2,
"learning_rate": 0.0008098883251287345,
"loss": 1.2382,
"step": 32200
},
{
"epoch": 0.2,
"learning_rate": 0.0008092588162715449,
"loss": 1.2547,
"step": 32300
},
{
"epoch": 0.2,
"learning_rate": 0.0008086293074143553,
"loss": 1.2621,
"step": 32400
},
{
"epoch": 0.2,
"learning_rate": 0.0008079997985571658,
"loss": 1.2545,
"step": 32500
},
{
"epoch": 0.2,
"learning_rate": 0.0008073702896999761,
"loss": 1.2586,
"step": 32600
},
{
"epoch": 0.2,
"learning_rate": 0.0008067407808427865,
"loss": 1.2533,
"step": 32700
},
{
"epoch": 0.2,
"learning_rate": 0.0008061112719855969,
"loss": 1.2611,
"step": 32800
},
{
"epoch": 0.2,
"learning_rate": 0.0008054817631284073,
"loss": 1.2714,
"step": 32900
},
{
"epoch": 0.21,
"learning_rate": 0.0008048522542712177,
"loss": 1.2583,
"step": 33000
},
{
"epoch": 0.21,
"learning_rate": 0.000804222745414028,
"loss": 1.2684,
"step": 33100
},
{
"epoch": 0.21,
"learning_rate": 0.0008035932365568384,
"loss": 1.2703,
"step": 33200
},
{
"epoch": 0.21,
"learning_rate": 0.0008029637276996488,
"loss": 1.241,
"step": 33300
},
{
"epoch": 0.21,
"learning_rate": 0.0008023342188424592,
"loss": 1.2473,
"step": 33400
},
{
"epoch": 0.21,
"learning_rate": 0.0008017047099852696,
"loss": 1.2567,
"step": 33500
},
{
"epoch": 0.21,
"learning_rate": 0.0008010752011280798,
"loss": 1.242,
"step": 33600
},
{
"epoch": 0.21,
"learning_rate": 0.0008004456922708902,
"loss": 1.2488,
"step": 33700
},
{
"epoch": 0.21,
"learning_rate": 0.0007998161834137006,
"loss": 1.2747,
"step": 33800
},
{
"epoch": 0.21,
"learning_rate": 0.000799186674556511,
"loss": 1.2697,
"step": 33900
},
{
"epoch": 0.21,
"learning_rate": 0.0007985571656993213,
"loss": 1.2691,
"step": 34000
},
{
"epoch": 0.21,
"learning_rate": 0.0007979276568421317,
"loss": 1.2479,
"step": 34100
},
{
"epoch": 0.21,
"learning_rate": 0.0007972981479849421,
"loss": 1.2619,
"step": 34200
},
{
"epoch": 0.21,
"learning_rate": 0.0007966686391277525,
"loss": 1.2439,
"step": 34300
},
{
"epoch": 0.21,
"learning_rate": 0.000796039130270563,
"loss": 1.2404,
"step": 34400
},
{
"epoch": 0.21,
"learning_rate": 0.0007954096214133733,
"loss": 1.254,
"step": 34500
},
{
"epoch": 0.22,
"learning_rate": 0.0007947801125561837,
"loss": 1.233,
"step": 34600
},
{
"epoch": 0.22,
"learning_rate": 0.0007941506036989941,
"loss": 1.2532,
"step": 34700
},
{
"epoch": 0.22,
"learning_rate": 0.0007935210948418045,
"loss": 1.2445,
"step": 34800
},
{
"epoch": 0.22,
"learning_rate": 0.0007928915859846149,
"loss": 1.259,
"step": 34900
},
{
"epoch": 0.22,
"learning_rate": 0.0007922620771274252,
"loss": 1.2816,
"step": 35000
},
{
"epoch": 0.22,
"learning_rate": 0.0007916325682702356,
"loss": 1.2529,
"step": 35100
},
{
"epoch": 0.22,
"learning_rate": 0.000791003059413046,
"loss": 1.2629,
"step": 35200
},
{
"epoch": 0.22,
"learning_rate": 0.0007903735505558564,
"loss": 1.26,
"step": 35300
},
{
"epoch": 0.22,
"learning_rate": 0.0007897440416986668,
"loss": 1.2612,
"step": 35400
},
{
"epoch": 0.22,
"learning_rate": 0.000789114532841477,
"loss": 1.2512,
"step": 35500
},
{
"epoch": 0.22,
"learning_rate": 0.0007884850239842874,
"loss": 1.2738,
"step": 35600
},
{
"epoch": 0.22,
"learning_rate": 0.0007878555151270978,
"loss": 1.2674,
"step": 35700
},
{
"epoch": 0.22,
"learning_rate": 0.0007872260062699082,
"loss": 1.2577,
"step": 35800
},
{
"epoch": 0.22,
"learning_rate": 0.0007865964974127186,
"loss": 1.2641,
"step": 35900
},
{
"epoch": 0.22,
"learning_rate": 0.0007859669885555289,
"loss": 1.2634,
"step": 36000
},
{
"epoch": 0.22,
"learning_rate": 0.0007853374796983393,
"loss": 1.2424,
"step": 36100
},
{
"epoch": 0.23,
"learning_rate": 0.0007847079708411497,
"loss": 1.249,
"step": 36200
},
{
"epoch": 0.23,
"learning_rate": 0.0007840784619839601,
"loss": 1.2518,
"step": 36300
},
{
"epoch": 0.23,
"learning_rate": 0.0007834489531267706,
"loss": 1.2436,
"step": 36400
},
{
"epoch": 0.23,
"learning_rate": 0.0007828194442695809,
"loss": 1.232,
"step": 36500
},
{
"epoch": 0.23,
"learning_rate": 0.0007821899354123913,
"loss": 1.2574,
"step": 36600
},
{
"epoch": 0.23,
"learning_rate": 0.0007815604265552017,
"loss": 1.2481,
"step": 36700
},
{
"epoch": 0.23,
"learning_rate": 0.0007809309176980121,
"loss": 1.2587,
"step": 36800
},
{
"epoch": 0.23,
"learning_rate": 0.0007803014088408225,
"loss": 1.2593,
"step": 36900
},
{
"epoch": 0.23,
"learning_rate": 0.0007796718999836328,
"loss": 1.2428,
"step": 37000
},
{
"epoch": 0.23,
"learning_rate": 0.0007790423911264432,
"loss": 1.2624,
"step": 37100
},
{
"epoch": 0.23,
"learning_rate": 0.0007784128822692536,
"loss": 1.2676,
"step": 37200
},
{
"epoch": 0.23,
"learning_rate": 0.000777783373412064,
"loss": 1.2319,
"step": 37300
},
{
"epoch": 0.23,
"learning_rate": 0.0007771538645548744,
"loss": 1.2609,
"step": 37400
},
{
"epoch": 0.23,
"learning_rate": 0.0007765243556976846,
"loss": 1.2563,
"step": 37500
},
{
"epoch": 0.23,
"learning_rate": 0.000775894846840495,
"loss": 1.2736,
"step": 37600
},
{
"epoch": 0.23,
"learning_rate": 0.0007752653379833054,
"loss": 1.2551,
"step": 37700
},
{
"epoch": 0.23,
"learning_rate": 0.0007746358291261158,
"loss": 1.2493,
"step": 37800
},
{
"epoch": 0.24,
"learning_rate": 0.0007740063202689261,
"loss": 1.2744,
"step": 37900
},
{
"epoch": 0.24,
"learning_rate": 0.0007733768114117365,
"loss": 1.2656,
"step": 38000
},
{
"epoch": 0.24,
"learning_rate": 0.0007727473025545469,
"loss": 1.2557,
"step": 38100
},
{
"epoch": 0.24,
"learning_rate": 0.0007721177936973573,
"loss": 1.2424,
"step": 38200
},
{
"epoch": 0.24,
"learning_rate": 0.0007714882848401678,
"loss": 1.2561,
"step": 38300
},
{
"epoch": 0.24,
"learning_rate": 0.0007708587759829781,
"loss": 1.2468,
"step": 38400
},
{
"epoch": 0.24,
"learning_rate": 0.0007702292671257885,
"loss": 1.2476,
"step": 38500
},
{
"epoch": 0.24,
"learning_rate": 0.0007695997582685989,
"loss": 1.2495,
"step": 38600
},
{
"epoch": 0.24,
"learning_rate": 0.0007689702494114093,
"loss": 1.2759,
"step": 38700
},
{
"epoch": 0.24,
"learning_rate": 0.0007683407405542197,
"loss": 1.2446,
"step": 38800
},
{
"epoch": 0.24,
"learning_rate": 0.00076771123169703,
"loss": 1.2586,
"step": 38900
},
{
"epoch": 0.24,
"learning_rate": 0.0007670817228398404,
"loss": 1.2452,
"step": 39000
},
{
"epoch": 0.24,
"learning_rate": 0.0007664522139826508,
"loss": 1.2421,
"step": 39100
},
{
"epoch": 0.24,
"learning_rate": 0.0007658227051254612,
"loss": 1.2487,
"step": 39200
},
{
"epoch": 0.24,
"learning_rate": 0.0007651931962682715,
"loss": 1.2342,
"step": 39300
},
{
"epoch": 0.24,
"learning_rate": 0.0007645636874110818,
"loss": 1.2376,
"step": 39400
},
{
"epoch": 0.25,
"learning_rate": 0.0007639341785538922,
"loss": 1.2399,
"step": 39500
},
{
"epoch": 0.25,
"learning_rate": 0.0007633046696967026,
"loss": 1.2582,
"step": 39600
},
{
"epoch": 0.25,
"learning_rate": 0.000762675160839513,
"loss": 1.2306,
"step": 39700
},
{
"epoch": 0.25,
"learning_rate": 0.0007620456519823234,
"loss": 1.2343,
"step": 39800
},
{
"epoch": 0.25,
"learning_rate": 0.0007614161431251337,
"loss": 1.2595,
"step": 39900
},
{
"epoch": 0.25,
"learning_rate": 0.0007607866342679441,
"loss": 1.2399,
"step": 40000
},
{
"epoch": 0.25,
"learning_rate": 0.0007601571254107545,
"loss": 1.2428,
"step": 40100
},
{
"epoch": 0.25,
"learning_rate": 0.0007595276165535649,
"loss": 1.2367,
"step": 40200
},
{
"epoch": 0.25,
"learning_rate": 0.0007588981076963754,
"loss": 1.2658,
"step": 40300
},
{
"epoch": 0.25,
"learning_rate": 0.0007582685988391857,
"loss": 1.2362,
"step": 40400
},
{
"epoch": 0.25,
"learning_rate": 0.0007576390899819961,
"loss": 1.2493,
"step": 40500
},
{
"epoch": 0.25,
"learning_rate": 0.0007570095811248065,
"loss": 1.2663,
"step": 40600
},
{
"epoch": 0.25,
"learning_rate": 0.0007563800722676169,
"loss": 1.2488,
"step": 40700
},
{
"epoch": 0.25,
"learning_rate": 0.0007557505634104273,
"loss": 1.2423,
"step": 40800
},
{
"epoch": 0.25,
"learning_rate": 0.0007551210545532376,
"loss": 1.2312,
"step": 40900
},
{
"epoch": 0.25,
"learning_rate": 0.000754491545696048,
"loss": 1.2499,
"step": 41000
},
{
"epoch": 0.26,
"learning_rate": 0.0007538620368388584,
"loss": 1.2335,
"step": 41100
},
{
"epoch": 0.26,
"learning_rate": 0.0007532325279816687,
"loss": 1.2267,
"step": 41200
},
{
"epoch": 0.26,
"learning_rate": 0.0007526030191244791,
"loss": 1.2377,
"step": 41300
},
{
"epoch": 0.26,
"learning_rate": 0.0007519735102672894,
"loss": 1.2266,
"step": 41400
},
{
"epoch": 0.26,
"learning_rate": 0.0007513440014100998,
"loss": 1.2445,
"step": 41500
},
{
"epoch": 0.26,
"learning_rate": 0.0007507144925529102,
"loss": 1.2316,
"step": 41600
},
{
"epoch": 0.26,
"learning_rate": 0.0007500849836957206,
"loss": 1.2431,
"step": 41700
},
{
"epoch": 0.26,
"learning_rate": 0.000749455474838531,
"loss": 1.2499,
"step": 41800
},
{
"epoch": 0.26,
"learning_rate": 0.0007488259659813413,
"loss": 1.255,
"step": 41900
},
{
"epoch": 0.26,
"learning_rate": 0.0007481964571241517,
"loss": 1.2342,
"step": 42000
},
{
"epoch": 0.26,
"learning_rate": 0.0007475669482669621,
"loss": 1.2233,
"step": 42100
},
{
"epoch": 0.26,
"learning_rate": 0.0007469374394097725,
"loss": 1.2464,
"step": 42200
},
{
"epoch": 0.26,
"learning_rate": 0.0007463079305525829,
"loss": 1.2444,
"step": 42300
},
{
"epoch": 0.26,
"learning_rate": 0.0007456784216953933,
"loss": 1.2452,
"step": 42400
},
{
"epoch": 0.26,
"learning_rate": 0.0007450489128382037,
"loss": 1.2278,
"step": 42500
},
{
"epoch": 0.26,
"learning_rate": 0.0007444194039810141,
"loss": 1.2509,
"step": 42600
},
{
"epoch": 0.27,
"learning_rate": 0.0007437898951238245,
"loss": 1.248,
"step": 42700
},
{
"epoch": 0.27,
"learning_rate": 0.0007431603862666348,
"loss": 1.2386,
"step": 42800
},
{
"epoch": 0.27,
"learning_rate": 0.0007425308774094452,
"loss": 1.2385,
"step": 42900
},
{
"epoch": 0.27,
"learning_rate": 0.0007419013685522556,
"loss": 1.2365,
"step": 43000
},
{
"epoch": 0.27,
"learning_rate": 0.000741271859695066,
"loss": 1.2554,
"step": 43100
},
{
"epoch": 0.27,
"learning_rate": 0.0007406423508378763,
"loss": 1.2586,
"step": 43200
},
{
"epoch": 0.27,
"learning_rate": 0.0007400128419806866,
"loss": 1.2397,
"step": 43300
},
{
"epoch": 0.27,
"learning_rate": 0.000739383333123497,
"loss": 1.2447,
"step": 43400
},
{
"epoch": 0.27,
"learning_rate": 0.0007387538242663074,
"loss": 1.2455,
"step": 43500
},
{
"epoch": 0.27,
"learning_rate": 0.0007381243154091178,
"loss": 1.2415,
"step": 43600
},
{
"epoch": 0.27,
"learning_rate": 0.0007374948065519282,
"loss": 1.2504,
"step": 43700
},
{
"epoch": 0.27,
"learning_rate": 0.0007368652976947385,
"loss": 1.2417,
"step": 43800
},
{
"epoch": 0.27,
"learning_rate": 0.0007362357888375489,
"loss": 1.2382,
"step": 43900
},
{
"epoch": 0.27,
"learning_rate": 0.0007356062799803593,
"loss": 1.2399,
"step": 44000
},
{
"epoch": 0.27,
"learning_rate": 0.0007349767711231697,
"loss": 1.2654,
"step": 44100
},
{
"epoch": 0.27,
"learning_rate": 0.0007343472622659802,
"loss": 1.2425,
"step": 44200
},
{
"epoch": 0.28,
"learning_rate": 0.0007337177534087905,
"loss": 1.255,
"step": 44300
},
{
"epoch": 0.28,
"learning_rate": 0.0007330882445516009,
"loss": 1.2418,
"step": 44400
},
{
"epoch": 0.28,
"learning_rate": 0.0007324587356944113,
"loss": 1.2313,
"step": 44500
},
{
"epoch": 0.28,
"learning_rate": 0.0007318292268372217,
"loss": 1.2429,
"step": 44600
},
{
"epoch": 0.28,
"learning_rate": 0.0007311997179800321,
"loss": 1.2677,
"step": 44700
},
{
"epoch": 0.28,
"learning_rate": 0.0007305702091228424,
"loss": 1.2235,
"step": 44800
},
{
"epoch": 0.28,
"learning_rate": 0.0007299407002656528,
"loss": 1.2359,
"step": 44900
},
{
"epoch": 0.28,
"learning_rate": 0.0007293111914084631,
"loss": 1.2512,
"step": 45000
},
{
"epoch": 0.28,
"learning_rate": 0.0007286816825512735,
"loss": 1.2216,
"step": 45100
},
{
"epoch": 0.28,
"learning_rate": 0.0007280521736940839,
"loss": 1.2559,
"step": 45200
},
{
"epoch": 0.28,
"learning_rate": 0.0007274226648368942,
"loss": 1.2301,
"step": 45300
},
{
"epoch": 0.28,
"learning_rate": 0.0007267931559797046,
"loss": 1.2275,
"step": 45400
},
{
"epoch": 0.28,
"learning_rate": 0.000726163647122515,
"loss": 1.24,
"step": 45500
},
{
"epoch": 0.28,
"learning_rate": 0.0007255341382653254,
"loss": 1.2484,
"step": 45600
},
{
"epoch": 0.28,
"learning_rate": 0.0007249046294081358,
"loss": 1.2383,
"step": 45700
},
{
"epoch": 0.28,
"learning_rate": 0.0007242751205509461,
"loss": 1.2442,
"step": 45800
},
{
"epoch": 0.29,
"learning_rate": 0.0007236456116937565,
"loss": 1.2214,
"step": 45900
},
{
"epoch": 0.29,
"learning_rate": 0.0007230161028365669,
"loss": 1.2405,
"step": 46000
},
{
"epoch": 0.29,
"learning_rate": 0.0007223865939793773,
"loss": 1.2372,
"step": 46100
},
{
"epoch": 0.29,
"learning_rate": 0.0007217570851221877,
"loss": 1.2423,
"step": 46200
},
{
"epoch": 0.29,
"learning_rate": 0.0007211275762649981,
"loss": 1.2399,
"step": 46300
},
{
"epoch": 0.29,
"learning_rate": 0.0007204980674078085,
"loss": 1.235,
"step": 46400
},
{
"epoch": 0.29,
"learning_rate": 0.0007198685585506189,
"loss": 1.2395,
"step": 46500
},
{
"epoch": 0.29,
"learning_rate": 0.0007192390496934293,
"loss": 1.2473,
"step": 46600
},
{
"epoch": 0.29,
"learning_rate": 0.0007186095408362396,
"loss": 1.2339,
"step": 46700
},
{
"epoch": 0.29,
"learning_rate": 0.00071798003197905,
"loss": 1.2356,
"step": 46800
},
{
"epoch": 0.29,
"learning_rate": 0.0007173505231218603,
"loss": 1.227,
"step": 46900
},
{
"epoch": 0.29,
"learning_rate": 0.0007167210142646707,
"loss": 1.2495,
"step": 47000
},
{
"epoch": 0.29,
"learning_rate": 0.0007160915054074811,
"loss": 1.2372,
"step": 47100
},
{
"epoch": 0.29,
"learning_rate": 0.0007154619965502914,
"loss": 1.2758,
"step": 47200
},
{
"epoch": 0.29,
"learning_rate": 0.0007148324876931018,
"loss": 1.2317,
"step": 47300
},
{
"epoch": 0.29,
"learning_rate": 0.0007142029788359122,
"loss": 1.2337,
"step": 47400
},
{
"epoch": 0.3,
"learning_rate": 0.0007135734699787226,
"loss": 1.2283,
"step": 47500
},
{
"epoch": 0.3,
"learning_rate": 0.000712943961121533,
"loss": 1.2466,
"step": 47600
},
{
"epoch": 0.3,
"learning_rate": 0.0007123144522643433,
"loss": 1.2351,
"step": 47700
},
{
"epoch": 0.3,
"learning_rate": 0.0007116849434071537,
"loss": 1.2418,
"step": 47800
},
{
"epoch": 0.3,
"learning_rate": 0.0007110554345499641,
"loss": 1.241,
"step": 47900
},
{
"epoch": 0.3,
"learning_rate": 0.0007104259256927745,
"loss": 1.2293,
"step": 48000
},
{
"epoch": 0.3,
"learning_rate": 0.000709796416835585,
"loss": 1.2399,
"step": 48100
},
{
"epoch": 0.3,
"learning_rate": 0.0007091669079783953,
"loss": 1.2309,
"step": 48200
},
{
"epoch": 0.3,
"learning_rate": 0.0007085373991212057,
"loss": 1.2266,
"step": 48300
},
{
"epoch": 0.3,
"learning_rate": 0.0007079078902640161,
"loss": 1.2332,
"step": 48400
},
{
"epoch": 0.3,
"learning_rate": 0.0007072783814068265,
"loss": 1.2427,
"step": 48500
},
{
"epoch": 0.3,
"learning_rate": 0.0007066488725496369,
"loss": 1.2463,
"step": 48600
},
{
"epoch": 0.3,
"learning_rate": 0.0007060193636924471,
"loss": 1.2421,
"step": 48700
},
{
"epoch": 0.3,
"learning_rate": 0.0007053898548352575,
"loss": 1.2318,
"step": 48800
},
{
"epoch": 0.3,
"learning_rate": 0.0007047603459780679,
"loss": 1.223,
"step": 48900
},
{
"epoch": 0.3,
"learning_rate": 0.0007041308371208783,
"loss": 1.2298,
"step": 49000
},
{
"epoch": 0.31,
"learning_rate": 0.0007035013282636887,
"loss": 1.2403,
"step": 49100
},
{
"epoch": 0.31,
"learning_rate": 0.000702871819406499,
"loss": 1.2324,
"step": 49200
},
{
"epoch": 0.31,
"learning_rate": 0.0007022423105493094,
"loss": 1.2465,
"step": 49300
},
{
"epoch": 0.31,
"learning_rate": 0.0007016128016921198,
"loss": 1.2361,
"step": 49400
},
{
"epoch": 0.31,
"learning_rate": 0.0007009832928349302,
"loss": 1.2313,
"step": 49500
},
{
"epoch": 0.31,
"learning_rate": 0.0007003537839777406,
"loss": 1.2439,
"step": 49600
},
{
"epoch": 0.31,
"learning_rate": 0.0006997242751205509,
"loss": 1.2409,
"step": 49700
},
{
"epoch": 0.31,
"learning_rate": 0.0006990947662633613,
"loss": 1.2406,
"step": 49800
},
{
"epoch": 0.31,
"learning_rate": 0.0006984652574061717,
"loss": 1.2146,
"step": 49900
},
{
"epoch": 0.31,
"learning_rate": 0.0006978357485489821,
"loss": 1.2345,
"step": 50000
},
{
"epoch": 0.31,
"learning_rate": 0.0006972062396917926,
"loss": 1.243,
"step": 50100
},
{
"epoch": 0.31,
"learning_rate": 0.0006965767308346029,
"loss": 1.2378,
"step": 50200
},
{
"epoch": 0.31,
"learning_rate": 0.0006959472219774133,
"loss": 1.2309,
"step": 50300
},
{
"epoch": 0.31,
"learning_rate": 0.0006953177131202237,
"loss": 1.2337,
"step": 50400
},
{
"epoch": 0.31,
"learning_rate": 0.0006946882042630341,
"loss": 1.2406,
"step": 50500
},
{
"epoch": 0.31,
"learning_rate": 0.0006940586954058443,
"loss": 1.2389,
"step": 50600
},
{
"epoch": 0.32,
"learning_rate": 0.0006934291865486547,
"loss": 1.2323,
"step": 50700
},
{
"epoch": 0.32,
"learning_rate": 0.0006927996776914651,
"loss": 1.2504,
"step": 50800
},
{
"epoch": 0.32,
"learning_rate": 0.0006921701688342755,
"loss": 1.2323,
"step": 50900
},
{
"epoch": 0.32,
"learning_rate": 0.0006915406599770859,
"loss": 1.2321,
"step": 51000
},
{
"epoch": 0.32,
"learning_rate": 0.0006909111511198962,
"loss": 1.2458,
"step": 51100
},
{
"epoch": 0.32,
"learning_rate": 0.0006902816422627066,
"loss": 1.2371,
"step": 51200
},
{
"epoch": 0.32,
"learning_rate": 0.000689652133405517,
"loss": 1.2227,
"step": 51300
},
{
"epoch": 0.32,
"learning_rate": 0.0006890226245483274,
"loss": 1.2007,
"step": 51400
},
{
"epoch": 0.32,
"learning_rate": 0.0006883931156911378,
"loss": 1.2522,
"step": 51500
},
{
"epoch": 0.32,
"learning_rate": 0.0006877636068339481,
"loss": 1.2535,
"step": 51600
},
{
"epoch": 0.32,
"learning_rate": 0.0006871340979767585,
"loss": 1.2256,
"step": 51700
},
{
"epoch": 0.32,
"learning_rate": 0.0006865045891195689,
"loss": 1.2258,
"step": 51800
},
{
"epoch": 0.32,
"learning_rate": 0.0006858750802623793,
"loss": 1.2065,
"step": 51900
},
{
"epoch": 0.32,
"learning_rate": 0.0006852455714051897,
"loss": 1.2471,
"step": 52000
},
{
"epoch": 0.32,
"learning_rate": 0.0006846160625480001,
"loss": 1.2313,
"step": 52100
},
{
"epoch": 0.32,
"learning_rate": 0.0006839865536908105,
"loss": 1.2136,
"step": 52200
},
{
"epoch": 0.33,
"learning_rate": 0.0006833570448336209,
"loss": 1.2374,
"step": 52300
},
{
"epoch": 0.33,
"learning_rate": 0.0006827275359764313,
"loss": 1.2368,
"step": 52400
},
{
"epoch": 0.33,
"learning_rate": 0.0006820980271192417,
"loss": 1.2295,
"step": 52500
},
{
"epoch": 0.33,
"learning_rate": 0.0006814685182620519,
"loss": 1.2498,
"step": 52600
},
{
"epoch": 0.33,
"learning_rate": 0.0006808390094048623,
"loss": 1.2475,
"step": 52700
},
{
"epoch": 0.33,
"learning_rate": 0.0006802095005476727,
"loss": 1.2169,
"step": 52800
},
{
"epoch": 0.33,
"learning_rate": 0.0006795799916904831,
"loss": 1.2418,
"step": 52900
},
{
"epoch": 0.33,
"learning_rate": 0.0006789504828332935,
"loss": 1.2283,
"step": 53000
},
{
"epoch": 0.33,
"learning_rate": 0.0006783209739761038,
"loss": 1.2274,
"step": 53100
},
{
"epoch": 0.33,
"learning_rate": 0.0006776914651189142,
"loss": 1.2336,
"step": 53200
},
{
"epoch": 0.33,
"learning_rate": 0.0006770619562617246,
"loss": 1.2277,
"step": 53300
},
{
"epoch": 0.33,
"learning_rate": 0.000676432447404535,
"loss": 1.2338,
"step": 53400
},
{
"epoch": 0.33,
"learning_rate": 0.0006758029385473454,
"loss": 1.2453,
"step": 53500
},
{
"epoch": 0.33,
"learning_rate": 0.0006751734296901557,
"loss": 1.2277,
"step": 53600
},
{
"epoch": 0.33,
"learning_rate": 0.0006745439208329661,
"loss": 1.2136,
"step": 53700
},
{
"epoch": 0.33,
"learning_rate": 0.0006739144119757765,
"loss": 1.2345,
"step": 53800
},
{
"epoch": 0.34,
"learning_rate": 0.0006732849031185869,
"loss": 1.2291,
"step": 53900
},
{
"epoch": 0.34,
"learning_rate": 0.0006726553942613974,
"loss": 1.233,
"step": 54000
},
{
"epoch": 0.34,
"learning_rate": 0.0006720258854042077,
"loss": 1.2422,
"step": 54100
},
{
"epoch": 0.34,
"learning_rate": 0.0006713963765470181,
"loss": 1.2385,
"step": 54200
},
{
"epoch": 0.34,
"learning_rate": 0.0006707668676898285,
"loss": 1.2402,
"step": 54300
},
{
"epoch": 0.34,
"learning_rate": 0.0006701373588326389,
"loss": 1.2081,
"step": 54400
},
{
"epoch": 0.34,
"learning_rate": 0.0006695078499754492,
"loss": 1.2211,
"step": 54500
},
{
"epoch": 0.34,
"learning_rate": 0.0006688783411182595,
"loss": 1.2149,
"step": 54600
},
{
"epoch": 0.34,
"learning_rate": 0.0006682488322610699,
"loss": 1.2328,
"step": 54700
},
{
"epoch": 0.34,
"learning_rate": 0.0006676193234038803,
"loss": 1.231,
"step": 54800
},
{
"epoch": 0.34,
"learning_rate": 0.0006669898145466907,
"loss": 1.2277,
"step": 54900
},
{
"epoch": 0.34,
"learning_rate": 0.000666360305689501,
"loss": 1.2414,
"step": 55000
},
{
"epoch": 0.34,
"learning_rate": 0.0006657307968323114,
"loss": 1.2395,
"step": 55100
},
{
"epoch": 0.34,
"learning_rate": 0.0006651012879751218,
"loss": 1.2471,
"step": 55200
},
{
"epoch": 0.34,
"learning_rate": 0.0006644717791179322,
"loss": 1.2363,
"step": 55300
},
{
"epoch": 0.34,
"learning_rate": 0.0006638422702607426,
"loss": 1.2248,
"step": 55400
},
{
"epoch": 0.35,
"learning_rate": 0.0006632127614035529,
"loss": 1.2244,
"step": 55500
},
{
"epoch": 0.35,
"learning_rate": 0.0006625832525463633,
"loss": 1.2177,
"step": 55600
},
{
"epoch": 0.35,
"learning_rate": 0.0006619537436891737,
"loss": 1.233,
"step": 55700
},
{
"epoch": 0.35,
"learning_rate": 0.0006613242348319841,
"loss": 1.2349,
"step": 55800
},
{
"epoch": 0.35,
"learning_rate": 0.0006606947259747945,
"loss": 1.2389,
"step": 55900
},
{
"epoch": 0.35,
"learning_rate": 0.0006600652171176049,
"loss": 1.2368,
"step": 56000
},
{
"epoch": 0.35,
"learning_rate": 0.0006594357082604153,
"loss": 1.2198,
"step": 56100
},
{
"epoch": 0.35,
"learning_rate": 0.0006588061994032257,
"loss": 1.2178,
"step": 56200
},
{
"epoch": 0.35,
"learning_rate": 0.000658176690546036,
"loss": 1.2177,
"step": 56300
},
{
"epoch": 0.35,
"learning_rate": 0.0006575471816888464,
"loss": 1.2249,
"step": 56400
},
{
"epoch": 0.35,
"learning_rate": 0.0006569176728316567,
"loss": 1.2234,
"step": 56500
},
{
"epoch": 0.35,
"learning_rate": 0.0006562881639744671,
"loss": 1.2255,
"step": 56600
},
{
"epoch": 0.35,
"learning_rate": 0.0006556586551172775,
"loss": 1.2133,
"step": 56700
},
{
"epoch": 0.35,
"learning_rate": 0.0006550291462600879,
"loss": 1.2065,
"step": 56800
},
{
"epoch": 0.35,
"learning_rate": 0.0006543996374028983,
"loss": 1.2112,
"step": 56900
},
{
"epoch": 0.35,
"learning_rate": 0.0006537701285457086,
"loss": 1.2118,
"step": 57000
},
{
"epoch": 0.35,
"learning_rate": 0.000653140619688519,
"loss": 1.224,
"step": 57100
},
{
"epoch": 0.36,
"learning_rate": 0.0006525111108313294,
"loss": 1.2395,
"step": 57200
},
{
"epoch": 0.36,
"learning_rate": 0.0006518816019741398,
"loss": 1.2382,
"step": 57300
},
{
"epoch": 0.36,
"learning_rate": 0.0006512520931169502,
"loss": 1.2086,
"step": 57400
},
{
"epoch": 0.36,
"learning_rate": 0.0006506225842597605,
"loss": 1.2059,
"step": 57500
},
{
"epoch": 0.36,
"learning_rate": 0.0006499930754025709,
"loss": 1.2496,
"step": 57600
},
{
"epoch": 0.36,
"learning_rate": 0.0006493635665453813,
"loss": 1.2223,
"step": 57700
},
{
"epoch": 0.36,
"learning_rate": 0.0006487340576881917,
"loss": 1.2291,
"step": 57800
},
{
"epoch": 0.36,
"learning_rate": 0.0006481045488310022,
"loss": 1.2347,
"step": 57900
},
{
"epoch": 0.36,
"learning_rate": 0.0006474750399738125,
"loss": 1.241,
"step": 58000
},
{
"epoch": 0.36,
"learning_rate": 0.0006468455311166229,
"loss": 1.2339,
"step": 58100
},
{
"epoch": 0.36,
"learning_rate": 0.0006462160222594333,
"loss": 1.2201,
"step": 58200
},
{
"epoch": 0.36,
"learning_rate": 0.0006455865134022436,
"loss": 1.2282,
"step": 58300
},
{
"epoch": 0.36,
"learning_rate": 0.000644957004545054,
"loss": 1.2187,
"step": 58400
},
{
"epoch": 0.36,
"learning_rate": 0.0006443274956878643,
"loss": 1.2174,
"step": 58500
},
{
"epoch": 0.36,
"learning_rate": 0.0006436979868306747,
"loss": 1.2245,
"step": 58600
},
{
"epoch": 0.36,
"learning_rate": 0.0006430684779734851,
"loss": 1.227,
"step": 58700
},
{
"epoch": 0.37,
"learning_rate": 0.0006424389691162955,
"loss": 1.2228,
"step": 58800
},
{
"epoch": 0.37,
"learning_rate": 0.0006418094602591058,
"loss": 1.2219,
"step": 58900
},
{
"epoch": 0.37,
"learning_rate": 0.0006411799514019162,
"loss": 1.2243,
"step": 59000
},
{
"epoch": 0.37,
"learning_rate": 0.0006405504425447266,
"loss": 1.2228,
"step": 59100
},
{
"epoch": 0.37,
"learning_rate": 0.000639920933687537,
"loss": 1.214,
"step": 59200
},
{
"epoch": 0.37,
"learning_rate": 0.0006392914248303474,
"loss": 1.224,
"step": 59300
},
{
"epoch": 0.37,
"learning_rate": 0.0006386619159731577,
"loss": 1.2232,
"step": 59400
},
{
"epoch": 0.37,
"learning_rate": 0.0006380324071159681,
"loss": 1.2311,
"step": 59500
},
{
"epoch": 0.37,
"learning_rate": 0.0006374028982587785,
"loss": 1.2368,
"step": 59600
},
{
"epoch": 0.37,
"learning_rate": 0.0006367733894015889,
"loss": 1.2137,
"step": 59700
},
{
"epoch": 0.37,
"learning_rate": 0.0006361438805443993,
"loss": 1.2279,
"step": 59800
},
{
"epoch": 0.37,
"learning_rate": 0.0006355143716872095,
"loss": 1.2236,
"step": 59900
},
{
"epoch": 0.37,
"learning_rate": 0.00063488486283002,
"loss": 1.216,
"step": 60000
},
{
"epoch": 0.37,
"learning_rate": 0.0006342553539728304,
"loss": 1.242,
"step": 60100
},
{
"epoch": 0.37,
"learning_rate": 0.0006336258451156408,
"loss": 1.2124,
"step": 60200
},
{
"epoch": 0.37,
"learning_rate": 0.0006329963362584512,
"loss": 1.2166,
"step": 60300
},
{
"epoch": 0.38,
"learning_rate": 0.0006323668274012615,
"loss": 1.2089,
"step": 60400
},
{
"epoch": 0.38,
"learning_rate": 0.0006317373185440719,
"loss": 1.1967,
"step": 60500
},
{
"epoch": 0.38,
"learning_rate": 0.0006311078096868823,
"loss": 1.2151,
"step": 60600
},
{
"epoch": 0.38,
"learning_rate": 0.0006304783008296927,
"loss": 1.2359,
"step": 60700
},
{
"epoch": 0.38,
"learning_rate": 0.0006298487919725031,
"loss": 1.2107,
"step": 60800
},
{
"epoch": 0.38,
"learning_rate": 0.0006292192831153134,
"loss": 1.2142,
"step": 60900
},
{
"epoch": 0.38,
"learning_rate": 0.0006285897742581238,
"loss": 1.223,
"step": 61000
},
{
"epoch": 0.38,
"learning_rate": 0.0006279602654009342,
"loss": 1.2185,
"step": 61100
},
{
"epoch": 0.38,
"learning_rate": 0.0006273307565437446,
"loss": 1.2203,
"step": 61200
},
{
"epoch": 0.38,
"learning_rate": 0.000626701247686555,
"loss": 1.2122,
"step": 61300
},
{
"epoch": 0.38,
"learning_rate": 0.0006260717388293653,
"loss": 1.2382,
"step": 61400
},
{
"epoch": 0.38,
"learning_rate": 0.0006254422299721757,
"loss": 1.2017,
"step": 61500
},
{
"epoch": 0.38,
"learning_rate": 0.0006248127211149861,
"loss": 1.2364,
"step": 61600
},
{
"epoch": 0.38,
"learning_rate": 0.0006241832122577965,
"loss": 1.2179,
"step": 61700
},
{
"epoch": 0.38,
"learning_rate": 0.0006235537034006069,
"loss": 1.2138,
"step": 61800
},
{
"epoch": 0.38,
"learning_rate": 0.0006229241945434173,
"loss": 1.2292,
"step": 61900
},
{
"epoch": 0.39,
"learning_rate": 0.0006222946856862276,
"loss": 1.2185,
"step": 62000
},
{
"epoch": 0.39,
"learning_rate": 0.000621665176829038,
"loss": 1.2146,
"step": 62100
},
{
"epoch": 0.39,
"learning_rate": 0.0006210356679718484,
"loss": 1.2449,
"step": 62200
},
{
"epoch": 0.39,
"learning_rate": 0.0006204061591146588,
"loss": 1.2316,
"step": 62300
},
{
"epoch": 0.39,
"learning_rate": 0.0006197766502574691,
"loss": 1.2121,
"step": 62400
},
{
"epoch": 0.39,
"learning_rate": 0.0006191471414002795,
"loss": 1.2084,
"step": 62500
},
{
"epoch": 0.39,
"learning_rate": 0.0006185176325430899,
"loss": 1.2175,
"step": 62600
},
{
"epoch": 0.39,
"learning_rate": 0.0006178881236859003,
"loss": 1.2189,
"step": 62700
},
{
"epoch": 0.39,
"learning_rate": 0.0006172586148287107,
"loss": 1.2202,
"step": 62800
},
{
"epoch": 0.39,
"learning_rate": 0.000616629105971521,
"loss": 1.2174,
"step": 62900
},
{
"epoch": 0.39,
"learning_rate": 0.0006159995971143314,
"loss": 1.2174,
"step": 63000
},
{
"epoch": 0.39,
"learning_rate": 0.0006153700882571418,
"loss": 1.2101,
"step": 63100
},
{
"epoch": 0.39,
"learning_rate": 0.0006147405793999522,
"loss": 1.2305,
"step": 63200
},
{
"epoch": 0.39,
"learning_rate": 0.0006141110705427625,
"loss": 1.2192,
"step": 63300
},
{
"epoch": 0.39,
"learning_rate": 0.0006134815616855729,
"loss": 1.2098,
"step": 63400
},
{
"epoch": 0.39,
"learning_rate": 0.0006128520528283833,
"loss": 1.2172,
"step": 63500
},
{
"epoch": 0.4,
"learning_rate": 0.0006122225439711937,
"loss": 1.2192,
"step": 63600
},
{
"epoch": 0.4,
"learning_rate": 0.0006115930351140041,
"loss": 1.2223,
"step": 63700
},
{
"epoch": 0.4,
"learning_rate": 0.0006109635262568143,
"loss": 1.1929,
"step": 63800
},
{
"epoch": 0.4,
"learning_rate": 0.0006103340173996248,
"loss": 1.2011,
"step": 63900
},
{
"epoch": 0.4,
"learning_rate": 0.0006097045085424352,
"loss": 1.2243,
"step": 64000
},
{
"epoch": 0.4,
"learning_rate": 0.0006090749996852456,
"loss": 1.2271,
"step": 64100
},
{
"epoch": 0.4,
"learning_rate": 0.000608445490828056,
"loss": 1.2312,
"step": 64200
},
{
"epoch": 0.4,
"learning_rate": 0.0006078159819708663,
"loss": 1.2267,
"step": 64300
},
{
"epoch": 0.4,
"learning_rate": 0.0006071864731136767,
"loss": 1.218,
"step": 64400
},
{
"epoch": 0.4,
"learning_rate": 0.0006065569642564871,
"loss": 1.2317,
"step": 64500
},
{
"epoch": 0.4,
"learning_rate": 0.0006059274553992975,
"loss": 1.2166,
"step": 64600
},
{
"epoch": 0.4,
"learning_rate": 0.0006052979465421079,
"loss": 1.2096,
"step": 64700
},
{
"epoch": 0.4,
"learning_rate": 0.0006046684376849182,
"loss": 1.2183,
"step": 64800
},
{
"epoch": 0.4,
"learning_rate": 0.0006040389288277286,
"loss": 1.208,
"step": 64900
},
{
"epoch": 0.4,
"learning_rate": 0.000603409419970539,
"loss": 1.217,
"step": 65000
}
],
"max_steps": 160854,
"num_train_epochs": 1,
"total_flos": 6.729383411712e+16,
"trial_name": null,
"trial_params": null
}