byt5lephone_g2p_v1-1024-NMSQA / trainer_state.json
Splend1dchan's picture
Upload 10 files
d4ba3f6
raw
history blame
155 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.99996056937818,
"global_step": 63400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3e-06,
"loss": 6.9272,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 6e-06,
"loss": 6.9063,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 9e-06,
"loss": 6.8492,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 1.2e-05,
"loss": 6.724,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 1.5e-05,
"loss": 6.534,
"step": 250
},
{
"epoch": 0.05,
"learning_rate": 1.8e-05,
"loss": 6.3145,
"step": 300
},
{
"epoch": 0.06,
"learning_rate": 2.1e-05,
"loss": 6.1061,
"step": 350
},
{
"epoch": 0.06,
"learning_rate": 2.4e-05,
"loss": 5.8852,
"step": 400
},
{
"epoch": 0.07,
"learning_rate": 2.7000000000000002e-05,
"loss": 5.6446,
"step": 450
},
{
"epoch": 0.08,
"learning_rate": 3e-05,
"loss": 5.4451,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 2.9976152623211448e-05,
"loss": 5.2905,
"step": 550
},
{
"epoch": 0.09,
"learning_rate": 2.9952305246422896e-05,
"loss": 5.0606,
"step": 600
},
{
"epoch": 0.1,
"learning_rate": 2.9928457869634343e-05,
"loss": 4.8638,
"step": 650
},
{
"epoch": 0.11,
"learning_rate": 2.9904610492845787e-05,
"loss": 4.713,
"step": 700
},
{
"epoch": 0.12,
"learning_rate": 2.9880763116057235e-05,
"loss": 4.4828,
"step": 750
},
{
"epoch": 0.13,
"learning_rate": 2.9856915739268682e-05,
"loss": 4.5133,
"step": 800
},
{
"epoch": 0.13,
"learning_rate": 2.983306836248013e-05,
"loss": 4.2172,
"step": 850
},
{
"epoch": 0.14,
"learning_rate": 2.9809220985691574e-05,
"loss": 4.1656,
"step": 900
},
{
"epoch": 0.15,
"learning_rate": 2.978537360890302e-05,
"loss": 4.1704,
"step": 950
},
{
"epoch": 0.16,
"learning_rate": 2.976152623211447e-05,
"loss": 4.0979,
"step": 1000
},
{
"epoch": 0.17,
"learning_rate": 2.9737678855325916e-05,
"loss": 3.9012,
"step": 1050
},
{
"epoch": 0.17,
"learning_rate": 2.971383147853736e-05,
"loss": 3.9303,
"step": 1100
},
{
"epoch": 0.18,
"learning_rate": 2.9689984101748808e-05,
"loss": 3.946,
"step": 1150
},
{
"epoch": 0.19,
"learning_rate": 2.9666136724960255e-05,
"loss": 3.9126,
"step": 1200
},
{
"epoch": 0.2,
"learning_rate": 2.9642289348171703e-05,
"loss": 3.8422,
"step": 1250
},
{
"epoch": 0.21,
"learning_rate": 2.9618441971383147e-05,
"loss": 3.6961,
"step": 1300
},
{
"epoch": 0.21,
"learning_rate": 2.9594594594594598e-05,
"loss": 3.7898,
"step": 1350
},
{
"epoch": 0.22,
"learning_rate": 2.9570747217806042e-05,
"loss": 3.721,
"step": 1400
},
{
"epoch": 0.23,
"learning_rate": 2.954689984101749e-05,
"loss": 3.8564,
"step": 1450
},
{
"epoch": 0.24,
"learning_rate": 2.9523052464228933e-05,
"loss": 3.7442,
"step": 1500
},
{
"epoch": 0.24,
"learning_rate": 2.9499205087440384e-05,
"loss": 3.84,
"step": 1550
},
{
"epoch": 0.25,
"learning_rate": 2.9475357710651828e-05,
"loss": 3.7246,
"step": 1600
},
{
"epoch": 0.26,
"learning_rate": 2.9451510333863276e-05,
"loss": 3.7482,
"step": 1650
},
{
"epoch": 0.27,
"learning_rate": 2.942766295707472e-05,
"loss": 3.5869,
"step": 1700
},
{
"epoch": 0.28,
"learning_rate": 2.940381558028617e-05,
"loss": 3.5348,
"step": 1750
},
{
"epoch": 0.28,
"learning_rate": 2.9379968203497615e-05,
"loss": 3.6747,
"step": 1800
},
{
"epoch": 0.29,
"learning_rate": 2.9356120826709062e-05,
"loss": 3.5978,
"step": 1850
},
{
"epoch": 0.3,
"learning_rate": 2.933227344992051e-05,
"loss": 3.6665,
"step": 1900
},
{
"epoch": 0.31,
"learning_rate": 2.9308426073131957e-05,
"loss": 3.7422,
"step": 1950
},
{
"epoch": 0.32,
"learning_rate": 2.92845786963434e-05,
"loss": 3.4649,
"step": 2000
},
{
"epoch": 0.32,
"learning_rate": 2.926073131955485e-05,
"loss": 3.5158,
"step": 2050
},
{
"epoch": 0.33,
"learning_rate": 2.9236883942766296e-05,
"loss": 3.4809,
"step": 2100
},
{
"epoch": 0.34,
"learning_rate": 2.9213036565977744e-05,
"loss": 3.59,
"step": 2150
},
{
"epoch": 0.35,
"learning_rate": 2.918918918918919e-05,
"loss": 3.444,
"step": 2200
},
{
"epoch": 0.35,
"learning_rate": 2.9165341812400635e-05,
"loss": 3.5945,
"step": 2250
},
{
"epoch": 0.36,
"learning_rate": 2.9141494435612086e-05,
"loss": 3.4512,
"step": 2300
},
{
"epoch": 0.37,
"learning_rate": 2.911764705882353e-05,
"loss": 3.5605,
"step": 2350
},
{
"epoch": 0.38,
"learning_rate": 2.9093799682034978e-05,
"loss": 3.6303,
"step": 2400
},
{
"epoch": 0.39,
"learning_rate": 2.9069952305246422e-05,
"loss": 3.5535,
"step": 2450
},
{
"epoch": 0.39,
"learning_rate": 2.9046104928457873e-05,
"loss": 3.4535,
"step": 2500
},
{
"epoch": 0.4,
"learning_rate": 2.9022257551669317e-05,
"loss": 3.5228,
"step": 2550
},
{
"epoch": 0.41,
"learning_rate": 2.8998410174880764e-05,
"loss": 3.4336,
"step": 2600
},
{
"epoch": 0.42,
"learning_rate": 2.8974562798092212e-05,
"loss": 3.5632,
"step": 2650
},
{
"epoch": 0.43,
"learning_rate": 2.895071542130366e-05,
"loss": 3.5295,
"step": 2700
},
{
"epoch": 0.43,
"learning_rate": 2.8926868044515103e-05,
"loss": 3.5078,
"step": 2750
},
{
"epoch": 0.44,
"learning_rate": 2.890302066772655e-05,
"loss": 3.4095,
"step": 2800
},
{
"epoch": 0.45,
"learning_rate": 2.8879173290937998e-05,
"loss": 3.4464,
"step": 2850
},
{
"epoch": 0.46,
"learning_rate": 2.8855325914149446e-05,
"loss": 3.5273,
"step": 2900
},
{
"epoch": 0.47,
"learning_rate": 2.883147853736089e-05,
"loss": 3.5008,
"step": 2950
},
{
"epoch": 0.47,
"learning_rate": 2.8807631160572337e-05,
"loss": 3.5109,
"step": 3000
},
{
"epoch": 0.48,
"learning_rate": 2.8783783783783785e-05,
"loss": 3.4497,
"step": 3050
},
{
"epoch": 0.49,
"learning_rate": 2.8759936406995232e-05,
"loss": 3.5214,
"step": 3100
},
{
"epoch": 0.5,
"learning_rate": 2.8736089030206676e-05,
"loss": 3.3884,
"step": 3150
},
{
"epoch": 0.5,
"learning_rate": 2.8712241653418124e-05,
"loss": 3.4611,
"step": 3200
},
{
"epoch": 0.51,
"learning_rate": 2.868839427662957e-05,
"loss": 3.4954,
"step": 3250
},
{
"epoch": 0.52,
"learning_rate": 2.866454689984102e-05,
"loss": 3.3634,
"step": 3300
},
{
"epoch": 0.53,
"learning_rate": 2.8640699523052463e-05,
"loss": 3.3091,
"step": 3350
},
{
"epoch": 0.54,
"learning_rate": 2.8616852146263914e-05,
"loss": 3.2928,
"step": 3400
},
{
"epoch": 0.54,
"learning_rate": 2.8593004769475358e-05,
"loss": 3.522,
"step": 3450
},
{
"epoch": 0.55,
"learning_rate": 2.8569157392686805e-05,
"loss": 3.4484,
"step": 3500
},
{
"epoch": 0.56,
"learning_rate": 2.854531001589825e-05,
"loss": 3.4451,
"step": 3550
},
{
"epoch": 0.57,
"learning_rate": 2.85214626391097e-05,
"loss": 3.4004,
"step": 3600
},
{
"epoch": 0.58,
"learning_rate": 2.8497615262321144e-05,
"loss": 3.3544,
"step": 3650
},
{
"epoch": 0.58,
"learning_rate": 2.8473767885532592e-05,
"loss": 3.3632,
"step": 3700
},
{
"epoch": 0.59,
"learning_rate": 2.8449920508744036e-05,
"loss": 3.4666,
"step": 3750
},
{
"epoch": 0.6,
"learning_rate": 2.8426073131955487e-05,
"loss": 3.3472,
"step": 3800
},
{
"epoch": 0.61,
"learning_rate": 2.8402225755166934e-05,
"loss": 3.3627,
"step": 3850
},
{
"epoch": 0.62,
"learning_rate": 2.8378378378378378e-05,
"loss": 3.3888,
"step": 3900
},
{
"epoch": 0.62,
"learning_rate": 2.8354531001589826e-05,
"loss": 3.3849,
"step": 3950
},
{
"epoch": 0.63,
"learning_rate": 2.8330683624801273e-05,
"loss": 3.4325,
"step": 4000
},
{
"epoch": 0.64,
"learning_rate": 2.830683624801272e-05,
"loss": 3.4523,
"step": 4050
},
{
"epoch": 0.65,
"learning_rate": 2.8282988871224165e-05,
"loss": 3.4083,
"step": 4100
},
{
"epoch": 0.65,
"learning_rate": 2.8259141494435616e-05,
"loss": 3.3159,
"step": 4150
},
{
"epoch": 0.66,
"learning_rate": 2.823529411764706e-05,
"loss": 3.3498,
"step": 4200
},
{
"epoch": 0.67,
"learning_rate": 2.8211446740858507e-05,
"loss": 3.4931,
"step": 4250
},
{
"epoch": 0.68,
"learning_rate": 2.818759936406995e-05,
"loss": 3.4344,
"step": 4300
},
{
"epoch": 0.69,
"learning_rate": 2.8163751987281402e-05,
"loss": 3.4875,
"step": 4350
},
{
"epoch": 0.69,
"learning_rate": 2.8139904610492846e-05,
"loss": 3.2618,
"step": 4400
},
{
"epoch": 0.7,
"learning_rate": 2.8116057233704294e-05,
"loss": 3.3632,
"step": 4450
},
{
"epoch": 0.71,
"learning_rate": 2.8092209856915738e-05,
"loss": 3.332,
"step": 4500
},
{
"epoch": 0.72,
"learning_rate": 2.806836248012719e-05,
"loss": 3.4005,
"step": 4550
},
{
"epoch": 0.73,
"learning_rate": 2.8044515103338633e-05,
"loss": 3.4338,
"step": 4600
},
{
"epoch": 0.73,
"learning_rate": 2.802066772655008e-05,
"loss": 3.4034,
"step": 4650
},
{
"epoch": 0.74,
"learning_rate": 2.7996820349761528e-05,
"loss": 3.2532,
"step": 4700
},
{
"epoch": 0.75,
"learning_rate": 2.7972972972972975e-05,
"loss": 3.2847,
"step": 4750
},
{
"epoch": 0.76,
"learning_rate": 2.794912559618442e-05,
"loss": 3.5397,
"step": 4800
},
{
"epoch": 0.76,
"learning_rate": 2.7925278219395867e-05,
"loss": 3.3293,
"step": 4850
},
{
"epoch": 0.77,
"learning_rate": 2.7901430842607314e-05,
"loss": 3.2783,
"step": 4900
},
{
"epoch": 0.78,
"learning_rate": 2.7877583465818762e-05,
"loss": 3.376,
"step": 4950
},
{
"epoch": 0.79,
"learning_rate": 2.7853736089030206e-05,
"loss": 3.3117,
"step": 5000
},
{
"epoch": 0.8,
"learning_rate": 2.7829888712241653e-05,
"loss": 3.3216,
"step": 5050
},
{
"epoch": 0.8,
"learning_rate": 2.78060413354531e-05,
"loss": 3.4992,
"step": 5100
},
{
"epoch": 0.81,
"learning_rate": 2.7782193958664548e-05,
"loss": 3.4038,
"step": 5150
},
{
"epoch": 0.82,
"learning_rate": 2.7758346581875992e-05,
"loss": 3.2956,
"step": 5200
},
{
"epoch": 0.83,
"learning_rate": 2.773449920508744e-05,
"loss": 3.3612,
"step": 5250
},
{
"epoch": 0.84,
"learning_rate": 2.7710651828298887e-05,
"loss": 3.3029,
"step": 5300
},
{
"epoch": 0.84,
"learning_rate": 2.7686804451510335e-05,
"loss": 3.4093,
"step": 5350
},
{
"epoch": 0.85,
"learning_rate": 2.766295707472178e-05,
"loss": 3.2937,
"step": 5400
},
{
"epoch": 0.86,
"learning_rate": 2.763910969793323e-05,
"loss": 3.2035,
"step": 5450
},
{
"epoch": 0.87,
"learning_rate": 2.7615262321144677e-05,
"loss": 3.3679,
"step": 5500
},
{
"epoch": 0.88,
"learning_rate": 2.759141494435612e-05,
"loss": 3.3374,
"step": 5550
},
{
"epoch": 0.88,
"learning_rate": 2.756756756756757e-05,
"loss": 3.2496,
"step": 5600
},
{
"epoch": 0.89,
"learning_rate": 2.7543720190779016e-05,
"loss": 3.2862,
"step": 5650
},
{
"epoch": 0.9,
"learning_rate": 2.7519872813990464e-05,
"loss": 3.2177,
"step": 5700
},
{
"epoch": 0.91,
"learning_rate": 2.7496025437201908e-05,
"loss": 3.229,
"step": 5750
},
{
"epoch": 0.91,
"learning_rate": 2.7472178060413355e-05,
"loss": 3.3426,
"step": 5800
},
{
"epoch": 0.92,
"learning_rate": 2.7448330683624803e-05,
"loss": 3.2579,
"step": 5850
},
{
"epoch": 0.93,
"learning_rate": 2.742448330683625e-05,
"loss": 3.2497,
"step": 5900
},
{
"epoch": 0.94,
"learning_rate": 2.7400635930047694e-05,
"loss": 3.3506,
"step": 5950
},
{
"epoch": 0.95,
"learning_rate": 2.7376788553259142e-05,
"loss": 3.2882,
"step": 6000
},
{
"epoch": 0.95,
"learning_rate": 2.735294117647059e-05,
"loss": 3.3079,
"step": 6050
},
{
"epoch": 0.96,
"learning_rate": 2.7329093799682037e-05,
"loss": 3.3407,
"step": 6100
},
{
"epoch": 0.97,
"learning_rate": 2.730524642289348e-05,
"loss": 3.2765,
"step": 6150
},
{
"epoch": 0.98,
"learning_rate": 2.728139904610493e-05,
"loss": 3.3539,
"step": 6200
},
{
"epoch": 0.99,
"learning_rate": 2.7257551669316376e-05,
"loss": 3.3298,
"step": 6250
},
{
"epoch": 0.99,
"learning_rate": 2.7233704292527823e-05,
"loss": 3.3286,
"step": 6300
},
{
"epoch": 1.0,
"learning_rate": 2.7209856915739267e-05,
"loss": 3.3775,
"step": 6350
},
{
"epoch": 1.01,
"learning_rate": 2.7186009538950718e-05,
"loss": 3.2558,
"step": 6400
},
{
"epoch": 1.02,
"learning_rate": 2.7162162162162162e-05,
"loss": 3.1356,
"step": 6450
},
{
"epoch": 1.03,
"learning_rate": 2.713831478537361e-05,
"loss": 3.2696,
"step": 6500
},
{
"epoch": 1.03,
"learning_rate": 2.7114467408585054e-05,
"loss": 3.4071,
"step": 6550
},
{
"epoch": 1.04,
"learning_rate": 2.7090620031796505e-05,
"loss": 3.3242,
"step": 6600
},
{
"epoch": 1.05,
"learning_rate": 2.706677265500795e-05,
"loss": 3.2378,
"step": 6650
},
{
"epoch": 1.06,
"learning_rate": 2.7042925278219396e-05,
"loss": 3.41,
"step": 6700
},
{
"epoch": 1.06,
"learning_rate": 2.701907790143084e-05,
"loss": 3.2521,
"step": 6750
},
{
"epoch": 1.07,
"learning_rate": 2.699523052464229e-05,
"loss": 3.276,
"step": 6800
},
{
"epoch": 1.08,
"learning_rate": 2.6971383147853735e-05,
"loss": 3.3381,
"step": 6850
},
{
"epoch": 1.09,
"learning_rate": 2.6947535771065183e-05,
"loss": 3.2429,
"step": 6900
},
{
"epoch": 1.1,
"learning_rate": 2.692368839427663e-05,
"loss": 3.247,
"step": 6950
},
{
"epoch": 1.1,
"learning_rate": 2.6899841017488078e-05,
"loss": 3.2207,
"step": 7000
},
{
"epoch": 1.11,
"learning_rate": 2.6875993640699522e-05,
"loss": 3.2676,
"step": 7050
},
{
"epoch": 1.12,
"learning_rate": 2.685214626391097e-05,
"loss": 3.2761,
"step": 7100
},
{
"epoch": 1.13,
"learning_rate": 2.682829888712242e-05,
"loss": 3.2047,
"step": 7150
},
{
"epoch": 1.14,
"learning_rate": 2.6804451510333864e-05,
"loss": 3.2238,
"step": 7200
},
{
"epoch": 1.14,
"learning_rate": 2.6780604133545312e-05,
"loss": 3.2134,
"step": 7250
},
{
"epoch": 1.15,
"learning_rate": 2.6756756756756756e-05,
"loss": 3.1721,
"step": 7300
},
{
"epoch": 1.16,
"learning_rate": 2.6732909379968207e-05,
"loss": 3.2021,
"step": 7350
},
{
"epoch": 1.17,
"learning_rate": 2.670906200317965e-05,
"loss": 3.2336,
"step": 7400
},
{
"epoch": 1.18,
"learning_rate": 2.6685214626391098e-05,
"loss": 3.2834,
"step": 7450
},
{
"epoch": 1.18,
"learning_rate": 2.6661367249602546e-05,
"loss": 3.1427,
"step": 7500
},
{
"epoch": 1.19,
"learning_rate": 2.6637519872813993e-05,
"loss": 3.2783,
"step": 7550
},
{
"epoch": 1.2,
"learning_rate": 2.6613672496025437e-05,
"loss": 3.1762,
"step": 7600
},
{
"epoch": 1.21,
"learning_rate": 2.6589825119236885e-05,
"loss": 3.1464,
"step": 7650
},
{
"epoch": 1.21,
"learning_rate": 2.6565977742448332e-05,
"loss": 3.2136,
"step": 7700
},
{
"epoch": 1.22,
"learning_rate": 2.654213036565978e-05,
"loss": 3.318,
"step": 7750
},
{
"epoch": 1.23,
"learning_rate": 2.6518282988871224e-05,
"loss": 3.2813,
"step": 7800
},
{
"epoch": 1.24,
"learning_rate": 2.649443561208267e-05,
"loss": 3.2116,
"step": 7850
},
{
"epoch": 1.25,
"learning_rate": 2.647058823529412e-05,
"loss": 3.3032,
"step": 7900
},
{
"epoch": 1.25,
"learning_rate": 2.6446740858505566e-05,
"loss": 3.1659,
"step": 7950
},
{
"epoch": 1.26,
"learning_rate": 2.642289348171701e-05,
"loss": 3.1673,
"step": 8000
},
{
"epoch": 1.27,
"learning_rate": 2.6399046104928458e-05,
"loss": 3.1739,
"step": 8050
},
{
"epoch": 1.28,
"learning_rate": 2.6375198728139905e-05,
"loss": 3.2324,
"step": 8100
},
{
"epoch": 1.29,
"learning_rate": 2.6351351351351353e-05,
"loss": 3.2337,
"step": 8150
},
{
"epoch": 1.29,
"learning_rate": 2.6327503974562797e-05,
"loss": 3.3776,
"step": 8200
},
{
"epoch": 1.3,
"learning_rate": 2.6303656597774248e-05,
"loss": 3.1979,
"step": 8250
},
{
"epoch": 1.31,
"learning_rate": 2.6279809220985692e-05,
"loss": 3.2378,
"step": 8300
},
{
"epoch": 1.32,
"learning_rate": 2.625596184419714e-05,
"loss": 3.1871,
"step": 8350
},
{
"epoch": 1.32,
"learning_rate": 2.6232114467408583e-05,
"loss": 3.2388,
"step": 8400
},
{
"epoch": 1.33,
"learning_rate": 2.6208267090620034e-05,
"loss": 3.1731,
"step": 8450
},
{
"epoch": 1.34,
"learning_rate": 2.6184419713831478e-05,
"loss": 3.111,
"step": 8500
},
{
"epoch": 1.35,
"learning_rate": 2.6160572337042926e-05,
"loss": 3.0985,
"step": 8550
},
{
"epoch": 1.36,
"learning_rate": 2.613672496025437e-05,
"loss": 3.175,
"step": 8600
},
{
"epoch": 1.36,
"learning_rate": 2.611287758346582e-05,
"loss": 3.0125,
"step": 8650
},
{
"epoch": 1.37,
"learning_rate": 2.6089030206677265e-05,
"loss": 3.0734,
"step": 8700
},
{
"epoch": 1.38,
"learning_rate": 2.6065182829888712e-05,
"loss": 3.181,
"step": 8750
},
{
"epoch": 1.39,
"learning_rate": 2.604133545310016e-05,
"loss": 3.1494,
"step": 8800
},
{
"epoch": 1.4,
"learning_rate": 2.6017488076311607e-05,
"loss": 3.1585,
"step": 8850
},
{
"epoch": 1.4,
"learning_rate": 2.5993640699523055e-05,
"loss": 3.1531,
"step": 8900
},
{
"epoch": 1.41,
"learning_rate": 2.59697933227345e-05,
"loss": 3.0955,
"step": 8950
},
{
"epoch": 1.42,
"learning_rate": 2.594594594594595e-05,
"loss": 3.2459,
"step": 9000
},
{
"epoch": 1.43,
"learning_rate": 2.5922098569157394e-05,
"loss": 3.0333,
"step": 9050
},
{
"epoch": 1.44,
"learning_rate": 2.589825119236884e-05,
"loss": 3.1748,
"step": 9100
},
{
"epoch": 1.44,
"learning_rate": 2.5874403815580285e-05,
"loss": 3.2446,
"step": 9150
},
{
"epoch": 1.45,
"learning_rate": 2.5850556438791736e-05,
"loss": 3.1582,
"step": 9200
},
{
"epoch": 1.46,
"learning_rate": 2.582670906200318e-05,
"loss": 3.0357,
"step": 9250
},
{
"epoch": 1.47,
"learning_rate": 2.5802861685214628e-05,
"loss": 3.1443,
"step": 9300
},
{
"epoch": 1.47,
"learning_rate": 2.5779014308426072e-05,
"loss": 3.2152,
"step": 9350
},
{
"epoch": 1.48,
"learning_rate": 2.5755166931637523e-05,
"loss": 3.0451,
"step": 9400
},
{
"epoch": 1.49,
"learning_rate": 2.5731319554848967e-05,
"loss": 3.1334,
"step": 9450
},
{
"epoch": 1.5,
"learning_rate": 2.5707472178060414e-05,
"loss": 3.0432,
"step": 9500
},
{
"epoch": 1.51,
"learning_rate": 2.5683624801271858e-05,
"loss": 3.0226,
"step": 9550
},
{
"epoch": 1.51,
"learning_rate": 2.565977742448331e-05,
"loss": 3.0225,
"step": 9600
},
{
"epoch": 1.52,
"learning_rate": 2.5635930047694753e-05,
"loss": 3.0502,
"step": 9650
},
{
"epoch": 1.53,
"learning_rate": 2.56120826709062e-05,
"loss": 3.0431,
"step": 9700
},
{
"epoch": 1.54,
"learning_rate": 2.5588235294117648e-05,
"loss": 3.0514,
"step": 9750
},
{
"epoch": 1.55,
"learning_rate": 2.5564387917329096e-05,
"loss": 2.9757,
"step": 9800
},
{
"epoch": 1.55,
"learning_rate": 2.554054054054054e-05,
"loss": 2.9868,
"step": 9850
},
{
"epoch": 1.56,
"learning_rate": 2.5516693163751987e-05,
"loss": 3.0799,
"step": 9900
},
{
"epoch": 1.57,
"learning_rate": 2.5492845786963435e-05,
"loss": 3.0556,
"step": 9950
},
{
"epoch": 1.58,
"learning_rate": 2.5468998410174882e-05,
"loss": 3.0771,
"step": 10000
},
{
"epoch": 1.59,
"learning_rate": 2.5445151033386326e-05,
"loss": 2.9782,
"step": 10050
},
{
"epoch": 1.59,
"learning_rate": 2.5421303656597774e-05,
"loss": 2.9809,
"step": 10100
},
{
"epoch": 1.6,
"learning_rate": 2.539745627980922e-05,
"loss": 3.019,
"step": 10150
},
{
"epoch": 1.61,
"learning_rate": 2.537360890302067e-05,
"loss": 3.0336,
"step": 10200
},
{
"epoch": 1.62,
"learning_rate": 2.5349761526232113e-05,
"loss": 2.9084,
"step": 10250
},
{
"epoch": 1.62,
"learning_rate": 2.5325914149443564e-05,
"loss": 2.9777,
"step": 10300
},
{
"epoch": 1.63,
"learning_rate": 2.5302066772655008e-05,
"loss": 2.9971,
"step": 10350
},
{
"epoch": 1.64,
"learning_rate": 2.5278219395866455e-05,
"loss": 3.0934,
"step": 10400
},
{
"epoch": 1.65,
"learning_rate": 2.5254372019077903e-05,
"loss": 2.9911,
"step": 10450
},
{
"epoch": 1.66,
"learning_rate": 2.523052464228935e-05,
"loss": 2.8821,
"step": 10500
},
{
"epoch": 1.66,
"learning_rate": 2.5206677265500798e-05,
"loss": 3.0607,
"step": 10550
},
{
"epoch": 1.67,
"learning_rate": 2.5182829888712242e-05,
"loss": 2.9407,
"step": 10600
},
{
"epoch": 1.68,
"learning_rate": 2.515898251192369e-05,
"loss": 2.9614,
"step": 10650
},
{
"epoch": 1.69,
"learning_rate": 2.5135135135135137e-05,
"loss": 2.9679,
"step": 10700
},
{
"epoch": 1.7,
"learning_rate": 2.5111287758346584e-05,
"loss": 2.9488,
"step": 10750
},
{
"epoch": 1.7,
"learning_rate": 2.5087440381558028e-05,
"loss": 3.0865,
"step": 10800
},
{
"epoch": 1.71,
"learning_rate": 2.5063593004769476e-05,
"loss": 3.0614,
"step": 10850
},
{
"epoch": 1.72,
"learning_rate": 2.5039745627980923e-05,
"loss": 2.9559,
"step": 10900
},
{
"epoch": 1.73,
"learning_rate": 2.501589825119237e-05,
"loss": 2.9049,
"step": 10950
},
{
"epoch": 1.73,
"learning_rate": 2.4992050874403815e-05,
"loss": 2.9864,
"step": 11000
},
{
"epoch": 1.74,
"learning_rate": 2.4968203497615266e-05,
"loss": 2.9728,
"step": 11050
},
{
"epoch": 1.75,
"learning_rate": 2.494435612082671e-05,
"loss": 2.8598,
"step": 11100
},
{
"epoch": 1.76,
"learning_rate": 2.4920508744038157e-05,
"loss": 2.9705,
"step": 11150
},
{
"epoch": 1.77,
"learning_rate": 2.48966613672496e-05,
"loss": 2.872,
"step": 11200
},
{
"epoch": 1.77,
"learning_rate": 2.4872813990461052e-05,
"loss": 2.9734,
"step": 11250
},
{
"epoch": 1.78,
"learning_rate": 2.4848966613672496e-05,
"loss": 2.9332,
"step": 11300
},
{
"epoch": 1.79,
"learning_rate": 2.4825119236883944e-05,
"loss": 2.9188,
"step": 11350
},
{
"epoch": 1.8,
"learning_rate": 2.4801271860095388e-05,
"loss": 2.8664,
"step": 11400
},
{
"epoch": 1.81,
"learning_rate": 2.477742448330684e-05,
"loss": 2.8799,
"step": 11450
},
{
"epoch": 1.81,
"learning_rate": 2.4753577106518283e-05,
"loss": 2.9415,
"step": 11500
},
{
"epoch": 1.82,
"learning_rate": 2.472972972972973e-05,
"loss": 2.9911,
"step": 11550
},
{
"epoch": 1.83,
"learning_rate": 2.4705882352941174e-05,
"loss": 2.9031,
"step": 11600
},
{
"epoch": 1.84,
"learning_rate": 2.4682034976152625e-05,
"loss": 2.9432,
"step": 11650
},
{
"epoch": 1.85,
"learning_rate": 2.465818759936407e-05,
"loss": 2.9868,
"step": 11700
},
{
"epoch": 1.85,
"learning_rate": 2.4634340222575517e-05,
"loss": 2.8311,
"step": 11750
},
{
"epoch": 1.86,
"learning_rate": 2.4610492845786964e-05,
"loss": 2.8431,
"step": 11800
},
{
"epoch": 1.87,
"learning_rate": 2.458664546899841e-05,
"loss": 2.9702,
"step": 11850
},
{
"epoch": 1.88,
"learning_rate": 2.4562798092209856e-05,
"loss": 2.9432,
"step": 11900
},
{
"epoch": 1.88,
"learning_rate": 2.4538950715421303e-05,
"loss": 2.904,
"step": 11950
},
{
"epoch": 1.89,
"learning_rate": 2.451510333863275e-05,
"loss": 2.899,
"step": 12000
},
{
"epoch": 1.9,
"learning_rate": 2.4491255961844198e-05,
"loss": 2.7878,
"step": 12050
},
{
"epoch": 1.91,
"learning_rate": 2.4467408585055646e-05,
"loss": 2.876,
"step": 12100
},
{
"epoch": 1.92,
"learning_rate": 2.444356120826709e-05,
"loss": 2.9902,
"step": 12150
},
{
"epoch": 1.92,
"learning_rate": 2.441971383147854e-05,
"loss": 2.8072,
"step": 12200
},
{
"epoch": 1.93,
"learning_rate": 2.4395866454689985e-05,
"loss": 2.7914,
"step": 12250
},
{
"epoch": 1.94,
"learning_rate": 2.4372019077901432e-05,
"loss": 2.9975,
"step": 12300
},
{
"epoch": 1.95,
"learning_rate": 2.4348171701112876e-05,
"loss": 2.9566,
"step": 12350
},
{
"epoch": 1.96,
"learning_rate": 2.4324324324324327e-05,
"loss": 2.9045,
"step": 12400
},
{
"epoch": 1.96,
"learning_rate": 2.430047694753577e-05,
"loss": 2.9696,
"step": 12450
},
{
"epoch": 1.97,
"learning_rate": 2.427662957074722e-05,
"loss": 3.03,
"step": 12500
},
{
"epoch": 1.98,
"learning_rate": 2.4252782193958666e-05,
"loss": 2.8536,
"step": 12550
},
{
"epoch": 1.99,
"learning_rate": 2.4228934817170114e-05,
"loss": 2.9715,
"step": 12600
},
{
"epoch": 2.0,
"learning_rate": 2.4205087440381558e-05,
"loss": 2.8773,
"step": 12650
},
{
"epoch": 2.0,
"learning_rate": 2.4181240063593005e-05,
"loss": 2.8083,
"step": 12700
},
{
"epoch": 2.01,
"learning_rate": 2.4157392686804453e-05,
"loss": 2.8994,
"step": 12750
},
{
"epoch": 2.02,
"learning_rate": 2.41335453100159e-05,
"loss": 2.8305,
"step": 12800
},
{
"epoch": 2.03,
"learning_rate": 2.4109697933227344e-05,
"loss": 2.9153,
"step": 12850
},
{
"epoch": 2.03,
"learning_rate": 2.4085850556438792e-05,
"loss": 2.8469,
"step": 12900
},
{
"epoch": 2.04,
"learning_rate": 2.406200317965024e-05,
"loss": 2.8301,
"step": 12950
},
{
"epoch": 2.05,
"learning_rate": 2.4038155802861687e-05,
"loss": 2.8205,
"step": 13000
},
{
"epoch": 2.06,
"learning_rate": 2.401430842607313e-05,
"loss": 2.8669,
"step": 13050
},
{
"epoch": 2.07,
"learning_rate": 2.399046104928458e-05,
"loss": 2.7455,
"step": 13100
},
{
"epoch": 2.07,
"learning_rate": 2.3966613672496026e-05,
"loss": 2.8466,
"step": 13150
},
{
"epoch": 2.08,
"learning_rate": 2.3942766295707473e-05,
"loss": 2.7977,
"step": 13200
},
{
"epoch": 2.09,
"learning_rate": 2.3918918918918917e-05,
"loss": 2.7967,
"step": 13250
},
{
"epoch": 2.1,
"learning_rate": 2.3895071542130368e-05,
"loss": 2.8632,
"step": 13300
},
{
"epoch": 2.11,
"learning_rate": 2.3871224165341812e-05,
"loss": 2.8509,
"step": 13350
},
{
"epoch": 2.11,
"learning_rate": 2.384737678855326e-05,
"loss": 2.7735,
"step": 13400
},
{
"epoch": 2.12,
"learning_rate": 2.3823529411764704e-05,
"loss": 2.7421,
"step": 13450
},
{
"epoch": 2.13,
"learning_rate": 2.3799682034976155e-05,
"loss": 2.8944,
"step": 13500
},
{
"epoch": 2.14,
"learning_rate": 2.37758346581876e-05,
"loss": 2.812,
"step": 13550
},
{
"epoch": 2.15,
"learning_rate": 2.3751987281399046e-05,
"loss": 2.7978,
"step": 13600
},
{
"epoch": 2.15,
"learning_rate": 2.372813990461049e-05,
"loss": 2.8839,
"step": 13650
},
{
"epoch": 2.16,
"learning_rate": 2.370429252782194e-05,
"loss": 2.8741,
"step": 13700
},
{
"epoch": 2.17,
"learning_rate": 2.368044515103339e-05,
"loss": 2.812,
"step": 13750
},
{
"epoch": 2.18,
"learning_rate": 2.3656597774244833e-05,
"loss": 2.8138,
"step": 13800
},
{
"epoch": 2.18,
"learning_rate": 2.3632750397456284e-05,
"loss": 2.7536,
"step": 13850
},
{
"epoch": 2.19,
"learning_rate": 2.3608903020667728e-05,
"loss": 2.9393,
"step": 13900
},
{
"epoch": 2.2,
"learning_rate": 2.3585055643879175e-05,
"loss": 2.8055,
"step": 13950
},
{
"epoch": 2.21,
"learning_rate": 2.356120826709062e-05,
"loss": 2.7866,
"step": 14000
},
{
"epoch": 2.22,
"learning_rate": 2.353736089030207e-05,
"loss": 2.7212,
"step": 14050
},
{
"epoch": 2.22,
"learning_rate": 2.3513513513513514e-05,
"loss": 2.901,
"step": 14100
},
{
"epoch": 2.23,
"learning_rate": 2.348966613672496e-05,
"loss": 2.9047,
"step": 14150
},
{
"epoch": 2.24,
"learning_rate": 2.3465818759936406e-05,
"loss": 2.8909,
"step": 14200
},
{
"epoch": 2.25,
"learning_rate": 2.3441971383147857e-05,
"loss": 2.7759,
"step": 14250
},
{
"epoch": 2.26,
"learning_rate": 2.34181240063593e-05,
"loss": 2.8714,
"step": 14300
},
{
"epoch": 2.26,
"learning_rate": 2.3394276629570748e-05,
"loss": 2.7804,
"step": 14350
},
{
"epoch": 2.27,
"learning_rate": 2.3370429252782192e-05,
"loss": 2.8279,
"step": 14400
},
{
"epoch": 2.28,
"learning_rate": 2.3346581875993643e-05,
"loss": 2.7939,
"step": 14450
},
{
"epoch": 2.29,
"learning_rate": 2.3322734499205087e-05,
"loss": 2.7902,
"step": 14500
},
{
"epoch": 2.29,
"learning_rate": 2.3298887122416535e-05,
"loss": 2.7452,
"step": 14550
},
{
"epoch": 2.3,
"learning_rate": 2.3275039745627982e-05,
"loss": 2.7765,
"step": 14600
},
{
"epoch": 2.31,
"learning_rate": 2.325119236883943e-05,
"loss": 2.7643,
"step": 14650
},
{
"epoch": 2.32,
"learning_rate": 2.3227344992050874e-05,
"loss": 2.8241,
"step": 14700
},
{
"epoch": 2.33,
"learning_rate": 2.320349761526232e-05,
"loss": 2.6875,
"step": 14750
},
{
"epoch": 2.33,
"learning_rate": 2.317965023847377e-05,
"loss": 2.7444,
"step": 14800
},
{
"epoch": 2.34,
"learning_rate": 2.3155802861685216e-05,
"loss": 2.8183,
"step": 14850
},
{
"epoch": 2.35,
"learning_rate": 2.313195548489666e-05,
"loss": 2.7885,
"step": 14900
},
{
"epoch": 2.36,
"learning_rate": 2.3108108108108108e-05,
"loss": 2.7358,
"step": 14950
},
{
"epoch": 2.37,
"learning_rate": 2.3084260731319555e-05,
"loss": 2.7983,
"step": 15000
},
{
"epoch": 2.37,
"learning_rate": 2.3060413354531003e-05,
"loss": 2.7354,
"step": 15050
},
{
"epoch": 2.38,
"learning_rate": 2.3036565977742447e-05,
"loss": 2.7596,
"step": 15100
},
{
"epoch": 2.39,
"learning_rate": 2.3012718600953894e-05,
"loss": 2.7021,
"step": 15150
},
{
"epoch": 2.4,
"learning_rate": 2.2988871224165342e-05,
"loss": 2.7264,
"step": 15200
},
{
"epoch": 2.41,
"learning_rate": 2.296502384737679e-05,
"loss": 2.7476,
"step": 15250
},
{
"epoch": 2.41,
"learning_rate": 2.2941176470588233e-05,
"loss": 2.8127,
"step": 15300
},
{
"epoch": 2.42,
"learning_rate": 2.2917329093799684e-05,
"loss": 2.73,
"step": 15350
},
{
"epoch": 2.43,
"learning_rate": 2.289348171701113e-05,
"loss": 2.7201,
"step": 15400
},
{
"epoch": 2.44,
"learning_rate": 2.2869634340222576e-05,
"loss": 2.8273,
"step": 15450
},
{
"epoch": 2.44,
"learning_rate": 2.2845786963434023e-05,
"loss": 2.8538,
"step": 15500
},
{
"epoch": 2.45,
"learning_rate": 2.282193958664547e-05,
"loss": 2.7814,
"step": 15550
},
{
"epoch": 2.46,
"learning_rate": 2.2798092209856918e-05,
"loss": 2.7943,
"step": 15600
},
{
"epoch": 2.47,
"learning_rate": 2.2774244833068362e-05,
"loss": 2.8791,
"step": 15650
},
{
"epoch": 2.48,
"learning_rate": 2.275039745627981e-05,
"loss": 2.6577,
"step": 15700
},
{
"epoch": 2.48,
"learning_rate": 2.2726550079491257e-05,
"loss": 2.7085,
"step": 15750
},
{
"epoch": 2.49,
"learning_rate": 2.2702702702702705e-05,
"loss": 2.8721,
"step": 15800
},
{
"epoch": 2.5,
"learning_rate": 2.267885532591415e-05,
"loss": 2.7809,
"step": 15850
},
{
"epoch": 2.51,
"learning_rate": 2.26550079491256e-05,
"loss": 2.8199,
"step": 15900
},
{
"epoch": 2.52,
"learning_rate": 2.2631160572337044e-05,
"loss": 2.7886,
"step": 15950
},
{
"epoch": 2.52,
"learning_rate": 2.260731319554849e-05,
"loss": 2.6889,
"step": 16000
},
{
"epoch": 2.53,
"learning_rate": 2.2583465818759935e-05,
"loss": 2.8305,
"step": 16050
},
{
"epoch": 2.54,
"learning_rate": 2.2559618441971386e-05,
"loss": 2.6675,
"step": 16100
},
{
"epoch": 2.55,
"learning_rate": 2.253577106518283e-05,
"loss": 2.7808,
"step": 16150
},
{
"epoch": 2.56,
"learning_rate": 2.2511923688394278e-05,
"loss": 2.878,
"step": 16200
},
{
"epoch": 2.56,
"learning_rate": 2.2488076311605722e-05,
"loss": 2.7607,
"step": 16250
},
{
"epoch": 2.57,
"learning_rate": 2.2464228934817173e-05,
"loss": 2.7103,
"step": 16300
},
{
"epoch": 2.58,
"learning_rate": 2.2440381558028617e-05,
"loss": 2.7305,
"step": 16350
},
{
"epoch": 2.59,
"learning_rate": 2.2416534181240064e-05,
"loss": 2.7234,
"step": 16400
},
{
"epoch": 2.59,
"learning_rate": 2.2392686804451508e-05,
"loss": 2.8669,
"step": 16450
},
{
"epoch": 2.6,
"learning_rate": 2.236883942766296e-05,
"loss": 2.7355,
"step": 16500
},
{
"epoch": 2.61,
"learning_rate": 2.2344992050874403e-05,
"loss": 2.8589,
"step": 16550
},
{
"epoch": 2.62,
"learning_rate": 2.232114467408585e-05,
"loss": 2.7384,
"step": 16600
},
{
"epoch": 2.63,
"learning_rate": 2.2297297297297298e-05,
"loss": 2.7677,
"step": 16650
},
{
"epoch": 2.63,
"learning_rate": 2.2273449920508746e-05,
"loss": 2.7464,
"step": 16700
},
{
"epoch": 2.64,
"learning_rate": 2.224960254372019e-05,
"loss": 2.7476,
"step": 16750
},
{
"epoch": 2.65,
"learning_rate": 2.2225755166931637e-05,
"loss": 2.6909,
"step": 16800
},
{
"epoch": 2.66,
"learning_rate": 2.2201907790143085e-05,
"loss": 2.7507,
"step": 16850
},
{
"epoch": 2.67,
"learning_rate": 2.2178060413354532e-05,
"loss": 2.7785,
"step": 16900
},
{
"epoch": 2.67,
"learning_rate": 2.2154213036565976e-05,
"loss": 2.7384,
"step": 16950
},
{
"epoch": 2.68,
"learning_rate": 2.2130365659777424e-05,
"loss": 2.8127,
"step": 17000
},
{
"epoch": 2.69,
"learning_rate": 2.210651828298887e-05,
"loss": 2.6794,
"step": 17050
},
{
"epoch": 2.7,
"learning_rate": 2.208267090620032e-05,
"loss": 2.732,
"step": 17100
},
{
"epoch": 2.71,
"learning_rate": 2.2058823529411766e-05,
"loss": 2.6593,
"step": 17150
},
{
"epoch": 2.71,
"learning_rate": 2.203497615262321e-05,
"loss": 2.8902,
"step": 17200
},
{
"epoch": 2.72,
"learning_rate": 2.201112877583466e-05,
"loss": 2.6455,
"step": 17250
},
{
"epoch": 2.73,
"learning_rate": 2.1987281399046105e-05,
"loss": 2.6208,
"step": 17300
},
{
"epoch": 2.74,
"learning_rate": 2.1963434022257553e-05,
"loss": 2.8468,
"step": 17350
},
{
"epoch": 2.74,
"learning_rate": 2.1939586645469e-05,
"loss": 2.7659,
"step": 17400
},
{
"epoch": 2.75,
"learning_rate": 2.1915739268680448e-05,
"loss": 2.6662,
"step": 17450
},
{
"epoch": 2.76,
"learning_rate": 2.1891891891891892e-05,
"loss": 2.7181,
"step": 17500
},
{
"epoch": 2.77,
"learning_rate": 2.186804451510334e-05,
"loss": 2.8185,
"step": 17550
},
{
"epoch": 2.78,
"learning_rate": 2.1844197138314787e-05,
"loss": 2.7554,
"step": 17600
},
{
"epoch": 2.78,
"learning_rate": 2.1820349761526234e-05,
"loss": 2.7064,
"step": 17650
},
{
"epoch": 2.79,
"learning_rate": 2.1796502384737678e-05,
"loss": 2.7318,
"step": 17700
},
{
"epoch": 2.8,
"learning_rate": 2.1772655007949126e-05,
"loss": 2.7196,
"step": 17750
},
{
"epoch": 2.81,
"learning_rate": 2.1748807631160573e-05,
"loss": 2.5457,
"step": 17800
},
{
"epoch": 2.82,
"learning_rate": 2.172496025437202e-05,
"loss": 2.6836,
"step": 17850
},
{
"epoch": 2.82,
"learning_rate": 2.1701112877583465e-05,
"loss": 2.685,
"step": 17900
},
{
"epoch": 2.83,
"learning_rate": 2.1677265500794912e-05,
"loss": 2.749,
"step": 17950
},
{
"epoch": 2.84,
"learning_rate": 2.165341812400636e-05,
"loss": 2.781,
"step": 18000
},
{
"epoch": 2.85,
"learning_rate": 2.1629570747217807e-05,
"loss": 2.7667,
"step": 18050
},
{
"epoch": 2.85,
"learning_rate": 2.160572337042925e-05,
"loss": 2.7558,
"step": 18100
},
{
"epoch": 2.86,
"learning_rate": 2.1581875993640702e-05,
"loss": 2.6881,
"step": 18150
},
{
"epoch": 2.87,
"learning_rate": 2.1558028616852146e-05,
"loss": 2.7462,
"step": 18200
},
{
"epoch": 2.88,
"learning_rate": 2.1534181240063594e-05,
"loss": 2.6983,
"step": 18250
},
{
"epoch": 2.89,
"learning_rate": 2.1510333863275038e-05,
"loss": 2.6547,
"step": 18300
},
{
"epoch": 2.89,
"learning_rate": 2.148648648648649e-05,
"loss": 2.65,
"step": 18350
},
{
"epoch": 2.9,
"learning_rate": 2.1462639109697933e-05,
"loss": 2.7827,
"step": 18400
},
{
"epoch": 2.91,
"learning_rate": 2.143879173290938e-05,
"loss": 2.6902,
"step": 18450
},
{
"epoch": 2.92,
"learning_rate": 2.1414944356120824e-05,
"loss": 2.6752,
"step": 18500
},
{
"epoch": 2.93,
"learning_rate": 2.1391096979332275e-05,
"loss": 2.6049,
"step": 18550
},
{
"epoch": 2.93,
"learning_rate": 2.136724960254372e-05,
"loss": 2.8715,
"step": 18600
},
{
"epoch": 2.94,
"learning_rate": 2.1343402225755167e-05,
"loss": 2.6845,
"step": 18650
},
{
"epoch": 2.95,
"learning_rate": 2.1319554848966614e-05,
"loss": 2.7181,
"step": 18700
},
{
"epoch": 2.96,
"learning_rate": 2.129570747217806e-05,
"loss": 2.688,
"step": 18750
},
{
"epoch": 2.97,
"learning_rate": 2.127186009538951e-05,
"loss": 2.7634,
"step": 18800
},
{
"epoch": 2.97,
"learning_rate": 2.1248012718600953e-05,
"loss": 2.7003,
"step": 18850
},
{
"epoch": 2.98,
"learning_rate": 2.1224165341812404e-05,
"loss": 2.6903,
"step": 18900
},
{
"epoch": 2.99,
"learning_rate": 2.1200317965023848e-05,
"loss": 2.635,
"step": 18950
},
{
"epoch": 3.0,
"learning_rate": 2.1176470588235296e-05,
"loss": 2.6414,
"step": 19000
},
{
"epoch": 3.0,
"learning_rate": 2.115262321144674e-05,
"loss": 2.7126,
"step": 19050
},
{
"epoch": 3.01,
"learning_rate": 2.112877583465819e-05,
"loss": 2.5964,
"step": 19100
},
{
"epoch": 3.02,
"learning_rate": 2.1104928457869635e-05,
"loss": 2.7598,
"step": 19150
},
{
"epoch": 3.03,
"learning_rate": 2.1081081081081082e-05,
"loss": 2.6722,
"step": 19200
},
{
"epoch": 3.04,
"learning_rate": 2.1057233704292526e-05,
"loss": 2.684,
"step": 19250
},
{
"epoch": 3.04,
"learning_rate": 2.1033386327503977e-05,
"loss": 2.6479,
"step": 19300
},
{
"epoch": 3.05,
"learning_rate": 2.100953895071542e-05,
"loss": 2.7599,
"step": 19350
},
{
"epoch": 3.06,
"learning_rate": 2.098569157392687e-05,
"loss": 2.8154,
"step": 19400
},
{
"epoch": 3.07,
"learning_rate": 2.0961844197138316e-05,
"loss": 2.6285,
"step": 19450
},
{
"epoch": 3.08,
"learning_rate": 2.0937996820349764e-05,
"loss": 2.6614,
"step": 19500
},
{
"epoch": 3.08,
"learning_rate": 2.0914149443561208e-05,
"loss": 2.5899,
"step": 19550
},
{
"epoch": 3.09,
"learning_rate": 2.0890302066772655e-05,
"loss": 2.6255,
"step": 19600
},
{
"epoch": 3.1,
"learning_rate": 2.0866454689984103e-05,
"loss": 2.7297,
"step": 19650
},
{
"epoch": 3.11,
"learning_rate": 2.084260731319555e-05,
"loss": 2.5885,
"step": 19700
},
{
"epoch": 3.12,
"learning_rate": 2.0818759936406994e-05,
"loss": 2.6466,
"step": 19750
},
{
"epoch": 3.12,
"learning_rate": 2.0794912559618442e-05,
"loss": 2.6435,
"step": 19800
},
{
"epoch": 3.13,
"learning_rate": 2.077106518282989e-05,
"loss": 2.6621,
"step": 19850
},
{
"epoch": 3.14,
"learning_rate": 2.0747217806041337e-05,
"loss": 2.6563,
"step": 19900
},
{
"epoch": 3.15,
"learning_rate": 2.072337042925278e-05,
"loss": 2.636,
"step": 19950
},
{
"epoch": 3.15,
"learning_rate": 2.0699523052464228e-05,
"loss": 2.5775,
"step": 20000
},
{
"epoch": 3.16,
"learning_rate": 2.0675675675675676e-05,
"loss": 2.6366,
"step": 20050
},
{
"epoch": 3.17,
"learning_rate": 2.0651828298887123e-05,
"loss": 2.6649,
"step": 20100
},
{
"epoch": 3.18,
"learning_rate": 2.0627980922098567e-05,
"loss": 2.7077,
"step": 20150
},
{
"epoch": 3.19,
"learning_rate": 2.0604133545310018e-05,
"loss": 2.6725,
"step": 20200
},
{
"epoch": 3.19,
"learning_rate": 2.0580286168521462e-05,
"loss": 2.7058,
"step": 20250
},
{
"epoch": 3.2,
"learning_rate": 2.055643879173291e-05,
"loss": 2.6434,
"step": 20300
},
{
"epoch": 3.21,
"learning_rate": 2.0532591414944354e-05,
"loss": 2.6279,
"step": 20350
},
{
"epoch": 3.22,
"learning_rate": 2.0508744038155805e-05,
"loss": 2.7532,
"step": 20400
},
{
"epoch": 3.23,
"learning_rate": 2.0484896661367252e-05,
"loss": 2.6562,
"step": 20450
},
{
"epoch": 3.23,
"learning_rate": 2.0461049284578696e-05,
"loss": 2.679,
"step": 20500
},
{
"epoch": 3.24,
"learning_rate": 2.0437201907790144e-05,
"loss": 2.6441,
"step": 20550
},
{
"epoch": 3.25,
"learning_rate": 2.041335453100159e-05,
"loss": 2.7591,
"step": 20600
},
{
"epoch": 3.26,
"learning_rate": 2.038950715421304e-05,
"loss": 2.73,
"step": 20650
},
{
"epoch": 3.26,
"learning_rate": 2.0365659777424483e-05,
"loss": 2.6668,
"step": 20700
},
{
"epoch": 3.27,
"learning_rate": 2.034181240063593e-05,
"loss": 2.5595,
"step": 20750
},
{
"epoch": 3.28,
"learning_rate": 2.0317965023847378e-05,
"loss": 2.5686,
"step": 20800
},
{
"epoch": 3.29,
"learning_rate": 2.0294117647058825e-05,
"loss": 2.6029,
"step": 20850
},
{
"epoch": 3.3,
"learning_rate": 2.027027027027027e-05,
"loss": 2.5819,
"step": 20900
},
{
"epoch": 3.3,
"learning_rate": 2.024642289348172e-05,
"loss": 2.638,
"step": 20950
},
{
"epoch": 3.31,
"learning_rate": 2.0222575516693164e-05,
"loss": 2.625,
"step": 21000
},
{
"epoch": 3.32,
"learning_rate": 2.019872813990461e-05,
"loss": 2.6363,
"step": 21050
},
{
"epoch": 3.33,
"learning_rate": 2.0174880763116056e-05,
"loss": 2.8446,
"step": 21100
},
{
"epoch": 3.34,
"learning_rate": 2.0151033386327507e-05,
"loss": 2.6304,
"step": 21150
},
{
"epoch": 3.34,
"learning_rate": 2.012718600953895e-05,
"loss": 2.5041,
"step": 21200
},
{
"epoch": 3.35,
"learning_rate": 2.0103338632750398e-05,
"loss": 2.6228,
"step": 21250
},
{
"epoch": 3.36,
"learning_rate": 2.0079491255961842e-05,
"loss": 2.5432,
"step": 21300
},
{
"epoch": 3.37,
"learning_rate": 2.0055643879173293e-05,
"loss": 2.7547,
"step": 21350
},
{
"epoch": 3.38,
"learning_rate": 2.0031796502384737e-05,
"loss": 2.634,
"step": 21400
},
{
"epoch": 3.38,
"learning_rate": 2.0007949125596185e-05,
"loss": 2.5405,
"step": 21450
},
{
"epoch": 3.39,
"learning_rate": 1.9984101748807632e-05,
"loss": 2.6386,
"step": 21500
},
{
"epoch": 3.4,
"learning_rate": 1.996025437201908e-05,
"loss": 2.6188,
"step": 21550
},
{
"epoch": 3.41,
"learning_rate": 1.9936406995230524e-05,
"loss": 2.6959,
"step": 21600
},
{
"epoch": 3.41,
"learning_rate": 1.991255961844197e-05,
"loss": 2.6742,
"step": 21650
},
{
"epoch": 3.42,
"learning_rate": 1.988871224165342e-05,
"loss": 2.5569,
"step": 21700
},
{
"epoch": 3.43,
"learning_rate": 1.9864864864864866e-05,
"loss": 2.5727,
"step": 21750
},
{
"epoch": 3.44,
"learning_rate": 1.984101748807631e-05,
"loss": 2.5024,
"step": 21800
},
{
"epoch": 3.45,
"learning_rate": 1.9817170111287758e-05,
"loss": 2.6301,
"step": 21850
},
{
"epoch": 3.45,
"learning_rate": 1.9793322734499205e-05,
"loss": 2.539,
"step": 21900
},
{
"epoch": 3.46,
"learning_rate": 1.9769475357710653e-05,
"loss": 2.513,
"step": 21950
},
{
"epoch": 3.47,
"learning_rate": 1.9745627980922097e-05,
"loss": 2.6151,
"step": 22000
},
{
"epoch": 3.48,
"learning_rate": 1.9721780604133544e-05,
"loss": 2.5928,
"step": 22050
},
{
"epoch": 3.49,
"learning_rate": 1.9697933227344995e-05,
"loss": 2.69,
"step": 22100
},
{
"epoch": 3.49,
"learning_rate": 1.967408585055644e-05,
"loss": 2.5951,
"step": 22150
},
{
"epoch": 3.5,
"learning_rate": 1.9650238473767887e-05,
"loss": 2.6194,
"step": 22200
},
{
"epoch": 3.51,
"learning_rate": 1.9626391096979334e-05,
"loss": 2.7138,
"step": 22250
},
{
"epoch": 3.52,
"learning_rate": 1.960254372019078e-05,
"loss": 2.6472,
"step": 22300
},
{
"epoch": 3.53,
"learning_rate": 1.9578696343402226e-05,
"loss": 2.6166,
"step": 22350
},
{
"epoch": 3.53,
"learning_rate": 1.9554848966613673e-05,
"loss": 2.6573,
"step": 22400
},
{
"epoch": 3.54,
"learning_rate": 1.953100158982512e-05,
"loss": 2.531,
"step": 22450
},
{
"epoch": 3.55,
"learning_rate": 1.9507154213036568e-05,
"loss": 2.5752,
"step": 22500
},
{
"epoch": 3.56,
"learning_rate": 1.9483306836248012e-05,
"loss": 2.5375,
"step": 22550
},
{
"epoch": 3.56,
"learning_rate": 1.945945945945946e-05,
"loss": 2.5569,
"step": 22600
},
{
"epoch": 3.57,
"learning_rate": 1.9435612082670907e-05,
"loss": 2.6506,
"step": 22650
},
{
"epoch": 3.58,
"learning_rate": 1.9411764705882355e-05,
"loss": 2.5785,
"step": 22700
},
{
"epoch": 3.59,
"learning_rate": 1.93879173290938e-05,
"loss": 2.6245,
"step": 22750
},
{
"epoch": 3.6,
"learning_rate": 1.9364069952305246e-05,
"loss": 2.6386,
"step": 22800
},
{
"epoch": 3.6,
"learning_rate": 1.9340222575516694e-05,
"loss": 2.5858,
"step": 22850
},
{
"epoch": 3.61,
"learning_rate": 1.931637519872814e-05,
"loss": 2.6831,
"step": 22900
},
{
"epoch": 3.62,
"learning_rate": 1.9292527821939585e-05,
"loss": 2.5829,
"step": 22950
},
{
"epoch": 3.63,
"learning_rate": 1.9268680445151036e-05,
"loss": 2.5942,
"step": 23000
},
{
"epoch": 3.64,
"learning_rate": 1.924483306836248e-05,
"loss": 2.4712,
"step": 23050
},
{
"epoch": 3.64,
"learning_rate": 1.9220985691573928e-05,
"loss": 2.663,
"step": 23100
},
{
"epoch": 3.65,
"learning_rate": 1.9197138314785372e-05,
"loss": 2.68,
"step": 23150
},
{
"epoch": 3.66,
"learning_rate": 1.9173290937996823e-05,
"loss": 2.5576,
"step": 23200
},
{
"epoch": 3.67,
"learning_rate": 1.9149443561208267e-05,
"loss": 2.5646,
"step": 23250
},
{
"epoch": 3.68,
"learning_rate": 1.9125596184419714e-05,
"loss": 2.6577,
"step": 23300
},
{
"epoch": 3.68,
"learning_rate": 1.9101748807631158e-05,
"loss": 2.6625,
"step": 23350
},
{
"epoch": 3.69,
"learning_rate": 1.907790143084261e-05,
"loss": 2.5941,
"step": 23400
},
{
"epoch": 3.7,
"learning_rate": 1.9054054054054053e-05,
"loss": 2.4865,
"step": 23450
},
{
"epoch": 3.71,
"learning_rate": 1.90302066772655e-05,
"loss": 2.5481,
"step": 23500
},
{
"epoch": 3.71,
"learning_rate": 1.9006359300476948e-05,
"loss": 2.5746,
"step": 23550
},
{
"epoch": 3.72,
"learning_rate": 1.8982511923688396e-05,
"loss": 2.6428,
"step": 23600
},
{
"epoch": 3.73,
"learning_rate": 1.895866454689984e-05,
"loss": 2.55,
"step": 23650
},
{
"epoch": 3.74,
"learning_rate": 1.8934817170111287e-05,
"loss": 2.5699,
"step": 23700
},
{
"epoch": 3.75,
"learning_rate": 1.8910969793322738e-05,
"loss": 2.6442,
"step": 23750
},
{
"epoch": 3.75,
"learning_rate": 1.8887122416534182e-05,
"loss": 2.5961,
"step": 23800
},
{
"epoch": 3.76,
"learning_rate": 1.886327503974563e-05,
"loss": 2.6291,
"step": 23850
},
{
"epoch": 3.77,
"learning_rate": 1.8839427662957074e-05,
"loss": 2.6604,
"step": 23900
},
{
"epoch": 3.78,
"learning_rate": 1.8815580286168525e-05,
"loss": 2.676,
"step": 23950
},
{
"epoch": 3.79,
"learning_rate": 1.879173290937997e-05,
"loss": 2.6272,
"step": 24000
},
{
"epoch": 3.79,
"learning_rate": 1.8767885532591416e-05,
"loss": 2.6494,
"step": 24050
},
{
"epoch": 3.8,
"learning_rate": 1.874403815580286e-05,
"loss": 2.5214,
"step": 24100
},
{
"epoch": 3.81,
"learning_rate": 1.872019077901431e-05,
"loss": 2.6031,
"step": 24150
},
{
"epoch": 3.82,
"learning_rate": 1.8696343402225755e-05,
"loss": 2.6587,
"step": 24200
},
{
"epoch": 3.82,
"learning_rate": 1.8672496025437203e-05,
"loss": 2.5222,
"step": 24250
},
{
"epoch": 3.83,
"learning_rate": 1.864864864864865e-05,
"loss": 2.6324,
"step": 24300
},
{
"epoch": 3.84,
"learning_rate": 1.8624801271860098e-05,
"loss": 2.5523,
"step": 24350
},
{
"epoch": 3.85,
"learning_rate": 1.860095389507154e-05,
"loss": 2.5159,
"step": 24400
},
{
"epoch": 3.86,
"learning_rate": 1.857710651828299e-05,
"loss": 2.6001,
"step": 24450
},
{
"epoch": 3.86,
"learning_rate": 1.8553259141494437e-05,
"loss": 2.5723,
"step": 24500
},
{
"epoch": 3.87,
"learning_rate": 1.8529411764705884e-05,
"loss": 2.5136,
"step": 24550
},
{
"epoch": 3.88,
"learning_rate": 1.8505564387917328e-05,
"loss": 2.5974,
"step": 24600
},
{
"epoch": 3.89,
"learning_rate": 1.8481717011128776e-05,
"loss": 2.4753,
"step": 24650
},
{
"epoch": 3.9,
"learning_rate": 1.8457869634340223e-05,
"loss": 2.5379,
"step": 24700
},
{
"epoch": 3.9,
"learning_rate": 1.843402225755167e-05,
"loss": 2.5857,
"step": 24750
},
{
"epoch": 3.91,
"learning_rate": 1.8410174880763115e-05,
"loss": 2.5175,
"step": 24800
},
{
"epoch": 3.92,
"learning_rate": 1.8386327503974562e-05,
"loss": 2.617,
"step": 24850
},
{
"epoch": 3.93,
"learning_rate": 1.836248012718601e-05,
"loss": 2.5201,
"step": 24900
},
{
"epoch": 3.94,
"learning_rate": 1.8338632750397457e-05,
"loss": 2.6352,
"step": 24950
},
{
"epoch": 3.94,
"learning_rate": 1.83147853736089e-05,
"loss": 2.5455,
"step": 25000
},
{
"epoch": 3.95,
"learning_rate": 1.8290937996820352e-05,
"loss": 2.5444,
"step": 25050
},
{
"epoch": 3.96,
"learning_rate": 1.8267090620031796e-05,
"loss": 2.6286,
"step": 25100
},
{
"epoch": 3.97,
"learning_rate": 1.8243243243243244e-05,
"loss": 2.6376,
"step": 25150
},
{
"epoch": 3.97,
"learning_rate": 1.8219395866454688e-05,
"loss": 2.4895,
"step": 25200
},
{
"epoch": 3.98,
"learning_rate": 1.819554848966614e-05,
"loss": 2.4588,
"step": 25250
},
{
"epoch": 3.99,
"learning_rate": 1.8171701112877583e-05,
"loss": 2.5429,
"step": 25300
},
{
"epoch": 4.0,
"learning_rate": 1.814785373608903e-05,
"loss": 2.5472,
"step": 25350
},
{
"epoch": 4.01,
"learning_rate": 1.8124006359300478e-05,
"loss": 2.3872,
"step": 25400
},
{
"epoch": 4.01,
"learning_rate": 1.8100158982511925e-05,
"loss": 2.4872,
"step": 25450
},
{
"epoch": 4.02,
"learning_rate": 1.8076311605723373e-05,
"loss": 2.4989,
"step": 25500
},
{
"epoch": 4.03,
"learning_rate": 1.8052464228934817e-05,
"loss": 2.5616,
"step": 25550
},
{
"epoch": 4.04,
"learning_rate": 1.8028616852146264e-05,
"loss": 2.544,
"step": 25600
},
{
"epoch": 4.05,
"learning_rate": 1.800476947535771e-05,
"loss": 2.5039,
"step": 25650
},
{
"epoch": 4.05,
"learning_rate": 1.798092209856916e-05,
"loss": 2.5312,
"step": 25700
},
{
"epoch": 4.06,
"learning_rate": 1.7957074721780603e-05,
"loss": 2.5458,
"step": 25750
},
{
"epoch": 4.07,
"learning_rate": 1.7933227344992054e-05,
"loss": 2.4705,
"step": 25800
},
{
"epoch": 4.08,
"learning_rate": 1.7909379968203498e-05,
"loss": 2.5152,
"step": 25850
},
{
"epoch": 4.09,
"learning_rate": 1.7885532591414946e-05,
"loss": 2.552,
"step": 25900
},
{
"epoch": 4.09,
"learning_rate": 1.786168521462639e-05,
"loss": 2.4723,
"step": 25950
},
{
"epoch": 4.1,
"learning_rate": 1.783783783783784e-05,
"loss": 2.6591,
"step": 26000
},
{
"epoch": 4.11,
"learning_rate": 1.7813990461049285e-05,
"loss": 2.5466,
"step": 26050
},
{
"epoch": 4.12,
"learning_rate": 1.7790143084260732e-05,
"loss": 2.5868,
"step": 26100
},
{
"epoch": 4.12,
"learning_rate": 1.7766295707472176e-05,
"loss": 2.5455,
"step": 26150
},
{
"epoch": 4.13,
"learning_rate": 1.7742448330683627e-05,
"loss": 2.5525,
"step": 26200
},
{
"epoch": 4.14,
"learning_rate": 1.771860095389507e-05,
"loss": 2.6624,
"step": 26250
},
{
"epoch": 4.15,
"learning_rate": 1.769475357710652e-05,
"loss": 2.5108,
"step": 26300
},
{
"epoch": 4.16,
"learning_rate": 1.7670906200317966e-05,
"loss": 2.5807,
"step": 26350
},
{
"epoch": 4.16,
"learning_rate": 1.7647058823529414e-05,
"loss": 2.5022,
"step": 26400
},
{
"epoch": 4.17,
"learning_rate": 1.7623211446740858e-05,
"loss": 2.5007,
"step": 26450
},
{
"epoch": 4.18,
"learning_rate": 1.7599364069952305e-05,
"loss": 2.6456,
"step": 26500
},
{
"epoch": 4.19,
"learning_rate": 1.7575516693163753e-05,
"loss": 2.5869,
"step": 26550
},
{
"epoch": 4.2,
"learning_rate": 1.75516693163752e-05,
"loss": 2.5292,
"step": 26600
},
{
"epoch": 4.2,
"learning_rate": 1.7527821939586644e-05,
"loss": 2.3513,
"step": 26650
},
{
"epoch": 4.21,
"learning_rate": 1.750397456279809e-05,
"loss": 2.5386,
"step": 26700
},
{
"epoch": 4.22,
"learning_rate": 1.748012718600954e-05,
"loss": 2.4587,
"step": 26750
},
{
"epoch": 4.23,
"learning_rate": 1.7456279809220987e-05,
"loss": 2.5152,
"step": 26800
},
{
"epoch": 4.24,
"learning_rate": 1.743243243243243e-05,
"loss": 2.5277,
"step": 26850
},
{
"epoch": 4.24,
"learning_rate": 1.7408585055643878e-05,
"loss": 2.549,
"step": 26900
},
{
"epoch": 4.25,
"learning_rate": 1.7384737678855326e-05,
"loss": 2.4883,
"step": 26950
},
{
"epoch": 4.26,
"learning_rate": 1.7360890302066773e-05,
"loss": 2.5428,
"step": 27000
},
{
"epoch": 4.27,
"learning_rate": 1.733704292527822e-05,
"loss": 2.5253,
"step": 27050
},
{
"epoch": 4.27,
"learning_rate": 1.7313195548489668e-05,
"loss": 2.5487,
"step": 27100
},
{
"epoch": 4.28,
"learning_rate": 1.7289348171701116e-05,
"loss": 2.4613,
"step": 27150
},
{
"epoch": 4.29,
"learning_rate": 1.726550079491256e-05,
"loss": 2.5073,
"step": 27200
},
{
"epoch": 4.3,
"learning_rate": 1.7241653418124007e-05,
"loss": 2.4965,
"step": 27250
},
{
"epoch": 4.31,
"learning_rate": 1.7217806041335455e-05,
"loss": 2.4334,
"step": 27300
},
{
"epoch": 4.31,
"learning_rate": 1.7193958664546902e-05,
"loss": 2.4705,
"step": 27350
},
{
"epoch": 4.32,
"learning_rate": 1.7170111287758346e-05,
"loss": 2.5386,
"step": 27400
},
{
"epoch": 4.33,
"learning_rate": 1.7146263910969794e-05,
"loss": 2.5268,
"step": 27450
},
{
"epoch": 4.34,
"learning_rate": 1.712241653418124e-05,
"loss": 2.4655,
"step": 27500
},
{
"epoch": 4.35,
"learning_rate": 1.709856915739269e-05,
"loss": 2.485,
"step": 27550
},
{
"epoch": 4.35,
"learning_rate": 1.7074721780604133e-05,
"loss": 2.5323,
"step": 27600
},
{
"epoch": 4.36,
"learning_rate": 1.705087440381558e-05,
"loss": 2.3916,
"step": 27650
},
{
"epoch": 4.37,
"learning_rate": 1.7027027027027028e-05,
"loss": 2.4557,
"step": 27700
},
{
"epoch": 4.38,
"learning_rate": 1.7003179650238475e-05,
"loss": 2.2644,
"step": 27750
},
{
"epoch": 4.38,
"learning_rate": 1.697933227344992e-05,
"loss": 2.4271,
"step": 27800
},
{
"epoch": 4.39,
"learning_rate": 1.695548489666137e-05,
"loss": 2.4928,
"step": 27850
},
{
"epoch": 4.4,
"learning_rate": 1.6931637519872814e-05,
"loss": 2.5587,
"step": 27900
},
{
"epoch": 4.41,
"learning_rate": 1.690779014308426e-05,
"loss": 2.4824,
"step": 27950
},
{
"epoch": 4.42,
"learning_rate": 1.6883942766295706e-05,
"loss": 2.5206,
"step": 28000
},
{
"epoch": 4.42,
"learning_rate": 1.6860095389507157e-05,
"loss": 2.4376,
"step": 28050
},
{
"epoch": 4.43,
"learning_rate": 1.68362480127186e-05,
"loss": 2.4982,
"step": 28100
},
{
"epoch": 4.44,
"learning_rate": 1.6812400635930048e-05,
"loss": 2.4095,
"step": 28150
},
{
"epoch": 4.45,
"learning_rate": 1.6788553259141492e-05,
"loss": 2.4715,
"step": 28200
},
{
"epoch": 4.46,
"learning_rate": 1.6764705882352943e-05,
"loss": 2.5444,
"step": 28250
},
{
"epoch": 4.46,
"learning_rate": 1.6740858505564387e-05,
"loss": 2.4082,
"step": 28300
},
{
"epoch": 4.47,
"learning_rate": 1.6717011128775835e-05,
"loss": 2.4436,
"step": 28350
},
{
"epoch": 4.48,
"learning_rate": 1.669316375198728e-05,
"loss": 2.5415,
"step": 28400
},
{
"epoch": 4.49,
"learning_rate": 1.666931637519873e-05,
"loss": 2.5421,
"step": 28450
},
{
"epoch": 4.5,
"learning_rate": 1.6645468998410174e-05,
"loss": 2.5493,
"step": 28500
},
{
"epoch": 4.5,
"learning_rate": 1.662162162162162e-05,
"loss": 2.556,
"step": 28550
},
{
"epoch": 4.51,
"learning_rate": 1.659777424483307e-05,
"loss": 2.4381,
"step": 28600
},
{
"epoch": 4.52,
"learning_rate": 1.6573926868044516e-05,
"loss": 2.6628,
"step": 28650
},
{
"epoch": 4.53,
"learning_rate": 1.6550079491255964e-05,
"loss": 2.503,
"step": 28700
},
{
"epoch": 4.53,
"learning_rate": 1.6526232114467408e-05,
"loss": 2.5354,
"step": 28750
},
{
"epoch": 4.54,
"learning_rate": 1.650238473767886e-05,
"loss": 2.4082,
"step": 28800
},
{
"epoch": 4.55,
"learning_rate": 1.6478537360890303e-05,
"loss": 2.4708,
"step": 28850
},
{
"epoch": 4.56,
"learning_rate": 1.645468998410175e-05,
"loss": 2.441,
"step": 28900
},
{
"epoch": 4.57,
"learning_rate": 1.6430842607313194e-05,
"loss": 2.4419,
"step": 28950
},
{
"epoch": 4.57,
"learning_rate": 1.6406995230524645e-05,
"loss": 2.5285,
"step": 29000
},
{
"epoch": 4.58,
"learning_rate": 1.638314785373609e-05,
"loss": 2.4596,
"step": 29050
},
{
"epoch": 4.59,
"learning_rate": 1.6359300476947537e-05,
"loss": 2.5179,
"step": 29100
},
{
"epoch": 4.6,
"learning_rate": 1.6335453100158984e-05,
"loss": 2.4328,
"step": 29150
},
{
"epoch": 4.61,
"learning_rate": 1.631160572337043e-05,
"loss": 2.4823,
"step": 29200
},
{
"epoch": 4.61,
"learning_rate": 1.6287758346581876e-05,
"loss": 2.4784,
"step": 29250
},
{
"epoch": 4.62,
"learning_rate": 1.6263910969793323e-05,
"loss": 2.4365,
"step": 29300
},
{
"epoch": 4.63,
"learning_rate": 1.624006359300477e-05,
"loss": 2.4199,
"step": 29350
},
{
"epoch": 4.64,
"learning_rate": 1.6216216216216218e-05,
"loss": 2.5584,
"step": 29400
},
{
"epoch": 4.65,
"learning_rate": 1.6192368839427662e-05,
"loss": 2.4681,
"step": 29450
},
{
"epoch": 4.65,
"learning_rate": 1.616852146263911e-05,
"loss": 2.6135,
"step": 29500
},
{
"epoch": 4.66,
"learning_rate": 1.6144674085850557e-05,
"loss": 2.4771,
"step": 29550
},
{
"epoch": 4.67,
"learning_rate": 1.6120826709062005e-05,
"loss": 2.4149,
"step": 29600
},
{
"epoch": 4.68,
"learning_rate": 1.609697933227345e-05,
"loss": 2.5233,
"step": 29650
},
{
"epoch": 4.68,
"learning_rate": 1.6073131955484896e-05,
"loss": 2.5135,
"step": 29700
},
{
"epoch": 4.69,
"learning_rate": 1.6049284578696344e-05,
"loss": 2.458,
"step": 29750
},
{
"epoch": 4.7,
"learning_rate": 1.602543720190779e-05,
"loss": 2.4923,
"step": 29800
},
{
"epoch": 4.71,
"learning_rate": 1.6001589825119235e-05,
"loss": 2.508,
"step": 29850
},
{
"epoch": 4.72,
"learning_rate": 1.5977742448330686e-05,
"loss": 2.4119,
"step": 29900
},
{
"epoch": 4.72,
"learning_rate": 1.595389507154213e-05,
"loss": 2.491,
"step": 29950
},
{
"epoch": 4.73,
"learning_rate": 1.5930047694753578e-05,
"loss": 2.437,
"step": 30000
},
{
"epoch": 4.74,
"learning_rate": 1.5906200317965022e-05,
"loss": 2.4738,
"step": 30050
},
{
"epoch": 4.75,
"learning_rate": 1.5882352941176473e-05,
"loss": 2.3932,
"step": 30100
},
{
"epoch": 4.76,
"learning_rate": 1.5858505564387917e-05,
"loss": 2.4635,
"step": 30150
},
{
"epoch": 4.76,
"learning_rate": 1.5834658187599364e-05,
"loss": 2.4331,
"step": 30200
},
{
"epoch": 4.77,
"learning_rate": 1.5810810810810808e-05,
"loss": 2.4436,
"step": 30250
},
{
"epoch": 4.78,
"learning_rate": 1.578696343402226e-05,
"loss": 2.4471,
"step": 30300
},
{
"epoch": 4.79,
"learning_rate": 1.5763116057233707e-05,
"loss": 2.5101,
"step": 30350
},
{
"epoch": 4.79,
"learning_rate": 1.573926868044515e-05,
"loss": 2.4395,
"step": 30400
},
{
"epoch": 4.8,
"learning_rate": 1.5715421303656598e-05,
"loss": 2.5001,
"step": 30450
},
{
"epoch": 4.81,
"learning_rate": 1.5691573926868046e-05,
"loss": 2.5082,
"step": 30500
},
{
"epoch": 4.82,
"learning_rate": 1.5667726550079493e-05,
"loss": 2.4692,
"step": 30550
},
{
"epoch": 4.83,
"learning_rate": 1.5643879173290937e-05,
"loss": 2.3858,
"step": 30600
},
{
"epoch": 4.83,
"learning_rate": 1.5620031796502388e-05,
"loss": 2.5594,
"step": 30650
},
{
"epoch": 4.84,
"learning_rate": 1.5596184419713832e-05,
"loss": 2.4556,
"step": 30700
},
{
"epoch": 4.85,
"learning_rate": 1.557233704292528e-05,
"loss": 2.5237,
"step": 30750
},
{
"epoch": 4.86,
"learning_rate": 1.5548489666136724e-05,
"loss": 2.582,
"step": 30800
},
{
"epoch": 4.87,
"learning_rate": 1.5524642289348175e-05,
"loss": 2.4763,
"step": 30850
},
{
"epoch": 4.87,
"learning_rate": 1.550079491255962e-05,
"loss": 2.4364,
"step": 30900
},
{
"epoch": 4.88,
"learning_rate": 1.5476947535771066e-05,
"loss": 2.44,
"step": 30950
},
{
"epoch": 4.89,
"learning_rate": 1.545310015898251e-05,
"loss": 2.5285,
"step": 31000
},
{
"epoch": 4.9,
"learning_rate": 1.542925278219396e-05,
"loss": 2.4616,
"step": 31050
},
{
"epoch": 4.91,
"learning_rate": 1.5405405405405405e-05,
"loss": 2.5149,
"step": 31100
},
{
"epoch": 4.91,
"learning_rate": 1.5381558028616853e-05,
"loss": 2.4926,
"step": 31150
},
{
"epoch": 4.92,
"learning_rate": 1.5357710651828297e-05,
"loss": 2.4282,
"step": 31200
},
{
"epoch": 4.93,
"learning_rate": 1.5333863275039748e-05,
"loss": 2.4479,
"step": 31250
},
{
"epoch": 4.94,
"learning_rate": 1.531001589825119e-05,
"loss": 2.517,
"step": 31300
},
{
"epoch": 4.94,
"learning_rate": 1.528616852146264e-05,
"loss": 2.5034,
"step": 31350
},
{
"epoch": 4.95,
"learning_rate": 1.5262321144674087e-05,
"loss": 2.4819,
"step": 31400
},
{
"epoch": 4.96,
"learning_rate": 1.5238473767885534e-05,
"loss": 2.4886,
"step": 31450
},
{
"epoch": 4.97,
"learning_rate": 1.5214626391096978e-05,
"loss": 2.4712,
"step": 31500
},
{
"epoch": 4.98,
"learning_rate": 1.5190779014308427e-05,
"loss": 2.5013,
"step": 31550
},
{
"epoch": 4.98,
"learning_rate": 1.5166931637519871e-05,
"loss": 2.5287,
"step": 31600
},
{
"epoch": 4.99,
"learning_rate": 1.514308426073132e-05,
"loss": 2.4269,
"step": 31650
},
{
"epoch": 5.0,
"learning_rate": 1.5119236883942765e-05,
"loss": 2.4604,
"step": 31700
},
{
"epoch": 5.01,
"learning_rate": 1.5095389507154214e-05,
"loss": 2.3029,
"step": 31750
},
{
"epoch": 5.02,
"learning_rate": 1.5071542130365658e-05,
"loss": 2.4492,
"step": 31800
},
{
"epoch": 5.02,
"learning_rate": 1.5047694753577107e-05,
"loss": 2.4764,
"step": 31850
},
{
"epoch": 5.03,
"learning_rate": 1.5023847376788551e-05,
"loss": 2.5057,
"step": 31900
},
{
"epoch": 5.04,
"learning_rate": 1.5e-05,
"loss": 2.4618,
"step": 31950
},
{
"epoch": 5.05,
"learning_rate": 1.4976152623211448e-05,
"loss": 2.4304,
"step": 32000
},
{
"epoch": 5.06,
"learning_rate": 1.4952305246422894e-05,
"loss": 2.4029,
"step": 32050
},
{
"epoch": 5.06,
"learning_rate": 1.4928457869634341e-05,
"loss": 2.2965,
"step": 32100
},
{
"epoch": 5.07,
"learning_rate": 1.4904610492845787e-05,
"loss": 2.3965,
"step": 32150
},
{
"epoch": 5.08,
"learning_rate": 1.4880763116057234e-05,
"loss": 2.3282,
"step": 32200
},
{
"epoch": 5.09,
"learning_rate": 1.485691573926868e-05,
"loss": 2.4409,
"step": 32250
},
{
"epoch": 5.09,
"learning_rate": 1.4833068362480128e-05,
"loss": 2.4228,
"step": 32300
},
{
"epoch": 5.1,
"learning_rate": 1.4809220985691573e-05,
"loss": 2.5412,
"step": 32350
},
{
"epoch": 5.11,
"learning_rate": 1.4785373608903021e-05,
"loss": 2.374,
"step": 32400
},
{
"epoch": 5.12,
"learning_rate": 1.4761526232114467e-05,
"loss": 2.3942,
"step": 32450
},
{
"epoch": 5.13,
"learning_rate": 1.4737678855325914e-05,
"loss": 2.3872,
"step": 32500
},
{
"epoch": 5.13,
"learning_rate": 1.471383147853736e-05,
"loss": 2.4669,
"step": 32550
},
{
"epoch": 5.14,
"learning_rate": 1.4689984101748807e-05,
"loss": 2.549,
"step": 32600
},
{
"epoch": 5.15,
"learning_rate": 1.4666136724960255e-05,
"loss": 2.453,
"step": 32650
},
{
"epoch": 5.16,
"learning_rate": 1.46422893481717e-05,
"loss": 2.3588,
"step": 32700
},
{
"epoch": 5.17,
"learning_rate": 1.4618441971383148e-05,
"loss": 2.5884,
"step": 32750
},
{
"epoch": 5.17,
"learning_rate": 1.4594594594594596e-05,
"loss": 2.442,
"step": 32800
},
{
"epoch": 5.18,
"learning_rate": 1.4570747217806043e-05,
"loss": 2.3182,
"step": 32850
},
{
"epoch": 5.19,
"learning_rate": 1.4546899841017489e-05,
"loss": 2.4851,
"step": 32900
},
{
"epoch": 5.2,
"learning_rate": 1.4523052464228936e-05,
"loss": 2.3914,
"step": 32950
},
{
"epoch": 5.21,
"learning_rate": 1.4499205087440382e-05,
"loss": 2.4582,
"step": 33000
},
{
"epoch": 5.21,
"learning_rate": 1.447535771065183e-05,
"loss": 2.3891,
"step": 33050
},
{
"epoch": 5.22,
"learning_rate": 1.4451510333863275e-05,
"loss": 2.4047,
"step": 33100
},
{
"epoch": 5.23,
"learning_rate": 1.4427662957074723e-05,
"loss": 2.4761,
"step": 33150
},
{
"epoch": 5.24,
"learning_rate": 1.4403815580286169e-05,
"loss": 2.4103,
"step": 33200
},
{
"epoch": 5.24,
"learning_rate": 1.4379968203497616e-05,
"loss": 2.4628,
"step": 33250
},
{
"epoch": 5.25,
"learning_rate": 1.4356120826709062e-05,
"loss": 2.3495,
"step": 33300
},
{
"epoch": 5.26,
"learning_rate": 1.433227344992051e-05,
"loss": 2.4002,
"step": 33350
},
{
"epoch": 5.27,
"learning_rate": 1.4308426073131957e-05,
"loss": 2.3719,
"step": 33400
},
{
"epoch": 5.28,
"learning_rate": 1.4284578696343403e-05,
"loss": 2.4141,
"step": 33450
},
{
"epoch": 5.28,
"learning_rate": 1.426073131955485e-05,
"loss": 2.4345,
"step": 33500
},
{
"epoch": 5.29,
"learning_rate": 1.4236883942766296e-05,
"loss": 2.5029,
"step": 33550
},
{
"epoch": 5.3,
"learning_rate": 1.4213036565977743e-05,
"loss": 2.4863,
"step": 33600
},
{
"epoch": 5.31,
"learning_rate": 1.4189189189189189e-05,
"loss": 2.4066,
"step": 33650
},
{
"epoch": 5.32,
"learning_rate": 1.4165341812400637e-05,
"loss": 2.4462,
"step": 33700
},
{
"epoch": 5.32,
"learning_rate": 1.4141494435612082e-05,
"loss": 2.3888,
"step": 33750
},
{
"epoch": 5.33,
"learning_rate": 1.411764705882353e-05,
"loss": 2.4463,
"step": 33800
},
{
"epoch": 5.34,
"learning_rate": 1.4093799682034976e-05,
"loss": 2.4239,
"step": 33850
},
{
"epoch": 5.35,
"learning_rate": 1.4069952305246423e-05,
"loss": 2.4442,
"step": 33900
},
{
"epoch": 5.35,
"learning_rate": 1.4046104928457869e-05,
"loss": 2.4302,
"step": 33950
},
{
"epoch": 5.36,
"learning_rate": 1.4022257551669316e-05,
"loss": 2.3692,
"step": 34000
},
{
"epoch": 5.37,
"learning_rate": 1.3998410174880764e-05,
"loss": 2.3628,
"step": 34050
},
{
"epoch": 5.38,
"learning_rate": 1.397456279809221e-05,
"loss": 2.5001,
"step": 34100
},
{
"epoch": 5.39,
"learning_rate": 1.3950715421303657e-05,
"loss": 2.4981,
"step": 34150
},
{
"epoch": 5.39,
"learning_rate": 1.3926868044515103e-05,
"loss": 2.4526,
"step": 34200
},
{
"epoch": 5.4,
"learning_rate": 1.390302066772655e-05,
"loss": 2.3627,
"step": 34250
},
{
"epoch": 5.41,
"learning_rate": 1.3879173290937996e-05,
"loss": 2.3423,
"step": 34300
},
{
"epoch": 5.42,
"learning_rate": 1.3855325914149444e-05,
"loss": 2.3372,
"step": 34350
},
{
"epoch": 5.43,
"learning_rate": 1.383147853736089e-05,
"loss": 2.3553,
"step": 34400
},
{
"epoch": 5.43,
"learning_rate": 1.3807631160572339e-05,
"loss": 2.424,
"step": 34450
},
{
"epoch": 5.44,
"learning_rate": 1.3783783783783784e-05,
"loss": 2.3868,
"step": 34500
},
{
"epoch": 5.45,
"learning_rate": 1.3759936406995232e-05,
"loss": 2.4128,
"step": 34550
},
{
"epoch": 5.46,
"learning_rate": 1.3736089030206678e-05,
"loss": 2.3561,
"step": 34600
},
{
"epoch": 5.47,
"learning_rate": 1.3712241653418125e-05,
"loss": 2.5233,
"step": 34650
},
{
"epoch": 5.47,
"learning_rate": 1.3688394276629571e-05,
"loss": 2.4911,
"step": 34700
},
{
"epoch": 5.48,
"learning_rate": 1.3664546899841018e-05,
"loss": 2.387,
"step": 34750
},
{
"epoch": 5.49,
"learning_rate": 1.3640699523052466e-05,
"loss": 2.4505,
"step": 34800
},
{
"epoch": 5.5,
"learning_rate": 1.3616852146263912e-05,
"loss": 2.4076,
"step": 34850
},
{
"epoch": 5.5,
"learning_rate": 1.3593004769475359e-05,
"loss": 2.3071,
"step": 34900
},
{
"epoch": 5.51,
"learning_rate": 1.3569157392686805e-05,
"loss": 2.531,
"step": 34950
},
{
"epoch": 5.52,
"learning_rate": 1.3545310015898252e-05,
"loss": 2.5236,
"step": 35000
},
{
"epoch": 5.53,
"learning_rate": 1.3521462639109698e-05,
"loss": 2.4052,
"step": 35050
},
{
"epoch": 5.54,
"learning_rate": 1.3497615262321146e-05,
"loss": 2.4237,
"step": 35100
},
{
"epoch": 5.54,
"learning_rate": 1.3473767885532591e-05,
"loss": 2.4589,
"step": 35150
},
{
"epoch": 5.55,
"learning_rate": 1.3449920508744039e-05,
"loss": 2.3325,
"step": 35200
},
{
"epoch": 5.56,
"learning_rate": 1.3426073131955485e-05,
"loss": 2.3614,
"step": 35250
},
{
"epoch": 5.57,
"learning_rate": 1.3402225755166932e-05,
"loss": 2.4497,
"step": 35300
},
{
"epoch": 5.58,
"learning_rate": 1.3378378378378378e-05,
"loss": 2.4434,
"step": 35350
},
{
"epoch": 5.58,
"learning_rate": 1.3354531001589825e-05,
"loss": 2.4159,
"step": 35400
},
{
"epoch": 5.59,
"learning_rate": 1.3330683624801273e-05,
"loss": 2.4629,
"step": 35450
},
{
"epoch": 5.6,
"learning_rate": 1.3306836248012719e-05,
"loss": 2.383,
"step": 35500
},
{
"epoch": 5.61,
"learning_rate": 1.3282988871224166e-05,
"loss": 2.498,
"step": 35550
},
{
"epoch": 5.62,
"learning_rate": 1.3259141494435612e-05,
"loss": 2.3559,
"step": 35600
},
{
"epoch": 5.62,
"learning_rate": 1.323529411764706e-05,
"loss": 2.389,
"step": 35650
},
{
"epoch": 5.63,
"learning_rate": 1.3211446740858505e-05,
"loss": 2.367,
"step": 35700
},
{
"epoch": 5.64,
"learning_rate": 1.3187599364069953e-05,
"loss": 2.2969,
"step": 35750
},
{
"epoch": 5.65,
"learning_rate": 1.3163751987281398e-05,
"loss": 2.395,
"step": 35800
},
{
"epoch": 5.65,
"learning_rate": 1.3139904610492846e-05,
"loss": 2.3354,
"step": 35850
},
{
"epoch": 5.66,
"learning_rate": 1.3116057233704292e-05,
"loss": 2.3289,
"step": 35900
},
{
"epoch": 5.67,
"learning_rate": 1.3092209856915739e-05,
"loss": 2.4246,
"step": 35950
},
{
"epoch": 5.68,
"learning_rate": 1.3068362480127185e-05,
"loss": 2.5001,
"step": 36000
},
{
"epoch": 5.69,
"learning_rate": 1.3044515103338632e-05,
"loss": 2.5762,
"step": 36050
},
{
"epoch": 5.69,
"learning_rate": 1.302066772655008e-05,
"loss": 2.358,
"step": 36100
},
{
"epoch": 5.7,
"learning_rate": 1.2996820349761527e-05,
"loss": 2.422,
"step": 36150
},
{
"epoch": 5.71,
"learning_rate": 1.2972972972972975e-05,
"loss": 2.4942,
"step": 36200
},
{
"epoch": 5.72,
"learning_rate": 1.294912559618442e-05,
"loss": 2.2875,
"step": 36250
},
{
"epoch": 5.73,
"learning_rate": 1.2925278219395868e-05,
"loss": 2.3661,
"step": 36300
},
{
"epoch": 5.73,
"learning_rate": 1.2901430842607314e-05,
"loss": 2.405,
"step": 36350
},
{
"epoch": 5.74,
"learning_rate": 1.2877583465818761e-05,
"loss": 2.407,
"step": 36400
},
{
"epoch": 5.75,
"learning_rate": 1.2853736089030207e-05,
"loss": 2.4231,
"step": 36450
},
{
"epoch": 5.76,
"learning_rate": 1.2829888712241655e-05,
"loss": 2.3957,
"step": 36500
},
{
"epoch": 5.76,
"learning_rate": 1.28060413354531e-05,
"loss": 2.4263,
"step": 36550
},
{
"epoch": 5.77,
"learning_rate": 1.2782193958664548e-05,
"loss": 2.4255,
"step": 36600
},
{
"epoch": 5.78,
"learning_rate": 1.2758346581875994e-05,
"loss": 2.3842,
"step": 36650
},
{
"epoch": 5.79,
"learning_rate": 1.2734499205087441e-05,
"loss": 2.3538,
"step": 36700
},
{
"epoch": 5.8,
"learning_rate": 1.2710651828298887e-05,
"loss": 2.4627,
"step": 36750
},
{
"epoch": 5.8,
"learning_rate": 1.2686804451510334e-05,
"loss": 2.4949,
"step": 36800
},
{
"epoch": 5.81,
"learning_rate": 1.2662957074721782e-05,
"loss": 2.3725,
"step": 36850
},
{
"epoch": 5.82,
"learning_rate": 1.2639109697933228e-05,
"loss": 2.4256,
"step": 36900
},
{
"epoch": 5.83,
"learning_rate": 1.2615262321144675e-05,
"loss": 2.5716,
"step": 36950
},
{
"epoch": 5.84,
"learning_rate": 1.2591414944356121e-05,
"loss": 2.4394,
"step": 37000
},
{
"epoch": 5.84,
"learning_rate": 1.2567567567567568e-05,
"loss": 2.4445,
"step": 37050
},
{
"epoch": 5.85,
"learning_rate": 1.2543720190779014e-05,
"loss": 2.4336,
"step": 37100
},
{
"epoch": 5.86,
"learning_rate": 1.2519872813990462e-05,
"loss": 2.4754,
"step": 37150
},
{
"epoch": 5.87,
"learning_rate": 1.2496025437201907e-05,
"loss": 2.4117,
"step": 37200
},
{
"epoch": 5.88,
"learning_rate": 1.2472178060413355e-05,
"loss": 2.3351,
"step": 37250
},
{
"epoch": 5.88,
"learning_rate": 1.24483306836248e-05,
"loss": 2.2414,
"step": 37300
},
{
"epoch": 5.89,
"learning_rate": 1.2424483306836248e-05,
"loss": 2.4939,
"step": 37350
},
{
"epoch": 5.9,
"learning_rate": 1.2400635930047694e-05,
"loss": 2.5218,
"step": 37400
},
{
"epoch": 5.91,
"learning_rate": 1.2376788553259141e-05,
"loss": 2.4034,
"step": 37450
},
{
"epoch": 5.91,
"learning_rate": 1.2352941176470587e-05,
"loss": 2.3499,
"step": 37500
},
{
"epoch": 5.92,
"learning_rate": 1.2329093799682035e-05,
"loss": 2.3668,
"step": 37550
},
{
"epoch": 5.93,
"learning_rate": 1.2305246422893482e-05,
"loss": 2.3756,
"step": 37600
},
{
"epoch": 5.94,
"learning_rate": 1.2281399046104928e-05,
"loss": 2.3595,
"step": 37650
},
{
"epoch": 5.95,
"learning_rate": 1.2257551669316375e-05,
"loss": 2.5006,
"step": 37700
},
{
"epoch": 5.95,
"learning_rate": 1.2233704292527823e-05,
"loss": 2.3216,
"step": 37750
},
{
"epoch": 5.96,
"learning_rate": 1.220985691573927e-05,
"loss": 2.3741,
"step": 37800
},
{
"epoch": 5.97,
"learning_rate": 1.2186009538950716e-05,
"loss": 2.4229,
"step": 37850
},
{
"epoch": 5.98,
"learning_rate": 1.2162162162162164e-05,
"loss": 2.4669,
"step": 37900
},
{
"epoch": 5.99,
"learning_rate": 1.213831478537361e-05,
"loss": 2.3883,
"step": 37950
},
{
"epoch": 5.99,
"learning_rate": 1.2114467408585057e-05,
"loss": 2.3252,
"step": 38000
},
{
"epoch": 6.0,
"learning_rate": 1.2090620031796503e-05,
"loss": 2.3869,
"step": 38050
},
{
"epoch": 6.01,
"learning_rate": 1.206677265500795e-05,
"loss": 2.3895,
"step": 38100
},
{
"epoch": 6.02,
"learning_rate": 1.2042925278219396e-05,
"loss": 2.3348,
"step": 38150
},
{
"epoch": 6.03,
"learning_rate": 1.2019077901430843e-05,
"loss": 2.3672,
"step": 38200
},
{
"epoch": 6.03,
"learning_rate": 1.199523052464229e-05,
"loss": 2.4028,
"step": 38250
},
{
"epoch": 6.04,
"learning_rate": 1.1971383147853737e-05,
"loss": 2.2395,
"step": 38300
},
{
"epoch": 6.05,
"learning_rate": 1.1947535771065184e-05,
"loss": 2.3821,
"step": 38350
},
{
"epoch": 6.06,
"learning_rate": 1.192368839427663e-05,
"loss": 2.3787,
"step": 38400
},
{
"epoch": 6.06,
"learning_rate": 1.1899841017488077e-05,
"loss": 2.3736,
"step": 38450
},
{
"epoch": 6.07,
"learning_rate": 1.1875993640699523e-05,
"loss": 2.2846,
"step": 38500
},
{
"epoch": 6.08,
"learning_rate": 1.185214626391097e-05,
"loss": 2.3708,
"step": 38550
},
{
"epoch": 6.09,
"learning_rate": 1.1828298887122416e-05,
"loss": 2.2383,
"step": 38600
},
{
"epoch": 6.1,
"learning_rate": 1.1804451510333864e-05,
"loss": 2.4514,
"step": 38650
},
{
"epoch": 6.1,
"learning_rate": 1.178060413354531e-05,
"loss": 2.3704,
"step": 38700
},
{
"epoch": 6.11,
"learning_rate": 1.1756756756756757e-05,
"loss": 2.3119,
"step": 38750
},
{
"epoch": 6.12,
"learning_rate": 1.1732909379968203e-05,
"loss": 2.2737,
"step": 38800
},
{
"epoch": 6.13,
"learning_rate": 1.170906200317965e-05,
"loss": 2.3966,
"step": 38850
},
{
"epoch": 6.14,
"learning_rate": 1.1685214626391096e-05,
"loss": 2.3318,
"step": 38900
},
{
"epoch": 6.14,
"learning_rate": 1.1661367249602544e-05,
"loss": 2.4453,
"step": 38950
},
{
"epoch": 6.15,
"learning_rate": 1.1637519872813991e-05,
"loss": 2.3973,
"step": 39000
},
{
"epoch": 6.16,
"learning_rate": 1.1613672496025437e-05,
"loss": 2.3625,
"step": 39050
},
{
"epoch": 6.17,
"learning_rate": 1.1589825119236884e-05,
"loss": 2.3174,
"step": 39100
},
{
"epoch": 6.18,
"learning_rate": 1.156597774244833e-05,
"loss": 2.4294,
"step": 39150
},
{
"epoch": 6.18,
"learning_rate": 1.1542130365659778e-05,
"loss": 2.3756,
"step": 39200
},
{
"epoch": 6.19,
"learning_rate": 1.1518282988871223e-05,
"loss": 2.4119,
"step": 39250
},
{
"epoch": 6.2,
"learning_rate": 1.1494435612082671e-05,
"loss": 2.442,
"step": 39300
},
{
"epoch": 6.21,
"learning_rate": 1.1470588235294117e-05,
"loss": 2.3622,
"step": 39350
},
{
"epoch": 6.21,
"learning_rate": 1.1446740858505566e-05,
"loss": 2.4294,
"step": 39400
},
{
"epoch": 6.22,
"learning_rate": 1.1422893481717012e-05,
"loss": 2.3,
"step": 39450
},
{
"epoch": 6.23,
"learning_rate": 1.1399046104928459e-05,
"loss": 2.4134,
"step": 39500
},
{
"epoch": 6.24,
"learning_rate": 1.1375198728139905e-05,
"loss": 2.3861,
"step": 39550
},
{
"epoch": 6.25,
"learning_rate": 1.1351351351351352e-05,
"loss": 2.3675,
"step": 39600
},
{
"epoch": 6.25,
"learning_rate": 1.13275039745628e-05,
"loss": 2.3885,
"step": 39650
},
{
"epoch": 6.26,
"learning_rate": 1.1303656597774246e-05,
"loss": 2.36,
"step": 39700
},
{
"epoch": 6.27,
"learning_rate": 1.1279809220985693e-05,
"loss": 2.3284,
"step": 39750
},
{
"epoch": 6.28,
"learning_rate": 1.1255961844197139e-05,
"loss": 2.4368,
"step": 39800
},
{
"epoch": 6.29,
"learning_rate": 1.1232114467408586e-05,
"loss": 2.3698,
"step": 39850
},
{
"epoch": 6.29,
"learning_rate": 1.1208267090620032e-05,
"loss": 2.28,
"step": 39900
},
{
"epoch": 6.3,
"learning_rate": 1.118441971383148e-05,
"loss": 2.4025,
"step": 39950
},
{
"epoch": 6.31,
"learning_rate": 1.1160572337042925e-05,
"loss": 2.4193,
"step": 40000
},
{
"epoch": 6.32,
"learning_rate": 1.1136724960254373e-05,
"loss": 2.268,
"step": 40050
},
{
"epoch": 6.32,
"learning_rate": 1.1112877583465819e-05,
"loss": 2.4122,
"step": 40100
},
{
"epoch": 6.33,
"learning_rate": 1.1089030206677266e-05,
"loss": 2.3692,
"step": 40150
},
{
"epoch": 6.34,
"learning_rate": 1.1065182829888712e-05,
"loss": 2.3528,
"step": 40200
},
{
"epoch": 6.35,
"learning_rate": 1.104133545310016e-05,
"loss": 2.3224,
"step": 40250
},
{
"epoch": 6.36,
"learning_rate": 1.1017488076311605e-05,
"loss": 2.4246,
"step": 40300
},
{
"epoch": 6.36,
"learning_rate": 1.0993640699523053e-05,
"loss": 2.3906,
"step": 40350
},
{
"epoch": 6.37,
"learning_rate": 1.09697933227345e-05,
"loss": 2.4014,
"step": 40400
},
{
"epoch": 6.38,
"learning_rate": 1.0945945945945946e-05,
"loss": 2.3314,
"step": 40450
},
{
"epoch": 6.39,
"learning_rate": 1.0922098569157393e-05,
"loss": 2.366,
"step": 40500
},
{
"epoch": 6.4,
"learning_rate": 1.0898251192368839e-05,
"loss": 2.3566,
"step": 40550
},
{
"epoch": 6.4,
"learning_rate": 1.0874403815580287e-05,
"loss": 2.3871,
"step": 40600
},
{
"epoch": 6.41,
"learning_rate": 1.0850556438791732e-05,
"loss": 2.276,
"step": 40650
},
{
"epoch": 6.42,
"learning_rate": 1.082670906200318e-05,
"loss": 2.3321,
"step": 40700
},
{
"epoch": 6.43,
"learning_rate": 1.0802861685214626e-05,
"loss": 2.3235,
"step": 40750
},
{
"epoch": 6.44,
"learning_rate": 1.0779014308426073e-05,
"loss": 2.4327,
"step": 40800
},
{
"epoch": 6.44,
"learning_rate": 1.0755166931637519e-05,
"loss": 2.2971,
"step": 40850
},
{
"epoch": 6.45,
"learning_rate": 1.0731319554848966e-05,
"loss": 2.4482,
"step": 40900
},
{
"epoch": 6.46,
"learning_rate": 1.0707472178060412e-05,
"loss": 2.3656,
"step": 40950
},
{
"epoch": 6.47,
"learning_rate": 1.068362480127186e-05,
"loss": 2.433,
"step": 41000
},
{
"epoch": 6.47,
"learning_rate": 1.0659777424483307e-05,
"loss": 2.4306,
"step": 41050
},
{
"epoch": 6.48,
"learning_rate": 1.0635930047694755e-05,
"loss": 2.3191,
"step": 41100
},
{
"epoch": 6.49,
"learning_rate": 1.0612082670906202e-05,
"loss": 2.4315,
"step": 41150
},
{
"epoch": 6.5,
"learning_rate": 1.0588235294117648e-05,
"loss": 2.3453,
"step": 41200
},
{
"epoch": 6.51,
"learning_rate": 1.0564387917329095e-05,
"loss": 2.2802,
"step": 41250
},
{
"epoch": 6.51,
"learning_rate": 1.0540540540540541e-05,
"loss": 2.3409,
"step": 41300
},
{
"epoch": 6.52,
"learning_rate": 1.0516693163751989e-05,
"loss": 2.354,
"step": 41350
},
{
"epoch": 6.53,
"learning_rate": 1.0492845786963434e-05,
"loss": 2.3923,
"step": 41400
},
{
"epoch": 6.54,
"learning_rate": 1.0468998410174882e-05,
"loss": 2.3687,
"step": 41450
},
{
"epoch": 6.55,
"learning_rate": 1.0445151033386328e-05,
"loss": 2.375,
"step": 41500
},
{
"epoch": 6.55,
"learning_rate": 1.0421303656597775e-05,
"loss": 2.3809,
"step": 41550
},
{
"epoch": 6.56,
"learning_rate": 1.0397456279809221e-05,
"loss": 2.3784,
"step": 41600
},
{
"epoch": 6.57,
"learning_rate": 1.0373608903020668e-05,
"loss": 2.3258,
"step": 41650
},
{
"epoch": 6.58,
"learning_rate": 1.0349761526232114e-05,
"loss": 2.3209,
"step": 41700
},
{
"epoch": 6.59,
"learning_rate": 1.0325914149443562e-05,
"loss": 2.3521,
"step": 41750
},
{
"epoch": 6.59,
"learning_rate": 1.0302066772655009e-05,
"loss": 2.3647,
"step": 41800
},
{
"epoch": 6.6,
"learning_rate": 1.0278219395866455e-05,
"loss": 2.3021,
"step": 41850
},
{
"epoch": 6.61,
"learning_rate": 1.0254372019077902e-05,
"loss": 2.415,
"step": 41900
},
{
"epoch": 6.62,
"learning_rate": 1.0230524642289348e-05,
"loss": 2.3468,
"step": 41950
},
{
"epoch": 6.62,
"learning_rate": 1.0206677265500796e-05,
"loss": 2.4086,
"step": 42000
},
{
"epoch": 6.63,
"learning_rate": 1.0182829888712241e-05,
"loss": 2.379,
"step": 42050
},
{
"epoch": 6.64,
"learning_rate": 1.0158982511923689e-05,
"loss": 2.453,
"step": 42100
},
{
"epoch": 6.65,
"learning_rate": 1.0135135135135135e-05,
"loss": 2.4528,
"step": 42150
},
{
"epoch": 6.66,
"learning_rate": 1.0111287758346582e-05,
"loss": 2.3831,
"step": 42200
},
{
"epoch": 6.66,
"learning_rate": 1.0087440381558028e-05,
"loss": 2.2717,
"step": 42250
},
{
"epoch": 6.67,
"learning_rate": 1.0063593004769475e-05,
"loss": 2.4052,
"step": 42300
},
{
"epoch": 6.68,
"learning_rate": 1.0039745627980921e-05,
"loss": 2.4027,
"step": 42350
},
{
"epoch": 6.69,
"learning_rate": 1.0015898251192369e-05,
"loss": 2.3474,
"step": 42400
},
{
"epoch": 6.7,
"learning_rate": 9.992050874403816e-06,
"loss": 2.319,
"step": 42450
},
{
"epoch": 6.7,
"learning_rate": 9.968203497615262e-06,
"loss": 2.3748,
"step": 42500
},
{
"epoch": 6.71,
"learning_rate": 9.94435612082671e-06,
"loss": 2.3052,
"step": 42550
},
{
"epoch": 6.72,
"learning_rate": 9.920508744038155e-06,
"loss": 2.3753,
"step": 42600
},
{
"epoch": 6.73,
"learning_rate": 9.896661367249603e-06,
"loss": 2.2557,
"step": 42650
},
{
"epoch": 6.73,
"learning_rate": 9.872813990461048e-06,
"loss": 2.3367,
"step": 42700
},
{
"epoch": 6.74,
"learning_rate": 9.848966613672498e-06,
"loss": 2.2875,
"step": 42750
},
{
"epoch": 6.75,
"learning_rate": 9.825119236883943e-06,
"loss": 2.2972,
"step": 42800
},
{
"epoch": 6.76,
"learning_rate": 9.80127186009539e-06,
"loss": 2.4598,
"step": 42850
},
{
"epoch": 6.77,
"learning_rate": 9.777424483306837e-06,
"loss": 2.2605,
"step": 42900
},
{
"epoch": 6.77,
"learning_rate": 9.753577106518284e-06,
"loss": 2.3722,
"step": 42950
},
{
"epoch": 6.78,
"learning_rate": 9.72972972972973e-06,
"loss": 2.3536,
"step": 43000
},
{
"epoch": 6.79,
"learning_rate": 9.705882352941177e-06,
"loss": 2.3247,
"step": 43050
},
{
"epoch": 6.8,
"learning_rate": 9.682034976152623e-06,
"loss": 2.3724,
"step": 43100
},
{
"epoch": 6.81,
"learning_rate": 9.65818759936407e-06,
"loss": 2.3119,
"step": 43150
},
{
"epoch": 6.81,
"learning_rate": 9.634340222575518e-06,
"loss": 2.3488,
"step": 43200
},
{
"epoch": 6.82,
"learning_rate": 9.610492845786964e-06,
"loss": 2.4406,
"step": 43250
},
{
"epoch": 6.83,
"learning_rate": 9.586645468998411e-06,
"loss": 2.3001,
"step": 43300
},
{
"epoch": 6.84,
"learning_rate": 9.562798092209857e-06,
"loss": 2.3593,
"step": 43350
},
{
"epoch": 6.85,
"learning_rate": 9.538950715421305e-06,
"loss": 2.3887,
"step": 43400
},
{
"epoch": 6.85,
"learning_rate": 9.51510333863275e-06,
"loss": 2.3598,
"step": 43450
},
{
"epoch": 6.86,
"learning_rate": 9.491255961844198e-06,
"loss": 2.3472,
"step": 43500
},
{
"epoch": 6.87,
"learning_rate": 9.467408585055644e-06,
"loss": 2.3077,
"step": 43550
},
{
"epoch": 6.88,
"learning_rate": 9.443561208267091e-06,
"loss": 2.3358,
"step": 43600
},
{
"epoch": 6.88,
"learning_rate": 9.419713831478537e-06,
"loss": 2.3872,
"step": 43650
},
{
"epoch": 6.89,
"learning_rate": 9.395866454689984e-06,
"loss": 2.2983,
"step": 43700
},
{
"epoch": 6.9,
"learning_rate": 9.37201907790143e-06,
"loss": 2.4493,
"step": 43750
},
{
"epoch": 6.91,
"learning_rate": 9.348171701112878e-06,
"loss": 2.3481,
"step": 43800
},
{
"epoch": 6.92,
"learning_rate": 9.324324324324325e-06,
"loss": 2.3392,
"step": 43850
},
{
"epoch": 6.92,
"learning_rate": 9.30047694753577e-06,
"loss": 2.4189,
"step": 43900
},
{
"epoch": 6.93,
"learning_rate": 9.276629570747218e-06,
"loss": 2.3721,
"step": 43950
},
{
"epoch": 6.94,
"learning_rate": 9.252782193958664e-06,
"loss": 2.2599,
"step": 44000
},
{
"epoch": 6.95,
"learning_rate": 9.228934817170112e-06,
"loss": 2.3492,
"step": 44050
},
{
"epoch": 6.96,
"learning_rate": 9.205087440381557e-06,
"loss": 2.3938,
"step": 44100
},
{
"epoch": 6.96,
"learning_rate": 9.181240063593005e-06,
"loss": 2.4319,
"step": 44150
},
{
"epoch": 6.97,
"learning_rate": 9.15739268680445e-06,
"loss": 2.3865,
"step": 44200
},
{
"epoch": 6.98,
"learning_rate": 9.133545310015898e-06,
"loss": 2.3202,
"step": 44250
},
{
"epoch": 6.99,
"learning_rate": 9.109697933227344e-06,
"loss": 2.3791,
"step": 44300
},
{
"epoch": 7.0,
"learning_rate": 9.085850556438791e-06,
"loss": 2.3508,
"step": 44350
},
{
"epoch": 7.0,
"learning_rate": 9.062003179650239e-06,
"loss": 2.3603,
"step": 44400
},
{
"epoch": 7.01,
"learning_rate": 9.038155802861686e-06,
"loss": 2.3675,
"step": 44450
},
{
"epoch": 7.02,
"learning_rate": 9.014308426073132e-06,
"loss": 2.3226,
"step": 44500
},
{
"epoch": 7.03,
"learning_rate": 8.99046104928458e-06,
"loss": 2.2309,
"step": 44550
},
{
"epoch": 7.03,
"learning_rate": 8.966613672496027e-06,
"loss": 2.3336,
"step": 44600
},
{
"epoch": 7.04,
"learning_rate": 8.942766295707473e-06,
"loss": 2.3969,
"step": 44650
},
{
"epoch": 7.05,
"learning_rate": 8.91891891891892e-06,
"loss": 2.3952,
"step": 44700
},
{
"epoch": 7.06,
"learning_rate": 8.895071542130366e-06,
"loss": 2.2318,
"step": 44750
},
{
"epoch": 7.07,
"learning_rate": 8.871224165341814e-06,
"loss": 2.3633,
"step": 44800
},
{
"epoch": 7.07,
"learning_rate": 8.84737678855326e-06,
"loss": 2.3753,
"step": 44850
},
{
"epoch": 7.08,
"learning_rate": 8.823529411764707e-06,
"loss": 2.3284,
"step": 44900
},
{
"epoch": 7.09,
"learning_rate": 8.799682034976153e-06,
"loss": 2.3755,
"step": 44950
},
{
"epoch": 7.1,
"learning_rate": 8.7758346581876e-06,
"loss": 2.2437,
"step": 45000
},
{
"epoch": 7.11,
"learning_rate": 8.751987281399046e-06,
"loss": 2.3282,
"step": 45050
},
{
"epoch": 7.11,
"learning_rate": 8.728139904610493e-06,
"loss": 2.2998,
"step": 45100
},
{
"epoch": 7.12,
"learning_rate": 8.704292527821939e-06,
"loss": 2.2286,
"step": 45150
},
{
"epoch": 7.13,
"learning_rate": 8.680445151033387e-06,
"loss": 2.3136,
"step": 45200
},
{
"epoch": 7.14,
"learning_rate": 8.656597774244834e-06,
"loss": 2.3727,
"step": 45250
},
{
"epoch": 7.15,
"learning_rate": 8.63275039745628e-06,
"loss": 2.4005,
"step": 45300
},
{
"epoch": 7.15,
"learning_rate": 8.608903020667727e-06,
"loss": 2.2586,
"step": 45350
},
{
"epoch": 7.16,
"learning_rate": 8.585055643879173e-06,
"loss": 2.3769,
"step": 45400
},
{
"epoch": 7.17,
"learning_rate": 8.56120826709062e-06,
"loss": 2.2427,
"step": 45450
},
{
"epoch": 7.18,
"learning_rate": 8.537360890302066e-06,
"loss": 2.4139,
"step": 45500
},
{
"epoch": 7.18,
"learning_rate": 8.513513513513514e-06,
"loss": 2.3047,
"step": 45550
},
{
"epoch": 7.19,
"learning_rate": 8.48966613672496e-06,
"loss": 2.4186,
"step": 45600
},
{
"epoch": 7.2,
"learning_rate": 8.465818759936407e-06,
"loss": 2.3452,
"step": 45650
},
{
"epoch": 7.21,
"learning_rate": 8.441971383147853e-06,
"loss": 2.2185,
"step": 45700
},
{
"epoch": 7.22,
"learning_rate": 8.4181240063593e-06,
"loss": 2.3871,
"step": 45750
},
{
"epoch": 7.22,
"learning_rate": 8.394276629570746e-06,
"loss": 2.2726,
"step": 45800
},
{
"epoch": 7.23,
"learning_rate": 8.370429252782194e-06,
"loss": 2.3655,
"step": 45850
},
{
"epoch": 7.24,
"learning_rate": 8.34658187599364e-06,
"loss": 2.2998,
"step": 45900
},
{
"epoch": 7.25,
"learning_rate": 8.322734499205087e-06,
"loss": 2.3761,
"step": 45950
},
{
"epoch": 7.26,
"learning_rate": 8.298887122416534e-06,
"loss": 2.4071,
"step": 46000
},
{
"epoch": 7.26,
"learning_rate": 8.275039745627982e-06,
"loss": 2.2446,
"step": 46050
},
{
"epoch": 7.27,
"learning_rate": 8.25119236883943e-06,
"loss": 2.4368,
"step": 46100
},
{
"epoch": 7.28,
"learning_rate": 8.227344992050875e-06,
"loss": 2.3106,
"step": 46150
},
{
"epoch": 7.29,
"learning_rate": 8.203497615262323e-06,
"loss": 2.2454,
"step": 46200
},
{
"epoch": 7.29,
"learning_rate": 8.179650238473768e-06,
"loss": 2.3197,
"step": 46250
},
{
"epoch": 7.3,
"learning_rate": 8.155802861685216e-06,
"loss": 2.291,
"step": 46300
},
{
"epoch": 7.31,
"learning_rate": 8.131955484896662e-06,
"loss": 2.3262,
"step": 46350
},
{
"epoch": 7.32,
"learning_rate": 8.108108108108109e-06,
"loss": 2.3377,
"step": 46400
},
{
"epoch": 7.33,
"learning_rate": 8.084260731319555e-06,
"loss": 2.3217,
"step": 46450
},
{
"epoch": 7.33,
"learning_rate": 8.060413354531002e-06,
"loss": 2.2479,
"step": 46500
},
{
"epoch": 7.34,
"learning_rate": 8.036565977742448e-06,
"loss": 2.2902,
"step": 46550
},
{
"epoch": 7.35,
"learning_rate": 8.012718600953896e-06,
"loss": 2.2538,
"step": 46600
},
{
"epoch": 7.36,
"learning_rate": 7.988871224165343e-06,
"loss": 2.3246,
"step": 46650
},
{
"epoch": 7.37,
"learning_rate": 7.965023847376789e-06,
"loss": 2.3713,
"step": 46700
},
{
"epoch": 7.37,
"learning_rate": 7.941176470588236e-06,
"loss": 2.3611,
"step": 46750
},
{
"epoch": 7.38,
"learning_rate": 7.917329093799682e-06,
"loss": 2.3627,
"step": 46800
},
{
"epoch": 7.39,
"learning_rate": 7.89348171701113e-06,
"loss": 2.2824,
"step": 46850
},
{
"epoch": 7.4,
"learning_rate": 7.869634340222575e-06,
"loss": 2.3895,
"step": 46900
},
{
"epoch": 7.41,
"learning_rate": 7.845786963434023e-06,
"loss": 2.2528,
"step": 46950
},
{
"epoch": 7.41,
"learning_rate": 7.821939586645469e-06,
"loss": 2.3639,
"step": 47000
},
{
"epoch": 7.42,
"learning_rate": 7.798092209856916e-06,
"loss": 2.3498,
"step": 47050
},
{
"epoch": 7.43,
"learning_rate": 7.774244833068362e-06,
"loss": 2.3634,
"step": 47100
},
{
"epoch": 7.44,
"learning_rate": 7.75039745627981e-06,
"loss": 2.277,
"step": 47150
},
{
"epoch": 7.44,
"learning_rate": 7.726550079491255e-06,
"loss": 2.333,
"step": 47200
},
{
"epoch": 7.45,
"learning_rate": 7.702702702702703e-06,
"loss": 2.3236,
"step": 47250
},
{
"epoch": 7.46,
"learning_rate": 7.678855325914148e-06,
"loss": 2.3351,
"step": 47300
},
{
"epoch": 7.47,
"learning_rate": 7.655007949125596e-06,
"loss": 2.1992,
"step": 47350
},
{
"epoch": 7.48,
"learning_rate": 7.631160572337043e-06,
"loss": 2.3276,
"step": 47400
},
{
"epoch": 7.48,
"learning_rate": 7.607313195548489e-06,
"loss": 2.3495,
"step": 47450
},
{
"epoch": 7.49,
"learning_rate": 7.583465818759936e-06,
"loss": 2.3136,
"step": 47500
},
{
"epoch": 7.5,
"learning_rate": 7.559618441971382e-06,
"loss": 2.3916,
"step": 47550
},
{
"epoch": 7.51,
"learning_rate": 7.535771065182829e-06,
"loss": 2.3319,
"step": 47600
},
{
"epoch": 7.52,
"learning_rate": 7.511923688394276e-06,
"loss": 2.2989,
"step": 47650
},
{
"epoch": 7.52,
"learning_rate": 7.488076311605724e-06,
"loss": 2.2922,
"step": 47700
},
{
"epoch": 7.53,
"learning_rate": 7.4642289348171706e-06,
"loss": 2.2579,
"step": 47750
},
{
"epoch": 7.54,
"learning_rate": 7.440381558028617e-06,
"loss": 2.25,
"step": 47800
},
{
"epoch": 7.55,
"learning_rate": 7.416534181240064e-06,
"loss": 2.3738,
"step": 47850
},
{
"epoch": 7.56,
"learning_rate": 7.3926868044515104e-06,
"loss": 2.2627,
"step": 47900
},
{
"epoch": 7.56,
"learning_rate": 7.368839427662957e-06,
"loss": 2.309,
"step": 47950
},
{
"epoch": 7.57,
"learning_rate": 7.344992050874404e-06,
"loss": 2.3631,
"step": 48000
},
{
"epoch": 7.58,
"learning_rate": 7.32114467408585e-06,
"loss": 2.2956,
"step": 48050
},
{
"epoch": 7.59,
"learning_rate": 7.297297297297298e-06,
"loss": 2.2407,
"step": 48100
},
{
"epoch": 7.59,
"learning_rate": 7.2734499205087444e-06,
"loss": 2.3228,
"step": 48150
},
{
"epoch": 7.6,
"learning_rate": 7.249602543720191e-06,
"loss": 2.231,
"step": 48200
},
{
"epoch": 7.61,
"learning_rate": 7.225755166931638e-06,
"loss": 2.352,
"step": 48250
},
{
"epoch": 7.62,
"learning_rate": 7.201907790143084e-06,
"loss": 2.3881,
"step": 48300
},
{
"epoch": 7.63,
"learning_rate": 7.178060413354531e-06,
"loss": 2.3211,
"step": 48350
},
{
"epoch": 7.63,
"learning_rate": 7.154213036565978e-06,
"loss": 2.3425,
"step": 48400
},
{
"epoch": 7.64,
"learning_rate": 7.130365659777425e-06,
"loss": 2.3098,
"step": 48450
},
{
"epoch": 7.65,
"learning_rate": 7.106518282988872e-06,
"loss": 2.3842,
"step": 48500
},
{
"epoch": 7.66,
"learning_rate": 7.082670906200318e-06,
"loss": 2.3383,
"step": 48550
},
{
"epoch": 7.67,
"learning_rate": 7.058823529411765e-06,
"loss": 2.3951,
"step": 48600
},
{
"epoch": 7.67,
"learning_rate": 7.0349761526232116e-06,
"loss": 2.3078,
"step": 48650
},
{
"epoch": 7.68,
"learning_rate": 7.011128775834658e-06,
"loss": 2.4464,
"step": 48700
},
{
"epoch": 7.69,
"learning_rate": 6.987281399046105e-06,
"loss": 2.3558,
"step": 48750
},
{
"epoch": 7.7,
"learning_rate": 6.9634340222575514e-06,
"loss": 2.2287,
"step": 48800
},
{
"epoch": 7.71,
"learning_rate": 6.939586645468998e-06,
"loss": 2.3043,
"step": 48850
},
{
"epoch": 7.71,
"learning_rate": 6.915739268680445e-06,
"loss": 2.3101,
"step": 48900
},
{
"epoch": 7.72,
"learning_rate": 6.891891891891892e-06,
"loss": 2.3032,
"step": 48950
},
{
"epoch": 7.73,
"learning_rate": 6.868044515103339e-06,
"loss": 2.2677,
"step": 49000
},
{
"epoch": 7.74,
"learning_rate": 6.8441971383147854e-06,
"loss": 2.3642,
"step": 49050
},
{
"epoch": 7.74,
"learning_rate": 6.820349761526233e-06,
"loss": 2.3649,
"step": 49100
},
{
"epoch": 7.75,
"learning_rate": 6.7965023847376795e-06,
"loss": 2.3617,
"step": 49150
},
{
"epoch": 7.76,
"learning_rate": 6.772655007949126e-06,
"loss": 2.3569,
"step": 49200
},
{
"epoch": 7.77,
"learning_rate": 6.748807631160573e-06,
"loss": 2.297,
"step": 49250
},
{
"epoch": 7.78,
"learning_rate": 6.7249602543720194e-06,
"loss": 2.2902,
"step": 49300
},
{
"epoch": 7.78,
"learning_rate": 6.701112877583466e-06,
"loss": 2.3593,
"step": 49350
},
{
"epoch": 7.79,
"learning_rate": 6.677265500794913e-06,
"loss": 2.2884,
"step": 49400
},
{
"epoch": 7.8,
"learning_rate": 6.653418124006359e-06,
"loss": 2.4,
"step": 49450
},
{
"epoch": 7.81,
"learning_rate": 6.629570747217806e-06,
"loss": 2.2576,
"step": 49500
},
{
"epoch": 7.82,
"learning_rate": 6.6057233704292526e-06,
"loss": 2.2758,
"step": 49550
},
{
"epoch": 7.82,
"learning_rate": 6.581875993640699e-06,
"loss": 2.3434,
"step": 49600
},
{
"epoch": 7.83,
"learning_rate": 6.558028616852146e-06,
"loss": 2.2259,
"step": 49650
},
{
"epoch": 7.84,
"learning_rate": 6.5341812400635925e-06,
"loss": 2.3104,
"step": 49700
},
{
"epoch": 7.85,
"learning_rate": 6.51033386327504e-06,
"loss": 2.2976,
"step": 49750
},
{
"epoch": 7.85,
"learning_rate": 6.486486486486487e-06,
"loss": 2.34,
"step": 49800
},
{
"epoch": 7.86,
"learning_rate": 6.462639109697934e-06,
"loss": 2.3814,
"step": 49850
},
{
"epoch": 7.87,
"learning_rate": 6.438791732909381e-06,
"loss": 2.3021,
"step": 49900
},
{
"epoch": 7.88,
"learning_rate": 6.414944356120827e-06,
"loss": 2.418,
"step": 49950
},
{
"epoch": 7.89,
"learning_rate": 6.391096979332274e-06,
"loss": 2.3411,
"step": 50000
},
{
"epoch": 7.89,
"learning_rate": 6.3672496025437205e-06,
"loss": 2.3213,
"step": 50050
},
{
"epoch": 7.9,
"learning_rate": 6.343402225755167e-06,
"loss": 2.3401,
"step": 50100
},
{
"epoch": 7.91,
"learning_rate": 6.319554848966614e-06,
"loss": 2.2525,
"step": 50150
},
{
"epoch": 7.92,
"learning_rate": 6.2957074721780604e-06,
"loss": 2.271,
"step": 50200
},
{
"epoch": 7.93,
"learning_rate": 6.271860095389507e-06,
"loss": 2.425,
"step": 50250
},
{
"epoch": 7.93,
"learning_rate": 6.248012718600954e-06,
"loss": 2.4018,
"step": 50300
},
{
"epoch": 7.94,
"learning_rate": 6.2241653418124e-06,
"loss": 2.3339,
"step": 50350
},
{
"epoch": 7.95,
"learning_rate": 6.200317965023847e-06,
"loss": 2.3432,
"step": 50400
},
{
"epoch": 7.96,
"learning_rate": 6.176470588235294e-06,
"loss": 2.3247,
"step": 50450
},
{
"epoch": 7.97,
"learning_rate": 6.152623211446741e-06,
"loss": 2.3498,
"step": 50500
},
{
"epoch": 7.97,
"learning_rate": 6.128775834658188e-06,
"loss": 2.2981,
"step": 50550
},
{
"epoch": 7.98,
"learning_rate": 6.104928457869635e-06,
"loss": 2.3491,
"step": 50600
},
{
"epoch": 7.99,
"learning_rate": 6.081081081081082e-06,
"loss": 2.3754,
"step": 50650
},
{
"epoch": 8.0,
"learning_rate": 6.057233704292528e-06,
"loss": 2.2337,
"step": 50700
},
{
"epoch": 8.0,
"learning_rate": 6.033386327503975e-06,
"loss": 2.3989,
"step": 50750
},
{
"epoch": 8.01,
"learning_rate": 6.009538950715422e-06,
"loss": 2.2889,
"step": 50800
},
{
"epoch": 8.02,
"learning_rate": 5.985691573926868e-06,
"loss": 2.3615,
"step": 50850
},
{
"epoch": 8.03,
"learning_rate": 5.961844197138315e-06,
"loss": 2.2809,
"step": 50900
},
{
"epoch": 8.04,
"learning_rate": 5.9379968203497616e-06,
"loss": 2.2556,
"step": 50950
},
{
"epoch": 8.04,
"learning_rate": 5.914149443561208e-06,
"loss": 2.3127,
"step": 51000
},
{
"epoch": 8.05,
"learning_rate": 5.890302066772655e-06,
"loss": 2.323,
"step": 51050
},
{
"epoch": 8.06,
"learning_rate": 5.8664546899841014e-06,
"loss": 2.2539,
"step": 51100
},
{
"epoch": 8.07,
"learning_rate": 5.842607313195548e-06,
"loss": 2.3405,
"step": 51150
},
{
"epoch": 8.08,
"learning_rate": 5.8187599364069955e-06,
"loss": 2.3322,
"step": 51200
},
{
"epoch": 8.08,
"learning_rate": 5.794912559618442e-06,
"loss": 2.3013,
"step": 51250
},
{
"epoch": 8.09,
"learning_rate": 5.771065182829889e-06,
"loss": 2.2925,
"step": 51300
},
{
"epoch": 8.1,
"learning_rate": 5.7472178060413354e-06,
"loss": 2.2064,
"step": 51350
},
{
"epoch": 8.11,
"learning_rate": 5.723370429252783e-06,
"loss": 2.3501,
"step": 51400
},
{
"epoch": 8.12,
"learning_rate": 5.6995230524642295e-06,
"loss": 2.2522,
"step": 51450
},
{
"epoch": 8.12,
"learning_rate": 5.675675675675676e-06,
"loss": 2.4012,
"step": 51500
},
{
"epoch": 8.13,
"learning_rate": 5.651828298887123e-06,
"loss": 2.2995,
"step": 51550
},
{
"epoch": 8.14,
"learning_rate": 5.627980922098569e-06,
"loss": 2.3444,
"step": 51600
},
{
"epoch": 8.15,
"learning_rate": 5.604133545310016e-06,
"loss": 2.3466,
"step": 51650
},
{
"epoch": 8.15,
"learning_rate": 5.580286168521463e-06,
"loss": 2.2761,
"step": 51700
},
{
"epoch": 8.16,
"learning_rate": 5.556438791732909e-06,
"loss": 2.2975,
"step": 51750
},
{
"epoch": 8.17,
"learning_rate": 5.532591414944356e-06,
"loss": 2.2724,
"step": 51800
},
{
"epoch": 8.18,
"learning_rate": 5.5087440381558026e-06,
"loss": 2.3443,
"step": 51850
},
{
"epoch": 8.19,
"learning_rate": 5.48489666136725e-06,
"loss": 2.2775,
"step": 51900
},
{
"epoch": 8.19,
"learning_rate": 5.461049284578697e-06,
"loss": 2.2304,
"step": 51950
},
{
"epoch": 8.2,
"learning_rate": 5.437201907790143e-06,
"loss": 2.2836,
"step": 52000
},
{
"epoch": 8.21,
"learning_rate": 5.41335453100159e-06,
"loss": 2.319,
"step": 52050
},
{
"epoch": 8.22,
"learning_rate": 5.3895071542130366e-06,
"loss": 2.3162,
"step": 52100
},
{
"epoch": 8.23,
"learning_rate": 5.365659777424483e-06,
"loss": 2.3399,
"step": 52150
},
{
"epoch": 8.23,
"learning_rate": 5.34181240063593e-06,
"loss": 2.2956,
"step": 52200
},
{
"epoch": 8.24,
"learning_rate": 5.317965023847377e-06,
"loss": 2.2469,
"step": 52250
},
{
"epoch": 8.25,
"learning_rate": 5.294117647058824e-06,
"loss": 2.2996,
"step": 52300
},
{
"epoch": 8.26,
"learning_rate": 5.2702702702702705e-06,
"loss": 2.2981,
"step": 52350
},
{
"epoch": 8.26,
"learning_rate": 5.246422893481717e-06,
"loss": 2.2523,
"step": 52400
},
{
"epoch": 8.27,
"learning_rate": 5.222575516693164e-06,
"loss": 2.3944,
"step": 52450
},
{
"epoch": 8.28,
"learning_rate": 5.1987281399046104e-06,
"loss": 2.2972,
"step": 52500
},
{
"epoch": 8.29,
"learning_rate": 5.174880763116057e-06,
"loss": 2.2167,
"step": 52550
},
{
"epoch": 8.3,
"learning_rate": 5.1510333863275045e-06,
"loss": 2.2483,
"step": 52600
},
{
"epoch": 8.3,
"learning_rate": 5.127186009538951e-06,
"loss": 2.2149,
"step": 52650
},
{
"epoch": 8.31,
"learning_rate": 5.103338632750398e-06,
"loss": 2.3079,
"step": 52700
},
{
"epoch": 8.32,
"learning_rate": 5.079491255961844e-06,
"loss": 2.3098,
"step": 52750
},
{
"epoch": 8.33,
"learning_rate": 5.055643879173291e-06,
"loss": 2.3029,
"step": 52800
},
{
"epoch": 8.34,
"learning_rate": 5.031796502384738e-06,
"loss": 2.2854,
"step": 52850
},
{
"epoch": 8.34,
"learning_rate": 5.007949125596184e-06,
"loss": 2.2957,
"step": 52900
},
{
"epoch": 8.35,
"learning_rate": 4.984101748807631e-06,
"loss": 2.2847,
"step": 52950
},
{
"epoch": 8.36,
"learning_rate": 4.9602543720190776e-06,
"loss": 2.2588,
"step": 53000
},
{
"epoch": 8.37,
"learning_rate": 4.936406995230524e-06,
"loss": 2.4674,
"step": 53050
},
{
"epoch": 8.38,
"learning_rate": 4.912559618441972e-06,
"loss": 2.3095,
"step": 53100
},
{
"epoch": 8.38,
"learning_rate": 4.888712241653418e-06,
"loss": 2.2204,
"step": 53150
},
{
"epoch": 8.39,
"learning_rate": 4.864864864864865e-06,
"loss": 2.3547,
"step": 53200
},
{
"epoch": 8.4,
"learning_rate": 4.8410174880763115e-06,
"loss": 2.3369,
"step": 53250
},
{
"epoch": 8.41,
"learning_rate": 4.817170111287759e-06,
"loss": 2.1745,
"step": 53300
},
{
"epoch": 8.41,
"learning_rate": 4.793322734499206e-06,
"loss": 2.2104,
"step": 53350
},
{
"epoch": 8.42,
"learning_rate": 4.769475357710652e-06,
"loss": 2.2942,
"step": 53400
},
{
"epoch": 8.43,
"learning_rate": 4.745627980922099e-06,
"loss": 2.3286,
"step": 53450
},
{
"epoch": 8.44,
"learning_rate": 4.7217806041335455e-06,
"loss": 2.3778,
"step": 53500
},
{
"epoch": 8.45,
"learning_rate": 4.697933227344992e-06,
"loss": 2.4332,
"step": 53550
},
{
"epoch": 8.45,
"learning_rate": 4.674085850556439e-06,
"loss": 2.3262,
"step": 53600
},
{
"epoch": 8.46,
"learning_rate": 4.650238473767885e-06,
"loss": 2.3026,
"step": 53650
},
{
"epoch": 8.47,
"learning_rate": 4.626391096979332e-06,
"loss": 2.3919,
"step": 53700
},
{
"epoch": 8.48,
"learning_rate": 4.602543720190779e-06,
"loss": 2.2402,
"step": 53750
},
{
"epoch": 8.49,
"learning_rate": 4.578696343402225e-06,
"loss": 2.3166,
"step": 53800
},
{
"epoch": 8.49,
"learning_rate": 4.554848966613672e-06,
"loss": 2.3128,
"step": 53850
},
{
"epoch": 8.5,
"learning_rate": 4.531001589825119e-06,
"loss": 2.3067,
"step": 53900
},
{
"epoch": 8.51,
"learning_rate": 4.507154213036566e-06,
"loss": 2.1824,
"step": 53950
},
{
"epoch": 8.52,
"learning_rate": 4.4833068362480135e-06,
"loss": 2.2984,
"step": 54000
},
{
"epoch": 8.53,
"learning_rate": 4.45945945945946e-06,
"loss": 2.265,
"step": 54050
},
{
"epoch": 8.53,
"learning_rate": 4.435612082670907e-06,
"loss": 2.2982,
"step": 54100
},
{
"epoch": 8.54,
"learning_rate": 4.411764705882353e-06,
"loss": 2.2818,
"step": 54150
},
{
"epoch": 8.55,
"learning_rate": 4.3879173290938e-06,
"loss": 2.3345,
"step": 54200
},
{
"epoch": 8.56,
"learning_rate": 4.364069952305247e-06,
"loss": 2.2896,
"step": 54250
},
{
"epoch": 8.56,
"learning_rate": 4.340222575516693e-06,
"loss": 2.2741,
"step": 54300
},
{
"epoch": 8.57,
"learning_rate": 4.31637519872814e-06,
"loss": 2.3498,
"step": 54350
},
{
"epoch": 8.58,
"learning_rate": 4.2925278219395865e-06,
"loss": 2.2106,
"step": 54400
},
{
"epoch": 8.59,
"learning_rate": 4.268680445151033e-06,
"loss": 2.2493,
"step": 54450
},
{
"epoch": 8.6,
"learning_rate": 4.24483306836248e-06,
"loss": 2.2853,
"step": 54500
},
{
"epoch": 8.6,
"learning_rate": 4.2209856915739264e-06,
"loss": 2.3935,
"step": 54550
},
{
"epoch": 8.61,
"learning_rate": 4.197138314785373e-06,
"loss": 2.3058,
"step": 54600
},
{
"epoch": 8.62,
"learning_rate": 4.17329093799682e-06,
"loss": 2.2131,
"step": 54650
},
{
"epoch": 8.63,
"learning_rate": 4.149443561208267e-06,
"loss": 2.2303,
"step": 54700
},
{
"epoch": 8.64,
"learning_rate": 4.125596184419715e-06,
"loss": 2.3927,
"step": 54750
},
{
"epoch": 8.64,
"learning_rate": 4.101748807631161e-06,
"loss": 2.2371,
"step": 54800
},
{
"epoch": 8.65,
"learning_rate": 4.077901430842608e-06,
"loss": 2.2474,
"step": 54850
},
{
"epoch": 8.66,
"learning_rate": 4.0540540540540545e-06,
"loss": 2.3602,
"step": 54900
},
{
"epoch": 8.67,
"learning_rate": 4.030206677265501e-06,
"loss": 2.2762,
"step": 54950
},
{
"epoch": 8.68,
"learning_rate": 4.006359300476948e-06,
"loss": 2.368,
"step": 55000
},
{
"epoch": 8.68,
"learning_rate": 3.982511923688394e-06,
"loss": 2.2163,
"step": 55050
},
{
"epoch": 8.69,
"learning_rate": 3.958664546899841e-06,
"loss": 2.3402,
"step": 55100
},
{
"epoch": 8.7,
"learning_rate": 3.934817170111288e-06,
"loss": 2.3111,
"step": 55150
},
{
"epoch": 8.71,
"learning_rate": 3.910969793322734e-06,
"loss": 2.3387,
"step": 55200
},
{
"epoch": 8.71,
"learning_rate": 3.887122416534181e-06,
"loss": 2.2679,
"step": 55250
},
{
"epoch": 8.72,
"learning_rate": 3.8632750397456276e-06,
"loss": 2.2446,
"step": 55300
},
{
"epoch": 8.73,
"learning_rate": 3.839427662957074e-06,
"loss": 2.3467,
"step": 55350
},
{
"epoch": 8.74,
"learning_rate": 3.815580286168522e-06,
"loss": 2.2937,
"step": 55400
},
{
"epoch": 8.75,
"learning_rate": 3.791732909379968e-06,
"loss": 2.2471,
"step": 55450
},
{
"epoch": 8.75,
"learning_rate": 3.7678855325914145e-06,
"loss": 2.3274,
"step": 55500
},
{
"epoch": 8.76,
"learning_rate": 3.744038155802862e-06,
"loss": 2.3025,
"step": 55550
},
{
"epoch": 8.77,
"learning_rate": 3.7201907790143086e-06,
"loss": 2.25,
"step": 55600
},
{
"epoch": 8.78,
"learning_rate": 3.6963434022257552e-06,
"loss": 2.2376,
"step": 55650
},
{
"epoch": 8.79,
"learning_rate": 3.672496025437202e-06,
"loss": 2.3342,
"step": 55700
},
{
"epoch": 8.79,
"learning_rate": 3.648648648648649e-06,
"loss": 2.3249,
"step": 55750
},
{
"epoch": 8.8,
"learning_rate": 3.6248012718600955e-06,
"loss": 2.2627,
"step": 55800
},
{
"epoch": 8.81,
"learning_rate": 3.600953895071542e-06,
"loss": 2.2669,
"step": 55850
},
{
"epoch": 8.82,
"learning_rate": 3.577106518282989e-06,
"loss": 2.2734,
"step": 55900
},
{
"epoch": 8.82,
"learning_rate": 3.553259141494436e-06,
"loss": 2.4094,
"step": 55950
},
{
"epoch": 8.83,
"learning_rate": 3.5294117647058825e-06,
"loss": 2.3159,
"step": 56000
},
{
"epoch": 8.84,
"learning_rate": 3.505564387917329e-06,
"loss": 2.2596,
"step": 56050
},
{
"epoch": 8.85,
"learning_rate": 3.4817170111287757e-06,
"loss": 2.2068,
"step": 56100
},
{
"epoch": 8.86,
"learning_rate": 3.4578696343402224e-06,
"loss": 2.3419,
"step": 56150
},
{
"epoch": 8.86,
"learning_rate": 3.4340222575516694e-06,
"loss": 2.2312,
"step": 56200
},
{
"epoch": 8.87,
"learning_rate": 3.4101748807631165e-06,
"loss": 2.2964,
"step": 56250
},
{
"epoch": 8.88,
"learning_rate": 3.386327503974563e-06,
"loss": 2.3362,
"step": 56300
},
{
"epoch": 8.89,
"learning_rate": 3.3624801271860097e-06,
"loss": 2.1478,
"step": 56350
},
{
"epoch": 8.9,
"learning_rate": 3.3386327503974563e-06,
"loss": 2.2711,
"step": 56400
},
{
"epoch": 8.9,
"learning_rate": 3.314785373608903e-06,
"loss": 2.216,
"step": 56450
},
{
"epoch": 8.91,
"learning_rate": 3.2909379968203496e-06,
"loss": 2.2763,
"step": 56500
},
{
"epoch": 8.92,
"learning_rate": 3.2670906200317962e-06,
"loss": 2.2942,
"step": 56550
},
{
"epoch": 8.93,
"learning_rate": 3.2432432432432437e-06,
"loss": 2.3179,
"step": 56600
},
{
"epoch": 8.94,
"learning_rate": 3.2193958664546903e-06,
"loss": 2.3063,
"step": 56650
},
{
"epoch": 8.94,
"learning_rate": 3.195548489666137e-06,
"loss": 2.3186,
"step": 56700
},
{
"epoch": 8.95,
"learning_rate": 3.1717011128775836e-06,
"loss": 2.2831,
"step": 56750
},
{
"epoch": 8.96,
"learning_rate": 3.1478537360890302e-06,
"loss": 2.3537,
"step": 56800
},
{
"epoch": 8.97,
"learning_rate": 3.124006359300477e-06,
"loss": 2.2919,
"step": 56850
},
{
"epoch": 8.97,
"learning_rate": 3.1001589825119235e-06,
"loss": 2.3836,
"step": 56900
},
{
"epoch": 8.98,
"learning_rate": 3.0763116057233705e-06,
"loss": 2.4449,
"step": 56950
},
{
"epoch": 8.99,
"learning_rate": 3.0524642289348176e-06,
"loss": 2.2795,
"step": 57000
},
{
"epoch": 9.0,
"learning_rate": 3.028616852146264e-06,
"loss": 2.2668,
"step": 57050
},
{
"epoch": 9.01,
"learning_rate": 3.004769475357711e-06,
"loss": 2.2844,
"step": 57100
},
{
"epoch": 9.01,
"learning_rate": 2.9809220985691575e-06,
"loss": 2.2807,
"step": 57150
},
{
"epoch": 9.02,
"learning_rate": 2.957074721780604e-06,
"loss": 2.3082,
"step": 57200
},
{
"epoch": 9.03,
"learning_rate": 2.9332273449920507e-06,
"loss": 2.2464,
"step": 57250
},
{
"epoch": 9.04,
"learning_rate": 2.9093799682034978e-06,
"loss": 2.3599,
"step": 57300
},
{
"epoch": 9.05,
"learning_rate": 2.8855325914149444e-06,
"loss": 2.2516,
"step": 57350
},
{
"epoch": 9.05,
"learning_rate": 2.8616852146263915e-06,
"loss": 2.2633,
"step": 57400
},
{
"epoch": 9.06,
"learning_rate": 2.837837837837838e-06,
"loss": 2.364,
"step": 57450
},
{
"epoch": 9.07,
"learning_rate": 2.8139904610492847e-06,
"loss": 2.2732,
"step": 57500
},
{
"epoch": 9.08,
"learning_rate": 2.7901430842607313e-06,
"loss": 2.2697,
"step": 57550
},
{
"epoch": 9.09,
"learning_rate": 2.766295707472178e-06,
"loss": 2.3099,
"step": 57600
},
{
"epoch": 9.09,
"learning_rate": 2.742448330683625e-06,
"loss": 2.2531,
"step": 57650
},
{
"epoch": 9.1,
"learning_rate": 2.7186009538950716e-06,
"loss": 2.3208,
"step": 57700
},
{
"epoch": 9.11,
"learning_rate": 2.6947535771065183e-06,
"loss": 2.2014,
"step": 57750
},
{
"epoch": 9.12,
"learning_rate": 2.670906200317965e-06,
"loss": 2.3493,
"step": 57800
},
{
"epoch": 9.12,
"learning_rate": 2.647058823529412e-06,
"loss": 2.2841,
"step": 57850
},
{
"epoch": 9.13,
"learning_rate": 2.6232114467408586e-06,
"loss": 2.2263,
"step": 57900
},
{
"epoch": 9.14,
"learning_rate": 2.5993640699523052e-06,
"loss": 2.3054,
"step": 57950
},
{
"epoch": 9.15,
"learning_rate": 2.5755166931637523e-06,
"loss": 2.2863,
"step": 58000
},
{
"epoch": 9.16,
"learning_rate": 2.551669316375199e-06,
"loss": 2.2468,
"step": 58050
},
{
"epoch": 9.16,
"learning_rate": 2.5278219395866455e-06,
"loss": 2.3116,
"step": 58100
},
{
"epoch": 9.17,
"learning_rate": 2.503974562798092e-06,
"loss": 2.3623,
"step": 58150
},
{
"epoch": 9.18,
"learning_rate": 2.4801271860095388e-06,
"loss": 2.2285,
"step": 58200
},
{
"epoch": 9.19,
"learning_rate": 2.456279809220986e-06,
"loss": 2.3856,
"step": 58250
},
{
"epoch": 9.2,
"learning_rate": 2.4324324324324325e-06,
"loss": 2.2852,
"step": 58300
},
{
"epoch": 9.2,
"learning_rate": 2.4085850556438795e-06,
"loss": 2.315,
"step": 58350
},
{
"epoch": 9.21,
"learning_rate": 2.384737678855326e-06,
"loss": 2.3652,
"step": 58400
},
{
"epoch": 9.22,
"learning_rate": 2.3608903020667728e-06,
"loss": 2.2811,
"step": 58450
},
{
"epoch": 9.23,
"learning_rate": 2.3370429252782194e-06,
"loss": 2.2823,
"step": 58500
},
{
"epoch": 9.24,
"learning_rate": 2.313195548489666e-06,
"loss": 2.2714,
"step": 58550
},
{
"epoch": 9.24,
"learning_rate": 2.2893481717011127e-06,
"loss": 2.2866,
"step": 58600
},
{
"epoch": 9.25,
"learning_rate": 2.2655007949125597e-06,
"loss": 2.2227,
"step": 58650
},
{
"epoch": 9.26,
"learning_rate": 2.2416534181240068e-06,
"loss": 2.3044,
"step": 58700
},
{
"epoch": 9.27,
"learning_rate": 2.2178060413354534e-06,
"loss": 2.3071,
"step": 58750
},
{
"epoch": 9.27,
"learning_rate": 2.1939586645469e-06,
"loss": 2.1963,
"step": 58800
},
{
"epoch": 9.28,
"learning_rate": 2.1701112877583466e-06,
"loss": 2.281,
"step": 58850
},
{
"epoch": 9.29,
"learning_rate": 2.1462639109697933e-06,
"loss": 2.2891,
"step": 58900
},
{
"epoch": 9.3,
"learning_rate": 2.12241653418124e-06,
"loss": 2.3513,
"step": 58950
},
{
"epoch": 9.31,
"learning_rate": 2.0985691573926865e-06,
"loss": 2.2369,
"step": 59000
},
{
"epoch": 9.31,
"learning_rate": 2.0747217806041336e-06,
"loss": 2.2101,
"step": 59050
},
{
"epoch": 9.32,
"learning_rate": 2.0508744038155806e-06,
"loss": 2.2356,
"step": 59100
},
{
"epoch": 9.33,
"learning_rate": 2.0270270270270273e-06,
"loss": 2.2986,
"step": 59150
},
{
"epoch": 9.34,
"learning_rate": 2.003179650238474e-06,
"loss": 2.3052,
"step": 59200
},
{
"epoch": 9.35,
"learning_rate": 1.9793322734499205e-06,
"loss": 2.2017,
"step": 59250
},
{
"epoch": 9.35,
"learning_rate": 1.955484896661367e-06,
"loss": 2.3372,
"step": 59300
},
{
"epoch": 9.36,
"learning_rate": 1.9316375198728138e-06,
"loss": 2.2929,
"step": 59350
},
{
"epoch": 9.37,
"learning_rate": 1.907790143084261e-06,
"loss": 2.2468,
"step": 59400
},
{
"epoch": 9.38,
"learning_rate": 1.8839427662957072e-06,
"loss": 2.316,
"step": 59450
},
{
"epoch": 9.38,
"learning_rate": 1.8600953895071543e-06,
"loss": 2.3643,
"step": 59500
},
{
"epoch": 9.39,
"learning_rate": 1.836248012718601e-06,
"loss": 2.2971,
"step": 59550
},
{
"epoch": 9.4,
"learning_rate": 1.8124006359300478e-06,
"loss": 2.3568,
"step": 59600
},
{
"epoch": 9.41,
"learning_rate": 1.7885532591414946e-06,
"loss": 2.1878,
"step": 59650
},
{
"epoch": 9.42,
"learning_rate": 1.7647058823529412e-06,
"loss": 2.3828,
"step": 59700
},
{
"epoch": 9.42,
"learning_rate": 1.7408585055643879e-06,
"loss": 2.2528,
"step": 59750
},
{
"epoch": 9.43,
"learning_rate": 1.7170111287758347e-06,
"loss": 2.329,
"step": 59800
},
{
"epoch": 9.44,
"learning_rate": 1.6931637519872815e-06,
"loss": 2.3648,
"step": 59850
},
{
"epoch": 9.45,
"learning_rate": 1.6693163751987282e-06,
"loss": 2.3221,
"step": 59900
},
{
"epoch": 9.46,
"learning_rate": 1.6454689984101748e-06,
"loss": 2.1805,
"step": 59950
},
{
"epoch": 9.46,
"learning_rate": 1.6216216216216219e-06,
"loss": 2.3446,
"step": 60000
},
{
"epoch": 9.47,
"learning_rate": 1.5977742448330685e-06,
"loss": 2.42,
"step": 60050
},
{
"epoch": 9.48,
"learning_rate": 1.5739268680445151e-06,
"loss": 2.1562,
"step": 60100
},
{
"epoch": 9.49,
"learning_rate": 1.5500794912559617e-06,
"loss": 2.1505,
"step": 60150
},
{
"epoch": 9.5,
"learning_rate": 1.5262321144674088e-06,
"loss": 2.2464,
"step": 60200
},
{
"epoch": 9.5,
"learning_rate": 1.5023847376788554e-06,
"loss": 2.3115,
"step": 60250
},
{
"epoch": 9.51,
"learning_rate": 1.478537360890302e-06,
"loss": 2.0721,
"step": 60300
},
{
"epoch": 9.52,
"learning_rate": 1.4546899841017489e-06,
"loss": 2.27,
"step": 60350
},
{
"epoch": 9.53,
"learning_rate": 1.4308426073131957e-06,
"loss": 2.3262,
"step": 60400
},
{
"epoch": 9.53,
"learning_rate": 1.4069952305246424e-06,
"loss": 2.3878,
"step": 60450
},
{
"epoch": 9.54,
"learning_rate": 1.383147853736089e-06,
"loss": 2.2035,
"step": 60500
},
{
"epoch": 9.55,
"learning_rate": 1.3593004769475358e-06,
"loss": 2.2805,
"step": 60550
},
{
"epoch": 9.56,
"learning_rate": 1.3354531001589825e-06,
"loss": 2.2035,
"step": 60600
},
{
"epoch": 9.57,
"learning_rate": 1.3116057233704293e-06,
"loss": 2.225,
"step": 60650
},
{
"epoch": 9.57,
"learning_rate": 1.2877583465818761e-06,
"loss": 2.2499,
"step": 60700
},
{
"epoch": 9.58,
"learning_rate": 1.2639109697933228e-06,
"loss": 2.2492,
"step": 60750
},
{
"epoch": 9.59,
"learning_rate": 1.2400635930047694e-06,
"loss": 2.2315,
"step": 60800
},
{
"epoch": 9.6,
"learning_rate": 1.2162162162162162e-06,
"loss": 2.2754,
"step": 60850
},
{
"epoch": 9.61,
"learning_rate": 1.192368839427663e-06,
"loss": 2.2571,
"step": 60900
},
{
"epoch": 9.61,
"learning_rate": 1.1685214626391097e-06,
"loss": 2.3155,
"step": 60950
},
{
"epoch": 9.62,
"learning_rate": 1.1446740858505563e-06,
"loss": 2.2825,
"step": 61000
},
{
"epoch": 9.63,
"learning_rate": 1.1208267090620034e-06,
"loss": 2.2699,
"step": 61050
},
{
"epoch": 9.64,
"learning_rate": 1.09697933227345e-06,
"loss": 2.2182,
"step": 61100
},
{
"epoch": 9.65,
"learning_rate": 1.0731319554848966e-06,
"loss": 2.2889,
"step": 61150
},
{
"epoch": 9.65,
"learning_rate": 1.0492845786963433e-06,
"loss": 2.2569,
"step": 61200
},
{
"epoch": 9.66,
"learning_rate": 1.0254372019077903e-06,
"loss": 2.2386,
"step": 61250
},
{
"epoch": 9.67,
"learning_rate": 1.001589825119237e-06,
"loss": 2.3111,
"step": 61300
},
{
"epoch": 9.68,
"learning_rate": 9.777424483306836e-07,
"loss": 2.2391,
"step": 61350
},
{
"epoch": 9.68,
"learning_rate": 9.538950715421304e-07,
"loss": 2.2852,
"step": 61400
},
{
"epoch": 9.69,
"learning_rate": 9.300476947535771e-07,
"loss": 2.2392,
"step": 61450
},
{
"epoch": 9.7,
"learning_rate": 9.062003179650239e-07,
"loss": 2.3655,
"step": 61500
},
{
"epoch": 9.71,
"learning_rate": 8.823529411764706e-07,
"loss": 2.2298,
"step": 61550
},
{
"epoch": 9.72,
"learning_rate": 8.585055643879174e-07,
"loss": 2.2856,
"step": 61600
},
{
"epoch": 9.72,
"learning_rate": 8.346581875993641e-07,
"loss": 2.2957,
"step": 61650
},
{
"epoch": 9.73,
"learning_rate": 8.108108108108109e-07,
"loss": 2.3311,
"step": 61700
},
{
"epoch": 9.74,
"learning_rate": 7.869634340222576e-07,
"loss": 2.2878,
"step": 61750
},
{
"epoch": 9.75,
"learning_rate": 7.631160572337044e-07,
"loss": 2.3333,
"step": 61800
},
{
"epoch": 9.76,
"learning_rate": 7.39268680445151e-07,
"loss": 2.2988,
"step": 61850
},
{
"epoch": 9.76,
"learning_rate": 7.154213036565979e-07,
"loss": 2.3403,
"step": 61900
},
{
"epoch": 9.77,
"learning_rate": 6.915739268680445e-07,
"loss": 2.3127,
"step": 61950
},
{
"epoch": 9.78,
"learning_rate": 6.677265500794912e-07,
"loss": 2.2709,
"step": 62000
},
{
"epoch": 9.79,
"learning_rate": 6.438791732909381e-07,
"loss": 2.2767,
"step": 62050
},
{
"epoch": 9.79,
"learning_rate": 6.200317965023847e-07,
"loss": 2.2643,
"step": 62100
},
{
"epoch": 9.8,
"learning_rate": 5.961844197138315e-07,
"loss": 2.1819,
"step": 62150
},
{
"epoch": 9.81,
"learning_rate": 5.723370429252782e-07,
"loss": 2.3761,
"step": 62200
},
{
"epoch": 9.82,
"learning_rate": 5.48489666136725e-07,
"loss": 2.1973,
"step": 62250
},
{
"epoch": 9.83,
"learning_rate": 5.246422893481716e-07,
"loss": 2.3058,
"step": 62300
},
{
"epoch": 9.83,
"learning_rate": 5.007949125596185e-07,
"loss": 2.3225,
"step": 62350
},
{
"epoch": 9.84,
"learning_rate": 4.769475357710652e-07,
"loss": 2.2697,
"step": 62400
},
{
"epoch": 9.85,
"learning_rate": 4.5310015898251194e-07,
"loss": 2.2113,
"step": 62450
},
{
"epoch": 9.86,
"learning_rate": 4.292527821939587e-07,
"loss": 2.3822,
"step": 62500
},
{
"epoch": 9.87,
"learning_rate": 4.0540540540540546e-07,
"loss": 2.3111,
"step": 62550
},
{
"epoch": 9.87,
"learning_rate": 3.815580286168522e-07,
"loss": 2.3057,
"step": 62600
},
{
"epoch": 9.88,
"learning_rate": 3.5771065182829893e-07,
"loss": 2.2624,
"step": 62650
},
{
"epoch": 9.89,
"learning_rate": 3.338632750397456e-07,
"loss": 2.2167,
"step": 62700
},
{
"epoch": 9.9,
"learning_rate": 3.1001589825119235e-07,
"loss": 2.3353,
"step": 62750
},
{
"epoch": 9.91,
"learning_rate": 2.861685214626391e-07,
"loss": 2.3511,
"step": 62800
},
{
"epoch": 9.91,
"learning_rate": 2.623211446740858e-07,
"loss": 2.2372,
"step": 62850
},
{
"epoch": 9.92,
"learning_rate": 2.384737678855326e-07,
"loss": 2.2875,
"step": 62900
},
{
"epoch": 9.93,
"learning_rate": 2.1462639109697934e-07,
"loss": 2.246,
"step": 62950
},
{
"epoch": 9.94,
"learning_rate": 1.907790143084261e-07,
"loss": 2.2529,
"step": 63000
},
{
"epoch": 9.94,
"learning_rate": 1.669316375198728e-07,
"loss": 2.2318,
"step": 63050
},
{
"epoch": 9.95,
"learning_rate": 1.4308426073131954e-07,
"loss": 2.3183,
"step": 63100
},
{
"epoch": 9.96,
"learning_rate": 1.192368839427663e-07,
"loss": 2.3006,
"step": 63150
},
{
"epoch": 9.97,
"learning_rate": 9.538950715421305e-08,
"loss": 2.2809,
"step": 63200
},
{
"epoch": 9.98,
"learning_rate": 7.154213036565977e-08,
"loss": 2.2341,
"step": 63250
},
{
"epoch": 9.98,
"learning_rate": 4.7694753577106525e-08,
"loss": 2.3625,
"step": 63300
},
{
"epoch": 9.99,
"learning_rate": 2.3847376788553262e-08,
"loss": 2.2389,
"step": 63350
},
{
"epoch": 10.0,
"learning_rate": 0.0,
"loss": 2.307,
"step": 63400
},
{
"epoch": 10.0,
"step": 63400,
"total_flos": 2.706173459807863e+18,
"train_loss": 2.6413619818221132,
"train_runtime": 44032.3597,
"train_samples_per_second": 46.076,
"train_steps_per_second": 1.44
}
],
"max_steps": 63400,
"num_train_epochs": 10,
"total_flos": 2.706173459807863e+18,
"trial_name": null,
"trial_params": null
}