neural-chat-7b-v1-1 / trainer_state.json
lvkaokao's picture
add model.
dac7b46
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3713417039942442,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.278350515463919e-08,
"loss": 2.5166,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.9587628865979384e-07,
"loss": 1.9059,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 2.989690721649485e-07,
"loss": 1.7869,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 4.0206185567010316e-07,
"loss": 1.7863,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 5.051546391752578e-07,
"loss": 1.6946,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 6.082474226804124e-07,
"loss": 1.7419,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 7.11340206185567e-07,
"loss": 1.645,
"step": 70
},
{
"epoch": 0.0,
"learning_rate": 8.144329896907217e-07,
"loss": 1.6611,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 9.175257731958763e-07,
"loss": 1.5985,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 1.020618556701031e-06,
"loss": 1.6198,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 1.1237113402061856e-06,
"loss": 1.6259,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 1.2268041237113403e-06,
"loss": 1.5973,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 1.329896907216495e-06,
"loss": 1.5941,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 1.4329896907216496e-06,
"loss": 1.5597,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 1.5360824742268042e-06,
"loss": 1.5672,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 1.639175257731959e-06,
"loss": 1.5372,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 1.7422680412371134e-06,
"loss": 1.5715,
"step": 170
},
{
"epoch": 0.01,
"learning_rate": 1.8453608247422682e-06,
"loss": 1.5389,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 1.948453608247423e-06,
"loss": 1.5525,
"step": 190
},
{
"epoch": 0.01,
"learning_rate": 2.0515463917525773e-06,
"loss": 1.5871,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 2.1546391752577322e-06,
"loss": 1.5442,
"step": 210
},
{
"epoch": 0.01,
"learning_rate": 2.2577319587628867e-06,
"loss": 1.5335,
"step": 220
},
{
"epoch": 0.01,
"learning_rate": 2.3608247422680415e-06,
"loss": 1.5103,
"step": 230
},
{
"epoch": 0.01,
"learning_rate": 2.463917525773196e-06,
"loss": 1.5016,
"step": 240
},
{
"epoch": 0.02,
"learning_rate": 2.5670103092783504e-06,
"loss": 1.5101,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 2.6701030927835053e-06,
"loss": 1.5323,
"step": 260
},
{
"epoch": 0.02,
"learning_rate": 2.77319587628866e-06,
"loss": 1.4785,
"step": 270
},
{
"epoch": 0.02,
"learning_rate": 2.8762886597938146e-06,
"loss": 1.4132,
"step": 280
},
{
"epoch": 0.02,
"learning_rate": 2.979381443298969e-06,
"loss": 1.4733,
"step": 290
},
{
"epoch": 0.02,
"learning_rate": 3.082474226804124e-06,
"loss": 1.4258,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 3.1855670103092784e-06,
"loss": 1.4557,
"step": 310
},
{
"epoch": 0.02,
"learning_rate": 3.2886597938144333e-06,
"loss": 1.44,
"step": 320
},
{
"epoch": 0.02,
"learning_rate": 3.391752577319588e-06,
"loss": 1.4348,
"step": 330
},
{
"epoch": 0.02,
"learning_rate": 3.494845360824742e-06,
"loss": 1.406,
"step": 340
},
{
"epoch": 0.02,
"learning_rate": 3.597938144329897e-06,
"loss": 1.4239,
"step": 350
},
{
"epoch": 0.02,
"learning_rate": 3.701030927835052e-06,
"loss": 1.4185,
"step": 360
},
{
"epoch": 0.02,
"learning_rate": 3.8041237113402064e-06,
"loss": 1.3954,
"step": 370
},
{
"epoch": 0.02,
"learning_rate": 3.907216494845361e-06,
"loss": 1.3759,
"step": 380
},
{
"epoch": 0.02,
"learning_rate": 4.010309278350516e-06,
"loss": 1.3231,
"step": 390
},
{
"epoch": 0.02,
"learning_rate": 4.11340206185567e-06,
"loss": 1.3059,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 4.216494845360825e-06,
"loss": 1.3258,
"step": 410
},
{
"epoch": 0.03,
"learning_rate": 4.31958762886598e-06,
"loss": 1.3722,
"step": 420
},
{
"epoch": 0.03,
"learning_rate": 4.422680412371134e-06,
"loss": 1.2917,
"step": 430
},
{
"epoch": 0.03,
"learning_rate": 4.525773195876289e-06,
"loss": 1.2893,
"step": 440
},
{
"epoch": 0.03,
"learning_rate": 4.628865979381444e-06,
"loss": 1.2865,
"step": 450
},
{
"epoch": 0.03,
"learning_rate": 4.731958762886599e-06,
"loss": 1.28,
"step": 460
},
{
"epoch": 0.03,
"learning_rate": 4.835051546391753e-06,
"loss": 1.299,
"step": 470
},
{
"epoch": 0.03,
"learning_rate": 4.9381443298969075e-06,
"loss": 1.2949,
"step": 480
},
{
"epoch": 0.03,
"learning_rate": 5.041237113402062e-06,
"loss": 1.3078,
"step": 490
},
{
"epoch": 0.03,
"learning_rate": 5.144329896907216e-06,
"loss": 1.2242,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 5.247422680412372e-06,
"loss": 1.2714,
"step": 510
},
{
"epoch": 0.03,
"learning_rate": 5.350515463917526e-06,
"loss": 1.2482,
"step": 520
},
{
"epoch": 0.03,
"learning_rate": 5.45360824742268e-06,
"loss": 1.2703,
"step": 530
},
{
"epoch": 0.03,
"learning_rate": 5.556701030927836e-06,
"loss": 1.2578,
"step": 540
},
{
"epoch": 0.03,
"learning_rate": 5.65979381443299e-06,
"loss": 1.2989,
"step": 550
},
{
"epoch": 0.03,
"learning_rate": 5.762886597938144e-06,
"loss": 1.2853,
"step": 560
},
{
"epoch": 0.04,
"learning_rate": 5.8659793814433e-06,
"loss": 1.3022,
"step": 570
},
{
"epoch": 0.04,
"learning_rate": 5.969072164948454e-06,
"loss": 1.2871,
"step": 580
},
{
"epoch": 0.04,
"learning_rate": 6.0721649484536086e-06,
"loss": 1.2679,
"step": 590
},
{
"epoch": 0.04,
"learning_rate": 6.1752577319587634e-06,
"loss": 1.2732,
"step": 600
},
{
"epoch": 0.04,
"learning_rate": 6.278350515463918e-06,
"loss": 1.2641,
"step": 610
},
{
"epoch": 0.04,
"learning_rate": 6.381443298969072e-06,
"loss": 1.2919,
"step": 620
},
{
"epoch": 0.04,
"learning_rate": 6.484536082474227e-06,
"loss": 1.2594,
"step": 630
},
{
"epoch": 0.04,
"learning_rate": 6.587628865979382e-06,
"loss": 1.2592,
"step": 640
},
{
"epoch": 0.04,
"learning_rate": 6.690721649484536e-06,
"loss": 1.2652,
"step": 650
},
{
"epoch": 0.04,
"learning_rate": 6.793814432989692e-06,
"loss": 1.3274,
"step": 660
},
{
"epoch": 0.04,
"learning_rate": 6.896907216494846e-06,
"loss": 1.2155,
"step": 670
},
{
"epoch": 0.04,
"learning_rate": 7e-06,
"loss": 1.2837,
"step": 680
},
{
"epoch": 0.04,
"learning_rate": 7.103092783505156e-06,
"loss": 1.265,
"step": 690
},
{
"epoch": 0.04,
"learning_rate": 7.20618556701031e-06,
"loss": 1.2427,
"step": 700
},
{
"epoch": 0.04,
"learning_rate": 7.309278350515464e-06,
"loss": 1.2688,
"step": 710
},
{
"epoch": 0.04,
"learning_rate": 7.412371134020619e-06,
"loss": 1.3071,
"step": 720
},
{
"epoch": 0.05,
"learning_rate": 7.515463917525773e-06,
"loss": 1.2346,
"step": 730
},
{
"epoch": 0.05,
"learning_rate": 7.618556701030928e-06,
"loss": 1.2246,
"step": 740
},
{
"epoch": 0.05,
"learning_rate": 7.721649484536083e-06,
"loss": 1.2604,
"step": 750
},
{
"epoch": 0.05,
"learning_rate": 7.824742268041238e-06,
"loss": 1.2589,
"step": 760
},
{
"epoch": 0.05,
"learning_rate": 7.927835051546391e-06,
"loss": 1.2512,
"step": 770
},
{
"epoch": 0.05,
"learning_rate": 8.030927835051548e-06,
"loss": 1.2229,
"step": 780
},
{
"epoch": 0.05,
"learning_rate": 8.134020618556701e-06,
"loss": 1.2326,
"step": 790
},
{
"epoch": 0.05,
"learning_rate": 8.237113402061856e-06,
"loss": 1.3097,
"step": 800
},
{
"epoch": 0.05,
"learning_rate": 8.34020618556701e-06,
"loss": 1.2358,
"step": 810
},
{
"epoch": 0.05,
"learning_rate": 8.443298969072166e-06,
"loss": 1.2746,
"step": 820
},
{
"epoch": 0.05,
"learning_rate": 8.54639175257732e-06,
"loss": 1.3063,
"step": 830
},
{
"epoch": 0.05,
"learning_rate": 8.649484536082475e-06,
"loss": 1.2702,
"step": 840
},
{
"epoch": 0.05,
"learning_rate": 8.75257731958763e-06,
"loss": 1.2421,
"step": 850
},
{
"epoch": 0.05,
"learning_rate": 8.855670103092783e-06,
"loss": 1.3047,
"step": 860
},
{
"epoch": 0.05,
"learning_rate": 8.95876288659794e-06,
"loss": 1.2452,
"step": 870
},
{
"epoch": 0.05,
"learning_rate": 9.061855670103093e-06,
"loss": 1.2753,
"step": 880
},
{
"epoch": 0.06,
"learning_rate": 9.164948453608248e-06,
"loss": 1.2532,
"step": 890
},
{
"epoch": 0.06,
"learning_rate": 9.268041237113403e-06,
"loss": 1.2379,
"step": 900
},
{
"epoch": 0.06,
"learning_rate": 9.371134020618558e-06,
"loss": 1.2891,
"step": 910
},
{
"epoch": 0.06,
"learning_rate": 9.474226804123711e-06,
"loss": 1.2773,
"step": 920
},
{
"epoch": 0.06,
"learning_rate": 9.577319587628868e-06,
"loss": 1.2753,
"step": 930
},
{
"epoch": 0.06,
"learning_rate": 9.68041237113402e-06,
"loss": 1.1976,
"step": 940
},
{
"epoch": 0.06,
"learning_rate": 9.783505154639176e-06,
"loss": 1.2721,
"step": 950
},
{
"epoch": 0.06,
"learning_rate": 9.88659793814433e-06,
"loss": 1.3058,
"step": 960
},
{
"epoch": 0.06,
"learning_rate": 9.989690721649485e-06,
"loss": 1.2383,
"step": 970
},
{
"epoch": 0.06,
"learning_rate": 9.998105303046253e-06,
"loss": 1.2356,
"step": 980
},
{
"epoch": 0.06,
"learning_rate": 9.996000084208755e-06,
"loss": 1.2639,
"step": 990
},
{
"epoch": 0.06,
"learning_rate": 9.993894865371256e-06,
"loss": 1.2689,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 9.991789646533758e-06,
"loss": 1.2764,
"step": 1010
},
{
"epoch": 0.06,
"learning_rate": 9.989684427696261e-06,
"loss": 1.2602,
"step": 1020
},
{
"epoch": 0.06,
"learning_rate": 9.987579208858762e-06,
"loss": 1.3034,
"step": 1030
},
{
"epoch": 0.06,
"learning_rate": 9.985473990021264e-06,
"loss": 1.2318,
"step": 1040
},
{
"epoch": 0.06,
"learning_rate": 9.983368771183765e-06,
"loss": 1.252,
"step": 1050
},
{
"epoch": 0.07,
"learning_rate": 9.981263552346267e-06,
"loss": 1.3111,
"step": 1060
},
{
"epoch": 0.07,
"learning_rate": 9.97915833350877e-06,
"loss": 1.255,
"step": 1070
},
{
"epoch": 0.07,
"learning_rate": 9.97705311467127e-06,
"loss": 1.2736,
"step": 1080
},
{
"epoch": 0.07,
"learning_rate": 9.974947895833773e-06,
"loss": 1.3128,
"step": 1090
},
{
"epoch": 0.07,
"learning_rate": 9.972842676996276e-06,
"loss": 1.2349,
"step": 1100
},
{
"epoch": 0.07,
"learning_rate": 9.970737458158777e-06,
"loss": 1.2797,
"step": 1110
},
{
"epoch": 0.07,
"learning_rate": 9.968632239321279e-06,
"loss": 1.2698,
"step": 1120
},
{
"epoch": 0.07,
"learning_rate": 9.96652702048378e-06,
"loss": 1.2714,
"step": 1130
},
{
"epoch": 0.07,
"learning_rate": 9.964421801646282e-06,
"loss": 1.2965,
"step": 1140
},
{
"epoch": 0.07,
"learning_rate": 9.962316582808785e-06,
"loss": 1.2764,
"step": 1150
},
{
"epoch": 0.07,
"learning_rate": 9.960211363971286e-06,
"loss": 1.2294,
"step": 1160
},
{
"epoch": 0.07,
"learning_rate": 9.958106145133788e-06,
"loss": 1.2623,
"step": 1170
},
{
"epoch": 0.07,
"learning_rate": 9.956000926296289e-06,
"loss": 1.2814,
"step": 1180
},
{
"epoch": 0.07,
"learning_rate": 9.953895707458791e-06,
"loss": 1.2793,
"step": 1190
},
{
"epoch": 0.07,
"learning_rate": 9.951790488621294e-06,
"loss": 1.2749,
"step": 1200
},
{
"epoch": 0.07,
"learning_rate": 9.949685269783795e-06,
"loss": 1.2689,
"step": 1210
},
{
"epoch": 0.08,
"learning_rate": 9.947580050946297e-06,
"loss": 1.2913,
"step": 1220
},
{
"epoch": 0.08,
"learning_rate": 9.9454748321088e-06,
"loss": 1.2386,
"step": 1230
},
{
"epoch": 0.08,
"learning_rate": 9.9433696132713e-06,
"loss": 1.2519,
"step": 1240
},
{
"epoch": 0.08,
"learning_rate": 9.941264394433803e-06,
"loss": 1.2893,
"step": 1250
},
{
"epoch": 0.08,
"learning_rate": 9.939159175596304e-06,
"loss": 1.2398,
"step": 1260
},
{
"epoch": 0.08,
"learning_rate": 9.937053956758805e-06,
"loss": 1.2836,
"step": 1270
},
{
"epoch": 0.08,
"learning_rate": 9.934948737921307e-06,
"loss": 1.2326,
"step": 1280
},
{
"epoch": 0.08,
"learning_rate": 9.93284351908381e-06,
"loss": 1.2768,
"step": 1290
},
{
"epoch": 0.08,
"learning_rate": 9.93073830024631e-06,
"loss": 1.2584,
"step": 1300
},
{
"epoch": 0.08,
"learning_rate": 9.928633081408813e-06,
"loss": 1.2738,
"step": 1310
},
{
"epoch": 0.08,
"learning_rate": 9.926527862571314e-06,
"loss": 1.2329,
"step": 1320
},
{
"epoch": 0.08,
"learning_rate": 9.924422643733816e-06,
"loss": 1.2623,
"step": 1330
},
{
"epoch": 0.08,
"learning_rate": 9.922317424896319e-06,
"loss": 1.2839,
"step": 1340
},
{
"epoch": 0.08,
"learning_rate": 9.92021220605882e-06,
"loss": 1.2806,
"step": 1350
},
{
"epoch": 0.08,
"learning_rate": 9.918106987221322e-06,
"loss": 1.232,
"step": 1360
},
{
"epoch": 0.08,
"learning_rate": 9.916001768383825e-06,
"loss": 1.256,
"step": 1370
},
{
"epoch": 0.09,
"learning_rate": 9.913896549546325e-06,
"loss": 1.225,
"step": 1380
},
{
"epoch": 0.09,
"learning_rate": 9.911791330708828e-06,
"loss": 1.2247,
"step": 1390
},
{
"epoch": 0.09,
"learning_rate": 9.909686111871329e-06,
"loss": 1.2422,
"step": 1400
},
{
"epoch": 0.09,
"learning_rate": 9.907580893033831e-06,
"loss": 1.2499,
"step": 1410
},
{
"epoch": 0.09,
"learning_rate": 9.905475674196334e-06,
"loss": 1.2569,
"step": 1420
},
{
"epoch": 0.09,
"learning_rate": 9.903370455358834e-06,
"loss": 1.2771,
"step": 1430
},
{
"epoch": 0.09,
"learning_rate": 9.901265236521337e-06,
"loss": 1.2789,
"step": 1440
},
{
"epoch": 0.09,
"learning_rate": 9.89916001768384e-06,
"loss": 1.2213,
"step": 1450
},
{
"epoch": 0.09,
"learning_rate": 9.89705479884634e-06,
"loss": 1.2393,
"step": 1460
},
{
"epoch": 0.09,
"learning_rate": 9.894949580008843e-06,
"loss": 1.257,
"step": 1470
},
{
"epoch": 0.09,
"learning_rate": 9.892844361171344e-06,
"loss": 1.2572,
"step": 1480
},
{
"epoch": 0.09,
"learning_rate": 9.890739142333846e-06,
"loss": 1.2503,
"step": 1490
},
{
"epoch": 0.09,
"learning_rate": 9.888633923496349e-06,
"loss": 1.2404,
"step": 1500
},
{
"epoch": 0.09,
"learning_rate": 9.88652870465885e-06,
"loss": 1.2847,
"step": 1510
},
{
"epoch": 0.09,
"learning_rate": 9.884423485821352e-06,
"loss": 1.2551,
"step": 1520
},
{
"epoch": 0.09,
"learning_rate": 9.882318266983854e-06,
"loss": 1.2741,
"step": 1530
},
{
"epoch": 0.1,
"learning_rate": 9.880213048146355e-06,
"loss": 1.2735,
"step": 1540
},
{
"epoch": 0.1,
"learning_rate": 9.878107829308858e-06,
"loss": 1.273,
"step": 1550
},
{
"epoch": 0.1,
"learning_rate": 9.876002610471358e-06,
"loss": 1.2357,
"step": 1560
},
{
"epoch": 0.1,
"learning_rate": 9.873897391633861e-06,
"loss": 1.1864,
"step": 1570
},
{
"epoch": 0.1,
"learning_rate": 9.871792172796363e-06,
"loss": 1.277,
"step": 1580
},
{
"epoch": 0.1,
"learning_rate": 9.869686953958864e-06,
"loss": 1.2375,
"step": 1590
},
{
"epoch": 0.1,
"learning_rate": 9.867581735121367e-06,
"loss": 1.2754,
"step": 1600
},
{
"epoch": 0.1,
"learning_rate": 9.865476516283868e-06,
"loss": 1.2051,
"step": 1610
},
{
"epoch": 0.1,
"learning_rate": 9.86337129744637e-06,
"loss": 1.2579,
"step": 1620
},
{
"epoch": 0.1,
"learning_rate": 9.861266078608873e-06,
"loss": 1.2216,
"step": 1630
},
{
"epoch": 0.1,
"learning_rate": 9.859160859771373e-06,
"loss": 1.2529,
"step": 1640
},
{
"epoch": 0.1,
"learning_rate": 9.857055640933876e-06,
"loss": 1.207,
"step": 1650
},
{
"epoch": 0.1,
"learning_rate": 9.854950422096378e-06,
"loss": 1.2275,
"step": 1660
},
{
"epoch": 0.1,
"learning_rate": 9.85284520325888e-06,
"loss": 1.2769,
"step": 1670
},
{
"epoch": 0.1,
"learning_rate": 9.850739984421382e-06,
"loss": 1.2165,
"step": 1680
},
{
"epoch": 0.1,
"learning_rate": 9.848634765583883e-06,
"loss": 1.2903,
"step": 1690
},
{
"epoch": 0.11,
"learning_rate": 9.846529546746385e-06,
"loss": 1.2548,
"step": 1700
},
{
"epoch": 0.11,
"learning_rate": 9.844424327908888e-06,
"loss": 1.2652,
"step": 1710
},
{
"epoch": 0.11,
"learning_rate": 9.842319109071388e-06,
"loss": 1.2718,
"step": 1720
},
{
"epoch": 0.11,
"learning_rate": 9.84021389023389e-06,
"loss": 1.269,
"step": 1730
},
{
"epoch": 0.11,
"learning_rate": 9.838108671396393e-06,
"loss": 1.2362,
"step": 1740
},
{
"epoch": 0.11,
"learning_rate": 9.836003452558894e-06,
"loss": 1.205,
"step": 1750
},
{
"epoch": 0.11,
"learning_rate": 9.833898233721397e-06,
"loss": 1.2649,
"step": 1760
},
{
"epoch": 0.11,
"learning_rate": 9.831793014883897e-06,
"loss": 1.2517,
"step": 1770
},
{
"epoch": 0.11,
"learning_rate": 9.8296877960464e-06,
"loss": 1.2015,
"step": 1780
},
{
"epoch": 0.11,
"learning_rate": 9.827582577208902e-06,
"loss": 1.2382,
"step": 1790
},
{
"epoch": 0.11,
"learning_rate": 9.825477358371403e-06,
"loss": 1.2236,
"step": 1800
},
{
"epoch": 0.11,
"learning_rate": 9.823372139533906e-06,
"loss": 1.2503,
"step": 1810
},
{
"epoch": 0.11,
"learning_rate": 9.821266920696407e-06,
"loss": 1.2409,
"step": 1820
},
{
"epoch": 0.11,
"learning_rate": 9.819161701858909e-06,
"loss": 1.2236,
"step": 1830
},
{
"epoch": 0.11,
"learning_rate": 9.817056483021412e-06,
"loss": 1.2246,
"step": 1840
},
{
"epoch": 0.11,
"learning_rate": 9.814951264183912e-06,
"loss": 1.234,
"step": 1850
},
{
"epoch": 0.12,
"learning_rate": 9.812846045346415e-06,
"loss": 1.2384,
"step": 1860
},
{
"epoch": 0.12,
"learning_rate": 9.810740826508917e-06,
"loss": 1.2103,
"step": 1870
},
{
"epoch": 0.12,
"learning_rate": 9.808635607671418e-06,
"loss": 1.2398,
"step": 1880
},
{
"epoch": 0.12,
"learning_rate": 9.80653038883392e-06,
"loss": 1.2246,
"step": 1890
},
{
"epoch": 0.12,
"learning_rate": 9.804425169996421e-06,
"loss": 1.1894,
"step": 1900
},
{
"epoch": 0.12,
"learning_rate": 9.802319951158924e-06,
"loss": 1.262,
"step": 1910
},
{
"epoch": 0.12,
"learning_rate": 9.800214732321426e-06,
"loss": 1.2595,
"step": 1920
},
{
"epoch": 0.12,
"learning_rate": 9.798109513483927e-06,
"loss": 1.1954,
"step": 1930
},
{
"epoch": 0.12,
"learning_rate": 9.79600429464643e-06,
"loss": 1.2578,
"step": 1940
},
{
"epoch": 0.12,
"learning_rate": 9.793899075808932e-06,
"loss": 1.2286,
"step": 1950
},
{
"epoch": 0.12,
"learning_rate": 9.791793856971433e-06,
"loss": 1.2455,
"step": 1960
},
{
"epoch": 0.12,
"learning_rate": 9.789688638133936e-06,
"loss": 1.214,
"step": 1970
},
{
"epoch": 0.12,
"learning_rate": 9.787583419296436e-06,
"loss": 1.2488,
"step": 1980
},
{
"epoch": 0.12,
"learning_rate": 9.785478200458939e-06,
"loss": 1.266,
"step": 1990
},
{
"epoch": 0.12,
"learning_rate": 9.783372981621441e-06,
"loss": 1.2139,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 9.781267762783942e-06,
"loss": 1.2516,
"step": 2010
},
{
"epoch": 0.13,
"learning_rate": 9.779162543946445e-06,
"loss": 1.2878,
"step": 2020
},
{
"epoch": 0.13,
"learning_rate": 9.777057325108945e-06,
"loss": 1.2494,
"step": 2030
},
{
"epoch": 0.13,
"learning_rate": 9.774952106271448e-06,
"loss": 1.207,
"step": 2040
},
{
"epoch": 0.13,
"learning_rate": 9.77284688743395e-06,
"loss": 1.2796,
"step": 2050
},
{
"epoch": 0.13,
"learning_rate": 9.770741668596451e-06,
"loss": 1.2285,
"step": 2060
},
{
"epoch": 0.13,
"learning_rate": 9.768636449758954e-06,
"loss": 1.2467,
"step": 2070
},
{
"epoch": 0.13,
"learning_rate": 9.766531230921456e-06,
"loss": 1.1801,
"step": 2080
},
{
"epoch": 0.13,
"learning_rate": 9.764426012083957e-06,
"loss": 1.2399,
"step": 2090
},
{
"epoch": 0.13,
"learning_rate": 9.76232079324646e-06,
"loss": 1.2359,
"step": 2100
},
{
"epoch": 0.13,
"learning_rate": 9.76021557440896e-06,
"loss": 1.2074,
"step": 2110
},
{
"epoch": 0.13,
"learning_rate": 9.758110355571463e-06,
"loss": 1.2601,
"step": 2120
},
{
"epoch": 0.13,
"learning_rate": 9.756005136733965e-06,
"loss": 1.2456,
"step": 2130
},
{
"epoch": 0.13,
"learning_rate": 9.753899917896466e-06,
"loss": 1.2479,
"step": 2140
},
{
"epoch": 0.13,
"learning_rate": 9.751794699058969e-06,
"loss": 1.2593,
"step": 2150
},
{
"epoch": 0.13,
"learning_rate": 9.749689480221471e-06,
"loss": 1.1856,
"step": 2160
},
{
"epoch": 0.13,
"learning_rate": 9.747584261383972e-06,
"loss": 1.2634,
"step": 2170
},
{
"epoch": 0.13,
"learning_rate": 9.745479042546474e-06,
"loss": 1.2046,
"step": 2180
},
{
"epoch": 0.14,
"learning_rate": 9.743373823708975e-06,
"loss": 1.2753,
"step": 2190
},
{
"epoch": 0.14,
"learning_rate": 9.741268604871478e-06,
"loss": 1.2393,
"step": 2200
},
{
"epoch": 0.14,
"learning_rate": 9.739163386033979e-06,
"loss": 1.224,
"step": 2210
},
{
"epoch": 0.14,
"learning_rate": 9.737058167196481e-06,
"loss": 1.2767,
"step": 2220
},
{
"epoch": 0.14,
"learning_rate": 9.734952948358982e-06,
"loss": 1.2584,
"step": 2230
},
{
"epoch": 0.14,
"learning_rate": 9.732847729521484e-06,
"loss": 1.2717,
"step": 2240
},
{
"epoch": 0.14,
"learning_rate": 9.730742510683985e-06,
"loss": 1.2364,
"step": 2250
},
{
"epoch": 0.14,
"learning_rate": 9.728637291846488e-06,
"loss": 1.2354,
"step": 2260
},
{
"epoch": 0.14,
"learning_rate": 9.72653207300899e-06,
"loss": 1.2347,
"step": 2270
},
{
"epoch": 0.14,
"learning_rate": 9.724426854171491e-06,
"loss": 1.2537,
"step": 2280
},
{
"epoch": 0.14,
"learning_rate": 9.722321635333993e-06,
"loss": 1.2186,
"step": 2290
},
{
"epoch": 0.14,
"learning_rate": 9.720216416496496e-06,
"loss": 1.2351,
"step": 2300
},
{
"epoch": 0.14,
"learning_rate": 9.718111197658997e-06,
"loss": 1.2325,
"step": 2310
},
{
"epoch": 0.14,
"learning_rate": 9.7160059788215e-06,
"loss": 1.1996,
"step": 2320
},
{
"epoch": 0.14,
"learning_rate": 9.713900759984e-06,
"loss": 1.2023,
"step": 2330
},
{
"epoch": 0.14,
"learning_rate": 9.711795541146503e-06,
"loss": 1.2527,
"step": 2340
},
{
"epoch": 0.15,
"learning_rate": 9.709690322309005e-06,
"loss": 1.2281,
"step": 2350
},
{
"epoch": 0.15,
"learning_rate": 9.707585103471506e-06,
"loss": 1.2382,
"step": 2360
},
{
"epoch": 0.15,
"learning_rate": 9.705479884634008e-06,
"loss": 1.2405,
"step": 2370
},
{
"epoch": 0.15,
"learning_rate": 9.70337466579651e-06,
"loss": 1.248,
"step": 2380
},
{
"epoch": 0.15,
"learning_rate": 9.701269446959012e-06,
"loss": 1.224,
"step": 2390
},
{
"epoch": 0.15,
"learning_rate": 9.699164228121514e-06,
"loss": 1.22,
"step": 2400
},
{
"epoch": 0.15,
"learning_rate": 9.697059009284015e-06,
"loss": 1.219,
"step": 2410
},
{
"epoch": 0.15,
"learning_rate": 9.694953790446517e-06,
"loss": 1.2518,
"step": 2420
},
{
"epoch": 0.15,
"learning_rate": 9.69284857160902e-06,
"loss": 1.219,
"step": 2430
},
{
"epoch": 0.15,
"learning_rate": 9.69074335277152e-06,
"loss": 1.2168,
"step": 2440
},
{
"epoch": 0.15,
"learning_rate": 9.688638133934023e-06,
"loss": 1.2469,
"step": 2450
},
{
"epoch": 0.15,
"learning_rate": 9.686532915096524e-06,
"loss": 1.2381,
"step": 2460
},
{
"epoch": 0.15,
"learning_rate": 9.684427696259027e-06,
"loss": 1.2001,
"step": 2470
},
{
"epoch": 0.15,
"learning_rate": 9.682322477421529e-06,
"loss": 1.2004,
"step": 2480
},
{
"epoch": 0.15,
"learning_rate": 9.68021725858403e-06,
"loss": 1.2409,
"step": 2490
},
{
"epoch": 0.15,
"learning_rate": 9.678112039746532e-06,
"loss": 1.2389,
"step": 2500
},
{
"epoch": 0.16,
"learning_rate": 9.676006820909035e-06,
"loss": 1.242,
"step": 2510
},
{
"epoch": 0.16,
"learning_rate": 9.673901602071536e-06,
"loss": 1.2372,
"step": 2520
},
{
"epoch": 0.16,
"learning_rate": 9.671796383234038e-06,
"loss": 1.2223,
"step": 2530
},
{
"epoch": 0.16,
"learning_rate": 9.669691164396539e-06,
"loss": 1.2506,
"step": 2540
},
{
"epoch": 0.16,
"learning_rate": 9.667585945559041e-06,
"loss": 1.2093,
"step": 2550
},
{
"epoch": 0.16,
"learning_rate": 9.665480726721544e-06,
"loss": 1.2171,
"step": 2560
},
{
"epoch": 0.16,
"learning_rate": 9.663375507884045e-06,
"loss": 1.2363,
"step": 2570
},
{
"epoch": 0.16,
"learning_rate": 9.661270289046547e-06,
"loss": 1.2978,
"step": 2580
},
{
"epoch": 0.16,
"learning_rate": 9.65916507020905e-06,
"loss": 1.2216,
"step": 2590
},
{
"epoch": 0.16,
"learning_rate": 9.65705985137155e-06,
"loss": 1.1937,
"step": 2600
},
{
"epoch": 0.16,
"learning_rate": 9.654954632534053e-06,
"loss": 1.2366,
"step": 2610
},
{
"epoch": 0.16,
"learning_rate": 9.652849413696554e-06,
"loss": 1.2465,
"step": 2620
},
{
"epoch": 0.16,
"learning_rate": 9.650744194859056e-06,
"loss": 1.2704,
"step": 2630
},
{
"epoch": 0.16,
"learning_rate": 9.648638976021559e-06,
"loss": 1.2113,
"step": 2640
},
{
"epoch": 0.16,
"learning_rate": 9.64653375718406e-06,
"loss": 1.2679,
"step": 2650
},
{
"epoch": 0.16,
"learning_rate": 9.644428538346562e-06,
"loss": 1.2005,
"step": 2660
},
{
"epoch": 0.17,
"learning_rate": 9.642323319509063e-06,
"loss": 1.2474,
"step": 2670
},
{
"epoch": 0.17,
"learning_rate": 9.640218100671565e-06,
"loss": 1.2308,
"step": 2680
},
{
"epoch": 0.17,
"learning_rate": 9.638112881834068e-06,
"loss": 1.2391,
"step": 2690
},
{
"epoch": 0.17,
"learning_rate": 9.636007662996569e-06,
"loss": 1.1968,
"step": 2700
},
{
"epoch": 0.17,
"learning_rate": 9.633902444159071e-06,
"loss": 1.2001,
"step": 2710
},
{
"epoch": 0.17,
"learning_rate": 9.631797225321574e-06,
"loss": 1.2688,
"step": 2720
},
{
"epoch": 0.17,
"learning_rate": 9.629692006484075e-06,
"loss": 1.2646,
"step": 2730
},
{
"epoch": 0.17,
"learning_rate": 9.627586787646577e-06,
"loss": 1.2606,
"step": 2740
},
{
"epoch": 0.17,
"learning_rate": 9.625481568809078e-06,
"loss": 1.1915,
"step": 2750
},
{
"epoch": 0.17,
"learning_rate": 9.62337634997158e-06,
"loss": 1.204,
"step": 2760
},
{
"epoch": 0.17,
"learning_rate": 9.621271131134083e-06,
"loss": 1.2128,
"step": 2770
},
{
"epoch": 0.17,
"learning_rate": 9.619165912296584e-06,
"loss": 1.2116,
"step": 2780
},
{
"epoch": 0.17,
"learning_rate": 9.617060693459086e-06,
"loss": 1.2287,
"step": 2790
},
{
"epoch": 0.17,
"learning_rate": 9.614955474621589e-06,
"loss": 1.2443,
"step": 2800
},
{
"epoch": 0.17,
"learning_rate": 9.61285025578409e-06,
"loss": 1.2926,
"step": 2810
},
{
"epoch": 0.17,
"learning_rate": 9.610745036946592e-06,
"loss": 1.2195,
"step": 2820
},
{
"epoch": 0.18,
"learning_rate": 9.608639818109093e-06,
"loss": 1.2345,
"step": 2830
},
{
"epoch": 0.18,
"learning_rate": 9.606534599271595e-06,
"loss": 1.2588,
"step": 2840
},
{
"epoch": 0.18,
"learning_rate": 9.604429380434098e-06,
"loss": 1.2392,
"step": 2850
},
{
"epoch": 0.18,
"learning_rate": 9.602324161596599e-06,
"loss": 1.2529,
"step": 2860
},
{
"epoch": 0.18,
"learning_rate": 9.600218942759101e-06,
"loss": 1.2119,
"step": 2870
},
{
"epoch": 0.18,
"learning_rate": 9.598113723921602e-06,
"loss": 1.2416,
"step": 2880
},
{
"epoch": 0.18,
"learning_rate": 9.596008505084104e-06,
"loss": 1.2111,
"step": 2890
},
{
"epoch": 0.18,
"learning_rate": 9.593903286246607e-06,
"loss": 1.2493,
"step": 2900
},
{
"epoch": 0.18,
"learning_rate": 9.591798067409108e-06,
"loss": 1.2481,
"step": 2910
},
{
"epoch": 0.18,
"learning_rate": 9.58969284857161e-06,
"loss": 1.2265,
"step": 2920
},
{
"epoch": 0.18,
"learning_rate": 9.587587629734113e-06,
"loss": 1.2549,
"step": 2930
},
{
"epoch": 0.18,
"learning_rate": 9.585482410896613e-06,
"loss": 1.2474,
"step": 2940
},
{
"epoch": 0.18,
"learning_rate": 9.583377192059116e-06,
"loss": 1.1773,
"step": 2950
},
{
"epoch": 0.18,
"learning_rate": 9.581271973221617e-06,
"loss": 1.2612,
"step": 2960
},
{
"epoch": 0.18,
"learning_rate": 9.57916675438412e-06,
"loss": 1.2247,
"step": 2970
},
{
"epoch": 0.18,
"learning_rate": 9.577061535546622e-06,
"loss": 1.2075,
"step": 2980
},
{
"epoch": 0.19,
"learning_rate": 9.574956316709123e-06,
"loss": 1.1812,
"step": 2990
},
{
"epoch": 0.19,
"learning_rate": 9.572851097871625e-06,
"loss": 1.2058,
"step": 3000
},
{
"epoch": 0.19,
"learning_rate": 9.570745879034128e-06,
"loss": 1.2781,
"step": 3010
},
{
"epoch": 0.19,
"learning_rate": 9.568640660196628e-06,
"loss": 1.2572,
"step": 3020
},
{
"epoch": 0.19,
"learning_rate": 9.566535441359131e-06,
"loss": 1.2794,
"step": 3030
},
{
"epoch": 0.19,
"learning_rate": 9.564430222521632e-06,
"loss": 1.2136,
"step": 3040
},
{
"epoch": 0.19,
"learning_rate": 9.562325003684134e-06,
"loss": 1.2632,
"step": 3050
},
{
"epoch": 0.19,
"learning_rate": 9.560219784846637e-06,
"loss": 1.2584,
"step": 3060
},
{
"epoch": 0.19,
"learning_rate": 9.558114566009137e-06,
"loss": 1.286,
"step": 3070
},
{
"epoch": 0.19,
"learning_rate": 9.55600934717164e-06,
"loss": 1.247,
"step": 3080
},
{
"epoch": 0.19,
"learning_rate": 9.55390412833414e-06,
"loss": 1.2715,
"step": 3090
},
{
"epoch": 0.19,
"learning_rate": 9.551798909496643e-06,
"loss": 1.2184,
"step": 3100
},
{
"epoch": 0.19,
"learning_rate": 9.549693690659146e-06,
"loss": 1.261,
"step": 3110
},
{
"epoch": 0.19,
"learning_rate": 9.547588471821647e-06,
"loss": 1.2183,
"step": 3120
},
{
"epoch": 0.19,
"learning_rate": 9.545483252984149e-06,
"loss": 1.1887,
"step": 3130
},
{
"epoch": 0.19,
"learning_rate": 9.543378034146652e-06,
"loss": 1.2405,
"step": 3140
},
{
"epoch": 0.19,
"learning_rate": 9.541272815309152e-06,
"loss": 1.2499,
"step": 3150
},
{
"epoch": 0.2,
"learning_rate": 9.539167596471653e-06,
"loss": 1.2164,
"step": 3160
},
{
"epoch": 0.2,
"learning_rate": 9.537062377634156e-06,
"loss": 1.2614,
"step": 3170
},
{
"epoch": 0.2,
"learning_rate": 9.534957158796657e-06,
"loss": 1.2475,
"step": 3180
},
{
"epoch": 0.2,
"learning_rate": 9.532851939959159e-06,
"loss": 1.2559,
"step": 3190
},
{
"epoch": 0.2,
"learning_rate": 9.530746721121662e-06,
"loss": 1.2457,
"step": 3200
},
{
"epoch": 0.2,
"learning_rate": 9.528641502284162e-06,
"loss": 1.2228,
"step": 3210
},
{
"epoch": 0.2,
"learning_rate": 9.526536283446665e-06,
"loss": 1.219,
"step": 3220
},
{
"epoch": 0.2,
"learning_rate": 9.524431064609166e-06,
"loss": 1.2255,
"step": 3230
},
{
"epoch": 0.2,
"learning_rate": 9.522325845771668e-06,
"loss": 1.1923,
"step": 3240
},
{
"epoch": 0.2,
"learning_rate": 9.52022062693417e-06,
"loss": 1.1996,
"step": 3250
},
{
"epoch": 0.2,
"learning_rate": 9.518115408096671e-06,
"loss": 1.2186,
"step": 3260
},
{
"epoch": 0.2,
"learning_rate": 9.516010189259174e-06,
"loss": 1.2384,
"step": 3270
},
{
"epoch": 0.2,
"learning_rate": 9.513904970421676e-06,
"loss": 1.2119,
"step": 3280
},
{
"epoch": 0.2,
"learning_rate": 9.511799751584177e-06,
"loss": 1.2455,
"step": 3290
},
{
"epoch": 0.2,
"learning_rate": 9.50969453274668e-06,
"loss": 1.2314,
"step": 3300
},
{
"epoch": 0.2,
"learning_rate": 9.50758931390918e-06,
"loss": 1.1995,
"step": 3310
},
{
"epoch": 0.21,
"learning_rate": 9.505484095071683e-06,
"loss": 1.2308,
"step": 3320
},
{
"epoch": 0.21,
"learning_rate": 9.503378876234186e-06,
"loss": 1.1957,
"step": 3330
},
{
"epoch": 0.21,
"learning_rate": 9.501273657396686e-06,
"loss": 1.2557,
"step": 3340
},
{
"epoch": 0.21,
"learning_rate": 9.499168438559189e-06,
"loss": 1.2351,
"step": 3350
},
{
"epoch": 0.21,
"learning_rate": 9.497063219721691e-06,
"loss": 1.2085,
"step": 3360
},
{
"epoch": 0.21,
"learning_rate": 9.494958000884192e-06,
"loss": 1.2241,
"step": 3370
},
{
"epoch": 0.21,
"learning_rate": 9.492852782046695e-06,
"loss": 1.1909,
"step": 3380
},
{
"epoch": 0.21,
"learning_rate": 9.490747563209195e-06,
"loss": 1.1886,
"step": 3390
},
{
"epoch": 0.21,
"learning_rate": 9.488642344371698e-06,
"loss": 1.2161,
"step": 3400
},
{
"epoch": 0.21,
"learning_rate": 9.4865371255342e-06,
"loss": 1.2718,
"step": 3410
},
{
"epoch": 0.21,
"learning_rate": 9.484431906696701e-06,
"loss": 1.2007,
"step": 3420
},
{
"epoch": 0.21,
"learning_rate": 9.482326687859204e-06,
"loss": 1.2038,
"step": 3430
},
{
"epoch": 0.21,
"learning_rate": 9.480221469021705e-06,
"loss": 1.2153,
"step": 3440
},
{
"epoch": 0.21,
"learning_rate": 9.478116250184207e-06,
"loss": 1.1932,
"step": 3450
},
{
"epoch": 0.21,
"learning_rate": 9.47601103134671e-06,
"loss": 1.255,
"step": 3460
},
{
"epoch": 0.21,
"learning_rate": 9.47390581250921e-06,
"loss": 1.2738,
"step": 3470
},
{
"epoch": 0.22,
"learning_rate": 9.471800593671713e-06,
"loss": 1.2533,
"step": 3480
},
{
"epoch": 0.22,
"learning_rate": 9.469695374834215e-06,
"loss": 1.2408,
"step": 3490
},
{
"epoch": 0.22,
"learning_rate": 9.467590155996716e-06,
"loss": 1.2331,
"step": 3500
},
{
"epoch": 0.22,
"learning_rate": 9.465484937159219e-06,
"loss": 1.2205,
"step": 3510
},
{
"epoch": 0.22,
"learning_rate": 9.46337971832172e-06,
"loss": 1.2569,
"step": 3520
},
{
"epoch": 0.22,
"learning_rate": 9.461274499484222e-06,
"loss": 1.2245,
"step": 3530
},
{
"epoch": 0.22,
"learning_rate": 9.459169280646724e-06,
"loss": 1.2192,
"step": 3540
},
{
"epoch": 0.22,
"learning_rate": 9.457064061809225e-06,
"loss": 1.2518,
"step": 3550
},
{
"epoch": 0.22,
"learning_rate": 9.454958842971728e-06,
"loss": 1.2619,
"step": 3560
},
{
"epoch": 0.22,
"learning_rate": 9.45285362413423e-06,
"loss": 1.1859,
"step": 3570
},
{
"epoch": 0.22,
"learning_rate": 9.450748405296731e-06,
"loss": 1.2216,
"step": 3580
},
{
"epoch": 0.22,
"learning_rate": 9.448643186459234e-06,
"loss": 1.221,
"step": 3590
},
{
"epoch": 0.22,
"learning_rate": 9.446537967621734e-06,
"loss": 1.2224,
"step": 3600
},
{
"epoch": 0.22,
"learning_rate": 9.444432748784237e-06,
"loss": 1.2324,
"step": 3610
},
{
"epoch": 0.22,
"learning_rate": 9.44232752994674e-06,
"loss": 1.1747,
"step": 3620
},
{
"epoch": 0.22,
"learning_rate": 9.44022231110924e-06,
"loss": 1.1962,
"step": 3630
},
{
"epoch": 0.23,
"learning_rate": 9.438117092271743e-06,
"loss": 1.2235,
"step": 3640
},
{
"epoch": 0.23,
"learning_rate": 9.436011873434245e-06,
"loss": 1.2081,
"step": 3650
},
{
"epoch": 0.23,
"learning_rate": 9.433906654596746e-06,
"loss": 1.2398,
"step": 3660
},
{
"epoch": 0.23,
"learning_rate": 9.431801435759248e-06,
"loss": 1.248,
"step": 3670
},
{
"epoch": 0.23,
"learning_rate": 9.42969621692175e-06,
"loss": 1.1936,
"step": 3680
},
{
"epoch": 0.23,
"learning_rate": 9.427590998084252e-06,
"loss": 1.2317,
"step": 3690
},
{
"epoch": 0.23,
"learning_rate": 9.425485779246754e-06,
"loss": 1.2139,
"step": 3700
},
{
"epoch": 0.23,
"learning_rate": 9.423380560409255e-06,
"loss": 1.1601,
"step": 3710
},
{
"epoch": 0.23,
"learning_rate": 9.421275341571758e-06,
"loss": 1.2127,
"step": 3720
},
{
"epoch": 0.23,
"learning_rate": 9.419170122734258e-06,
"loss": 1.2082,
"step": 3730
},
{
"epoch": 0.23,
"learning_rate": 9.41706490389676e-06,
"loss": 1.1971,
"step": 3740
},
{
"epoch": 0.23,
"learning_rate": 9.414959685059263e-06,
"loss": 1.2289,
"step": 3750
},
{
"epoch": 0.23,
"learning_rate": 9.412854466221764e-06,
"loss": 1.2133,
"step": 3760
},
{
"epoch": 0.23,
"learning_rate": 9.410749247384267e-06,
"loss": 1.2111,
"step": 3770
},
{
"epoch": 0.23,
"learning_rate": 9.408644028546769e-06,
"loss": 1.2342,
"step": 3780
},
{
"epoch": 0.23,
"learning_rate": 9.40653880970927e-06,
"loss": 1.217,
"step": 3790
},
{
"epoch": 0.24,
"learning_rate": 9.404433590871772e-06,
"loss": 1.2651,
"step": 3800
},
{
"epoch": 0.24,
"learning_rate": 9.402328372034273e-06,
"loss": 1.2259,
"step": 3810
},
{
"epoch": 0.24,
"learning_rate": 9.400223153196776e-06,
"loss": 1.2434,
"step": 3820
},
{
"epoch": 0.24,
"learning_rate": 9.398117934359278e-06,
"loss": 1.2199,
"step": 3830
},
{
"epoch": 0.24,
"learning_rate": 9.396012715521779e-06,
"loss": 1.2299,
"step": 3840
},
{
"epoch": 0.24,
"learning_rate": 9.393907496684282e-06,
"loss": 1.2156,
"step": 3850
},
{
"epoch": 0.24,
"learning_rate": 9.391802277846784e-06,
"loss": 1.2402,
"step": 3860
},
{
"epoch": 0.24,
"learning_rate": 9.389697059009285e-06,
"loss": 1.237,
"step": 3870
},
{
"epoch": 0.24,
"learning_rate": 9.387591840171787e-06,
"loss": 1.2141,
"step": 3880
},
{
"epoch": 0.24,
"learning_rate": 9.385486621334288e-06,
"loss": 1.2253,
"step": 3890
},
{
"epoch": 0.24,
"learning_rate": 9.38338140249679e-06,
"loss": 1.217,
"step": 3900
},
{
"epoch": 0.24,
"learning_rate": 9.381276183659293e-06,
"loss": 1.1919,
"step": 3910
},
{
"epoch": 0.24,
"learning_rate": 9.379170964821794e-06,
"loss": 1.1663,
"step": 3920
},
{
"epoch": 0.24,
"learning_rate": 9.377065745984296e-06,
"loss": 1.2338,
"step": 3930
},
{
"epoch": 0.24,
"learning_rate": 9.374960527146797e-06,
"loss": 1.2399,
"step": 3940
},
{
"epoch": 0.24,
"learning_rate": 9.3728553083093e-06,
"loss": 1.1608,
"step": 3950
},
{
"epoch": 0.25,
"learning_rate": 9.370750089471802e-06,
"loss": 1.1752,
"step": 3960
},
{
"epoch": 0.25,
"learning_rate": 9.368644870634303e-06,
"loss": 1.2364,
"step": 3970
},
{
"epoch": 0.25,
"learning_rate": 9.366539651796806e-06,
"loss": 1.2053,
"step": 3980
},
{
"epoch": 0.25,
"learning_rate": 9.364434432959308e-06,
"loss": 1.2431,
"step": 3990
},
{
"epoch": 0.25,
"learning_rate": 9.362329214121809e-06,
"loss": 1.1948,
"step": 4000
},
{
"epoch": 0.25,
"learning_rate": 9.360223995284311e-06,
"loss": 1.2248,
"step": 4010
},
{
"epoch": 0.25,
"learning_rate": 9.358118776446812e-06,
"loss": 1.2057,
"step": 4020
},
{
"epoch": 0.25,
"learning_rate": 9.356013557609315e-06,
"loss": 1.2373,
"step": 4030
},
{
"epoch": 0.25,
"learning_rate": 9.353908338771817e-06,
"loss": 1.1993,
"step": 4040
},
{
"epoch": 0.25,
"learning_rate": 9.351803119934318e-06,
"loss": 1.1474,
"step": 4050
},
{
"epoch": 0.25,
"learning_rate": 9.34969790109682e-06,
"loss": 1.2084,
"step": 4060
},
{
"epoch": 0.25,
"learning_rate": 9.347592682259323e-06,
"loss": 1.224,
"step": 4070
},
{
"epoch": 0.25,
"learning_rate": 9.345487463421824e-06,
"loss": 1.206,
"step": 4080
},
{
"epoch": 0.25,
"learning_rate": 9.343382244584326e-06,
"loss": 1.2225,
"step": 4090
},
{
"epoch": 0.25,
"learning_rate": 9.341277025746827e-06,
"loss": 1.2189,
"step": 4100
},
{
"epoch": 0.25,
"learning_rate": 9.339171806909328e-06,
"loss": 1.25,
"step": 4110
},
{
"epoch": 0.25,
"learning_rate": 9.33706658807183e-06,
"loss": 1.251,
"step": 4120
},
{
"epoch": 0.26,
"learning_rate": 9.334961369234333e-06,
"loss": 1.2048,
"step": 4130
},
{
"epoch": 0.26,
"learning_rate": 9.332856150396834e-06,
"loss": 1.2369,
"step": 4140
},
{
"epoch": 0.26,
"learning_rate": 9.330750931559336e-06,
"loss": 1.2427,
"step": 4150
},
{
"epoch": 0.26,
"learning_rate": 9.328645712721837e-06,
"loss": 1.2873,
"step": 4160
},
{
"epoch": 0.26,
"learning_rate": 9.32654049388434e-06,
"loss": 1.1579,
"step": 4170
},
{
"epoch": 0.26,
"learning_rate": 9.324435275046842e-06,
"loss": 1.2025,
"step": 4180
},
{
"epoch": 0.26,
"learning_rate": 9.322330056209343e-06,
"loss": 1.209,
"step": 4190
},
{
"epoch": 0.26,
"learning_rate": 9.320224837371845e-06,
"loss": 1.2015,
"step": 4200
},
{
"epoch": 0.26,
"learning_rate": 9.318119618534348e-06,
"loss": 1.2509,
"step": 4210
},
{
"epoch": 0.26,
"learning_rate": 9.316014399696849e-06,
"loss": 1.2696,
"step": 4220
},
{
"epoch": 0.26,
"learning_rate": 9.313909180859351e-06,
"loss": 1.2281,
"step": 4230
},
{
"epoch": 0.26,
"learning_rate": 9.311803962021852e-06,
"loss": 1.2089,
"step": 4240
},
{
"epoch": 0.26,
"learning_rate": 9.309698743184354e-06,
"loss": 1.2831,
"step": 4250
},
{
"epoch": 0.26,
"learning_rate": 9.307593524346857e-06,
"loss": 1.2757,
"step": 4260
},
{
"epoch": 0.26,
"learning_rate": 9.305488305509358e-06,
"loss": 1.2267,
"step": 4270
},
{
"epoch": 0.26,
"learning_rate": 9.30338308667186e-06,
"loss": 1.1837,
"step": 4280
},
{
"epoch": 0.27,
"learning_rate": 9.301277867834361e-06,
"loss": 1.2245,
"step": 4290
},
{
"epoch": 0.27,
"learning_rate": 9.299172648996863e-06,
"loss": 1.157,
"step": 4300
},
{
"epoch": 0.27,
"learning_rate": 9.297067430159366e-06,
"loss": 1.215,
"step": 4310
},
{
"epoch": 0.27,
"learning_rate": 9.294962211321867e-06,
"loss": 1.2421,
"step": 4320
},
{
"epoch": 0.27,
"learning_rate": 9.29285699248437e-06,
"loss": 1.2581,
"step": 4330
},
{
"epoch": 0.27,
"learning_rate": 9.290751773646872e-06,
"loss": 1.1966,
"step": 4340
},
{
"epoch": 0.27,
"learning_rate": 9.288646554809373e-06,
"loss": 1.2494,
"step": 4350
},
{
"epoch": 0.27,
"learning_rate": 9.286541335971875e-06,
"loss": 1.1633,
"step": 4360
},
{
"epoch": 0.27,
"learning_rate": 9.284436117134376e-06,
"loss": 1.2258,
"step": 4370
},
{
"epoch": 0.27,
"learning_rate": 9.282330898296878e-06,
"loss": 1.2703,
"step": 4380
},
{
"epoch": 0.27,
"learning_rate": 9.280225679459381e-06,
"loss": 1.1973,
"step": 4390
},
{
"epoch": 0.27,
"learning_rate": 9.278120460621882e-06,
"loss": 1.2614,
"step": 4400
},
{
"epoch": 0.27,
"learning_rate": 9.276015241784384e-06,
"loss": 1.243,
"step": 4410
},
{
"epoch": 0.27,
"learning_rate": 9.273910022946887e-06,
"loss": 1.2473,
"step": 4420
},
{
"epoch": 0.27,
"learning_rate": 9.271804804109387e-06,
"loss": 1.2269,
"step": 4430
},
{
"epoch": 0.27,
"learning_rate": 9.26969958527189e-06,
"loss": 1.2466,
"step": 4440
},
{
"epoch": 0.28,
"learning_rate": 9.26759436643439e-06,
"loss": 1.2362,
"step": 4450
},
{
"epoch": 0.28,
"learning_rate": 9.265489147596893e-06,
"loss": 1.2277,
"step": 4460
},
{
"epoch": 0.28,
"learning_rate": 9.263383928759396e-06,
"loss": 1.1939,
"step": 4470
},
{
"epoch": 0.28,
"learning_rate": 9.261278709921897e-06,
"loss": 1.2013,
"step": 4480
},
{
"epoch": 0.28,
"learning_rate": 9.259173491084399e-06,
"loss": 1.2057,
"step": 4490
},
{
"epoch": 0.28,
"learning_rate": 9.257068272246902e-06,
"loss": 1.2276,
"step": 4500
},
{
"epoch": 0.28,
"learning_rate": 9.254963053409402e-06,
"loss": 1.2029,
"step": 4510
},
{
"epoch": 0.28,
"learning_rate": 9.252857834571905e-06,
"loss": 1.2285,
"step": 4520
},
{
"epoch": 0.28,
"learning_rate": 9.250752615734406e-06,
"loss": 1.2078,
"step": 4530
},
{
"epoch": 0.28,
"learning_rate": 9.248647396896908e-06,
"loss": 1.2317,
"step": 4540
},
{
"epoch": 0.28,
"learning_rate": 9.24654217805941e-06,
"loss": 1.2266,
"step": 4550
},
{
"epoch": 0.28,
"learning_rate": 9.244436959221911e-06,
"loss": 1.212,
"step": 4560
},
{
"epoch": 0.28,
"learning_rate": 9.242331740384414e-06,
"loss": 1.1849,
"step": 4570
},
{
"epoch": 0.28,
"learning_rate": 9.240226521546915e-06,
"loss": 1.2238,
"step": 4580
},
{
"epoch": 0.28,
"learning_rate": 9.238121302709417e-06,
"loss": 1.221,
"step": 4590
},
{
"epoch": 0.28,
"learning_rate": 9.23601608387192e-06,
"loss": 1.2356,
"step": 4600
},
{
"epoch": 0.29,
"learning_rate": 9.23391086503442e-06,
"loss": 1.2287,
"step": 4610
},
{
"epoch": 0.29,
"learning_rate": 9.231805646196923e-06,
"loss": 1.2226,
"step": 4620
},
{
"epoch": 0.29,
"learning_rate": 9.229700427359426e-06,
"loss": 1.159,
"step": 4630
},
{
"epoch": 0.29,
"learning_rate": 9.227595208521926e-06,
"loss": 1.2239,
"step": 4640
},
{
"epoch": 0.29,
"learning_rate": 9.225489989684429e-06,
"loss": 1.2547,
"step": 4650
},
{
"epoch": 0.29,
"learning_rate": 9.22338477084693e-06,
"loss": 1.1689,
"step": 4660
},
{
"epoch": 0.29,
"learning_rate": 9.221279552009432e-06,
"loss": 1.1546,
"step": 4670
},
{
"epoch": 0.29,
"learning_rate": 9.219174333171935e-06,
"loss": 1.2197,
"step": 4680
},
{
"epoch": 0.29,
"learning_rate": 9.217069114334436e-06,
"loss": 1.1826,
"step": 4690
},
{
"epoch": 0.29,
"learning_rate": 9.214963895496938e-06,
"loss": 1.2543,
"step": 4700
},
{
"epoch": 0.29,
"learning_rate": 9.21285867665944e-06,
"loss": 1.1747,
"step": 4710
},
{
"epoch": 0.29,
"learning_rate": 9.210753457821941e-06,
"loss": 1.2486,
"step": 4720
},
{
"epoch": 0.29,
"learning_rate": 9.208648238984444e-06,
"loss": 1.2506,
"step": 4730
},
{
"epoch": 0.29,
"learning_rate": 9.206543020146945e-06,
"loss": 1.2257,
"step": 4740
},
{
"epoch": 0.29,
"learning_rate": 9.204437801309447e-06,
"loss": 1.183,
"step": 4750
},
{
"epoch": 0.29,
"learning_rate": 9.20233258247195e-06,
"loss": 1.2092,
"step": 4760
},
{
"epoch": 0.3,
"learning_rate": 9.20022736363445e-06,
"loss": 1.1907,
"step": 4770
},
{
"epoch": 0.3,
"learning_rate": 9.198122144796953e-06,
"loss": 1.1898,
"step": 4780
},
{
"epoch": 0.3,
"learning_rate": 9.196016925959454e-06,
"loss": 1.1834,
"step": 4790
},
{
"epoch": 0.3,
"learning_rate": 9.193911707121956e-06,
"loss": 1.2316,
"step": 4800
},
{
"epoch": 0.3,
"learning_rate": 9.191806488284459e-06,
"loss": 1.2018,
"step": 4810
},
{
"epoch": 0.3,
"learning_rate": 9.18970126944696e-06,
"loss": 1.2066,
"step": 4820
},
{
"epoch": 0.3,
"learning_rate": 9.187596050609462e-06,
"loss": 1.182,
"step": 4830
},
{
"epoch": 0.3,
"learning_rate": 9.185490831771965e-06,
"loss": 1.2089,
"step": 4840
},
{
"epoch": 0.3,
"learning_rate": 9.183385612934465e-06,
"loss": 1.1919,
"step": 4850
},
{
"epoch": 0.3,
"learning_rate": 9.181280394096968e-06,
"loss": 1.2734,
"step": 4860
},
{
"epoch": 0.3,
"learning_rate": 9.179175175259469e-06,
"loss": 1.25,
"step": 4870
},
{
"epoch": 0.3,
"learning_rate": 9.177069956421971e-06,
"loss": 1.213,
"step": 4880
},
{
"epoch": 0.3,
"learning_rate": 9.174964737584474e-06,
"loss": 1.2126,
"step": 4890
},
{
"epoch": 0.3,
"learning_rate": 9.172859518746974e-06,
"loss": 1.203,
"step": 4900
},
{
"epoch": 0.3,
"learning_rate": 9.170754299909477e-06,
"loss": 1.2718,
"step": 4910
},
{
"epoch": 0.3,
"learning_rate": 9.16864908107198e-06,
"loss": 1.1845,
"step": 4920
},
{
"epoch": 0.31,
"learning_rate": 9.16654386223448e-06,
"loss": 1.1991,
"step": 4930
},
{
"epoch": 0.31,
"learning_rate": 9.164438643396983e-06,
"loss": 1.2049,
"step": 4940
},
{
"epoch": 0.31,
"learning_rate": 9.162333424559484e-06,
"loss": 1.2345,
"step": 4950
},
{
"epoch": 0.31,
"learning_rate": 9.160228205721986e-06,
"loss": 1.2284,
"step": 4960
},
{
"epoch": 0.31,
"learning_rate": 9.158122986884489e-06,
"loss": 1.2253,
"step": 4970
},
{
"epoch": 0.31,
"learning_rate": 9.15601776804699e-06,
"loss": 1.2038,
"step": 4980
},
{
"epoch": 0.31,
"learning_rate": 9.153912549209492e-06,
"loss": 1.2353,
"step": 4990
},
{
"epoch": 0.31,
"learning_rate": 9.151807330371993e-06,
"loss": 1.2029,
"step": 5000
},
{
"epoch": 0.31,
"learning_rate": 9.149702111534495e-06,
"loss": 1.1451,
"step": 5010
},
{
"epoch": 0.31,
"learning_rate": 9.147596892696998e-06,
"loss": 1.186,
"step": 5020
},
{
"epoch": 0.31,
"learning_rate": 9.145491673859498e-06,
"loss": 1.2406,
"step": 5030
},
{
"epoch": 0.31,
"learning_rate": 9.143386455022e-06,
"loss": 1.1957,
"step": 5040
},
{
"epoch": 0.31,
"learning_rate": 9.141281236184502e-06,
"loss": 1.19,
"step": 5050
},
{
"epoch": 0.31,
"learning_rate": 9.139176017347004e-06,
"loss": 1.2007,
"step": 5060
},
{
"epoch": 0.31,
"learning_rate": 9.137070798509505e-06,
"loss": 1.2259,
"step": 5070
},
{
"epoch": 0.31,
"learning_rate": 9.134965579672008e-06,
"loss": 1.2204,
"step": 5080
},
{
"epoch": 0.32,
"learning_rate": 9.132860360834508e-06,
"loss": 1.2021,
"step": 5090
},
{
"epoch": 0.32,
"learning_rate": 9.13075514199701e-06,
"loss": 1.2208,
"step": 5100
},
{
"epoch": 0.32,
"learning_rate": 9.128649923159513e-06,
"loss": 1.1702,
"step": 5110
},
{
"epoch": 0.32,
"learning_rate": 9.126544704322014e-06,
"loss": 1.2513,
"step": 5120
},
{
"epoch": 0.32,
"learning_rate": 9.124439485484517e-06,
"loss": 1.1855,
"step": 5130
},
{
"epoch": 0.32,
"learning_rate": 9.122334266647017e-06,
"loss": 1.1868,
"step": 5140
},
{
"epoch": 0.32,
"learning_rate": 9.12022904780952e-06,
"loss": 1.1861,
"step": 5150
},
{
"epoch": 0.32,
"learning_rate": 9.118123828972022e-06,
"loss": 1.184,
"step": 5160
},
{
"epoch": 0.32,
"learning_rate": 9.116018610134523e-06,
"loss": 1.2227,
"step": 5170
},
{
"epoch": 0.32,
"learning_rate": 9.113913391297026e-06,
"loss": 1.1739,
"step": 5180
},
{
"epoch": 0.32,
"learning_rate": 9.111808172459528e-06,
"loss": 1.1705,
"step": 5190
},
{
"epoch": 0.32,
"learning_rate": 9.109702953622029e-06,
"loss": 1.1681,
"step": 5200
},
{
"epoch": 0.32,
"learning_rate": 9.107597734784532e-06,
"loss": 1.2116,
"step": 5210
},
{
"epoch": 0.32,
"learning_rate": 9.105492515947032e-06,
"loss": 1.2158,
"step": 5220
},
{
"epoch": 0.32,
"learning_rate": 9.103387297109535e-06,
"loss": 1.1452,
"step": 5230
},
{
"epoch": 0.32,
"learning_rate": 9.101282078272037e-06,
"loss": 1.1695,
"step": 5240
},
{
"epoch": 0.32,
"learning_rate": 9.099176859434538e-06,
"loss": 1.2099,
"step": 5250
},
{
"epoch": 0.33,
"learning_rate": 9.09707164059704e-06,
"loss": 1.2359,
"step": 5260
},
{
"epoch": 0.33,
"learning_rate": 9.094966421759543e-06,
"loss": 1.207,
"step": 5270
},
{
"epoch": 0.33,
"learning_rate": 9.092861202922044e-06,
"loss": 1.2119,
"step": 5280
},
{
"epoch": 0.33,
"learning_rate": 9.090755984084546e-06,
"loss": 1.1408,
"step": 5290
},
{
"epoch": 0.33,
"learning_rate": 9.088650765247047e-06,
"loss": 1.1842,
"step": 5300
},
{
"epoch": 0.33,
"learning_rate": 9.08654554640955e-06,
"loss": 1.1786,
"step": 5310
},
{
"epoch": 0.33,
"learning_rate": 9.084440327572052e-06,
"loss": 1.2016,
"step": 5320
},
{
"epoch": 0.33,
"learning_rate": 9.082335108734553e-06,
"loss": 1.2011,
"step": 5330
},
{
"epoch": 0.33,
"learning_rate": 9.080229889897056e-06,
"loss": 1.235,
"step": 5340
},
{
"epoch": 0.33,
"learning_rate": 9.078124671059556e-06,
"loss": 1.2116,
"step": 5350
},
{
"epoch": 0.33,
"learning_rate": 9.076019452222059e-06,
"loss": 1.2083,
"step": 5360
},
{
"epoch": 0.33,
"learning_rate": 9.073914233384561e-06,
"loss": 1.2152,
"step": 5370
},
{
"epoch": 0.33,
"learning_rate": 9.071809014547062e-06,
"loss": 1.239,
"step": 5380
},
{
"epoch": 0.33,
"learning_rate": 9.069703795709565e-06,
"loss": 1.1989,
"step": 5390
},
{
"epoch": 0.33,
"learning_rate": 9.067598576872067e-06,
"loss": 1.2193,
"step": 5400
},
{
"epoch": 0.33,
"learning_rate": 9.065493358034568e-06,
"loss": 1.2051,
"step": 5410
},
{
"epoch": 0.34,
"learning_rate": 9.06338813919707e-06,
"loss": 1.1981,
"step": 5420
},
{
"epoch": 0.34,
"learning_rate": 9.061282920359571e-06,
"loss": 1.2218,
"step": 5430
},
{
"epoch": 0.34,
"learning_rate": 9.059177701522074e-06,
"loss": 1.2669,
"step": 5440
},
{
"epoch": 0.34,
"learning_rate": 9.057072482684576e-06,
"loss": 1.2127,
"step": 5450
},
{
"epoch": 0.34,
"learning_rate": 9.054967263847077e-06,
"loss": 1.1538,
"step": 5460
},
{
"epoch": 0.34,
"learning_rate": 9.05286204500958e-06,
"loss": 1.1664,
"step": 5470
},
{
"epoch": 0.34,
"learning_rate": 9.050756826172082e-06,
"loss": 1.1976,
"step": 5480
},
{
"epoch": 0.34,
"learning_rate": 9.048651607334583e-06,
"loss": 1.2223,
"step": 5490
},
{
"epoch": 0.34,
"learning_rate": 9.046546388497085e-06,
"loss": 1.201,
"step": 5500
},
{
"epoch": 0.34,
"learning_rate": 9.044441169659586e-06,
"loss": 1.2372,
"step": 5510
},
{
"epoch": 0.34,
"learning_rate": 9.042335950822089e-06,
"loss": 1.1758,
"step": 5520
},
{
"epoch": 0.34,
"learning_rate": 9.040230731984591e-06,
"loss": 1.1793,
"step": 5530
},
{
"epoch": 0.34,
"learning_rate": 9.038125513147092e-06,
"loss": 1.1706,
"step": 5540
},
{
"epoch": 0.34,
"learning_rate": 9.036020294309594e-06,
"loss": 1.2267,
"step": 5550
},
{
"epoch": 0.34,
"learning_rate": 9.033915075472097e-06,
"loss": 1.218,
"step": 5560
},
{
"epoch": 0.34,
"learning_rate": 9.031809856634598e-06,
"loss": 1.1789,
"step": 5570
},
{
"epoch": 0.35,
"learning_rate": 9.0297046377971e-06,
"loss": 1.2317,
"step": 5580
},
{
"epoch": 0.35,
"learning_rate": 9.027599418959601e-06,
"loss": 1.23,
"step": 5590
},
{
"epoch": 0.35,
"learning_rate": 9.025494200122104e-06,
"loss": 1.2058,
"step": 5600
},
{
"epoch": 0.35,
"learning_rate": 9.023388981284606e-06,
"loss": 1.276,
"step": 5610
},
{
"epoch": 0.35,
"learning_rate": 9.021283762447107e-06,
"loss": 1.1758,
"step": 5620
},
{
"epoch": 0.35,
"learning_rate": 9.01917854360961e-06,
"loss": 1.182,
"step": 5630
},
{
"epoch": 0.35,
"learning_rate": 9.01707332477211e-06,
"loss": 1.2027,
"step": 5640
},
{
"epoch": 0.35,
"learning_rate": 9.014968105934613e-06,
"loss": 1.2442,
"step": 5650
},
{
"epoch": 0.35,
"learning_rate": 9.012862887097115e-06,
"loss": 1.1509,
"step": 5660
},
{
"epoch": 0.35,
"learning_rate": 9.010757668259616e-06,
"loss": 1.2369,
"step": 5670
},
{
"epoch": 0.35,
"learning_rate": 9.008652449422118e-06,
"loss": 1.225,
"step": 5680
},
{
"epoch": 0.35,
"learning_rate": 9.006547230584621e-06,
"loss": 1.2575,
"step": 5690
},
{
"epoch": 0.35,
"learning_rate": 9.004442011747122e-06,
"loss": 1.1801,
"step": 5700
},
{
"epoch": 0.35,
"learning_rate": 9.002336792909624e-06,
"loss": 1.1817,
"step": 5710
},
{
"epoch": 0.35,
"learning_rate": 9.000231574072125e-06,
"loss": 1.2392,
"step": 5720
},
{
"epoch": 0.35,
"learning_rate": 8.998126355234628e-06,
"loss": 1.1718,
"step": 5730
},
{
"epoch": 0.36,
"learning_rate": 8.99602113639713e-06,
"loss": 1.2155,
"step": 5740
},
{
"epoch": 0.36,
"learning_rate": 8.993915917559631e-06,
"loss": 1.1976,
"step": 5750
},
{
"epoch": 0.36,
"learning_rate": 8.991810698722133e-06,
"loss": 1.1715,
"step": 5760
},
{
"epoch": 0.36,
"learning_rate": 8.989705479884636e-06,
"loss": 1.1555,
"step": 5770
},
{
"epoch": 0.36,
"learning_rate": 8.987600261047137e-06,
"loss": 1.2071,
"step": 5780
},
{
"epoch": 0.36,
"learning_rate": 8.98549504220964e-06,
"loss": 1.2062,
"step": 5790
},
{
"epoch": 0.36,
"learning_rate": 8.98338982337214e-06,
"loss": 1.1978,
"step": 5800
},
{
"epoch": 0.36,
"learning_rate": 8.981284604534642e-06,
"loss": 1.2125,
"step": 5810
},
{
"epoch": 0.36,
"learning_rate": 8.979179385697145e-06,
"loss": 1.1887,
"step": 5820
},
{
"epoch": 0.36,
"learning_rate": 8.977074166859646e-06,
"loss": 1.2384,
"step": 5830
},
{
"epoch": 0.36,
"learning_rate": 8.974968948022148e-06,
"loss": 1.2708,
"step": 5840
},
{
"epoch": 0.36,
"learning_rate": 8.972863729184649e-06,
"loss": 1.1985,
"step": 5850
},
{
"epoch": 0.36,
"learning_rate": 8.970758510347152e-06,
"loss": 1.2202,
"step": 5860
},
{
"epoch": 0.36,
"learning_rate": 8.968653291509654e-06,
"loss": 1.2281,
"step": 5870
},
{
"epoch": 0.36,
"learning_rate": 8.966548072672155e-06,
"loss": 1.2158,
"step": 5880
},
{
"epoch": 0.36,
"learning_rate": 8.964442853834657e-06,
"loss": 1.1912,
"step": 5890
},
{
"epoch": 0.37,
"learning_rate": 8.96233763499716e-06,
"loss": 1.1822,
"step": 5900
},
{
"epoch": 0.37,
"learning_rate": 8.96023241615966e-06,
"loss": 1.2181,
"step": 5910
},
{
"epoch": 0.37,
"learning_rate": 8.958127197322163e-06,
"loss": 1.1901,
"step": 5920
},
{
"epoch": 0.37,
"learning_rate": 8.956021978484664e-06,
"loss": 1.1742,
"step": 5930
},
{
"epoch": 0.37,
"learning_rate": 8.953916759647166e-06,
"loss": 1.1675,
"step": 5940
},
{
"epoch": 0.37,
"learning_rate": 8.951811540809669e-06,
"loss": 1.249,
"step": 5950
},
{
"epoch": 0.37,
"learning_rate": 8.94970632197217e-06,
"loss": 1.1683,
"step": 5960
},
{
"epoch": 0.37,
"learning_rate": 8.947601103134672e-06,
"loss": 1.1558,
"step": 5970
},
{
"epoch": 0.37,
"learning_rate": 8.945495884297175e-06,
"loss": 1.1685,
"step": 5980
},
{
"epoch": 0.37,
"learning_rate": 8.943390665459674e-06,
"loss": 1.1734,
"step": 5990
},
{
"epoch": 0.37,
"learning_rate": 8.941285446622176e-06,
"loss": 1.1478,
"step": 6000
}
],
"max_steps": 48471,
"num_train_epochs": 3,
"total_flos": 3.0400555051175117e+19,
"trial_name": null,
"trial_params": null
}