{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3713417039942442, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.278350515463919e-08, "loss": 2.5166, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.9587628865979384e-07, "loss": 1.9059, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.989690721649485e-07, "loss": 1.7869, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.0206185567010316e-07, "loss": 1.7863, "step": 40 }, { "epoch": 0.0, "learning_rate": 5.051546391752578e-07, "loss": 1.6946, "step": 50 }, { "epoch": 0.0, "learning_rate": 6.082474226804124e-07, "loss": 1.7419, "step": 60 }, { "epoch": 0.0, "learning_rate": 7.11340206185567e-07, "loss": 1.645, "step": 70 }, { "epoch": 0.0, "learning_rate": 8.144329896907217e-07, "loss": 1.6611, "step": 80 }, { "epoch": 0.01, "learning_rate": 9.175257731958763e-07, "loss": 1.5985, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.020618556701031e-06, "loss": 1.6198, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.1237113402061856e-06, "loss": 1.6259, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.2268041237113403e-06, "loss": 1.5973, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.329896907216495e-06, "loss": 1.5941, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.4329896907216496e-06, "loss": 1.5597, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.5360824742268042e-06, "loss": 1.5672, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.639175257731959e-06, "loss": 1.5372, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.7422680412371134e-06, "loss": 1.5715, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.8453608247422682e-06, "loss": 1.5389, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.948453608247423e-06, "loss": 1.5525, "step": 190 }, { "epoch": 0.01, "learning_rate": 2.0515463917525773e-06, "loss": 1.5871, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.1546391752577322e-06, "loss": 1.5442, "step": 210 }, { "epoch": 0.01, "learning_rate": 2.2577319587628867e-06, "loss": 1.5335, "step": 220 }, { "epoch": 0.01, "learning_rate": 2.3608247422680415e-06, "loss": 1.5103, "step": 230 }, { "epoch": 0.01, "learning_rate": 2.463917525773196e-06, "loss": 1.5016, "step": 240 }, { "epoch": 0.02, "learning_rate": 2.5670103092783504e-06, "loss": 1.5101, "step": 250 }, { "epoch": 0.02, "learning_rate": 2.6701030927835053e-06, "loss": 1.5323, "step": 260 }, { "epoch": 0.02, "learning_rate": 2.77319587628866e-06, "loss": 1.4785, "step": 270 }, { "epoch": 0.02, "learning_rate": 2.8762886597938146e-06, "loss": 1.4132, "step": 280 }, { "epoch": 0.02, "learning_rate": 2.979381443298969e-06, "loss": 1.4733, "step": 290 }, { "epoch": 0.02, "learning_rate": 3.082474226804124e-06, "loss": 1.4258, "step": 300 }, { "epoch": 0.02, "learning_rate": 3.1855670103092784e-06, "loss": 1.4557, "step": 310 }, { "epoch": 0.02, "learning_rate": 3.2886597938144333e-06, "loss": 1.44, "step": 320 }, { "epoch": 0.02, "learning_rate": 3.391752577319588e-06, "loss": 1.4348, "step": 330 }, { "epoch": 0.02, "learning_rate": 3.494845360824742e-06, "loss": 1.406, "step": 340 }, { "epoch": 0.02, "learning_rate": 3.597938144329897e-06, "loss": 1.4239, "step": 350 }, { "epoch": 0.02, "learning_rate": 3.701030927835052e-06, "loss": 1.4185, "step": 360 }, { "epoch": 0.02, "learning_rate": 3.8041237113402064e-06, "loss": 1.3954, "step": 370 }, { "epoch": 0.02, "learning_rate": 3.907216494845361e-06, "loss": 1.3759, "step": 380 }, { "epoch": 0.02, "learning_rate": 4.010309278350516e-06, "loss": 1.3231, "step": 390 }, { "epoch": 0.02, "learning_rate": 4.11340206185567e-06, "loss": 1.3059, "step": 400 }, { "epoch": 0.03, "learning_rate": 4.216494845360825e-06, "loss": 1.3258, "step": 410 }, { "epoch": 0.03, "learning_rate": 4.31958762886598e-06, "loss": 1.3722, "step": 420 }, { "epoch": 0.03, "learning_rate": 4.422680412371134e-06, "loss": 1.2917, "step": 430 }, { "epoch": 0.03, "learning_rate": 4.525773195876289e-06, "loss": 1.2893, "step": 440 }, { "epoch": 0.03, "learning_rate": 4.628865979381444e-06, "loss": 1.2865, "step": 450 }, { "epoch": 0.03, "learning_rate": 4.731958762886599e-06, "loss": 1.28, "step": 460 }, { "epoch": 0.03, "learning_rate": 4.835051546391753e-06, "loss": 1.299, "step": 470 }, { "epoch": 0.03, "learning_rate": 4.9381443298969075e-06, "loss": 1.2949, "step": 480 }, { "epoch": 0.03, "learning_rate": 5.041237113402062e-06, "loss": 1.3078, "step": 490 }, { "epoch": 0.03, "learning_rate": 5.144329896907216e-06, "loss": 1.2242, "step": 500 }, { "epoch": 0.03, "learning_rate": 5.247422680412372e-06, "loss": 1.2714, "step": 510 }, { "epoch": 0.03, "learning_rate": 5.350515463917526e-06, "loss": 1.2482, "step": 520 }, { "epoch": 0.03, "learning_rate": 5.45360824742268e-06, "loss": 1.2703, "step": 530 }, { "epoch": 0.03, "learning_rate": 5.556701030927836e-06, "loss": 1.2578, "step": 540 }, { "epoch": 0.03, "learning_rate": 5.65979381443299e-06, "loss": 1.2989, "step": 550 }, { "epoch": 0.03, "learning_rate": 5.762886597938144e-06, "loss": 1.2853, "step": 560 }, { "epoch": 0.04, "learning_rate": 5.8659793814433e-06, "loss": 1.3022, "step": 570 }, { "epoch": 0.04, "learning_rate": 5.969072164948454e-06, "loss": 1.2871, "step": 580 }, { "epoch": 0.04, "learning_rate": 6.0721649484536086e-06, "loss": 1.2679, "step": 590 }, { "epoch": 0.04, "learning_rate": 6.1752577319587634e-06, "loss": 1.2732, "step": 600 }, { "epoch": 0.04, "learning_rate": 6.278350515463918e-06, "loss": 1.2641, "step": 610 }, { "epoch": 0.04, "learning_rate": 6.381443298969072e-06, "loss": 1.2919, "step": 620 }, { "epoch": 0.04, "learning_rate": 6.484536082474227e-06, "loss": 1.2594, "step": 630 }, { "epoch": 0.04, "learning_rate": 6.587628865979382e-06, "loss": 1.2592, "step": 640 }, { "epoch": 0.04, "learning_rate": 6.690721649484536e-06, "loss": 1.2652, "step": 650 }, { "epoch": 0.04, "learning_rate": 6.793814432989692e-06, "loss": 1.3274, "step": 660 }, { "epoch": 0.04, "learning_rate": 6.896907216494846e-06, "loss": 1.2155, "step": 670 }, { "epoch": 0.04, "learning_rate": 7e-06, "loss": 1.2837, "step": 680 }, { "epoch": 0.04, "learning_rate": 7.103092783505156e-06, "loss": 1.265, "step": 690 }, { "epoch": 0.04, "learning_rate": 7.20618556701031e-06, "loss": 1.2427, "step": 700 }, { "epoch": 0.04, "learning_rate": 7.309278350515464e-06, "loss": 1.2688, "step": 710 }, { "epoch": 0.04, "learning_rate": 7.412371134020619e-06, "loss": 1.3071, "step": 720 }, { "epoch": 0.05, "learning_rate": 7.515463917525773e-06, "loss": 1.2346, "step": 730 }, { "epoch": 0.05, "learning_rate": 7.618556701030928e-06, "loss": 1.2246, "step": 740 }, { "epoch": 0.05, "learning_rate": 7.721649484536083e-06, "loss": 1.2604, "step": 750 }, { "epoch": 0.05, "learning_rate": 7.824742268041238e-06, "loss": 1.2589, "step": 760 }, { "epoch": 0.05, "learning_rate": 7.927835051546391e-06, "loss": 1.2512, "step": 770 }, { "epoch": 0.05, "learning_rate": 8.030927835051548e-06, "loss": 1.2229, "step": 780 }, { "epoch": 0.05, "learning_rate": 8.134020618556701e-06, "loss": 1.2326, "step": 790 }, { "epoch": 0.05, "learning_rate": 8.237113402061856e-06, "loss": 1.3097, "step": 800 }, { "epoch": 0.05, "learning_rate": 8.34020618556701e-06, "loss": 1.2358, "step": 810 }, { "epoch": 0.05, "learning_rate": 8.443298969072166e-06, "loss": 1.2746, "step": 820 }, { "epoch": 0.05, "learning_rate": 8.54639175257732e-06, "loss": 1.3063, "step": 830 }, { "epoch": 0.05, "learning_rate": 8.649484536082475e-06, "loss": 1.2702, "step": 840 }, { "epoch": 0.05, "learning_rate": 8.75257731958763e-06, "loss": 1.2421, "step": 850 }, { "epoch": 0.05, "learning_rate": 8.855670103092783e-06, "loss": 1.3047, "step": 860 }, { "epoch": 0.05, "learning_rate": 8.95876288659794e-06, "loss": 1.2452, "step": 870 }, { "epoch": 0.05, "learning_rate": 9.061855670103093e-06, "loss": 1.2753, "step": 880 }, { "epoch": 0.06, "learning_rate": 9.164948453608248e-06, "loss": 1.2532, "step": 890 }, { "epoch": 0.06, "learning_rate": 9.268041237113403e-06, "loss": 1.2379, "step": 900 }, { "epoch": 0.06, "learning_rate": 9.371134020618558e-06, "loss": 1.2891, "step": 910 }, { "epoch": 0.06, "learning_rate": 9.474226804123711e-06, "loss": 1.2773, "step": 920 }, { "epoch": 0.06, "learning_rate": 9.577319587628868e-06, "loss": 1.2753, "step": 930 }, { "epoch": 0.06, "learning_rate": 9.68041237113402e-06, "loss": 1.1976, "step": 940 }, { "epoch": 0.06, "learning_rate": 9.783505154639176e-06, "loss": 1.2721, "step": 950 }, { "epoch": 0.06, "learning_rate": 9.88659793814433e-06, "loss": 1.3058, "step": 960 }, { "epoch": 0.06, "learning_rate": 9.989690721649485e-06, "loss": 1.2383, "step": 970 }, { "epoch": 0.06, "learning_rate": 9.998105303046253e-06, "loss": 1.2356, "step": 980 }, { "epoch": 0.06, "learning_rate": 9.996000084208755e-06, "loss": 1.2639, "step": 990 }, { "epoch": 0.06, "learning_rate": 9.993894865371256e-06, "loss": 1.2689, "step": 1000 }, { "epoch": 0.06, "learning_rate": 9.991789646533758e-06, "loss": 1.2764, "step": 1010 }, { "epoch": 0.06, "learning_rate": 9.989684427696261e-06, "loss": 1.2602, "step": 1020 }, { "epoch": 0.06, "learning_rate": 9.987579208858762e-06, "loss": 1.3034, "step": 1030 }, { "epoch": 0.06, "learning_rate": 9.985473990021264e-06, "loss": 1.2318, "step": 1040 }, { "epoch": 0.06, "learning_rate": 9.983368771183765e-06, "loss": 1.252, "step": 1050 }, { "epoch": 0.07, "learning_rate": 9.981263552346267e-06, "loss": 1.3111, "step": 1060 }, { "epoch": 0.07, "learning_rate": 9.97915833350877e-06, "loss": 1.255, "step": 1070 }, { "epoch": 0.07, "learning_rate": 9.97705311467127e-06, "loss": 1.2736, "step": 1080 }, { "epoch": 0.07, "learning_rate": 9.974947895833773e-06, "loss": 1.3128, "step": 1090 }, { "epoch": 0.07, "learning_rate": 9.972842676996276e-06, "loss": 1.2349, "step": 1100 }, { "epoch": 0.07, "learning_rate": 9.970737458158777e-06, "loss": 1.2797, "step": 1110 }, { "epoch": 0.07, "learning_rate": 9.968632239321279e-06, "loss": 1.2698, "step": 1120 }, { "epoch": 0.07, "learning_rate": 9.96652702048378e-06, "loss": 1.2714, "step": 1130 }, { "epoch": 0.07, "learning_rate": 9.964421801646282e-06, "loss": 1.2965, "step": 1140 }, { "epoch": 0.07, "learning_rate": 9.962316582808785e-06, "loss": 1.2764, "step": 1150 }, { "epoch": 0.07, "learning_rate": 9.960211363971286e-06, "loss": 1.2294, "step": 1160 }, { "epoch": 0.07, "learning_rate": 9.958106145133788e-06, "loss": 1.2623, "step": 1170 }, { "epoch": 0.07, "learning_rate": 9.956000926296289e-06, "loss": 1.2814, "step": 1180 }, { "epoch": 0.07, "learning_rate": 9.953895707458791e-06, "loss": 1.2793, "step": 1190 }, { "epoch": 0.07, "learning_rate": 9.951790488621294e-06, "loss": 1.2749, "step": 1200 }, { "epoch": 0.07, "learning_rate": 9.949685269783795e-06, "loss": 1.2689, "step": 1210 }, { "epoch": 0.08, "learning_rate": 9.947580050946297e-06, "loss": 1.2913, "step": 1220 }, { "epoch": 0.08, "learning_rate": 9.9454748321088e-06, "loss": 1.2386, "step": 1230 }, { "epoch": 0.08, "learning_rate": 9.9433696132713e-06, "loss": 1.2519, "step": 1240 }, { "epoch": 0.08, "learning_rate": 9.941264394433803e-06, "loss": 1.2893, "step": 1250 }, { "epoch": 0.08, "learning_rate": 9.939159175596304e-06, "loss": 1.2398, "step": 1260 }, { "epoch": 0.08, "learning_rate": 9.937053956758805e-06, "loss": 1.2836, "step": 1270 }, { "epoch": 0.08, "learning_rate": 9.934948737921307e-06, "loss": 1.2326, "step": 1280 }, { "epoch": 0.08, "learning_rate": 9.93284351908381e-06, "loss": 1.2768, "step": 1290 }, { "epoch": 0.08, "learning_rate": 9.93073830024631e-06, "loss": 1.2584, "step": 1300 }, { "epoch": 0.08, "learning_rate": 9.928633081408813e-06, "loss": 1.2738, "step": 1310 }, { "epoch": 0.08, "learning_rate": 9.926527862571314e-06, "loss": 1.2329, "step": 1320 }, { "epoch": 0.08, "learning_rate": 9.924422643733816e-06, "loss": 1.2623, "step": 1330 }, { "epoch": 0.08, "learning_rate": 9.922317424896319e-06, "loss": 1.2839, "step": 1340 }, { "epoch": 0.08, "learning_rate": 9.92021220605882e-06, "loss": 1.2806, "step": 1350 }, { "epoch": 0.08, "learning_rate": 9.918106987221322e-06, "loss": 1.232, "step": 1360 }, { "epoch": 0.08, "learning_rate": 9.916001768383825e-06, "loss": 1.256, "step": 1370 }, { "epoch": 0.09, "learning_rate": 9.913896549546325e-06, "loss": 1.225, "step": 1380 }, { "epoch": 0.09, "learning_rate": 9.911791330708828e-06, "loss": 1.2247, "step": 1390 }, { "epoch": 0.09, "learning_rate": 9.909686111871329e-06, "loss": 1.2422, "step": 1400 }, { "epoch": 0.09, "learning_rate": 9.907580893033831e-06, "loss": 1.2499, "step": 1410 }, { "epoch": 0.09, "learning_rate": 9.905475674196334e-06, "loss": 1.2569, "step": 1420 }, { "epoch": 0.09, "learning_rate": 9.903370455358834e-06, "loss": 1.2771, "step": 1430 }, { "epoch": 0.09, "learning_rate": 9.901265236521337e-06, "loss": 1.2789, "step": 1440 }, { "epoch": 0.09, "learning_rate": 9.89916001768384e-06, "loss": 1.2213, "step": 1450 }, { "epoch": 0.09, "learning_rate": 9.89705479884634e-06, "loss": 1.2393, "step": 1460 }, { "epoch": 0.09, "learning_rate": 9.894949580008843e-06, "loss": 1.257, "step": 1470 }, { "epoch": 0.09, "learning_rate": 9.892844361171344e-06, "loss": 1.2572, "step": 1480 }, { "epoch": 0.09, "learning_rate": 9.890739142333846e-06, "loss": 1.2503, "step": 1490 }, { "epoch": 0.09, "learning_rate": 9.888633923496349e-06, "loss": 1.2404, "step": 1500 }, { "epoch": 0.09, "learning_rate": 9.88652870465885e-06, "loss": 1.2847, "step": 1510 }, { "epoch": 0.09, "learning_rate": 9.884423485821352e-06, "loss": 1.2551, "step": 1520 }, { "epoch": 0.09, "learning_rate": 9.882318266983854e-06, "loss": 1.2741, "step": 1530 }, { "epoch": 0.1, "learning_rate": 9.880213048146355e-06, "loss": 1.2735, "step": 1540 }, { "epoch": 0.1, "learning_rate": 9.878107829308858e-06, "loss": 1.273, "step": 1550 }, { "epoch": 0.1, "learning_rate": 9.876002610471358e-06, "loss": 1.2357, "step": 1560 }, { "epoch": 0.1, "learning_rate": 9.873897391633861e-06, "loss": 1.1864, "step": 1570 }, { "epoch": 0.1, "learning_rate": 9.871792172796363e-06, "loss": 1.277, "step": 1580 }, { "epoch": 0.1, "learning_rate": 9.869686953958864e-06, "loss": 1.2375, "step": 1590 }, { "epoch": 0.1, "learning_rate": 9.867581735121367e-06, "loss": 1.2754, "step": 1600 }, { "epoch": 0.1, "learning_rate": 9.865476516283868e-06, "loss": 1.2051, "step": 1610 }, { "epoch": 0.1, "learning_rate": 9.86337129744637e-06, "loss": 1.2579, "step": 1620 }, { "epoch": 0.1, "learning_rate": 9.861266078608873e-06, "loss": 1.2216, "step": 1630 }, { "epoch": 0.1, "learning_rate": 9.859160859771373e-06, "loss": 1.2529, "step": 1640 }, { "epoch": 0.1, "learning_rate": 9.857055640933876e-06, "loss": 1.207, "step": 1650 }, { "epoch": 0.1, "learning_rate": 9.854950422096378e-06, "loss": 1.2275, "step": 1660 }, { "epoch": 0.1, "learning_rate": 9.85284520325888e-06, "loss": 1.2769, "step": 1670 }, { "epoch": 0.1, "learning_rate": 9.850739984421382e-06, "loss": 1.2165, "step": 1680 }, { "epoch": 0.1, "learning_rate": 9.848634765583883e-06, "loss": 1.2903, "step": 1690 }, { "epoch": 0.11, "learning_rate": 9.846529546746385e-06, "loss": 1.2548, "step": 1700 }, { "epoch": 0.11, "learning_rate": 9.844424327908888e-06, "loss": 1.2652, "step": 1710 }, { "epoch": 0.11, "learning_rate": 9.842319109071388e-06, "loss": 1.2718, "step": 1720 }, { "epoch": 0.11, "learning_rate": 9.84021389023389e-06, "loss": 1.269, "step": 1730 }, { "epoch": 0.11, "learning_rate": 9.838108671396393e-06, "loss": 1.2362, "step": 1740 }, { "epoch": 0.11, "learning_rate": 9.836003452558894e-06, "loss": 1.205, "step": 1750 }, { "epoch": 0.11, "learning_rate": 9.833898233721397e-06, "loss": 1.2649, "step": 1760 }, { "epoch": 0.11, "learning_rate": 9.831793014883897e-06, "loss": 1.2517, "step": 1770 }, { "epoch": 0.11, "learning_rate": 9.8296877960464e-06, "loss": 1.2015, "step": 1780 }, { "epoch": 0.11, "learning_rate": 9.827582577208902e-06, "loss": 1.2382, "step": 1790 }, { "epoch": 0.11, "learning_rate": 9.825477358371403e-06, "loss": 1.2236, "step": 1800 }, { "epoch": 0.11, "learning_rate": 9.823372139533906e-06, "loss": 1.2503, "step": 1810 }, { "epoch": 0.11, "learning_rate": 9.821266920696407e-06, "loss": 1.2409, "step": 1820 }, { "epoch": 0.11, "learning_rate": 9.819161701858909e-06, "loss": 1.2236, "step": 1830 }, { "epoch": 0.11, "learning_rate": 9.817056483021412e-06, "loss": 1.2246, "step": 1840 }, { "epoch": 0.11, "learning_rate": 9.814951264183912e-06, "loss": 1.234, "step": 1850 }, { "epoch": 0.12, "learning_rate": 9.812846045346415e-06, "loss": 1.2384, "step": 1860 }, { "epoch": 0.12, "learning_rate": 9.810740826508917e-06, "loss": 1.2103, "step": 1870 }, { "epoch": 0.12, "learning_rate": 9.808635607671418e-06, "loss": 1.2398, "step": 1880 }, { "epoch": 0.12, "learning_rate": 9.80653038883392e-06, "loss": 1.2246, "step": 1890 }, { "epoch": 0.12, "learning_rate": 9.804425169996421e-06, "loss": 1.1894, "step": 1900 }, { "epoch": 0.12, "learning_rate": 9.802319951158924e-06, "loss": 1.262, "step": 1910 }, { "epoch": 0.12, "learning_rate": 9.800214732321426e-06, "loss": 1.2595, "step": 1920 }, { "epoch": 0.12, "learning_rate": 9.798109513483927e-06, "loss": 1.1954, "step": 1930 }, { "epoch": 0.12, "learning_rate": 9.79600429464643e-06, "loss": 1.2578, "step": 1940 }, { "epoch": 0.12, "learning_rate": 9.793899075808932e-06, "loss": 1.2286, "step": 1950 }, { "epoch": 0.12, "learning_rate": 9.791793856971433e-06, "loss": 1.2455, "step": 1960 }, { "epoch": 0.12, "learning_rate": 9.789688638133936e-06, "loss": 1.214, "step": 1970 }, { "epoch": 0.12, "learning_rate": 9.787583419296436e-06, "loss": 1.2488, "step": 1980 }, { "epoch": 0.12, "learning_rate": 9.785478200458939e-06, "loss": 1.266, "step": 1990 }, { "epoch": 0.12, "learning_rate": 9.783372981621441e-06, "loss": 1.2139, "step": 2000 }, { "epoch": 0.12, "learning_rate": 9.781267762783942e-06, "loss": 1.2516, "step": 2010 }, { "epoch": 0.13, "learning_rate": 9.779162543946445e-06, "loss": 1.2878, "step": 2020 }, { "epoch": 0.13, "learning_rate": 9.777057325108945e-06, "loss": 1.2494, "step": 2030 }, { "epoch": 0.13, "learning_rate": 9.774952106271448e-06, "loss": 1.207, "step": 2040 }, { "epoch": 0.13, "learning_rate": 9.77284688743395e-06, "loss": 1.2796, "step": 2050 }, { "epoch": 0.13, "learning_rate": 9.770741668596451e-06, "loss": 1.2285, "step": 2060 }, { "epoch": 0.13, "learning_rate": 9.768636449758954e-06, "loss": 1.2467, "step": 2070 }, { "epoch": 0.13, "learning_rate": 9.766531230921456e-06, "loss": 1.1801, "step": 2080 }, { "epoch": 0.13, "learning_rate": 9.764426012083957e-06, "loss": 1.2399, "step": 2090 }, { "epoch": 0.13, "learning_rate": 9.76232079324646e-06, "loss": 1.2359, "step": 2100 }, { "epoch": 0.13, "learning_rate": 9.76021557440896e-06, "loss": 1.2074, "step": 2110 }, { "epoch": 0.13, "learning_rate": 9.758110355571463e-06, "loss": 1.2601, "step": 2120 }, { "epoch": 0.13, "learning_rate": 9.756005136733965e-06, "loss": 1.2456, "step": 2130 }, { "epoch": 0.13, "learning_rate": 9.753899917896466e-06, "loss": 1.2479, "step": 2140 }, { "epoch": 0.13, "learning_rate": 9.751794699058969e-06, "loss": 1.2593, "step": 2150 }, { "epoch": 0.13, "learning_rate": 9.749689480221471e-06, "loss": 1.1856, "step": 2160 }, { "epoch": 0.13, "learning_rate": 9.747584261383972e-06, "loss": 1.2634, "step": 2170 }, { "epoch": 0.13, "learning_rate": 9.745479042546474e-06, "loss": 1.2046, "step": 2180 }, { "epoch": 0.14, "learning_rate": 9.743373823708975e-06, "loss": 1.2753, "step": 2190 }, { "epoch": 0.14, "learning_rate": 9.741268604871478e-06, "loss": 1.2393, "step": 2200 }, { "epoch": 0.14, "learning_rate": 9.739163386033979e-06, "loss": 1.224, "step": 2210 }, { "epoch": 0.14, "learning_rate": 9.737058167196481e-06, "loss": 1.2767, "step": 2220 }, { "epoch": 0.14, "learning_rate": 9.734952948358982e-06, "loss": 1.2584, "step": 2230 }, { "epoch": 0.14, "learning_rate": 9.732847729521484e-06, "loss": 1.2717, "step": 2240 }, { "epoch": 0.14, "learning_rate": 9.730742510683985e-06, "loss": 1.2364, "step": 2250 }, { "epoch": 0.14, "learning_rate": 9.728637291846488e-06, "loss": 1.2354, "step": 2260 }, { "epoch": 0.14, "learning_rate": 9.72653207300899e-06, "loss": 1.2347, "step": 2270 }, { "epoch": 0.14, "learning_rate": 9.724426854171491e-06, "loss": 1.2537, "step": 2280 }, { "epoch": 0.14, "learning_rate": 9.722321635333993e-06, "loss": 1.2186, "step": 2290 }, { "epoch": 0.14, "learning_rate": 9.720216416496496e-06, "loss": 1.2351, "step": 2300 }, { "epoch": 0.14, "learning_rate": 9.718111197658997e-06, "loss": 1.2325, "step": 2310 }, { "epoch": 0.14, "learning_rate": 9.7160059788215e-06, "loss": 1.1996, "step": 2320 }, { "epoch": 0.14, "learning_rate": 9.713900759984e-06, "loss": 1.2023, "step": 2330 }, { "epoch": 0.14, "learning_rate": 9.711795541146503e-06, "loss": 1.2527, "step": 2340 }, { "epoch": 0.15, "learning_rate": 9.709690322309005e-06, "loss": 1.2281, "step": 2350 }, { "epoch": 0.15, "learning_rate": 9.707585103471506e-06, "loss": 1.2382, "step": 2360 }, { "epoch": 0.15, "learning_rate": 9.705479884634008e-06, "loss": 1.2405, "step": 2370 }, { "epoch": 0.15, "learning_rate": 9.70337466579651e-06, "loss": 1.248, "step": 2380 }, { "epoch": 0.15, "learning_rate": 9.701269446959012e-06, "loss": 1.224, "step": 2390 }, { "epoch": 0.15, "learning_rate": 9.699164228121514e-06, "loss": 1.22, "step": 2400 }, { "epoch": 0.15, "learning_rate": 9.697059009284015e-06, "loss": 1.219, "step": 2410 }, { "epoch": 0.15, "learning_rate": 9.694953790446517e-06, "loss": 1.2518, "step": 2420 }, { "epoch": 0.15, "learning_rate": 9.69284857160902e-06, "loss": 1.219, "step": 2430 }, { "epoch": 0.15, "learning_rate": 9.69074335277152e-06, "loss": 1.2168, "step": 2440 }, { "epoch": 0.15, "learning_rate": 9.688638133934023e-06, "loss": 1.2469, "step": 2450 }, { "epoch": 0.15, "learning_rate": 9.686532915096524e-06, "loss": 1.2381, "step": 2460 }, { "epoch": 0.15, "learning_rate": 9.684427696259027e-06, "loss": 1.2001, "step": 2470 }, { "epoch": 0.15, "learning_rate": 9.682322477421529e-06, "loss": 1.2004, "step": 2480 }, { "epoch": 0.15, "learning_rate": 9.68021725858403e-06, "loss": 1.2409, "step": 2490 }, { "epoch": 0.15, "learning_rate": 9.678112039746532e-06, "loss": 1.2389, "step": 2500 }, { "epoch": 0.16, "learning_rate": 9.676006820909035e-06, "loss": 1.242, "step": 2510 }, { "epoch": 0.16, "learning_rate": 9.673901602071536e-06, "loss": 1.2372, "step": 2520 }, { "epoch": 0.16, "learning_rate": 9.671796383234038e-06, "loss": 1.2223, "step": 2530 }, { "epoch": 0.16, "learning_rate": 9.669691164396539e-06, "loss": 1.2506, "step": 2540 }, { "epoch": 0.16, "learning_rate": 9.667585945559041e-06, "loss": 1.2093, "step": 2550 }, { "epoch": 0.16, "learning_rate": 9.665480726721544e-06, "loss": 1.2171, "step": 2560 }, { "epoch": 0.16, "learning_rate": 9.663375507884045e-06, "loss": 1.2363, "step": 2570 }, { "epoch": 0.16, "learning_rate": 9.661270289046547e-06, "loss": 1.2978, "step": 2580 }, { "epoch": 0.16, "learning_rate": 9.65916507020905e-06, "loss": 1.2216, "step": 2590 }, { "epoch": 0.16, "learning_rate": 9.65705985137155e-06, "loss": 1.1937, "step": 2600 }, { "epoch": 0.16, "learning_rate": 9.654954632534053e-06, "loss": 1.2366, "step": 2610 }, { "epoch": 0.16, "learning_rate": 9.652849413696554e-06, "loss": 1.2465, "step": 2620 }, { "epoch": 0.16, "learning_rate": 9.650744194859056e-06, "loss": 1.2704, "step": 2630 }, { "epoch": 0.16, "learning_rate": 9.648638976021559e-06, "loss": 1.2113, "step": 2640 }, { "epoch": 0.16, "learning_rate": 9.64653375718406e-06, "loss": 1.2679, "step": 2650 }, { "epoch": 0.16, "learning_rate": 9.644428538346562e-06, "loss": 1.2005, "step": 2660 }, { "epoch": 0.17, "learning_rate": 9.642323319509063e-06, "loss": 1.2474, "step": 2670 }, { "epoch": 0.17, "learning_rate": 9.640218100671565e-06, "loss": 1.2308, "step": 2680 }, { "epoch": 0.17, "learning_rate": 9.638112881834068e-06, "loss": 1.2391, "step": 2690 }, { "epoch": 0.17, "learning_rate": 9.636007662996569e-06, "loss": 1.1968, "step": 2700 }, { "epoch": 0.17, "learning_rate": 9.633902444159071e-06, "loss": 1.2001, "step": 2710 }, { "epoch": 0.17, "learning_rate": 9.631797225321574e-06, "loss": 1.2688, "step": 2720 }, { "epoch": 0.17, "learning_rate": 9.629692006484075e-06, "loss": 1.2646, "step": 2730 }, { "epoch": 0.17, "learning_rate": 9.627586787646577e-06, "loss": 1.2606, "step": 2740 }, { "epoch": 0.17, "learning_rate": 9.625481568809078e-06, "loss": 1.1915, "step": 2750 }, { "epoch": 0.17, "learning_rate": 9.62337634997158e-06, "loss": 1.204, "step": 2760 }, { "epoch": 0.17, "learning_rate": 9.621271131134083e-06, "loss": 1.2128, "step": 2770 }, { "epoch": 0.17, "learning_rate": 9.619165912296584e-06, "loss": 1.2116, "step": 2780 }, { "epoch": 0.17, "learning_rate": 9.617060693459086e-06, "loss": 1.2287, "step": 2790 }, { "epoch": 0.17, "learning_rate": 9.614955474621589e-06, "loss": 1.2443, "step": 2800 }, { "epoch": 0.17, "learning_rate": 9.61285025578409e-06, "loss": 1.2926, "step": 2810 }, { "epoch": 0.17, "learning_rate": 9.610745036946592e-06, "loss": 1.2195, "step": 2820 }, { "epoch": 0.18, "learning_rate": 9.608639818109093e-06, "loss": 1.2345, "step": 2830 }, { "epoch": 0.18, "learning_rate": 9.606534599271595e-06, "loss": 1.2588, "step": 2840 }, { "epoch": 0.18, "learning_rate": 9.604429380434098e-06, "loss": 1.2392, "step": 2850 }, { "epoch": 0.18, "learning_rate": 9.602324161596599e-06, "loss": 1.2529, "step": 2860 }, { "epoch": 0.18, "learning_rate": 9.600218942759101e-06, "loss": 1.2119, "step": 2870 }, { "epoch": 0.18, "learning_rate": 9.598113723921602e-06, "loss": 1.2416, "step": 2880 }, { "epoch": 0.18, "learning_rate": 9.596008505084104e-06, "loss": 1.2111, "step": 2890 }, { "epoch": 0.18, "learning_rate": 9.593903286246607e-06, "loss": 1.2493, "step": 2900 }, { "epoch": 0.18, "learning_rate": 9.591798067409108e-06, "loss": 1.2481, "step": 2910 }, { "epoch": 0.18, "learning_rate": 9.58969284857161e-06, "loss": 1.2265, "step": 2920 }, { "epoch": 0.18, "learning_rate": 9.587587629734113e-06, "loss": 1.2549, "step": 2930 }, { "epoch": 0.18, "learning_rate": 9.585482410896613e-06, "loss": 1.2474, "step": 2940 }, { "epoch": 0.18, "learning_rate": 9.583377192059116e-06, "loss": 1.1773, "step": 2950 }, { "epoch": 0.18, "learning_rate": 9.581271973221617e-06, "loss": 1.2612, "step": 2960 }, { "epoch": 0.18, "learning_rate": 9.57916675438412e-06, "loss": 1.2247, "step": 2970 }, { "epoch": 0.18, "learning_rate": 9.577061535546622e-06, "loss": 1.2075, "step": 2980 }, { "epoch": 0.19, "learning_rate": 9.574956316709123e-06, "loss": 1.1812, "step": 2990 }, { "epoch": 0.19, "learning_rate": 9.572851097871625e-06, "loss": 1.2058, "step": 3000 }, { "epoch": 0.19, "learning_rate": 9.570745879034128e-06, "loss": 1.2781, "step": 3010 }, { "epoch": 0.19, "learning_rate": 9.568640660196628e-06, "loss": 1.2572, "step": 3020 }, { "epoch": 0.19, "learning_rate": 9.566535441359131e-06, "loss": 1.2794, "step": 3030 }, { "epoch": 0.19, "learning_rate": 9.564430222521632e-06, "loss": 1.2136, "step": 3040 }, { "epoch": 0.19, "learning_rate": 9.562325003684134e-06, "loss": 1.2632, "step": 3050 }, { "epoch": 0.19, "learning_rate": 9.560219784846637e-06, "loss": 1.2584, "step": 3060 }, { "epoch": 0.19, "learning_rate": 9.558114566009137e-06, "loss": 1.286, "step": 3070 }, { "epoch": 0.19, "learning_rate": 9.55600934717164e-06, "loss": 1.247, "step": 3080 }, { "epoch": 0.19, "learning_rate": 9.55390412833414e-06, "loss": 1.2715, "step": 3090 }, { "epoch": 0.19, "learning_rate": 9.551798909496643e-06, "loss": 1.2184, "step": 3100 }, { "epoch": 0.19, "learning_rate": 9.549693690659146e-06, "loss": 1.261, "step": 3110 }, { "epoch": 0.19, "learning_rate": 9.547588471821647e-06, "loss": 1.2183, "step": 3120 }, { "epoch": 0.19, "learning_rate": 9.545483252984149e-06, "loss": 1.1887, "step": 3130 }, { "epoch": 0.19, "learning_rate": 9.543378034146652e-06, "loss": 1.2405, "step": 3140 }, { "epoch": 0.19, "learning_rate": 9.541272815309152e-06, "loss": 1.2499, "step": 3150 }, { "epoch": 0.2, "learning_rate": 9.539167596471653e-06, "loss": 1.2164, "step": 3160 }, { "epoch": 0.2, "learning_rate": 9.537062377634156e-06, "loss": 1.2614, "step": 3170 }, { "epoch": 0.2, "learning_rate": 9.534957158796657e-06, "loss": 1.2475, "step": 3180 }, { "epoch": 0.2, "learning_rate": 9.532851939959159e-06, "loss": 1.2559, "step": 3190 }, { "epoch": 0.2, "learning_rate": 9.530746721121662e-06, "loss": 1.2457, "step": 3200 }, { "epoch": 0.2, "learning_rate": 9.528641502284162e-06, "loss": 1.2228, "step": 3210 }, { "epoch": 0.2, "learning_rate": 9.526536283446665e-06, "loss": 1.219, "step": 3220 }, { "epoch": 0.2, "learning_rate": 9.524431064609166e-06, "loss": 1.2255, "step": 3230 }, { "epoch": 0.2, "learning_rate": 9.522325845771668e-06, "loss": 1.1923, "step": 3240 }, { "epoch": 0.2, "learning_rate": 9.52022062693417e-06, "loss": 1.1996, "step": 3250 }, { "epoch": 0.2, "learning_rate": 9.518115408096671e-06, "loss": 1.2186, "step": 3260 }, { "epoch": 0.2, "learning_rate": 9.516010189259174e-06, "loss": 1.2384, "step": 3270 }, { "epoch": 0.2, "learning_rate": 9.513904970421676e-06, "loss": 1.2119, "step": 3280 }, { "epoch": 0.2, "learning_rate": 9.511799751584177e-06, "loss": 1.2455, "step": 3290 }, { "epoch": 0.2, "learning_rate": 9.50969453274668e-06, "loss": 1.2314, "step": 3300 }, { "epoch": 0.2, "learning_rate": 9.50758931390918e-06, "loss": 1.1995, "step": 3310 }, { "epoch": 0.21, "learning_rate": 9.505484095071683e-06, "loss": 1.2308, "step": 3320 }, { "epoch": 0.21, "learning_rate": 9.503378876234186e-06, "loss": 1.1957, "step": 3330 }, { "epoch": 0.21, "learning_rate": 9.501273657396686e-06, "loss": 1.2557, "step": 3340 }, { "epoch": 0.21, "learning_rate": 9.499168438559189e-06, "loss": 1.2351, "step": 3350 }, { "epoch": 0.21, "learning_rate": 9.497063219721691e-06, "loss": 1.2085, "step": 3360 }, { "epoch": 0.21, "learning_rate": 9.494958000884192e-06, "loss": 1.2241, "step": 3370 }, { "epoch": 0.21, "learning_rate": 9.492852782046695e-06, "loss": 1.1909, "step": 3380 }, { "epoch": 0.21, "learning_rate": 9.490747563209195e-06, "loss": 1.1886, "step": 3390 }, { "epoch": 0.21, "learning_rate": 9.488642344371698e-06, "loss": 1.2161, "step": 3400 }, { "epoch": 0.21, "learning_rate": 9.4865371255342e-06, "loss": 1.2718, "step": 3410 }, { "epoch": 0.21, "learning_rate": 9.484431906696701e-06, "loss": 1.2007, "step": 3420 }, { "epoch": 0.21, "learning_rate": 9.482326687859204e-06, "loss": 1.2038, "step": 3430 }, { "epoch": 0.21, "learning_rate": 9.480221469021705e-06, "loss": 1.2153, "step": 3440 }, { "epoch": 0.21, "learning_rate": 9.478116250184207e-06, "loss": 1.1932, "step": 3450 }, { "epoch": 0.21, "learning_rate": 9.47601103134671e-06, "loss": 1.255, "step": 3460 }, { "epoch": 0.21, "learning_rate": 9.47390581250921e-06, "loss": 1.2738, "step": 3470 }, { "epoch": 0.22, "learning_rate": 9.471800593671713e-06, "loss": 1.2533, "step": 3480 }, { "epoch": 0.22, "learning_rate": 9.469695374834215e-06, "loss": 1.2408, "step": 3490 }, { "epoch": 0.22, "learning_rate": 9.467590155996716e-06, "loss": 1.2331, "step": 3500 }, { "epoch": 0.22, "learning_rate": 9.465484937159219e-06, "loss": 1.2205, "step": 3510 }, { "epoch": 0.22, "learning_rate": 9.46337971832172e-06, "loss": 1.2569, "step": 3520 }, { "epoch": 0.22, "learning_rate": 9.461274499484222e-06, "loss": 1.2245, "step": 3530 }, { "epoch": 0.22, "learning_rate": 9.459169280646724e-06, "loss": 1.2192, "step": 3540 }, { "epoch": 0.22, "learning_rate": 9.457064061809225e-06, "loss": 1.2518, "step": 3550 }, { "epoch": 0.22, "learning_rate": 9.454958842971728e-06, "loss": 1.2619, "step": 3560 }, { "epoch": 0.22, "learning_rate": 9.45285362413423e-06, "loss": 1.1859, "step": 3570 }, { "epoch": 0.22, "learning_rate": 9.450748405296731e-06, "loss": 1.2216, "step": 3580 }, { "epoch": 0.22, "learning_rate": 9.448643186459234e-06, "loss": 1.221, "step": 3590 }, { "epoch": 0.22, "learning_rate": 9.446537967621734e-06, "loss": 1.2224, "step": 3600 }, { "epoch": 0.22, "learning_rate": 9.444432748784237e-06, "loss": 1.2324, "step": 3610 }, { "epoch": 0.22, "learning_rate": 9.44232752994674e-06, "loss": 1.1747, "step": 3620 }, { "epoch": 0.22, "learning_rate": 9.44022231110924e-06, "loss": 1.1962, "step": 3630 }, { "epoch": 0.23, "learning_rate": 9.438117092271743e-06, "loss": 1.2235, "step": 3640 }, { "epoch": 0.23, "learning_rate": 9.436011873434245e-06, "loss": 1.2081, "step": 3650 }, { "epoch": 0.23, "learning_rate": 9.433906654596746e-06, "loss": 1.2398, "step": 3660 }, { "epoch": 0.23, "learning_rate": 9.431801435759248e-06, "loss": 1.248, "step": 3670 }, { "epoch": 0.23, "learning_rate": 9.42969621692175e-06, "loss": 1.1936, "step": 3680 }, { "epoch": 0.23, "learning_rate": 9.427590998084252e-06, "loss": 1.2317, "step": 3690 }, { "epoch": 0.23, "learning_rate": 9.425485779246754e-06, "loss": 1.2139, "step": 3700 }, { "epoch": 0.23, "learning_rate": 9.423380560409255e-06, "loss": 1.1601, "step": 3710 }, { "epoch": 0.23, "learning_rate": 9.421275341571758e-06, "loss": 1.2127, "step": 3720 }, { "epoch": 0.23, "learning_rate": 9.419170122734258e-06, "loss": 1.2082, "step": 3730 }, { "epoch": 0.23, "learning_rate": 9.41706490389676e-06, "loss": 1.1971, "step": 3740 }, { "epoch": 0.23, "learning_rate": 9.414959685059263e-06, "loss": 1.2289, "step": 3750 }, { "epoch": 0.23, "learning_rate": 9.412854466221764e-06, "loss": 1.2133, "step": 3760 }, { "epoch": 0.23, "learning_rate": 9.410749247384267e-06, "loss": 1.2111, "step": 3770 }, { "epoch": 0.23, "learning_rate": 9.408644028546769e-06, "loss": 1.2342, "step": 3780 }, { "epoch": 0.23, "learning_rate": 9.40653880970927e-06, "loss": 1.217, "step": 3790 }, { "epoch": 0.24, "learning_rate": 9.404433590871772e-06, "loss": 1.2651, "step": 3800 }, { "epoch": 0.24, "learning_rate": 9.402328372034273e-06, "loss": 1.2259, "step": 3810 }, { "epoch": 0.24, "learning_rate": 9.400223153196776e-06, "loss": 1.2434, "step": 3820 }, { "epoch": 0.24, "learning_rate": 9.398117934359278e-06, "loss": 1.2199, "step": 3830 }, { "epoch": 0.24, "learning_rate": 9.396012715521779e-06, "loss": 1.2299, "step": 3840 }, { "epoch": 0.24, "learning_rate": 9.393907496684282e-06, "loss": 1.2156, "step": 3850 }, { "epoch": 0.24, "learning_rate": 9.391802277846784e-06, "loss": 1.2402, "step": 3860 }, { "epoch": 0.24, "learning_rate": 9.389697059009285e-06, "loss": 1.237, "step": 3870 }, { "epoch": 0.24, "learning_rate": 9.387591840171787e-06, "loss": 1.2141, "step": 3880 }, { "epoch": 0.24, "learning_rate": 9.385486621334288e-06, "loss": 1.2253, "step": 3890 }, { "epoch": 0.24, "learning_rate": 9.38338140249679e-06, "loss": 1.217, "step": 3900 }, { "epoch": 0.24, "learning_rate": 9.381276183659293e-06, "loss": 1.1919, "step": 3910 }, { "epoch": 0.24, "learning_rate": 9.379170964821794e-06, "loss": 1.1663, "step": 3920 }, { "epoch": 0.24, "learning_rate": 9.377065745984296e-06, "loss": 1.2338, "step": 3930 }, { "epoch": 0.24, "learning_rate": 9.374960527146797e-06, "loss": 1.2399, "step": 3940 }, { "epoch": 0.24, "learning_rate": 9.3728553083093e-06, "loss": 1.1608, "step": 3950 }, { "epoch": 0.25, "learning_rate": 9.370750089471802e-06, "loss": 1.1752, "step": 3960 }, { "epoch": 0.25, "learning_rate": 9.368644870634303e-06, "loss": 1.2364, "step": 3970 }, { "epoch": 0.25, "learning_rate": 9.366539651796806e-06, "loss": 1.2053, "step": 3980 }, { "epoch": 0.25, "learning_rate": 9.364434432959308e-06, "loss": 1.2431, "step": 3990 }, { "epoch": 0.25, "learning_rate": 9.362329214121809e-06, "loss": 1.1948, "step": 4000 }, { "epoch": 0.25, "learning_rate": 9.360223995284311e-06, "loss": 1.2248, "step": 4010 }, { "epoch": 0.25, "learning_rate": 9.358118776446812e-06, "loss": 1.2057, "step": 4020 }, { "epoch": 0.25, "learning_rate": 9.356013557609315e-06, "loss": 1.2373, "step": 4030 }, { "epoch": 0.25, "learning_rate": 9.353908338771817e-06, "loss": 1.1993, "step": 4040 }, { "epoch": 0.25, "learning_rate": 9.351803119934318e-06, "loss": 1.1474, "step": 4050 }, { "epoch": 0.25, "learning_rate": 9.34969790109682e-06, "loss": 1.2084, "step": 4060 }, { "epoch": 0.25, "learning_rate": 9.347592682259323e-06, "loss": 1.224, "step": 4070 }, { "epoch": 0.25, "learning_rate": 9.345487463421824e-06, "loss": 1.206, "step": 4080 }, { "epoch": 0.25, "learning_rate": 9.343382244584326e-06, "loss": 1.2225, "step": 4090 }, { "epoch": 0.25, "learning_rate": 9.341277025746827e-06, "loss": 1.2189, "step": 4100 }, { "epoch": 0.25, "learning_rate": 9.339171806909328e-06, "loss": 1.25, "step": 4110 }, { "epoch": 0.25, "learning_rate": 9.33706658807183e-06, "loss": 1.251, "step": 4120 }, { "epoch": 0.26, "learning_rate": 9.334961369234333e-06, "loss": 1.2048, "step": 4130 }, { "epoch": 0.26, "learning_rate": 9.332856150396834e-06, "loss": 1.2369, "step": 4140 }, { "epoch": 0.26, "learning_rate": 9.330750931559336e-06, "loss": 1.2427, "step": 4150 }, { "epoch": 0.26, "learning_rate": 9.328645712721837e-06, "loss": 1.2873, "step": 4160 }, { "epoch": 0.26, "learning_rate": 9.32654049388434e-06, "loss": 1.1579, "step": 4170 }, { "epoch": 0.26, "learning_rate": 9.324435275046842e-06, "loss": 1.2025, "step": 4180 }, { "epoch": 0.26, "learning_rate": 9.322330056209343e-06, "loss": 1.209, "step": 4190 }, { "epoch": 0.26, "learning_rate": 9.320224837371845e-06, "loss": 1.2015, "step": 4200 }, { "epoch": 0.26, "learning_rate": 9.318119618534348e-06, "loss": 1.2509, "step": 4210 }, { "epoch": 0.26, "learning_rate": 9.316014399696849e-06, "loss": 1.2696, "step": 4220 }, { "epoch": 0.26, "learning_rate": 9.313909180859351e-06, "loss": 1.2281, "step": 4230 }, { "epoch": 0.26, "learning_rate": 9.311803962021852e-06, "loss": 1.2089, "step": 4240 }, { "epoch": 0.26, "learning_rate": 9.309698743184354e-06, "loss": 1.2831, "step": 4250 }, { "epoch": 0.26, "learning_rate": 9.307593524346857e-06, "loss": 1.2757, "step": 4260 }, { "epoch": 0.26, "learning_rate": 9.305488305509358e-06, "loss": 1.2267, "step": 4270 }, { "epoch": 0.26, "learning_rate": 9.30338308667186e-06, "loss": 1.1837, "step": 4280 }, { "epoch": 0.27, "learning_rate": 9.301277867834361e-06, "loss": 1.2245, "step": 4290 }, { "epoch": 0.27, "learning_rate": 9.299172648996863e-06, "loss": 1.157, "step": 4300 }, { "epoch": 0.27, "learning_rate": 9.297067430159366e-06, "loss": 1.215, "step": 4310 }, { "epoch": 0.27, "learning_rate": 9.294962211321867e-06, "loss": 1.2421, "step": 4320 }, { "epoch": 0.27, "learning_rate": 9.29285699248437e-06, "loss": 1.2581, "step": 4330 }, { "epoch": 0.27, "learning_rate": 9.290751773646872e-06, "loss": 1.1966, "step": 4340 }, { "epoch": 0.27, "learning_rate": 9.288646554809373e-06, "loss": 1.2494, "step": 4350 }, { "epoch": 0.27, "learning_rate": 9.286541335971875e-06, "loss": 1.1633, "step": 4360 }, { "epoch": 0.27, "learning_rate": 9.284436117134376e-06, "loss": 1.2258, "step": 4370 }, { "epoch": 0.27, "learning_rate": 9.282330898296878e-06, "loss": 1.2703, "step": 4380 }, { "epoch": 0.27, "learning_rate": 9.280225679459381e-06, "loss": 1.1973, "step": 4390 }, { "epoch": 0.27, "learning_rate": 9.278120460621882e-06, "loss": 1.2614, "step": 4400 }, { "epoch": 0.27, "learning_rate": 9.276015241784384e-06, "loss": 1.243, "step": 4410 }, { "epoch": 0.27, "learning_rate": 9.273910022946887e-06, "loss": 1.2473, "step": 4420 }, { "epoch": 0.27, "learning_rate": 9.271804804109387e-06, "loss": 1.2269, "step": 4430 }, { "epoch": 0.27, "learning_rate": 9.26969958527189e-06, "loss": 1.2466, "step": 4440 }, { "epoch": 0.28, "learning_rate": 9.26759436643439e-06, "loss": 1.2362, "step": 4450 }, { "epoch": 0.28, "learning_rate": 9.265489147596893e-06, "loss": 1.2277, "step": 4460 }, { "epoch": 0.28, "learning_rate": 9.263383928759396e-06, "loss": 1.1939, "step": 4470 }, { "epoch": 0.28, "learning_rate": 9.261278709921897e-06, "loss": 1.2013, "step": 4480 }, { "epoch": 0.28, "learning_rate": 9.259173491084399e-06, "loss": 1.2057, "step": 4490 }, { "epoch": 0.28, "learning_rate": 9.257068272246902e-06, "loss": 1.2276, "step": 4500 }, { "epoch": 0.28, "learning_rate": 9.254963053409402e-06, "loss": 1.2029, "step": 4510 }, { "epoch": 0.28, "learning_rate": 9.252857834571905e-06, "loss": 1.2285, "step": 4520 }, { "epoch": 0.28, "learning_rate": 9.250752615734406e-06, "loss": 1.2078, "step": 4530 }, { "epoch": 0.28, "learning_rate": 9.248647396896908e-06, "loss": 1.2317, "step": 4540 }, { "epoch": 0.28, "learning_rate": 9.24654217805941e-06, "loss": 1.2266, "step": 4550 }, { "epoch": 0.28, "learning_rate": 9.244436959221911e-06, "loss": 1.212, "step": 4560 }, { "epoch": 0.28, "learning_rate": 9.242331740384414e-06, "loss": 1.1849, "step": 4570 }, { "epoch": 0.28, "learning_rate": 9.240226521546915e-06, "loss": 1.2238, "step": 4580 }, { "epoch": 0.28, "learning_rate": 9.238121302709417e-06, "loss": 1.221, "step": 4590 }, { "epoch": 0.28, "learning_rate": 9.23601608387192e-06, "loss": 1.2356, "step": 4600 }, { "epoch": 0.29, "learning_rate": 9.23391086503442e-06, "loss": 1.2287, "step": 4610 }, { "epoch": 0.29, "learning_rate": 9.231805646196923e-06, "loss": 1.2226, "step": 4620 }, { "epoch": 0.29, "learning_rate": 9.229700427359426e-06, "loss": 1.159, "step": 4630 }, { "epoch": 0.29, "learning_rate": 9.227595208521926e-06, "loss": 1.2239, "step": 4640 }, { "epoch": 0.29, "learning_rate": 9.225489989684429e-06, "loss": 1.2547, "step": 4650 }, { "epoch": 0.29, "learning_rate": 9.22338477084693e-06, "loss": 1.1689, "step": 4660 }, { "epoch": 0.29, "learning_rate": 9.221279552009432e-06, "loss": 1.1546, "step": 4670 }, { "epoch": 0.29, "learning_rate": 9.219174333171935e-06, "loss": 1.2197, "step": 4680 }, { "epoch": 0.29, "learning_rate": 9.217069114334436e-06, "loss": 1.1826, "step": 4690 }, { "epoch": 0.29, "learning_rate": 9.214963895496938e-06, "loss": 1.2543, "step": 4700 }, { "epoch": 0.29, "learning_rate": 9.21285867665944e-06, "loss": 1.1747, "step": 4710 }, { "epoch": 0.29, "learning_rate": 9.210753457821941e-06, "loss": 1.2486, "step": 4720 }, { "epoch": 0.29, "learning_rate": 9.208648238984444e-06, "loss": 1.2506, "step": 4730 }, { "epoch": 0.29, "learning_rate": 9.206543020146945e-06, "loss": 1.2257, "step": 4740 }, { "epoch": 0.29, "learning_rate": 9.204437801309447e-06, "loss": 1.183, "step": 4750 }, { "epoch": 0.29, "learning_rate": 9.20233258247195e-06, "loss": 1.2092, "step": 4760 }, { "epoch": 0.3, "learning_rate": 9.20022736363445e-06, "loss": 1.1907, "step": 4770 }, { "epoch": 0.3, "learning_rate": 9.198122144796953e-06, "loss": 1.1898, "step": 4780 }, { "epoch": 0.3, "learning_rate": 9.196016925959454e-06, "loss": 1.1834, "step": 4790 }, { "epoch": 0.3, "learning_rate": 9.193911707121956e-06, "loss": 1.2316, "step": 4800 }, { "epoch": 0.3, "learning_rate": 9.191806488284459e-06, "loss": 1.2018, "step": 4810 }, { "epoch": 0.3, "learning_rate": 9.18970126944696e-06, "loss": 1.2066, "step": 4820 }, { "epoch": 0.3, "learning_rate": 9.187596050609462e-06, "loss": 1.182, "step": 4830 }, { "epoch": 0.3, "learning_rate": 9.185490831771965e-06, "loss": 1.2089, "step": 4840 }, { "epoch": 0.3, "learning_rate": 9.183385612934465e-06, "loss": 1.1919, "step": 4850 }, { "epoch": 0.3, "learning_rate": 9.181280394096968e-06, "loss": 1.2734, "step": 4860 }, { "epoch": 0.3, "learning_rate": 9.179175175259469e-06, "loss": 1.25, "step": 4870 }, { "epoch": 0.3, "learning_rate": 9.177069956421971e-06, "loss": 1.213, "step": 4880 }, { "epoch": 0.3, "learning_rate": 9.174964737584474e-06, "loss": 1.2126, "step": 4890 }, { "epoch": 0.3, "learning_rate": 9.172859518746974e-06, "loss": 1.203, "step": 4900 }, { "epoch": 0.3, "learning_rate": 9.170754299909477e-06, "loss": 1.2718, "step": 4910 }, { "epoch": 0.3, "learning_rate": 9.16864908107198e-06, "loss": 1.1845, "step": 4920 }, { "epoch": 0.31, "learning_rate": 9.16654386223448e-06, "loss": 1.1991, "step": 4930 }, { "epoch": 0.31, "learning_rate": 9.164438643396983e-06, "loss": 1.2049, "step": 4940 }, { "epoch": 0.31, "learning_rate": 9.162333424559484e-06, "loss": 1.2345, "step": 4950 }, { "epoch": 0.31, "learning_rate": 9.160228205721986e-06, "loss": 1.2284, "step": 4960 }, { "epoch": 0.31, "learning_rate": 9.158122986884489e-06, "loss": 1.2253, "step": 4970 }, { "epoch": 0.31, "learning_rate": 9.15601776804699e-06, "loss": 1.2038, "step": 4980 }, { "epoch": 0.31, "learning_rate": 9.153912549209492e-06, "loss": 1.2353, "step": 4990 }, { "epoch": 0.31, "learning_rate": 9.151807330371993e-06, "loss": 1.2029, "step": 5000 }, { "epoch": 0.31, "learning_rate": 9.149702111534495e-06, "loss": 1.1451, "step": 5010 }, { "epoch": 0.31, "learning_rate": 9.147596892696998e-06, "loss": 1.186, "step": 5020 }, { "epoch": 0.31, "learning_rate": 9.145491673859498e-06, "loss": 1.2406, "step": 5030 }, { "epoch": 0.31, "learning_rate": 9.143386455022e-06, "loss": 1.1957, "step": 5040 }, { "epoch": 0.31, "learning_rate": 9.141281236184502e-06, "loss": 1.19, "step": 5050 }, { "epoch": 0.31, "learning_rate": 9.139176017347004e-06, "loss": 1.2007, "step": 5060 }, { "epoch": 0.31, "learning_rate": 9.137070798509505e-06, "loss": 1.2259, "step": 5070 }, { "epoch": 0.31, "learning_rate": 9.134965579672008e-06, "loss": 1.2204, "step": 5080 }, { "epoch": 0.32, "learning_rate": 9.132860360834508e-06, "loss": 1.2021, "step": 5090 }, { "epoch": 0.32, "learning_rate": 9.13075514199701e-06, "loss": 1.2208, "step": 5100 }, { "epoch": 0.32, "learning_rate": 9.128649923159513e-06, "loss": 1.1702, "step": 5110 }, { "epoch": 0.32, "learning_rate": 9.126544704322014e-06, "loss": 1.2513, "step": 5120 }, { "epoch": 0.32, "learning_rate": 9.124439485484517e-06, "loss": 1.1855, "step": 5130 }, { "epoch": 0.32, "learning_rate": 9.122334266647017e-06, "loss": 1.1868, "step": 5140 }, { "epoch": 0.32, "learning_rate": 9.12022904780952e-06, "loss": 1.1861, "step": 5150 }, { "epoch": 0.32, "learning_rate": 9.118123828972022e-06, "loss": 1.184, "step": 5160 }, { "epoch": 0.32, "learning_rate": 9.116018610134523e-06, "loss": 1.2227, "step": 5170 }, { "epoch": 0.32, "learning_rate": 9.113913391297026e-06, "loss": 1.1739, "step": 5180 }, { "epoch": 0.32, "learning_rate": 9.111808172459528e-06, "loss": 1.1705, "step": 5190 }, { "epoch": 0.32, "learning_rate": 9.109702953622029e-06, "loss": 1.1681, "step": 5200 }, { "epoch": 0.32, "learning_rate": 9.107597734784532e-06, "loss": 1.2116, "step": 5210 }, { "epoch": 0.32, "learning_rate": 9.105492515947032e-06, "loss": 1.2158, "step": 5220 }, { "epoch": 0.32, "learning_rate": 9.103387297109535e-06, "loss": 1.1452, "step": 5230 }, { "epoch": 0.32, "learning_rate": 9.101282078272037e-06, "loss": 1.1695, "step": 5240 }, { "epoch": 0.32, "learning_rate": 9.099176859434538e-06, "loss": 1.2099, "step": 5250 }, { "epoch": 0.33, "learning_rate": 9.09707164059704e-06, "loss": 1.2359, "step": 5260 }, { "epoch": 0.33, "learning_rate": 9.094966421759543e-06, "loss": 1.207, "step": 5270 }, { "epoch": 0.33, "learning_rate": 9.092861202922044e-06, "loss": 1.2119, "step": 5280 }, { "epoch": 0.33, "learning_rate": 9.090755984084546e-06, "loss": 1.1408, "step": 5290 }, { "epoch": 0.33, "learning_rate": 9.088650765247047e-06, "loss": 1.1842, "step": 5300 }, { "epoch": 0.33, "learning_rate": 9.08654554640955e-06, "loss": 1.1786, "step": 5310 }, { "epoch": 0.33, "learning_rate": 9.084440327572052e-06, "loss": 1.2016, "step": 5320 }, { "epoch": 0.33, "learning_rate": 9.082335108734553e-06, "loss": 1.2011, "step": 5330 }, { "epoch": 0.33, "learning_rate": 9.080229889897056e-06, "loss": 1.235, "step": 5340 }, { "epoch": 0.33, "learning_rate": 9.078124671059556e-06, "loss": 1.2116, "step": 5350 }, { "epoch": 0.33, "learning_rate": 9.076019452222059e-06, "loss": 1.2083, "step": 5360 }, { "epoch": 0.33, "learning_rate": 9.073914233384561e-06, "loss": 1.2152, "step": 5370 }, { "epoch": 0.33, "learning_rate": 9.071809014547062e-06, "loss": 1.239, "step": 5380 }, { "epoch": 0.33, "learning_rate": 9.069703795709565e-06, "loss": 1.1989, "step": 5390 }, { "epoch": 0.33, "learning_rate": 9.067598576872067e-06, "loss": 1.2193, "step": 5400 }, { "epoch": 0.33, "learning_rate": 9.065493358034568e-06, "loss": 1.2051, "step": 5410 }, { "epoch": 0.34, "learning_rate": 9.06338813919707e-06, "loss": 1.1981, "step": 5420 }, { "epoch": 0.34, "learning_rate": 9.061282920359571e-06, "loss": 1.2218, "step": 5430 }, { "epoch": 0.34, "learning_rate": 9.059177701522074e-06, "loss": 1.2669, "step": 5440 }, { "epoch": 0.34, "learning_rate": 9.057072482684576e-06, "loss": 1.2127, "step": 5450 }, { "epoch": 0.34, "learning_rate": 9.054967263847077e-06, "loss": 1.1538, "step": 5460 }, { "epoch": 0.34, "learning_rate": 9.05286204500958e-06, "loss": 1.1664, "step": 5470 }, { "epoch": 0.34, "learning_rate": 9.050756826172082e-06, "loss": 1.1976, "step": 5480 }, { "epoch": 0.34, "learning_rate": 9.048651607334583e-06, "loss": 1.2223, "step": 5490 }, { "epoch": 0.34, "learning_rate": 9.046546388497085e-06, "loss": 1.201, "step": 5500 }, { "epoch": 0.34, "learning_rate": 9.044441169659586e-06, "loss": 1.2372, "step": 5510 }, { "epoch": 0.34, "learning_rate": 9.042335950822089e-06, "loss": 1.1758, "step": 5520 }, { "epoch": 0.34, "learning_rate": 9.040230731984591e-06, "loss": 1.1793, "step": 5530 }, { "epoch": 0.34, "learning_rate": 9.038125513147092e-06, "loss": 1.1706, "step": 5540 }, { "epoch": 0.34, "learning_rate": 9.036020294309594e-06, "loss": 1.2267, "step": 5550 }, { "epoch": 0.34, "learning_rate": 9.033915075472097e-06, "loss": 1.218, "step": 5560 }, { "epoch": 0.34, "learning_rate": 9.031809856634598e-06, "loss": 1.1789, "step": 5570 }, { "epoch": 0.35, "learning_rate": 9.0297046377971e-06, "loss": 1.2317, "step": 5580 }, { "epoch": 0.35, "learning_rate": 9.027599418959601e-06, "loss": 1.23, "step": 5590 }, { "epoch": 0.35, "learning_rate": 9.025494200122104e-06, "loss": 1.2058, "step": 5600 }, { "epoch": 0.35, "learning_rate": 9.023388981284606e-06, "loss": 1.276, "step": 5610 }, { "epoch": 0.35, "learning_rate": 9.021283762447107e-06, "loss": 1.1758, "step": 5620 }, { "epoch": 0.35, "learning_rate": 9.01917854360961e-06, "loss": 1.182, "step": 5630 }, { "epoch": 0.35, "learning_rate": 9.01707332477211e-06, "loss": 1.2027, "step": 5640 }, { "epoch": 0.35, "learning_rate": 9.014968105934613e-06, "loss": 1.2442, "step": 5650 }, { "epoch": 0.35, "learning_rate": 9.012862887097115e-06, "loss": 1.1509, "step": 5660 }, { "epoch": 0.35, "learning_rate": 9.010757668259616e-06, "loss": 1.2369, "step": 5670 }, { "epoch": 0.35, "learning_rate": 9.008652449422118e-06, "loss": 1.225, "step": 5680 }, { "epoch": 0.35, "learning_rate": 9.006547230584621e-06, "loss": 1.2575, "step": 5690 }, { "epoch": 0.35, "learning_rate": 9.004442011747122e-06, "loss": 1.1801, "step": 5700 }, { "epoch": 0.35, "learning_rate": 9.002336792909624e-06, "loss": 1.1817, "step": 5710 }, { "epoch": 0.35, "learning_rate": 9.000231574072125e-06, "loss": 1.2392, "step": 5720 }, { "epoch": 0.35, "learning_rate": 8.998126355234628e-06, "loss": 1.1718, "step": 5730 }, { "epoch": 0.36, "learning_rate": 8.99602113639713e-06, "loss": 1.2155, "step": 5740 }, { "epoch": 0.36, "learning_rate": 8.993915917559631e-06, "loss": 1.1976, "step": 5750 }, { "epoch": 0.36, "learning_rate": 8.991810698722133e-06, "loss": 1.1715, "step": 5760 }, { "epoch": 0.36, "learning_rate": 8.989705479884636e-06, "loss": 1.1555, "step": 5770 }, { "epoch": 0.36, "learning_rate": 8.987600261047137e-06, "loss": 1.2071, "step": 5780 }, { "epoch": 0.36, "learning_rate": 8.98549504220964e-06, "loss": 1.2062, "step": 5790 }, { "epoch": 0.36, "learning_rate": 8.98338982337214e-06, "loss": 1.1978, "step": 5800 }, { "epoch": 0.36, "learning_rate": 8.981284604534642e-06, "loss": 1.2125, "step": 5810 }, { "epoch": 0.36, "learning_rate": 8.979179385697145e-06, "loss": 1.1887, "step": 5820 }, { "epoch": 0.36, "learning_rate": 8.977074166859646e-06, "loss": 1.2384, "step": 5830 }, { "epoch": 0.36, "learning_rate": 8.974968948022148e-06, "loss": 1.2708, "step": 5840 }, { "epoch": 0.36, "learning_rate": 8.972863729184649e-06, "loss": 1.1985, "step": 5850 }, { "epoch": 0.36, "learning_rate": 8.970758510347152e-06, "loss": 1.2202, "step": 5860 }, { "epoch": 0.36, "learning_rate": 8.968653291509654e-06, "loss": 1.2281, "step": 5870 }, { "epoch": 0.36, "learning_rate": 8.966548072672155e-06, "loss": 1.2158, "step": 5880 }, { "epoch": 0.36, "learning_rate": 8.964442853834657e-06, "loss": 1.1912, "step": 5890 }, { "epoch": 0.37, "learning_rate": 8.96233763499716e-06, "loss": 1.1822, "step": 5900 }, { "epoch": 0.37, "learning_rate": 8.96023241615966e-06, "loss": 1.2181, "step": 5910 }, { "epoch": 0.37, "learning_rate": 8.958127197322163e-06, "loss": 1.1901, "step": 5920 }, { "epoch": 0.37, "learning_rate": 8.956021978484664e-06, "loss": 1.1742, "step": 5930 }, { "epoch": 0.37, "learning_rate": 8.953916759647166e-06, "loss": 1.1675, "step": 5940 }, { "epoch": 0.37, "learning_rate": 8.951811540809669e-06, "loss": 1.249, "step": 5950 }, { "epoch": 0.37, "learning_rate": 8.94970632197217e-06, "loss": 1.1683, "step": 5960 }, { "epoch": 0.37, "learning_rate": 8.947601103134672e-06, "loss": 1.1558, "step": 5970 }, { "epoch": 0.37, "learning_rate": 8.945495884297175e-06, "loss": 1.1685, "step": 5980 }, { "epoch": 0.37, "learning_rate": 8.943390665459674e-06, "loss": 1.1734, "step": 5990 }, { "epoch": 0.37, "learning_rate": 8.941285446622176e-06, "loss": 1.1478, "step": 6000 } ], "max_steps": 48471, "num_train_epochs": 3, "total_flos": 3.0400555051175117e+19, "trial_name": null, "trial_params": null }