{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2000214041095891, "eval_steps": 500, "global_step": 22426, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0001498796040310354, "loss": 1.2625, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.00014974583073218585, "loss": 1.2637, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.0001496120574333363, "loss": 1.2824, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.00014947828413448673, "loss": 1.3157, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.00014935119950057967, "loss": 1.2988, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.00014921742620173012, "loss": 1.2871, "step": 120 }, { "epoch": 0.01, "learning_rate": 0.00014908365290288057, "loss": 1.2886, "step": 140 }, { "epoch": 0.01, "learning_rate": 0.00014894987960403102, "loss": 1.2471, "step": 160 }, { "epoch": 0.01, "learning_rate": 0.00014881610630518147, "loss": 1.3077, "step": 180 }, { "epoch": 0.01, "learning_rate": 0.00014868233300633193, "loss": 1.2709, "step": 200 }, { "epoch": 0.01, "learning_rate": 0.00014854855970748238, "loss": 1.2056, "step": 220 }, { "epoch": 0.01, "learning_rate": 0.00014841478640863283, "loss": 1.2455, "step": 240 }, { "epoch": 0.01, "learning_rate": 0.00014828101310978328, "loss": 1.2848, "step": 260 }, { "epoch": 0.01, "learning_rate": 0.00014814723981093373, "loss": 1.306, "step": 280 }, { "epoch": 0.02, "learning_rate": 0.00014801346651208418, "loss": 1.2115, "step": 300 }, { "epoch": 0.02, "learning_rate": 0.00014787969321323463, "loss": 1.3216, "step": 320 }, { "epoch": 0.02, "learning_rate": 0.00014774591991438508, "loss": 1.2745, "step": 340 }, { "epoch": 0.02, "learning_rate": 0.00014761214661553553, "loss": 1.3096, "step": 360 }, { "epoch": 0.02, "learning_rate": 0.00014747837331668598, "loss": 1.2596, "step": 380 }, { "epoch": 0.02, "learning_rate": 0.00014734460001783643, "loss": 1.2654, "step": 400 }, { "epoch": 0.02, "learning_rate": 0.00014721082671898688, "loss": 1.2926, "step": 420 }, { "epoch": 0.02, "learning_rate": 0.0001470770534201373, "loss": 1.2775, "step": 440 }, { "epoch": 0.02, "learning_rate": 0.00014694328012128778, "loss": 1.2677, "step": 460 }, { "epoch": 0.03, "learning_rate": 0.00014680950682243824, "loss": 1.2212, "step": 480 }, { "epoch": 0.03, "learning_rate": 0.00014667573352358869, "loss": 1.2541, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.00014654196022473914, "loss": 1.2662, "step": 520 }, { "epoch": 0.03, "learning_rate": 0.0001464081869258896, "loss": 1.2629, "step": 540 }, { "epoch": 0.03, "learning_rate": 0.00014627441362704004, "loss": 1.265, "step": 560 }, { "epoch": 0.03, "learning_rate": 0.0001461406403281905, "loss": 1.3136, "step": 580 }, { "epoch": 0.03, "learning_rate": 0.0001460068670293409, "loss": 1.2735, "step": 600 }, { "epoch": 0.03, "learning_rate": 0.0001458730937304914, "loss": 1.2982, "step": 620 }, { "epoch": 0.03, "learning_rate": 0.00014573932043164184, "loss": 1.2514, "step": 640 }, { "epoch": 0.04, "learning_rate": 0.00014560554713279226, "loss": 1.2414, "step": 660 }, { "epoch": 0.04, "learning_rate": 0.00014547177383394274, "loss": 1.3266, "step": 680 }, { "epoch": 0.04, "learning_rate": 0.0001453380005350932, "loss": 1.3038, "step": 700 }, { "epoch": 0.04, "learning_rate": 0.00014520422723624364, "loss": 1.2789, "step": 720 }, { "epoch": 0.04, "learning_rate": 0.0001450704539373941, "loss": 1.1716, "step": 740 }, { "epoch": 0.04, "learning_rate": 0.00014493668063854452, "loss": 1.3485, "step": 760 }, { "epoch": 0.04, "learning_rate": 0.000144802907339695, "loss": 1.3455, "step": 780 }, { "epoch": 0.04, "learning_rate": 0.00014466913404084545, "loss": 1.2625, "step": 800 }, { "epoch": 0.04, "learning_rate": 0.00014453536074199587, "loss": 1.3343, "step": 820 }, { "epoch": 0.04, "learning_rate": 0.00014440158744314635, "loss": 1.2302, "step": 840 }, { "epoch": 0.05, "learning_rate": 0.00014426781414429677, "loss": 1.3047, "step": 860 }, { "epoch": 0.05, "learning_rate": 0.00014413404084544722, "loss": 1.3057, "step": 880 }, { "epoch": 0.05, "learning_rate": 0.0001440002675465977, "loss": 1.2456, "step": 900 }, { "epoch": 0.05, "learning_rate": 0.00014386649424774812, "loss": 1.3016, "step": 920 }, { "epoch": 0.05, "learning_rate": 0.0001437327209488986, "loss": 1.2667, "step": 940 }, { "epoch": 0.05, "learning_rate": 0.00014359894765004905, "loss": 1.233, "step": 960 }, { "epoch": 0.05, "learning_rate": 0.00014346517435119948, "loss": 1.2787, "step": 980 }, { "epoch": 0.05, "learning_rate": 0.00014333140105234995, "loss": 1.3219, "step": 1000 }, { "epoch": 0.05, "learning_rate": 0.00014319762775350038, "loss": 1.3008, "step": 1020 }, { "epoch": 0.06, "learning_rate": 0.00014306385445465083, "loss": 1.2843, "step": 1040 }, { "epoch": 0.06, "learning_rate": 0.0001429300811558013, "loss": 1.361, "step": 1060 }, { "epoch": 0.06, "learning_rate": 0.00014279630785695173, "loss": 1.3107, "step": 1080 }, { "epoch": 0.06, "learning_rate": 0.00014266253455810218, "loss": 1.3061, "step": 1100 }, { "epoch": 0.06, "learning_rate": 0.00014252876125925263, "loss": 1.3589, "step": 1120 }, { "epoch": 0.06, "learning_rate": 0.00014239498796040308, "loss": 1.3262, "step": 1140 }, { "epoch": 0.06, "learning_rate": 0.00014226121466155356, "loss": 1.2855, "step": 1160 }, { "epoch": 0.06, "learning_rate": 0.00014212744136270398, "loss": 1.2904, "step": 1180 }, { "epoch": 0.06, "learning_rate": 0.00014199366806385443, "loss": 1.2428, "step": 1200 }, { "epoch": 0.07, "learning_rate": 0.0001418598947650049, "loss": 1.2766, "step": 1220 }, { "epoch": 0.07, "learning_rate": 0.00014172612146615534, "loss": 1.3265, "step": 1240 }, { "epoch": 0.07, "learning_rate": 0.00014159234816730579, "loss": 1.2227, "step": 1260 }, { "epoch": 0.07, "learning_rate": 0.00014145857486845624, "loss": 1.2326, "step": 1280 }, { "epoch": 0.07, "learning_rate": 0.0001413248015696067, "loss": 1.1939, "step": 1300 }, { "epoch": 0.07, "learning_rate": 0.00014119102827075714, "loss": 1.3674, "step": 1320 }, { "epoch": 0.07, "learning_rate": 0.0001410572549719076, "loss": 1.2618, "step": 1340 }, { "epoch": 0.07, "learning_rate": 0.00014092348167305804, "loss": 1.2437, "step": 1360 }, { "epoch": 0.07, "learning_rate": 0.0001407897083742085, "loss": 1.2144, "step": 1380 }, { "epoch": 0.07, "learning_rate": 0.00014065593507535894, "loss": 1.3589, "step": 1400 }, { "epoch": 0.08, "learning_rate": 0.0001405221617765094, "loss": 1.2563, "step": 1420 }, { "epoch": 0.08, "learning_rate": 0.00014038838847765984, "loss": 1.2728, "step": 1440 }, { "epoch": 0.08, "learning_rate": 0.0001402546151788103, "loss": 1.3046, "step": 1460 }, { "epoch": 0.08, "learning_rate": 0.00014012084187996074, "loss": 1.2689, "step": 1480 }, { "epoch": 0.08, "learning_rate": 0.0001399870685811112, "loss": 1.2448, "step": 1500 }, { "epoch": 0.08, "learning_rate": 0.00013985329528226165, "loss": 1.2145, "step": 1520 }, { "epoch": 0.08, "learning_rate": 0.0001397195219834121, "loss": 1.2388, "step": 1540 }, { "epoch": 0.08, "learning_rate": 0.00013958574868456255, "loss": 1.2154, "step": 1560 }, { "epoch": 0.08, "learning_rate": 0.000139451975385713, "loss": 1.2406, "step": 1580 }, { "epoch": 0.09, "learning_rate": 0.00013931820208686345, "loss": 1.2484, "step": 1600 }, { "epoch": 0.09, "learning_rate": 0.0001391844287880139, "loss": 1.2388, "step": 1620 }, { "epoch": 0.09, "learning_rate": 0.00013905065548916435, "loss": 1.3034, "step": 1640 }, { "epoch": 0.09, "learning_rate": 0.0001389168821903148, "loss": 1.3128, "step": 1660 }, { "epoch": 0.09, "learning_rate": 0.00013878310889146525, "loss": 1.3074, "step": 1680 }, { "epoch": 0.09, "learning_rate": 0.0001386493355926157, "loss": 1.3044, "step": 1700 }, { "epoch": 0.09, "learning_rate": 0.00013851556229376615, "loss": 1.2358, "step": 1720 }, { "epoch": 0.09, "learning_rate": 0.0001383817889949166, "loss": 1.3831, "step": 1740 }, { "epoch": 0.09, "learning_rate": 0.00013824801569606705, "loss": 1.2184, "step": 1760 }, { "epoch": 0.1, "learning_rate": 0.0001381142423972175, "loss": 1.1809, "step": 1780 }, { "epoch": 0.1, "learning_rate": 0.00013798046909836796, "loss": 1.3266, "step": 1800 }, { "epoch": 0.1, "learning_rate": 0.0001378466957995184, "loss": 1.2332, "step": 1820 }, { "epoch": 0.1, "learning_rate": 0.00013771292250066886, "loss": 1.2197, "step": 1840 }, { "epoch": 0.1, "learning_rate": 0.00013758583786676178, "loss": 1.2727, "step": 1860 }, { "epoch": 0.1, "learning_rate": 0.00013745206456791223, "loss": 1.1916, "step": 1880 }, { "epoch": 0.1, "learning_rate": 0.0001373182912690627, "loss": 1.2141, "step": 1900 }, { "epoch": 0.1, "learning_rate": 0.00013718451797021313, "loss": 1.2517, "step": 1920 }, { "epoch": 0.1, "learning_rate": 0.00013705074467136358, "loss": 1.2393, "step": 1940 }, { "epoch": 0.1, "learning_rate": 0.00013691697137251403, "loss": 1.2482, "step": 1960 }, { "epoch": 0.11, "learning_rate": 0.00013678319807366448, "loss": 1.344, "step": 1980 }, { "epoch": 0.11, "learning_rate": 0.00013664942477481493, "loss": 1.2912, "step": 2000 }, { "epoch": 0.11, "learning_rate": 0.00013652234014090785, "loss": 1.2545, "step": 2020 }, { "epoch": 0.11, "learning_rate": 0.00013638856684205833, "loss": 1.3032, "step": 2040 }, { "epoch": 0.11, "learning_rate": 0.00013625479354320875, "loss": 1.3115, "step": 2060 }, { "epoch": 0.11, "learning_rate": 0.0001361210202443592, "loss": 1.2388, "step": 2080 }, { "epoch": 0.11, "learning_rate": 0.00013598724694550968, "loss": 1.2518, "step": 2100 }, { "epoch": 0.11, "learning_rate": 0.0001358534736466601, "loss": 1.2791, "step": 2120 }, { "epoch": 0.11, "learning_rate": 0.00013571970034781058, "loss": 1.3101, "step": 2140 }, { "epoch": 0.12, "learning_rate": 0.000135585927048961, "loss": 1.2393, "step": 2160 }, { "epoch": 0.12, "learning_rate": 0.00013545215375011145, "loss": 1.2455, "step": 2180 }, { "epoch": 0.12, "learning_rate": 0.00013531838045126193, "loss": 1.2799, "step": 2200 }, { "epoch": 0.12, "learning_rate": 0.00013518460715241236, "loss": 1.1852, "step": 2220 }, { "epoch": 0.12, "learning_rate": 0.0001350508338535628, "loss": 1.2493, "step": 2240 }, { "epoch": 0.12, "learning_rate": 0.00013491706055471328, "loss": 1.2487, "step": 2260 }, { "epoch": 0.12, "learning_rate": 0.0001347832872558637, "loss": 1.2668, "step": 2280 }, { "epoch": 0.12, "learning_rate": 0.00013464951395701416, "loss": 1.2786, "step": 2300 }, { "epoch": 0.12, "learning_rate": 0.0001345157406581646, "loss": 1.2678, "step": 2320 }, { "epoch": 0.13, "learning_rate": 0.00013438196735931506, "loss": 1.2539, "step": 2340 }, { "epoch": 0.13, "learning_rate": 0.0001342481940604655, "loss": 1.2445, "step": 2360 }, { "epoch": 0.13, "learning_rate": 0.00013411442076161596, "loss": 1.2144, "step": 2380 }, { "epoch": 0.13, "learning_rate": 0.0001339806474627664, "loss": 1.1921, "step": 2400 }, { "epoch": 0.13, "learning_rate": 0.00013385356282885933, "loss": 1.2193, "step": 2420 }, { "epoch": 0.13, "learning_rate": 0.0001337197895300098, "loss": 1.2188, "step": 2440 }, { "epoch": 0.13, "learning_rate": 0.00013358601623116026, "loss": 1.2526, "step": 2460 }, { "epoch": 0.13, "learning_rate": 0.00013345224293231068, "loss": 1.1961, "step": 2480 }, { "epoch": 0.13, "learning_rate": 0.00013331846963346116, "loss": 1.2479, "step": 2500 }, { "epoch": 0.13, "learning_rate": 0.0001331846963346116, "loss": 1.1856, "step": 2520 }, { "epoch": 0.14, "learning_rate": 0.00013305092303576204, "loss": 1.2656, "step": 2540 }, { "epoch": 0.14, "learning_rate": 0.00013291714973691251, "loss": 1.3054, "step": 2560 }, { "epoch": 0.14, "learning_rate": 0.00013278337643806294, "loss": 1.3605, "step": 2580 }, { "epoch": 0.14, "learning_rate": 0.0001326496031392134, "loss": 1.2771, "step": 2600 }, { "epoch": 0.14, "learning_rate": 0.00013251582984036387, "loss": 1.276, "step": 2620 }, { "epoch": 0.14, "learning_rate": 0.0001323820565415143, "loss": 1.2967, "step": 2640 }, { "epoch": 0.14, "learning_rate": 0.00013224828324266477, "loss": 1.1732, "step": 2660 }, { "epoch": 0.14, "learning_rate": 0.00013211450994381522, "loss": 1.3124, "step": 2680 }, { "epoch": 0.14, "learning_rate": 0.00013198073664496564, "loss": 1.3672, "step": 2700 }, { "epoch": 0.15, "learning_rate": 0.00013184696334611612, "loss": 1.2497, "step": 2720 }, { "epoch": 0.15, "learning_rate": 0.00013171319004726654, "loss": 1.298, "step": 2740 }, { "epoch": 0.15, "learning_rate": 0.000131579416748417, "loss": 1.2449, "step": 2760 }, { "epoch": 0.15, "learning_rate": 0.00013144564344956747, "loss": 1.3402, "step": 2780 }, { "epoch": 0.15, "learning_rate": 0.0001313118701507179, "loss": 1.289, "step": 2800 }, { "epoch": 0.15, "learning_rate": 0.00013117809685186835, "loss": 1.3015, "step": 2820 }, { "epoch": 0.15, "learning_rate": 0.0001310443235530188, "loss": 1.288, "step": 2840 }, { "epoch": 0.15, "learning_rate": 0.00013091055025416925, "loss": 1.2717, "step": 2860 }, { "epoch": 0.15, "learning_rate": 0.00013077677695531973, "loss": 1.2579, "step": 2880 }, { "epoch": 0.16, "learning_rate": 0.00013064300365647015, "loss": 1.2266, "step": 2900 }, { "epoch": 0.16, "learning_rate": 0.0001305092303576206, "loss": 1.3082, "step": 2920 }, { "epoch": 0.16, "learning_rate": 0.00013037545705877108, "loss": 1.1601, "step": 2940 }, { "epoch": 0.16, "learning_rate": 0.0001302416837599215, "loss": 1.2866, "step": 2960 }, { "epoch": 0.16, "learning_rate": 0.00013010791046107195, "loss": 1.2495, "step": 2980 }, { "epoch": 0.16, "learning_rate": 0.0001299741371622224, "loss": 1.2786, "step": 3000 }, { "epoch": 0.16, "learning_rate": 0.00012984036386337285, "loss": 1.2868, "step": 3020 }, { "epoch": 0.16, "learning_rate": 0.0001297065905645233, "loss": 1.2896, "step": 3040 }, { "epoch": 0.16, "learning_rate": 0.00012957281726567375, "loss": 1.2752, "step": 3060 }, { "epoch": 0.16, "learning_rate": 0.0001294390439668242, "loss": 1.2322, "step": 3080 }, { "epoch": 0.17, "learning_rate": 0.00012930527066797466, "loss": 1.2861, "step": 3100 }, { "epoch": 0.17, "learning_rate": 0.0001291714973691251, "loss": 1.234, "step": 3120 }, { "epoch": 0.17, "learning_rate": 0.00012903772407027556, "loss": 1.346, "step": 3140 }, { "epoch": 0.17, "learning_rate": 0.000128903950771426, "loss": 1.3317, "step": 3160 }, { "epoch": 0.17, "learning_rate": 0.00012877017747257646, "loss": 1.3054, "step": 3180 }, { "epoch": 0.17, "learning_rate": 0.0001286364041737269, "loss": 1.2268, "step": 3200 }, { "epoch": 0.17, "learning_rate": 0.00012850263087487736, "loss": 1.2361, "step": 3220 }, { "epoch": 0.17, "learning_rate": 0.0001283688575760278, "loss": 1.2153, "step": 3240 }, { "epoch": 0.17, "learning_rate": 0.00012823508427717826, "loss": 1.3076, "step": 3260 }, { "epoch": 0.18, "learning_rate": 0.0001281013109783287, "loss": 1.2096, "step": 3280 }, { "epoch": 0.18, "learning_rate": 0.00012796753767947916, "loss": 1.3716, "step": 3300 }, { "epoch": 0.18, "learning_rate": 0.00012783376438062961, "loss": 1.2961, "step": 3320 }, { "epoch": 0.18, "learning_rate": 0.00012769999108178006, "loss": 1.2442, "step": 3340 }, { "epoch": 0.18, "learning_rate": 0.00012756621778293052, "loss": 1.2021, "step": 3360 }, { "epoch": 0.18, "learning_rate": 0.00012743244448408097, "loss": 1.2581, "step": 3380 }, { "epoch": 0.18, "learning_rate": 0.00012729867118523142, "loss": 1.3217, "step": 3400 }, { "epoch": 0.18, "learning_rate": 0.00012716489788638187, "loss": 1.3133, "step": 3420 }, { "epoch": 0.18, "learning_rate": 0.00012703112458753232, "loss": 1.2535, "step": 3440 }, { "epoch": 0.19, "learning_rate": 0.00012689735128868277, "loss": 1.2532, "step": 3460 }, { "epoch": 0.19, "learning_rate": 0.00012676357798983322, "loss": 1.2102, "step": 3480 }, { "epoch": 0.19, "learning_rate": 0.00012662980469098367, "loss": 1.2741, "step": 3500 }, { "epoch": 0.19, "learning_rate": 0.00012649603139213412, "loss": 1.2778, "step": 3520 }, { "epoch": 0.19, "learning_rate": 0.00012636225809328457, "loss": 1.2423, "step": 3540 }, { "epoch": 0.19, "learning_rate": 0.00012622848479443502, "loss": 1.2752, "step": 3560 }, { "epoch": 0.19, "learning_rate": 0.00012609471149558547, "loss": 1.3005, "step": 3580 }, { "epoch": 0.19, "learning_rate": 0.00012596093819673592, "loss": 1.1945, "step": 3600 }, { "epoch": 0.19, "learning_rate": 0.00012582716489788637, "loss": 1.2263, "step": 3620 }, { "epoch": 0.19, "learning_rate": 0.00012569339159903683, "loss": 1.3261, "step": 3640 }, { "epoch": 0.2, "learning_rate": 0.00012555961830018728, "loss": 1.1964, "step": 3660 }, { "epoch": 0.2, "learning_rate": 0.00012542584500133773, "loss": 1.254, "step": 3680 }, { "epoch": 0.2, "learning_rate": 0.00012529207170248818, "loss": 1.1634, "step": 3700 }, { "epoch": 0.2, "learning_rate": 0.00012515829840363863, "loss": 1.2088, "step": 3720 }, { "epoch": 0.2, "learning_rate": 0.00012502452510478908, "loss": 1.2862, "step": 3740 }, { "epoch": 0.2, "learning_rate": 0.00012489075180593953, "loss": 1.2535, "step": 3760 }, { "epoch": 0.2, "learning_rate": 0.00012475697850708995, "loss": 1.269, "step": 3780 }, { "epoch": 0.2, "learning_rate": 0.00012462320520824043, "loss": 1.1485, "step": 3800 }, { "epoch": 0.2, "learning_rate": 0.00012448943190939088, "loss": 1.3195, "step": 3820 }, { "epoch": 0.21, "learning_rate": 0.00012435565861054133, "loss": 1.3545, "step": 3840 }, { "epoch": 0.21, "learning_rate": 0.00012422188531169178, "loss": 1.2354, "step": 3860 }, { "epoch": 0.21, "learning_rate": 0.00012408811201284223, "loss": 1.3004, "step": 3880 }, { "epoch": 0.21, "learning_rate": 0.00012395433871399268, "loss": 1.2733, "step": 3900 }, { "epoch": 0.21, "learning_rate": 0.00012382056541514314, "loss": 1.2772, "step": 3920 }, { "epoch": 0.21, "learning_rate": 0.00012368679211629356, "loss": 1.2124, "step": 3940 }, { "epoch": 0.21, "learning_rate": 0.00012355301881744404, "loss": 1.2737, "step": 3960 }, { "epoch": 0.21, "learning_rate": 0.0001234192455185945, "loss": 1.2847, "step": 3980 }, { "epoch": 0.21, "learning_rate": 0.0001232854722197449, "loss": 1.3527, "step": 4000 }, { "epoch": 0.22, "learning_rate": 0.0001231516989208954, "loss": 1.2883, "step": 4020 }, { "epoch": 0.22, "learning_rate": 0.0001230179256220458, "loss": 1.2433, "step": 4040 }, { "epoch": 0.22, "learning_rate": 0.0001228841523231963, "loss": 1.2077, "step": 4060 }, { "epoch": 0.22, "learning_rate": 0.00012275037902434674, "loss": 1.2164, "step": 4080 }, { "epoch": 0.22, "learning_rate": 0.00012261660572549716, "loss": 1.3049, "step": 4100 }, { "epoch": 0.22, "learning_rate": 0.00012248283242664764, "loss": 1.178, "step": 4120 }, { "epoch": 0.22, "learning_rate": 0.0001223490591277981, "loss": 1.3433, "step": 4140 }, { "epoch": 0.22, "learning_rate": 0.00012221528582894852, "loss": 1.2906, "step": 4160 }, { "epoch": 0.22, "learning_rate": 0.000122081512530099, "loss": 1.2444, "step": 4180 }, { "epoch": 0.22, "learning_rate": 0.00012194773923124942, "loss": 1.2907, "step": 4200 }, { "epoch": 0.23, "learning_rate": 0.00012181396593239988, "loss": 1.2569, "step": 4220 }, { "epoch": 0.23, "learning_rate": 0.00012168019263355033, "loss": 1.2712, "step": 4240 }, { "epoch": 0.23, "learning_rate": 0.00012154641933470078, "loss": 1.2258, "step": 4260 }, { "epoch": 0.23, "learning_rate": 0.00012141264603585123, "loss": 1.2425, "step": 4280 }, { "epoch": 0.23, "learning_rate": 0.00012127887273700169, "loss": 1.2629, "step": 4300 }, { "epoch": 0.23, "learning_rate": 0.00012114509943815214, "loss": 1.223, "step": 4320 }, { "epoch": 0.23, "learning_rate": 0.00012101132613930259, "loss": 1.2029, "step": 4340 }, { "epoch": 0.23, "learning_rate": 0.00012087755284045302, "loss": 1.3088, "step": 4360 }, { "epoch": 0.23, "learning_rate": 0.00012074377954160349, "loss": 1.2688, "step": 4380 }, { "epoch": 0.24, "learning_rate": 0.00012061000624275394, "loss": 1.3692, "step": 4400 }, { "epoch": 0.24, "learning_rate": 0.00012047623294390438, "loss": 1.2459, "step": 4420 }, { "epoch": 0.24, "learning_rate": 0.00012034245964505484, "loss": 1.2383, "step": 4440 }, { "epoch": 0.24, "learning_rate": 0.00012020868634620528, "loss": 1.3598, "step": 4460 }, { "epoch": 0.24, "learning_rate": 0.00012007491304735574, "loss": 1.2385, "step": 4480 }, { "epoch": 0.24, "learning_rate": 0.00011994113974850619, "loss": 1.276, "step": 4500 }, { "epoch": 0.24, "learning_rate": 0.00011980736644965663, "loss": 1.239, "step": 4520 }, { "epoch": 0.24, "learning_rate": 0.0001196735931508071, "loss": 1.2464, "step": 4540 }, { "epoch": 0.24, "learning_rate": 0.00011953981985195754, "loss": 1.2179, "step": 4560 }, { "epoch": 0.25, "learning_rate": 0.00011940604655310798, "loss": 1.3519, "step": 4580 }, { "epoch": 0.25, "learning_rate": 0.00011927227325425845, "loss": 1.2849, "step": 4600 }, { "epoch": 0.25, "learning_rate": 0.00011913849995540888, "loss": 1.3064, "step": 4620 }, { "epoch": 0.25, "learning_rate": 0.00011900472665655933, "loss": 1.2588, "step": 4640 }, { "epoch": 0.25, "learning_rate": 0.0001188709533577098, "loss": 1.3034, "step": 4660 }, { "epoch": 0.25, "learning_rate": 0.00011873718005886024, "loss": 1.1986, "step": 4680 }, { "epoch": 0.25, "learning_rate": 0.0001186034067600107, "loss": 1.2726, "step": 4700 }, { "epoch": 0.25, "learning_rate": 0.00011846963346116114, "loss": 1.2532, "step": 4720 }, { "epoch": 0.25, "learning_rate": 0.00011833586016231159, "loss": 1.2512, "step": 4740 }, { "epoch": 0.25, "learning_rate": 0.00011820208686346205, "loss": 1.2225, "step": 4760 }, { "epoch": 0.26, "learning_rate": 0.00011806831356461249, "loss": 1.191, "step": 4780 }, { "epoch": 0.26, "learning_rate": 0.00011793454026576294, "loss": 1.2679, "step": 4800 }, { "epoch": 0.26, "learning_rate": 0.0001178007669669134, "loss": 1.256, "step": 4820 }, { "epoch": 0.26, "learning_rate": 0.00011766699366806384, "loss": 1.2175, "step": 4840 }, { "epoch": 0.26, "learning_rate": 0.00011753322036921429, "loss": 1.3049, "step": 4860 }, { "epoch": 0.26, "learning_rate": 0.00011739944707036474, "loss": 1.3196, "step": 4880 }, { "epoch": 0.26, "learning_rate": 0.0001172656737715152, "loss": 1.1598, "step": 4900 }, { "epoch": 0.26, "learning_rate": 0.00011713190047266566, "loss": 1.2284, "step": 4920 }, { "epoch": 0.26, "learning_rate": 0.0001169981271738161, "loss": 1.2414, "step": 4940 }, { "epoch": 0.27, "learning_rate": 0.00011686435387496655, "loss": 1.2733, "step": 4960 }, { "epoch": 0.27, "learning_rate": 0.00011673058057611698, "loss": 1.2176, "step": 4980 }, { "epoch": 0.27, "learning_rate": 0.00011659680727726745, "loss": 1.187, "step": 5000 }, { "epoch": 0.27, "learning_rate": 0.0001164630339784179, "loss": 1.3064, "step": 5020 }, { "epoch": 0.27, "learning_rate": 0.00011632926067956835, "loss": 1.1765, "step": 5040 }, { "epoch": 0.27, "learning_rate": 0.0001161954873807188, "loss": 1.1341, "step": 5060 }, { "epoch": 0.27, "learning_rate": 0.00011606171408186925, "loss": 1.3273, "step": 5080 }, { "epoch": 0.27, "learning_rate": 0.0001159279407830197, "loss": 1.2815, "step": 5100 }, { "epoch": 0.27, "learning_rate": 0.00011579416748417015, "loss": 1.235, "step": 5120 }, { "epoch": 0.28, "learning_rate": 0.00011566708285026307, "loss": 1.2213, "step": 5140 }, { "epoch": 0.28, "learning_rate": 0.00011553330955141353, "loss": 1.327, "step": 5160 }, { "epoch": 0.28, "learning_rate": 0.00011539953625256399, "loss": 1.3128, "step": 5180 }, { "epoch": 0.28, "learning_rate": 0.00011526576295371442, "loss": 1.2475, "step": 5200 }, { "epoch": 0.28, "learning_rate": 0.00011513198965486489, "loss": 1.2233, "step": 5220 }, { "epoch": 0.28, "learning_rate": 0.00011499821635601532, "loss": 1.2747, "step": 5240 }, { "epoch": 0.28, "learning_rate": 0.00011487113172210826, "loss": 1.2768, "step": 5260 }, { "epoch": 0.28, "learning_rate": 0.0001147373584232587, "loss": 1.2503, "step": 5280 }, { "epoch": 0.28, "learning_rate": 0.00011460358512440916, "loss": 1.3333, "step": 5300 }, { "epoch": 0.28, "learning_rate": 0.00011447650049050209, "loss": 1.1691, "step": 5320 }, { "epoch": 0.29, "learning_rate": 0.00011434272719165253, "loss": 1.2045, "step": 5340 }, { "epoch": 0.29, "learning_rate": 0.00011420895389280299, "loss": 1.2205, "step": 5360 }, { "epoch": 0.29, "learning_rate": 0.00011407518059395344, "loss": 1.2251, "step": 5380 }, { "epoch": 0.29, "learning_rate": 0.00011394140729510388, "loss": 1.2454, "step": 5400 }, { "epoch": 0.29, "learning_rate": 0.00011380763399625434, "loss": 1.1692, "step": 5420 }, { "epoch": 0.29, "learning_rate": 0.0001136738606974048, "loss": 1.2115, "step": 5440 }, { "epoch": 0.29, "learning_rate": 0.00011354008739855523, "loss": 1.2443, "step": 5460 }, { "epoch": 0.29, "learning_rate": 0.0001134063140997057, "loss": 1.2777, "step": 5480 }, { "epoch": 0.29, "learning_rate": 0.00011327254080085613, "loss": 1.2883, "step": 5500 }, { "epoch": 0.3, "learning_rate": 0.00011313876750200658, "loss": 1.1527, "step": 5520 }, { "epoch": 0.3, "learning_rate": 0.00011300499420315705, "loss": 1.2568, "step": 5540 }, { "epoch": 0.3, "learning_rate": 0.00011287122090430748, "loss": 1.1976, "step": 5560 }, { "epoch": 0.3, "learning_rate": 0.00011273744760545795, "loss": 1.2965, "step": 5580 }, { "epoch": 0.3, "learning_rate": 0.0001126036743066084, "loss": 1.2291, "step": 5600 }, { "epoch": 0.3, "learning_rate": 0.00011246990100775884, "loss": 1.2249, "step": 5620 }, { "epoch": 0.3, "learning_rate": 0.0001123361277089093, "loss": 1.2766, "step": 5640 }, { "epoch": 0.3, "learning_rate": 0.00011220235441005974, "loss": 1.2865, "step": 5660 }, { "epoch": 0.3, "learning_rate": 0.00011206858111121019, "loss": 1.2298, "step": 5680 }, { "epoch": 0.31, "learning_rate": 0.00011193480781236065, "loss": 1.2428, "step": 5700 }, { "epoch": 0.31, "learning_rate": 0.00011180103451351109, "loss": 1.2475, "step": 5720 }, { "epoch": 0.31, "learning_rate": 0.00011166726121466154, "loss": 1.1985, "step": 5740 }, { "epoch": 0.31, "learning_rate": 0.00011153348791581199, "loss": 1.2568, "step": 5760 }, { "epoch": 0.31, "learning_rate": 0.00011139971461696244, "loss": 1.24, "step": 5780 }, { "epoch": 0.31, "learning_rate": 0.0001112659413181129, "loss": 1.26, "step": 5800 }, { "epoch": 0.31, "learning_rate": 0.00011113216801926334, "loss": 1.3224, "step": 5820 }, { "epoch": 0.31, "learning_rate": 0.0001109983947204138, "loss": 1.1922, "step": 5840 }, { "epoch": 0.31, "learning_rate": 0.00011086462142156426, "loss": 1.3266, "step": 5860 }, { "epoch": 0.31, "learning_rate": 0.0001107308481227147, "loss": 1.2385, "step": 5880 }, { "epoch": 0.32, "learning_rate": 0.00011059707482386515, "loss": 1.2544, "step": 5900 }, { "epoch": 0.32, "learning_rate": 0.00011046330152501558, "loss": 1.2178, "step": 5920 }, { "epoch": 0.32, "learning_rate": 0.00011032952822616605, "loss": 1.2272, "step": 5940 }, { "epoch": 0.32, "learning_rate": 0.0001101957549273165, "loss": 1.2688, "step": 5960 }, { "epoch": 0.32, "learning_rate": 0.00011006198162846695, "loss": 1.3173, "step": 5980 }, { "epoch": 0.32, "learning_rate": 0.0001099282083296174, "loss": 1.2822, "step": 6000 }, { "epoch": 0.32, "learning_rate": 0.00010979443503076784, "loss": 1.3713, "step": 6020 }, { "epoch": 0.32, "learning_rate": 0.0001096606617319183, "loss": 1.2513, "step": 6040 }, { "epoch": 0.32, "learning_rate": 0.00010952688843306875, "loss": 1.2963, "step": 6060 }, { "epoch": 0.33, "learning_rate": 0.00010939311513421919, "loss": 1.294, "step": 6080 }, { "epoch": 0.33, "learning_rate": 0.00010925934183536965, "loss": 1.2102, "step": 6100 }, { "epoch": 0.33, "learning_rate": 0.0001091255685365201, "loss": 1.2752, "step": 6120 }, { "epoch": 0.33, "learning_rate": 0.00010899179523767054, "loss": 1.3164, "step": 6140 }, { "epoch": 0.33, "learning_rate": 0.000108858021938821, "loss": 1.3003, "step": 6160 }, { "epoch": 0.33, "learning_rate": 0.00010872424863997144, "loss": 1.2282, "step": 6180 }, { "epoch": 0.33, "learning_rate": 0.00010859047534112191, "loss": 1.2504, "step": 6200 }, { "epoch": 0.33, "learning_rate": 0.00010845670204227236, "loss": 1.2112, "step": 6220 }, { "epoch": 0.33, "learning_rate": 0.0001083229287434228, "loss": 1.2239, "step": 6240 }, { "epoch": 0.33, "learning_rate": 0.00010818915544457326, "loss": 1.241, "step": 6260 }, { "epoch": 0.34, "learning_rate": 0.00010805538214572371, "loss": 1.2371, "step": 6280 }, { "epoch": 0.34, "learning_rate": 0.00010792160884687415, "loss": 1.2749, "step": 6300 }, { "epoch": 0.34, "learning_rate": 0.00010778783554802461, "loss": 1.1918, "step": 6320 }, { "epoch": 0.34, "learning_rate": 0.00010765406224917505, "loss": 1.2478, "step": 6340 }, { "epoch": 0.34, "learning_rate": 0.0001075202889503255, "loss": 1.2193, "step": 6360 }, { "epoch": 0.34, "learning_rate": 0.00010738651565147596, "loss": 1.2331, "step": 6380 }, { "epoch": 0.34, "learning_rate": 0.0001072527423526264, "loss": 1.2856, "step": 6400 }, { "epoch": 0.34, "learning_rate": 0.00010711896905377687, "loss": 1.2411, "step": 6420 }, { "epoch": 0.34, "learning_rate": 0.0001069851957549273, "loss": 1.2937, "step": 6440 }, { "epoch": 0.35, "learning_rate": 0.00010685142245607775, "loss": 1.2907, "step": 6460 }, { "epoch": 0.35, "learning_rate": 0.00010671764915722822, "loss": 1.2082, "step": 6480 }, { "epoch": 0.35, "learning_rate": 0.00010658387585837865, "loss": 1.2155, "step": 6500 }, { "epoch": 0.35, "learning_rate": 0.0001064501025595291, "loss": 1.2481, "step": 6520 }, { "epoch": 0.35, "learning_rate": 0.00010631632926067957, "loss": 1.2598, "step": 6540 }, { "epoch": 0.35, "learning_rate": 0.00010618255596183001, "loss": 1.2127, "step": 6560 }, { "epoch": 0.35, "learning_rate": 0.00010604878266298046, "loss": 1.2854, "step": 6580 }, { "epoch": 0.35, "learning_rate": 0.00010591500936413091, "loss": 1.3469, "step": 6600 }, { "epoch": 0.35, "learning_rate": 0.00010578123606528136, "loss": 1.3407, "step": 6620 }, { "epoch": 0.36, "learning_rate": 0.00010564746276643182, "loss": 1.3226, "step": 6640 }, { "epoch": 0.36, "learning_rate": 0.00010551368946758226, "loss": 1.2002, "step": 6660 }, { "epoch": 0.36, "learning_rate": 0.00010537991616873271, "loss": 1.2413, "step": 6680 }, { "epoch": 0.36, "learning_rate": 0.00010524614286988315, "loss": 1.1809, "step": 6700 }, { "epoch": 0.36, "learning_rate": 0.00010511236957103361, "loss": 1.229, "step": 6720 }, { "epoch": 0.36, "learning_rate": 0.00010497859627218406, "loss": 1.2543, "step": 6740 }, { "epoch": 0.36, "learning_rate": 0.00010484482297333451, "loss": 1.2133, "step": 6760 }, { "epoch": 0.36, "learning_rate": 0.00010471104967448496, "loss": 1.234, "step": 6780 }, { "epoch": 0.36, "learning_rate": 0.00010457727637563542, "loss": 1.175, "step": 6800 }, { "epoch": 0.36, "learning_rate": 0.00010444350307678587, "loss": 1.239, "step": 6820 }, { "epoch": 0.37, "learning_rate": 0.00010430972977793632, "loss": 1.1946, "step": 6840 }, { "epoch": 0.37, "learning_rate": 0.00010417595647908675, "loss": 1.3, "step": 6860 }, { "epoch": 0.37, "learning_rate": 0.00010404218318023722, "loss": 1.2251, "step": 6880 }, { "epoch": 0.37, "learning_rate": 0.00010390840988138767, "loss": 1.2074, "step": 6900 }, { "epoch": 0.37, "learning_rate": 0.0001037746365825381, "loss": 1.28, "step": 6920 }, { "epoch": 0.37, "learning_rate": 0.00010364086328368857, "loss": 1.2345, "step": 6940 }, { "epoch": 0.37, "learning_rate": 0.00010350708998483901, "loss": 1.2979, "step": 6960 }, { "epoch": 0.37, "learning_rate": 0.00010337331668598947, "loss": 1.2409, "step": 6980 }, { "epoch": 0.37, "learning_rate": 0.00010323954338713992, "loss": 1.2225, "step": 7000 }, { "epoch": 0.38, "learning_rate": 0.00010310577008829036, "loss": 1.3312, "step": 7020 }, { "epoch": 0.38, "learning_rate": 0.00010297199678944082, "loss": 1.3371, "step": 7040 }, { "epoch": 0.38, "learning_rate": 0.00010283822349059127, "loss": 1.2533, "step": 7060 }, { "epoch": 0.38, "learning_rate": 0.00010270445019174171, "loss": 1.1777, "step": 7080 }, { "epoch": 0.38, "learning_rate": 0.00010257067689289218, "loss": 1.3087, "step": 7100 }, { "epoch": 0.38, "learning_rate": 0.00010243690359404261, "loss": 1.2846, "step": 7120 }, { "epoch": 0.38, "learning_rate": 0.00010230313029519306, "loss": 1.285, "step": 7140 }, { "epoch": 0.38, "learning_rate": 0.00010216935699634353, "loss": 1.2389, "step": 7160 }, { "epoch": 0.38, "learning_rate": 0.00010203558369749397, "loss": 1.3075, "step": 7180 }, { "epoch": 0.39, "learning_rate": 0.00010190181039864443, "loss": 1.3009, "step": 7200 }, { "epoch": 0.39, "learning_rate": 0.00010176803709979488, "loss": 1.2662, "step": 7220 }, { "epoch": 0.39, "learning_rate": 0.00010163426380094532, "loss": 1.2465, "step": 7240 }, { "epoch": 0.39, "learning_rate": 0.00010150049050209578, "loss": 1.2734, "step": 7260 }, { "epoch": 0.39, "learning_rate": 0.00010136671720324622, "loss": 1.236, "step": 7280 }, { "epoch": 0.39, "learning_rate": 0.00010123294390439667, "loss": 1.2205, "step": 7300 }, { "epoch": 0.39, "learning_rate": 0.00010109917060554713, "loss": 1.2966, "step": 7320 }, { "epoch": 0.39, "learning_rate": 0.00010096539730669757, "loss": 1.1876, "step": 7340 }, { "epoch": 0.39, "learning_rate": 0.00010083162400784802, "loss": 1.2498, "step": 7360 }, { "epoch": 0.39, "learning_rate": 0.00010069785070899847, "loss": 1.2932, "step": 7380 }, { "epoch": 0.4, "learning_rate": 0.00010056407741014892, "loss": 1.2171, "step": 7400 }, { "epoch": 0.4, "learning_rate": 0.00010043030411129939, "loss": 1.2482, "step": 7420 }, { "epoch": 0.4, "learning_rate": 0.00010029653081244983, "loss": 1.2415, "step": 7440 }, { "epoch": 0.4, "learning_rate": 0.00010016275751360028, "loss": 1.2561, "step": 7460 }, { "epoch": 0.4, "learning_rate": 0.00010002898421475074, "loss": 1.1842, "step": 7480 }, { "epoch": 0.4, "learning_rate": 9.989521091590118e-05, "loss": 1.2193, "step": 7500 }, { "epoch": 0.4, "learning_rate": 9.976143761705163e-05, "loss": 1.2601, "step": 7520 }, { "epoch": 0.4, "learning_rate": 9.962766431820207e-05, "loss": 1.2414, "step": 7540 }, { "epoch": 0.4, "learning_rate": 9.949389101935253e-05, "loss": 1.2435, "step": 7560 }, { "epoch": 0.41, "learning_rate": 9.936011772050298e-05, "loss": 1.2397, "step": 7580 }, { "epoch": 0.41, "learning_rate": 9.922634442165343e-05, "loss": 1.2504, "step": 7600 }, { "epoch": 0.41, "learning_rate": 9.909257112280388e-05, "loss": 1.1626, "step": 7620 }, { "epoch": 0.41, "learning_rate": 9.895879782395432e-05, "loss": 1.2119, "step": 7640 }, { "epoch": 0.41, "learning_rate": 9.882502452510478e-05, "loss": 1.2643, "step": 7660 }, { "epoch": 0.41, "learning_rate": 9.869125122625523e-05, "loss": 1.2948, "step": 7680 }, { "epoch": 0.41, "learning_rate": 9.855747792740567e-05, "loss": 1.1956, "step": 7700 }, { "epoch": 0.41, "learning_rate": 9.842370462855614e-05, "loss": 1.226, "step": 7720 }, { "epoch": 0.41, "learning_rate": 9.828993132970659e-05, "loss": 1.2657, "step": 7740 }, { "epoch": 0.42, "learning_rate": 9.815615803085702e-05, "loss": 1.2514, "step": 7760 }, { "epoch": 0.42, "learning_rate": 9.802238473200749e-05, "loss": 1.2894, "step": 7780 }, { "epoch": 0.42, "learning_rate": 9.788861143315792e-05, "loss": 1.2051, "step": 7800 }, { "epoch": 0.42, "learning_rate": 9.775483813430839e-05, "loss": 1.2518, "step": 7820 }, { "epoch": 0.42, "learning_rate": 9.762775350040132e-05, "loss": 1.2349, "step": 7840 }, { "epoch": 0.42, "learning_rate": 9.749398020155176e-05, "loss": 1.2554, "step": 7860 }, { "epoch": 0.42, "learning_rate": 9.736020690270221e-05, "loss": 1.3273, "step": 7880 }, { "epoch": 0.42, "learning_rate": 9.722643360385266e-05, "loss": 1.2585, "step": 7900 }, { "epoch": 0.42, "learning_rate": 9.709266030500311e-05, "loss": 1.2452, "step": 7920 }, { "epoch": 0.42, "learning_rate": 9.695888700615357e-05, "loss": 1.239, "step": 7940 }, { "epoch": 0.43, "learning_rate": 9.682511370730401e-05, "loss": 1.2432, "step": 7960 }, { "epoch": 0.43, "learning_rate": 9.669134040845446e-05, "loss": 1.2636, "step": 7980 }, { "epoch": 0.43, "learning_rate": 9.655756710960493e-05, "loss": 1.253, "step": 8000 }, { "epoch": 0.43, "learning_rate": 9.642379381075536e-05, "loss": 1.2862, "step": 8020 }, { "epoch": 0.43, "learning_rate": 9.62967091768483e-05, "loss": 1.2261, "step": 8040 }, { "epoch": 0.43, "learning_rate": 9.616293587799873e-05, "loss": 1.2839, "step": 8060 }, { "epoch": 0.43, "learning_rate": 9.60291625791492e-05, "loss": 1.2587, "step": 8080 }, { "epoch": 0.43, "learning_rate": 9.589538928029965e-05, "loss": 1.2197, "step": 8100 }, { "epoch": 0.43, "learning_rate": 9.576161598145009e-05, "loss": 1.2388, "step": 8120 }, { "epoch": 0.44, "learning_rate": 9.562784268260055e-05, "loss": 1.2913, "step": 8140 }, { "epoch": 0.44, "learning_rate": 9.549406938375099e-05, "loss": 1.2289, "step": 8160 }, { "epoch": 0.44, "learning_rate": 9.536029608490144e-05, "loss": 1.1632, "step": 8180 }, { "epoch": 0.44, "learning_rate": 9.52265227860519e-05, "loss": 1.2719, "step": 8200 }, { "epoch": 0.44, "learning_rate": 9.509274948720234e-05, "loss": 1.2905, "step": 8220 }, { "epoch": 0.44, "learning_rate": 9.49589761883528e-05, "loss": 1.2877, "step": 8240 }, { "epoch": 0.44, "learning_rate": 9.482520288950325e-05, "loss": 1.1934, "step": 8260 }, { "epoch": 0.44, "learning_rate": 9.469142959065369e-05, "loss": 1.1927, "step": 8280 }, { "epoch": 0.44, "learning_rate": 9.455765629180416e-05, "loss": 1.2834, "step": 8300 }, { "epoch": 0.45, "learning_rate": 9.442388299295459e-05, "loss": 1.2079, "step": 8320 }, { "epoch": 0.45, "learning_rate": 9.429010969410504e-05, "loss": 1.1659, "step": 8340 }, { "epoch": 0.45, "learning_rate": 9.415633639525551e-05, "loss": 1.2628, "step": 8360 }, { "epoch": 0.45, "learning_rate": 9.402256309640594e-05, "loss": 1.3499, "step": 8380 }, { "epoch": 0.45, "learning_rate": 9.38887897975564e-05, "loss": 1.2469, "step": 8400 }, { "epoch": 0.45, "learning_rate": 9.375501649870685e-05, "loss": 1.2518, "step": 8420 }, { "epoch": 0.45, "learning_rate": 9.36212431998573e-05, "loss": 1.2798, "step": 8440 }, { "epoch": 0.45, "learning_rate": 9.348746990100776e-05, "loss": 1.2521, "step": 8460 }, { "epoch": 0.45, "learning_rate": 9.33536966021582e-05, "loss": 1.2222, "step": 8480 }, { "epoch": 0.45, "learning_rate": 9.321992330330865e-05, "loss": 1.3206, "step": 8500 }, { "epoch": 0.46, "learning_rate": 9.308615000445911e-05, "loss": 1.2281, "step": 8520 }, { "epoch": 0.46, "learning_rate": 9.295237670560955e-05, "loss": 1.2056, "step": 8540 }, { "epoch": 0.46, "learning_rate": 9.283198073664496e-05, "loss": 1.1968, "step": 8560 }, { "epoch": 0.46, "learning_rate": 9.26982074377954e-05, "loss": 1.1708, "step": 8580 }, { "epoch": 0.46, "learning_rate": 9.256443413894585e-05, "loss": 1.2011, "step": 8600 }, { "epoch": 0.46, "learning_rate": 9.243066084009632e-05, "loss": 1.2715, "step": 8620 }, { "epoch": 0.46, "learning_rate": 9.229688754124675e-05, "loss": 1.2564, "step": 8640 }, { "epoch": 0.46, "learning_rate": 9.216311424239722e-05, "loss": 1.1572, "step": 8660 }, { "epoch": 0.46, "learning_rate": 9.202934094354765e-05, "loss": 1.2291, "step": 8680 }, { "epoch": 0.47, "learning_rate": 9.18955676446981e-05, "loss": 1.296, "step": 8700 }, { "epoch": 0.47, "learning_rate": 9.176179434584857e-05, "loss": 1.2867, "step": 8720 }, { "epoch": 0.47, "learning_rate": 9.162802104699901e-05, "loss": 1.227, "step": 8740 }, { "epoch": 0.47, "learning_rate": 9.149424774814946e-05, "loss": 1.139, "step": 8760 }, { "epoch": 0.47, "learning_rate": 9.136047444929992e-05, "loss": 1.3093, "step": 8780 }, { "epoch": 0.47, "learning_rate": 9.122670115045036e-05, "loss": 1.3355, "step": 8800 }, { "epoch": 0.47, "learning_rate": 9.109292785160081e-05, "loss": 1.3257, "step": 8820 }, { "epoch": 0.47, "learning_rate": 9.095915455275126e-05, "loss": 1.2642, "step": 8840 }, { "epoch": 0.47, "learning_rate": 9.082538125390171e-05, "loss": 1.2519, "step": 8860 }, { "epoch": 0.48, "learning_rate": 9.069160795505218e-05, "loss": 1.2833, "step": 8880 }, { "epoch": 0.48, "learning_rate": 9.055783465620261e-05, "loss": 1.1774, "step": 8900 }, { "epoch": 0.48, "learning_rate": 9.042406135735306e-05, "loss": 1.2615, "step": 8920 }, { "epoch": 0.48, "learning_rate": 9.02902880585035e-05, "loss": 1.2929, "step": 8940 }, { "epoch": 0.48, "learning_rate": 9.015651475965397e-05, "loss": 1.1969, "step": 8960 }, { "epoch": 0.48, "learning_rate": 9.002274146080442e-05, "loss": 1.3023, "step": 8980 }, { "epoch": 0.48, "learning_rate": 8.988896816195487e-05, "loss": 1.388, "step": 9000 }, { "epoch": 0.48, "learning_rate": 8.975519486310532e-05, "loss": 1.3288, "step": 9020 }, { "epoch": 0.48, "learning_rate": 8.962142156425577e-05, "loss": 1.355, "step": 9040 }, { "epoch": 0.48, "learning_rate": 8.948764826540622e-05, "loss": 1.2233, "step": 9060 }, { "epoch": 0.49, "learning_rate": 8.935387496655667e-05, "loss": 1.1702, "step": 9080 }, { "epoch": 0.49, "learning_rate": 8.92201016677071e-05, "loss": 1.2287, "step": 9100 }, { "epoch": 0.49, "learning_rate": 8.908632836885757e-05, "loss": 1.1995, "step": 9120 }, { "epoch": 0.49, "learning_rate": 8.895255507000802e-05, "loss": 1.2556, "step": 9140 }, { "epoch": 0.49, "learning_rate": 8.881878177115846e-05, "loss": 1.2391, "step": 9160 }, { "epoch": 0.49, "learning_rate": 8.868500847230892e-05, "loss": 1.2031, "step": 9180 }, { "epoch": 0.49, "learning_rate": 8.855123517345936e-05, "loss": 1.2011, "step": 9200 }, { "epoch": 0.49, "learning_rate": 8.841746187460982e-05, "loss": 1.3379, "step": 9220 }, { "epoch": 0.49, "learning_rate": 8.828368857576028e-05, "loss": 1.3135, "step": 9240 }, { "epoch": 0.5, "learning_rate": 8.814991527691071e-05, "loss": 1.26, "step": 9260 }, { "epoch": 0.5, "learning_rate": 8.801614197806118e-05, "loss": 1.2027, "step": 9280 }, { "epoch": 0.5, "learning_rate": 8.788236867921163e-05, "loss": 1.2313, "step": 9300 }, { "epoch": 0.5, "learning_rate": 8.774859538036206e-05, "loss": 1.253, "step": 9320 }, { "epoch": 0.5, "learning_rate": 8.761482208151253e-05, "loss": 1.2576, "step": 9340 }, { "epoch": 0.5, "learning_rate": 8.748104878266297e-05, "loss": 1.2785, "step": 9360 }, { "epoch": 0.5, "learning_rate": 8.734727548381342e-05, "loss": 1.2572, "step": 9380 }, { "epoch": 0.5, "learning_rate": 8.721350218496388e-05, "loss": 1.2132, "step": 9400 }, { "epoch": 0.5, "learning_rate": 8.707972888611432e-05, "loss": 1.2678, "step": 9420 }, { "epoch": 0.51, "learning_rate": 8.694595558726478e-05, "loss": 1.2454, "step": 9440 }, { "epoch": 0.51, "learning_rate": 8.681218228841523e-05, "loss": 1.2681, "step": 9460 }, { "epoch": 0.51, "learning_rate": 8.667840898956567e-05, "loss": 1.3031, "step": 9480 }, { "epoch": 0.51, "learning_rate": 8.654463569071613e-05, "loss": 1.2746, "step": 9500 }, { "epoch": 0.51, "learning_rate": 8.641086239186657e-05, "loss": 1.2042, "step": 9520 }, { "epoch": 0.51, "learning_rate": 8.627708909301702e-05, "loss": 1.197, "step": 9540 }, { "epoch": 0.51, "learning_rate": 8.614331579416749e-05, "loss": 1.2036, "step": 9560 }, { "epoch": 0.51, "learning_rate": 8.600954249531792e-05, "loss": 1.2884, "step": 9580 }, { "epoch": 0.51, "learning_rate": 8.587576919646837e-05, "loss": 1.2256, "step": 9600 }, { "epoch": 0.51, "learning_rate": 8.574199589761883e-05, "loss": 1.2082, "step": 9620 }, { "epoch": 0.52, "learning_rate": 8.560822259876928e-05, "loss": 1.1717, "step": 9640 }, { "epoch": 0.52, "learning_rate": 8.547444929991974e-05, "loss": 1.2304, "step": 9660 }, { "epoch": 0.52, "learning_rate": 8.534067600107018e-05, "loss": 1.2069, "step": 9680 }, { "epoch": 0.52, "learning_rate": 8.520690270222063e-05, "loss": 1.2066, "step": 9700 }, { "epoch": 0.52, "learning_rate": 8.507312940337109e-05, "loss": 1.2089, "step": 9720 }, { "epoch": 0.52, "learning_rate": 8.493935610452153e-05, "loss": 1.2422, "step": 9740 }, { "epoch": 0.52, "learning_rate": 8.480558280567198e-05, "loss": 1.1555, "step": 9760 }, { "epoch": 0.52, "learning_rate": 8.467180950682243e-05, "loss": 1.2612, "step": 9780 }, { "epoch": 0.52, "learning_rate": 8.453803620797288e-05, "loss": 1.1586, "step": 9800 }, { "epoch": 0.53, "learning_rate": 8.440426290912333e-05, "loss": 1.271, "step": 9820 }, { "epoch": 0.53, "learning_rate": 8.427048961027378e-05, "loss": 1.221, "step": 9840 }, { "epoch": 0.53, "learning_rate": 8.413671631142423e-05, "loss": 1.1977, "step": 9860 }, { "epoch": 0.53, "learning_rate": 8.400294301257467e-05, "loss": 1.2368, "step": 9880 }, { "epoch": 0.53, "learning_rate": 8.386916971372514e-05, "loss": 1.2286, "step": 9900 }, { "epoch": 0.53, "learning_rate": 8.373539641487559e-05, "loss": 1.2463, "step": 9920 }, { "epoch": 0.53, "learning_rate": 8.360162311602602e-05, "loss": 1.2563, "step": 9940 }, { "epoch": 0.53, "learning_rate": 8.346784981717649e-05, "loss": 1.1624, "step": 9960 }, { "epoch": 0.53, "learning_rate": 8.333407651832694e-05, "loss": 1.2199, "step": 9980 }, { "epoch": 0.54, "learning_rate": 8.320030321947739e-05, "loss": 1.2817, "step": 10000 }, { "epoch": 0.54, "learning_rate": 8.306652992062784e-05, "loss": 1.2945, "step": 10020 }, { "epoch": 0.54, "learning_rate": 8.293275662177828e-05, "loss": 1.2161, "step": 10040 }, { "epoch": 0.54, "learning_rate": 8.279898332292874e-05, "loss": 1.2568, "step": 10060 }, { "epoch": 0.54, "learning_rate": 8.266521002407919e-05, "loss": 1.2738, "step": 10080 }, { "epoch": 0.54, "learning_rate": 8.253143672522963e-05, "loss": 1.2251, "step": 10100 }, { "epoch": 0.54, "learning_rate": 8.239766342638009e-05, "loss": 1.2371, "step": 10120 }, { "epoch": 0.54, "learning_rate": 8.226389012753053e-05, "loss": 1.2621, "step": 10140 }, { "epoch": 0.54, "learning_rate": 8.213011682868098e-05, "loss": 1.1993, "step": 10160 }, { "epoch": 0.54, "learning_rate": 8.199634352983145e-05, "loss": 1.2604, "step": 10180 }, { "epoch": 0.55, "learning_rate": 8.186257023098188e-05, "loss": 1.2643, "step": 10200 }, { "epoch": 0.55, "learning_rate": 8.172879693213233e-05, "loss": 1.2325, "step": 10220 }, { "epoch": 0.55, "learning_rate": 8.15950236332828e-05, "loss": 1.2499, "step": 10240 }, { "epoch": 0.55, "learning_rate": 8.146125033443323e-05, "loss": 1.27, "step": 10260 }, { "epoch": 0.55, "learning_rate": 8.13274770355837e-05, "loss": 1.2287, "step": 10280 }, { "epoch": 0.55, "learning_rate": 8.119370373673414e-05, "loss": 1.3572, "step": 10300 }, { "epoch": 0.55, "learning_rate": 8.105993043788459e-05, "loss": 1.2297, "step": 10320 }, { "epoch": 0.55, "learning_rate": 8.092615713903505e-05, "loss": 1.2549, "step": 10340 }, { "epoch": 0.55, "learning_rate": 8.079238384018549e-05, "loss": 1.1971, "step": 10360 }, { "epoch": 0.56, "learning_rate": 8.065861054133594e-05, "loss": 1.3071, "step": 10380 }, { "epoch": 0.56, "learning_rate": 8.05248372424864e-05, "loss": 1.2156, "step": 10400 }, { "epoch": 0.56, "learning_rate": 8.039106394363684e-05, "loss": 1.2563, "step": 10420 }, { "epoch": 0.56, "learning_rate": 8.025729064478729e-05, "loss": 1.3196, "step": 10440 }, { "epoch": 0.56, "learning_rate": 8.012351734593774e-05, "loss": 1.2644, "step": 10460 }, { "epoch": 0.56, "learning_rate": 7.998974404708819e-05, "loss": 1.254, "step": 10480 }, { "epoch": 0.56, "learning_rate": 7.985597074823866e-05, "loss": 1.154, "step": 10500 }, { "epoch": 0.56, "learning_rate": 7.97221974493891e-05, "loss": 1.2592, "step": 10520 }, { "epoch": 0.56, "learning_rate": 7.958842415053954e-05, "loss": 1.244, "step": 10540 }, { "epoch": 0.57, "learning_rate": 7.945465085168998e-05, "loss": 1.3127, "step": 10560 }, { "epoch": 0.57, "learning_rate": 7.932087755284045e-05, "loss": 1.2457, "step": 10580 }, { "epoch": 0.57, "learning_rate": 7.91871042539909e-05, "loss": 1.2566, "step": 10600 }, { "epoch": 0.57, "learning_rate": 7.905333095514135e-05, "loss": 1.1635, "step": 10620 }, { "epoch": 0.57, "learning_rate": 7.89195576562918e-05, "loss": 1.2568, "step": 10640 }, { "epoch": 0.57, "learning_rate": 7.879247302238473e-05, "loss": 1.2544, "step": 10660 }, { "epoch": 0.57, "learning_rate": 7.865869972353517e-05, "loss": 1.2085, "step": 10680 }, { "epoch": 0.57, "learning_rate": 7.852492642468563e-05, "loss": 1.2252, "step": 10700 }, { "epoch": 0.57, "learning_rate": 7.839115312583607e-05, "loss": 1.2473, "step": 10720 }, { "epoch": 0.57, "learning_rate": 7.825737982698653e-05, "loss": 1.1707, "step": 10740 }, { "epoch": 0.58, "learning_rate": 7.812360652813698e-05, "loss": 1.3252, "step": 10760 }, { "epoch": 0.58, "learning_rate": 7.798983322928742e-05, "loss": 1.2583, "step": 10780 }, { "epoch": 0.58, "learning_rate": 7.785605993043789e-05, "loss": 1.2885, "step": 10800 }, { "epoch": 0.58, "learning_rate": 7.772228663158832e-05, "loss": 1.3081, "step": 10820 }, { "epoch": 0.58, "learning_rate": 7.758851333273877e-05, "loss": 1.2226, "step": 10840 }, { "epoch": 0.58, "learning_rate": 7.745474003388924e-05, "loss": 1.1864, "step": 10860 }, { "epoch": 0.58, "learning_rate": 7.732096673503967e-05, "loss": 1.2069, "step": 10880 }, { "epoch": 0.58, "learning_rate": 7.718719343619013e-05, "loss": 1.18, "step": 10900 }, { "epoch": 0.58, "learning_rate": 7.705342013734059e-05, "loss": 1.2664, "step": 10920 }, { "epoch": 0.59, "learning_rate": 7.691964683849103e-05, "loss": 1.2568, "step": 10940 }, { "epoch": 0.59, "learning_rate": 7.678587353964149e-05, "loss": 1.2502, "step": 10960 }, { "epoch": 0.59, "learning_rate": 7.665210024079193e-05, "loss": 1.2499, "step": 10980 }, { "epoch": 0.59, "learning_rate": 7.651832694194238e-05, "loss": 1.3068, "step": 11000 }, { "epoch": 0.59, "learning_rate": 7.638455364309284e-05, "loss": 1.2541, "step": 11020 }, { "epoch": 0.59, "learning_rate": 7.625078034424328e-05, "loss": 1.2952, "step": 11040 }, { "epoch": 0.59, "learning_rate": 7.611700704539373e-05, "loss": 1.2259, "step": 11060 }, { "epoch": 0.59, "learning_rate": 7.598323374654418e-05, "loss": 1.2238, "step": 11080 }, { "epoch": 0.59, "learning_rate": 7.584946044769463e-05, "loss": 1.2615, "step": 11100 }, { "epoch": 0.6, "learning_rate": 7.571568714884508e-05, "loss": 1.2634, "step": 11120 }, { "epoch": 0.6, "learning_rate": 7.558191384999553e-05, "loss": 1.1873, "step": 11140 }, { "epoch": 0.6, "learning_rate": 7.544814055114598e-05, "loss": 1.2912, "step": 11160 }, { "epoch": 0.6, "learning_rate": 7.531436725229645e-05, "loss": 1.2808, "step": 11180 }, { "epoch": 0.6, "learning_rate": 7.518059395344689e-05, "loss": 1.1985, "step": 11200 }, { "epoch": 0.6, "learning_rate": 7.504682065459734e-05, "loss": 1.2727, "step": 11220 }, { "epoch": 0.6, "learning_rate": 7.491304735574779e-05, "loss": 1.2199, "step": 11240 }, { "epoch": 0.6, "learning_rate": 7.477927405689824e-05, "loss": 1.1882, "step": 11260 }, { "epoch": 0.6, "learning_rate": 7.464550075804869e-05, "loss": 1.341, "step": 11280 }, { "epoch": 0.6, "learning_rate": 7.451172745919914e-05, "loss": 1.3104, "step": 11300 }, { "epoch": 0.61, "learning_rate": 7.437795416034959e-05, "loss": 1.2509, "step": 11320 }, { "epoch": 0.61, "learning_rate": 7.424418086150004e-05, "loss": 1.2067, "step": 11340 }, { "epoch": 0.61, "learning_rate": 7.411040756265049e-05, "loss": 1.2277, "step": 11360 }, { "epoch": 0.61, "learning_rate": 7.397663426380093e-05, "loss": 1.2333, "step": 11380 }, { "epoch": 0.61, "learning_rate": 7.38428609649514e-05, "loss": 1.2209, "step": 11400 }, { "epoch": 0.61, "learning_rate": 7.370908766610184e-05, "loss": 1.2424, "step": 11420 }, { "epoch": 0.61, "learning_rate": 7.35753143672523e-05, "loss": 1.2921, "step": 11440 }, { "epoch": 0.61, "learning_rate": 7.344154106840273e-05, "loss": 1.2341, "step": 11460 }, { "epoch": 0.61, "learning_rate": 7.33077677695532e-05, "loss": 1.3227, "step": 11480 }, { "epoch": 0.62, "learning_rate": 7.317399447070365e-05, "loss": 1.2983, "step": 11500 }, { "epoch": 0.62, "learning_rate": 7.304022117185408e-05, "loss": 1.2579, "step": 11520 }, { "epoch": 0.62, "learning_rate": 7.290644787300453e-05, "loss": 1.1881, "step": 11540 }, { "epoch": 0.62, "learning_rate": 7.2772674574155e-05, "loss": 1.2218, "step": 11560 }, { "epoch": 0.62, "learning_rate": 7.263890127530545e-05, "loss": 1.2019, "step": 11580 }, { "epoch": 0.62, "learning_rate": 7.250512797645589e-05, "loss": 1.2027, "step": 11600 }, { "epoch": 0.62, "learning_rate": 7.237135467760634e-05, "loss": 1.1868, "step": 11620 }, { "epoch": 0.62, "learning_rate": 7.223758137875679e-05, "loss": 1.2663, "step": 11640 }, { "epoch": 0.62, "learning_rate": 7.210380807990725e-05, "loss": 1.2274, "step": 11660 }, { "epoch": 0.62, "learning_rate": 7.197003478105769e-05, "loss": 1.2013, "step": 11680 }, { "epoch": 0.63, "learning_rate": 7.183626148220814e-05, "loss": 1.2167, "step": 11700 }, { "epoch": 0.63, "learning_rate": 7.170248818335859e-05, "loss": 1.29, "step": 11720 }, { "epoch": 0.63, "learning_rate": 7.156871488450904e-05, "loss": 1.2494, "step": 11740 }, { "epoch": 0.63, "learning_rate": 7.143494158565949e-05, "loss": 1.2329, "step": 11760 }, { "epoch": 0.63, "learning_rate": 7.130785695175243e-05, "loss": 1.2622, "step": 11780 }, { "epoch": 0.63, "learning_rate": 7.117408365290288e-05, "loss": 1.3208, "step": 11800 }, { "epoch": 0.63, "learning_rate": 7.104031035405333e-05, "loss": 1.2568, "step": 11820 }, { "epoch": 0.63, "learning_rate": 7.091322572014626e-05, "loss": 1.2532, "step": 11840 }, { "epoch": 0.63, "learning_rate": 7.077945242129671e-05, "loss": 1.2454, "step": 11860 }, { "epoch": 0.64, "learning_rate": 7.064567912244715e-05, "loss": 1.2169, "step": 11880 }, { "epoch": 0.64, "learning_rate": 7.05119058235976e-05, "loss": 1.2065, "step": 11900 }, { "epoch": 0.64, "learning_rate": 7.038482118969053e-05, "loss": 1.2166, "step": 11920 }, { "epoch": 0.64, "learning_rate": 7.025104789084098e-05, "loss": 1.3137, "step": 11940 }, { "epoch": 0.64, "learning_rate": 7.011727459199143e-05, "loss": 1.2238, "step": 11960 }, { "epoch": 0.64, "learning_rate": 6.998350129314188e-05, "loss": 1.3668, "step": 11980 }, { "epoch": 0.64, "learning_rate": 6.984972799429233e-05, "loss": 1.1807, "step": 12000 }, { "epoch": 0.64, "learning_rate": 6.971595469544278e-05, "loss": 1.2962, "step": 12020 }, { "epoch": 0.64, "learning_rate": 6.958218139659323e-05, "loss": 1.2269, "step": 12040 }, { "epoch": 0.65, "learning_rate": 6.944840809774368e-05, "loss": 1.2349, "step": 12060 }, { "epoch": 0.65, "learning_rate": 6.931463479889414e-05, "loss": 1.2097, "step": 12080 }, { "epoch": 0.65, "learning_rate": 6.918086150004459e-05, "loss": 1.2441, "step": 12100 }, { "epoch": 0.65, "learning_rate": 6.904708820119504e-05, "loss": 1.2529, "step": 12120 }, { "epoch": 0.65, "learning_rate": 6.891331490234549e-05, "loss": 1.1557, "step": 12140 }, { "epoch": 0.65, "learning_rate": 6.877954160349594e-05, "loss": 1.2329, "step": 12160 }, { "epoch": 0.65, "learning_rate": 6.864576830464639e-05, "loss": 1.2274, "step": 12180 }, { "epoch": 0.65, "learning_rate": 6.851199500579684e-05, "loss": 1.1798, "step": 12200 }, { "epoch": 0.65, "learning_rate": 6.837822170694729e-05, "loss": 1.2672, "step": 12220 }, { "epoch": 0.65, "learning_rate": 6.824444840809774e-05, "loss": 1.208, "step": 12240 }, { "epoch": 0.66, "learning_rate": 6.811067510924819e-05, "loss": 1.1973, "step": 12260 }, { "epoch": 0.66, "learning_rate": 6.797690181039864e-05, "loss": 1.1668, "step": 12280 }, { "epoch": 0.66, "learning_rate": 6.78431285115491e-05, "loss": 1.2438, "step": 12300 }, { "epoch": 0.66, "learning_rate": 6.770935521269953e-05, "loss": 1.2651, "step": 12320 }, { "epoch": 0.66, "learning_rate": 6.757558191385e-05, "loss": 1.2528, "step": 12340 }, { "epoch": 0.66, "learning_rate": 6.744180861500045e-05, "loss": 1.2287, "step": 12360 }, { "epoch": 0.66, "learning_rate": 6.73080353161509e-05, "loss": 1.2539, "step": 12380 }, { "epoch": 0.66, "learning_rate": 6.717426201730133e-05, "loss": 1.2459, "step": 12400 }, { "epoch": 0.66, "learning_rate": 6.704048871845178e-05, "loss": 1.2592, "step": 12420 }, { "epoch": 0.67, "learning_rate": 6.690671541960225e-05, "loss": 1.2274, "step": 12440 }, { "epoch": 0.67, "learning_rate": 6.67729421207527e-05, "loss": 1.248, "step": 12460 }, { "epoch": 0.67, "learning_rate": 6.663916882190314e-05, "loss": 1.1897, "step": 12480 }, { "epoch": 0.67, "learning_rate": 6.650539552305359e-05, "loss": 1.2343, "step": 12500 }, { "epoch": 0.67, "learning_rate": 6.637162222420405e-05, "loss": 1.2702, "step": 12520 }, { "epoch": 0.67, "learning_rate": 6.623784892535449e-05, "loss": 1.2176, "step": 12540 }, { "epoch": 0.67, "learning_rate": 6.610407562650494e-05, "loss": 1.3199, "step": 12560 }, { "epoch": 0.67, "learning_rate": 6.597030232765539e-05, "loss": 1.2144, "step": 12580 }, { "epoch": 0.67, "learning_rate": 6.583652902880585e-05, "loss": 1.1113, "step": 12600 }, { "epoch": 0.68, "learning_rate": 6.570275572995629e-05, "loss": 1.2538, "step": 12620 }, { "epoch": 0.68, "learning_rate": 6.556898243110674e-05, "loss": 1.1333, "step": 12640 }, { "epoch": 0.68, "learning_rate": 6.543520913225719e-05, "loss": 1.2734, "step": 12660 }, { "epoch": 0.68, "learning_rate": 6.530143583340764e-05, "loss": 1.2098, "step": 12680 }, { "epoch": 0.68, "learning_rate": 6.51676625345581e-05, "loss": 1.2354, "step": 12700 }, { "epoch": 0.68, "learning_rate": 6.503388923570854e-05, "loss": 1.2102, "step": 12720 }, { "epoch": 0.68, "learning_rate": 6.4900115936859e-05, "loss": 1.2129, "step": 12740 }, { "epoch": 0.68, "learning_rate": 6.476634263800945e-05, "loss": 1.2735, "step": 12760 }, { "epoch": 0.68, "learning_rate": 6.46325693391599e-05, "loss": 1.2618, "step": 12780 }, { "epoch": 0.68, "learning_rate": 6.449879604031035e-05, "loss": 1.2389, "step": 12800 }, { "epoch": 0.69, "learning_rate": 6.43650227414608e-05, "loss": 1.1994, "step": 12820 }, { "epoch": 0.69, "learning_rate": 6.423124944261125e-05, "loss": 1.2052, "step": 12840 }, { "epoch": 0.69, "learning_rate": 6.40974761437617e-05, "loss": 1.2301, "step": 12860 }, { "epoch": 0.69, "learning_rate": 6.396370284491215e-05, "loss": 1.2551, "step": 12880 }, { "epoch": 0.69, "learning_rate": 6.38299295460626e-05, "loss": 1.155, "step": 12900 }, { "epoch": 0.69, "learning_rate": 6.369615624721305e-05, "loss": 1.1681, "step": 12920 }, { "epoch": 0.69, "learning_rate": 6.35623829483635e-05, "loss": 1.2225, "step": 12940 }, { "epoch": 0.69, "learning_rate": 6.342860964951395e-05, "loss": 1.2259, "step": 12960 }, { "epoch": 0.69, "learning_rate": 6.32948363506644e-05, "loss": 1.2987, "step": 12980 }, { "epoch": 0.7, "learning_rate": 6.316106305181485e-05, "loss": 1.1836, "step": 13000 }, { "epoch": 0.7, "learning_rate": 6.302728975296529e-05, "loss": 1.3246, "step": 13020 }, { "epoch": 0.7, "learning_rate": 6.289351645411576e-05, "loss": 1.2836, "step": 13040 }, { "epoch": 0.7, "learning_rate": 6.275974315526621e-05, "loss": 1.2597, "step": 13060 }, { "epoch": 0.7, "learning_rate": 6.262596985641666e-05, "loss": 1.2336, "step": 13080 }, { "epoch": 0.7, "learning_rate": 6.24921965575671e-05, "loss": 1.2291, "step": 13100 }, { "epoch": 0.7, "learning_rate": 6.235842325871756e-05, "loss": 1.2461, "step": 13120 }, { "epoch": 0.7, "learning_rate": 6.222464995986801e-05, "loss": 1.2751, "step": 13140 }, { "epoch": 0.7, "learning_rate": 6.209087666101846e-05, "loss": 1.2774, "step": 13160 }, { "epoch": 0.71, "learning_rate": 6.19571033621689e-05, "loss": 1.2045, "step": 13180 }, { "epoch": 0.71, "learning_rate": 6.182333006331936e-05, "loss": 1.1988, "step": 13200 }, { "epoch": 0.71, "learning_rate": 6.168955676446981e-05, "loss": 1.2054, "step": 13220 }, { "epoch": 0.71, "learning_rate": 6.155578346562025e-05, "loss": 1.293, "step": 13240 }, { "epoch": 0.71, "learning_rate": 6.14220101667707e-05, "loss": 1.2437, "step": 13260 }, { "epoch": 0.71, "learning_rate": 6.128823686792116e-05, "loss": 1.2393, "step": 13280 }, { "epoch": 0.71, "learning_rate": 6.115446356907162e-05, "loss": 1.2555, "step": 13300 }, { "epoch": 0.71, "learning_rate": 6.102069027022206e-05, "loss": 1.1612, "step": 13320 }, { "epoch": 0.71, "learning_rate": 6.088691697137251e-05, "loss": 1.2055, "step": 13340 }, { "epoch": 0.71, "learning_rate": 6.0753143672522954e-05, "loss": 1.2805, "step": 13360 }, { "epoch": 0.72, "learning_rate": 6.061937037367341e-05, "loss": 1.2563, "step": 13380 }, { "epoch": 0.72, "learning_rate": 6.048559707482386e-05, "loss": 1.2352, "step": 13400 }, { "epoch": 0.72, "learning_rate": 6.0351823775974306e-05, "loss": 1.2293, "step": 13420 }, { "epoch": 0.72, "learning_rate": 6.021805047712476e-05, "loss": 1.1811, "step": 13440 }, { "epoch": 0.72, "learning_rate": 6.0084277178275215e-05, "loss": 1.2481, "step": 13460 }, { "epoch": 0.72, "learning_rate": 5.9950503879425665e-05, "loss": 1.2753, "step": 13480 }, { "epoch": 0.72, "learning_rate": 5.981673058057611e-05, "loss": 1.2417, "step": 13500 }, { "epoch": 0.72, "learning_rate": 5.968295728172656e-05, "loss": 1.239, "step": 13520 }, { "epoch": 0.72, "learning_rate": 5.954918398287702e-05, "loss": 1.3171, "step": 13540 }, { "epoch": 0.73, "learning_rate": 5.941541068402746e-05, "loss": 1.1562, "step": 13560 }, { "epoch": 0.73, "learning_rate": 5.928163738517791e-05, "loss": 1.2311, "step": 13580 }, { "epoch": 0.73, "learning_rate": 5.914786408632836e-05, "loss": 1.3033, "step": 13600 }, { "epoch": 0.73, "learning_rate": 5.901409078747881e-05, "loss": 1.2635, "step": 13620 }, { "epoch": 0.73, "learning_rate": 5.8880317488629264e-05, "loss": 1.1992, "step": 13640 }, { "epoch": 0.73, "learning_rate": 5.8746544189779715e-05, "loss": 1.2307, "step": 13660 }, { "epoch": 0.73, "learning_rate": 5.8612770890930166e-05, "loss": 1.2735, "step": 13680 }, { "epoch": 0.73, "learning_rate": 5.847899759208061e-05, "loss": 1.1868, "step": 13700 }, { "epoch": 0.73, "learning_rate": 5.834522429323107e-05, "loss": 1.2114, "step": 13720 }, { "epoch": 0.74, "learning_rate": 5.821145099438152e-05, "loss": 1.2118, "step": 13740 }, { "epoch": 0.74, "learning_rate": 5.807767769553197e-05, "loss": 1.2056, "step": 13760 }, { "epoch": 0.74, "learning_rate": 5.7950593061624894e-05, "loss": 1.2697, "step": 13780 }, { "epoch": 0.74, "learning_rate": 5.781681976277535e-05, "loss": 1.2336, "step": 13800 }, { "epoch": 0.74, "learning_rate": 5.7683046463925795e-05, "loss": 1.1967, "step": 13820 }, { "epoch": 0.74, "learning_rate": 5.7549273165076246e-05, "loss": 1.2254, "step": 13840 }, { "epoch": 0.74, "learning_rate": 5.74154998662267e-05, "loss": 1.2518, "step": 13860 }, { "epoch": 0.74, "learning_rate": 5.728172656737714e-05, "loss": 1.2112, "step": 13880 }, { "epoch": 0.74, "learning_rate": 5.71479532685276e-05, "loss": 1.1781, "step": 13900 }, { "epoch": 0.74, "learning_rate": 5.701417996967805e-05, "loss": 1.2115, "step": 13920 }, { "epoch": 0.75, "learning_rate": 5.688040667082849e-05, "loss": 1.1801, "step": 13940 }, { "epoch": 0.75, "learning_rate": 5.6746633371978944e-05, "loss": 1.2324, "step": 13960 }, { "epoch": 0.75, "learning_rate": 5.66128600731294e-05, "loss": 1.1945, "step": 13980 }, { "epoch": 0.75, "learning_rate": 5.647908677427985e-05, "loss": 1.1954, "step": 14000 }, { "epoch": 0.75, "learning_rate": 5.6345313475430296e-05, "loss": 1.2072, "step": 14020 }, { "epoch": 0.75, "learning_rate": 5.6211540176580747e-05, "loss": 1.2482, "step": 14040 }, { "epoch": 0.75, "learning_rate": 5.6077766877731204e-05, "loss": 1.2325, "step": 14060 }, { "epoch": 0.75, "learning_rate": 5.5943993578881655e-05, "loss": 1.1918, "step": 14080 }, { "epoch": 0.75, "learning_rate": 5.58102202800321e-05, "loss": 1.2431, "step": 14100 }, { "epoch": 0.76, "learning_rate": 5.567644698118255e-05, "loss": 1.2266, "step": 14120 }, { "epoch": 0.76, "learning_rate": 5.554267368233301e-05, "loss": 1.2819, "step": 14140 }, { "epoch": 0.76, "learning_rate": 5.540890038348345e-05, "loss": 1.3429, "step": 14160 }, { "epoch": 0.76, "learning_rate": 5.52751270846339e-05, "loss": 1.2449, "step": 14180 }, { "epoch": 0.76, "learning_rate": 5.514135378578435e-05, "loss": 1.2349, "step": 14200 }, { "epoch": 0.76, "learning_rate": 5.5007580486934796e-05, "loss": 1.2799, "step": 14220 }, { "epoch": 0.76, "learning_rate": 5.4873807188085254e-05, "loss": 1.2754, "step": 14240 }, { "epoch": 0.76, "learning_rate": 5.474672255417818e-05, "loss": 1.2664, "step": 14260 }, { "epoch": 0.76, "learning_rate": 5.461294925532863e-05, "loss": 1.2684, "step": 14280 }, { "epoch": 0.77, "learning_rate": 5.447917595647908e-05, "loss": 1.2261, "step": 14300 }, { "epoch": 0.77, "learning_rate": 5.434540265762954e-05, "loss": 1.1966, "step": 14320 }, { "epoch": 0.77, "learning_rate": 5.421162935877998e-05, "loss": 1.1624, "step": 14340 }, { "epoch": 0.77, "learning_rate": 5.407785605993043e-05, "loss": 1.1323, "step": 14360 }, { "epoch": 0.77, "learning_rate": 5.3944082761080884e-05, "loss": 1.2306, "step": 14380 }, { "epoch": 0.77, "learning_rate": 5.381030946223134e-05, "loss": 1.2516, "step": 14400 }, { "epoch": 0.77, "learning_rate": 5.3676536163381785e-05, "loss": 1.2672, "step": 14420 }, { "epoch": 0.77, "learning_rate": 5.3542762864532236e-05, "loss": 1.1664, "step": 14440 }, { "epoch": 0.77, "learning_rate": 5.3408989565682686e-05, "loss": 1.2381, "step": 14460 }, { "epoch": 0.77, "learning_rate": 5.327521626683313e-05, "loss": 1.2, "step": 14480 }, { "epoch": 0.78, "learning_rate": 5.314144296798359e-05, "loss": 1.2055, "step": 14500 }, { "epoch": 0.78, "learning_rate": 5.300766966913404e-05, "loss": 1.2226, "step": 14520 }, { "epoch": 0.78, "learning_rate": 5.287389637028448e-05, "loss": 1.2401, "step": 14540 }, { "epoch": 0.78, "learning_rate": 5.274012307143493e-05, "loss": 1.2661, "step": 14560 }, { "epoch": 0.78, "learning_rate": 5.260634977258539e-05, "loss": 1.2433, "step": 14580 }, { "epoch": 0.78, "learning_rate": 5.247257647373584e-05, "loss": 1.1201, "step": 14600 }, { "epoch": 0.78, "learning_rate": 5.2338803174886285e-05, "loss": 1.2863, "step": 14620 }, { "epoch": 0.78, "learning_rate": 5.2205029876036736e-05, "loss": 1.1867, "step": 14640 }, { "epoch": 0.78, "learning_rate": 5.2071256577187194e-05, "loss": 1.1938, "step": 14660 }, { "epoch": 0.79, "learning_rate": 5.1937483278337644e-05, "loss": 1.183, "step": 14680 }, { "epoch": 0.79, "learning_rate": 5.180370997948809e-05, "loss": 1.1815, "step": 14700 }, { "epoch": 0.79, "learning_rate": 5.166993668063854e-05, "loss": 1.3126, "step": 14720 }, { "epoch": 0.79, "learning_rate": 5.153616338178898e-05, "loss": 1.2137, "step": 14740 }, { "epoch": 0.79, "learning_rate": 5.140239008293944e-05, "loss": 1.225, "step": 14760 }, { "epoch": 0.79, "learning_rate": 5.126861678408989e-05, "loss": 1.2115, "step": 14780 }, { "epoch": 0.79, "learning_rate": 5.113484348524034e-05, "loss": 1.2397, "step": 14800 }, { "epoch": 0.79, "learning_rate": 5.1001070186390786e-05, "loss": 1.2738, "step": 14820 }, { "epoch": 0.79, "learning_rate": 5.086729688754124e-05, "loss": 1.2477, "step": 14840 }, { "epoch": 0.8, "learning_rate": 5.0733523588691694e-05, "loss": 1.1818, "step": 14860 }, { "epoch": 0.8, "learning_rate": 5.0599750289842145e-05, "loss": 1.2929, "step": 14880 }, { "epoch": 0.8, "learning_rate": 5.046597699099259e-05, "loss": 1.2313, "step": 14900 }, { "epoch": 0.8, "learning_rate": 5.0332203692143046e-05, "loss": 1.1864, "step": 14920 }, { "epoch": 0.8, "learning_rate": 5.01984303932935e-05, "loss": 1.22, "step": 14940 }, { "epoch": 0.8, "learning_rate": 5.006465709444394e-05, "loss": 1.2563, "step": 14960 }, { "epoch": 0.8, "learning_rate": 4.993088379559439e-05, "loss": 1.2011, "step": 14980 }, { "epoch": 0.8, "learning_rate": 4.979711049674485e-05, "loss": 1.1813, "step": 15000 }, { "epoch": 0.8, "learning_rate": 4.96633371978953e-05, "loss": 1.2515, "step": 15020 }, { "epoch": 0.8, "learning_rate": 4.9529563899045744e-05, "loss": 1.2136, "step": 15040 }, { "epoch": 0.81, "learning_rate": 4.9395790600196194e-05, "loss": 1.2818, "step": 15060 }, { "epoch": 0.81, "learning_rate": 4.9262017301346645e-05, "loss": 1.2574, "step": 15080 }, { "epoch": 0.81, "learning_rate": 4.91282440024971e-05, "loss": 1.2016, "step": 15100 }, { "epoch": 0.81, "learning_rate": 4.8994470703647547e-05, "loss": 1.2288, "step": 15120 }, { "epoch": 0.81, "learning_rate": 4.8860697404798e-05, "loss": 1.2784, "step": 15140 }, { "epoch": 0.81, "learning_rate": 4.872692410594845e-05, "loss": 1.1656, "step": 15160 }, { "epoch": 0.81, "learning_rate": 4.85931508070989e-05, "loss": 1.2096, "step": 15180 }, { "epoch": 0.81, "learning_rate": 4.845937750824935e-05, "loss": 1.1541, "step": 15200 }, { "epoch": 0.81, "learning_rate": 4.83256042093998e-05, "loss": 1.2398, "step": 15220 }, { "epoch": 0.82, "learning_rate": 4.8191830910550244e-05, "loss": 1.2532, "step": 15240 }, { "epoch": 0.82, "learning_rate": 4.80580576117007e-05, "loss": 1.2496, "step": 15260 }, { "epoch": 0.82, "learning_rate": 4.792428431285115e-05, "loss": 1.2404, "step": 15280 }, { "epoch": 0.82, "learning_rate": 4.77905110140016e-05, "loss": 1.2545, "step": 15300 }, { "epoch": 0.82, "learning_rate": 4.765673771515205e-05, "loss": 1.2468, "step": 15320 }, { "epoch": 0.82, "learning_rate": 4.7522964416302504e-05, "loss": 1.1775, "step": 15340 }, { "epoch": 0.82, "learning_rate": 4.7389191117452955e-05, "loss": 1.2206, "step": 15360 }, { "epoch": 0.82, "learning_rate": 4.7255417818603406e-05, "loss": 1.2497, "step": 15380 }, { "epoch": 0.82, "learning_rate": 4.712164451975385e-05, "loss": 1.2427, "step": 15400 }, { "epoch": 0.83, "learning_rate": 4.6994559885846775e-05, "loss": 1.2162, "step": 15420 }, { "epoch": 0.83, "learning_rate": 4.686078658699723e-05, "loss": 1.2676, "step": 15440 }, { "epoch": 0.83, "learning_rate": 4.6727013288147684e-05, "loss": 1.1233, "step": 15460 }, { "epoch": 0.83, "learning_rate": 4.6593239989298134e-05, "loss": 1.2596, "step": 15480 }, { "epoch": 0.83, "learning_rate": 4.645946669044858e-05, "loss": 1.2203, "step": 15500 }, { "epoch": 0.83, "learning_rate": 4.6325693391599036e-05, "loss": 1.2786, "step": 15520 }, { "epoch": 0.83, "learning_rate": 4.6191920092749486e-05, "loss": 1.2035, "step": 15540 }, { "epoch": 0.83, "learning_rate": 4.605814679389993e-05, "loss": 1.2471, "step": 15560 }, { "epoch": 0.83, "learning_rate": 4.592437349505038e-05, "loss": 1.2783, "step": 15580 }, { "epoch": 0.83, "learning_rate": 4.579060019620084e-05, "loss": 1.196, "step": 15600 }, { "epoch": 0.84, "learning_rate": 4.565682689735129e-05, "loss": 1.2318, "step": 15620 }, { "epoch": 0.84, "learning_rate": 4.552305359850173e-05, "loss": 1.2216, "step": 15640 }, { "epoch": 0.84, "learning_rate": 4.5389280299652184e-05, "loss": 1.3276, "step": 15660 }, { "epoch": 0.84, "learning_rate": 4.5255507000802635e-05, "loss": 1.2284, "step": 15680 }, { "epoch": 0.84, "learning_rate": 4.512173370195309e-05, "loss": 1.203, "step": 15700 }, { "epoch": 0.84, "learning_rate": 4.4987960403103536e-05, "loss": 1.2161, "step": 15720 }, { "epoch": 0.84, "learning_rate": 4.485418710425399e-05, "loss": 1.179, "step": 15740 }, { "epoch": 0.84, "learning_rate": 4.472041380540444e-05, "loss": 1.2494, "step": 15760 }, { "epoch": 0.84, "learning_rate": 4.458664050655489e-05, "loss": 1.297, "step": 15780 }, { "epoch": 0.85, "learning_rate": 4.445286720770534e-05, "loss": 1.2289, "step": 15800 }, { "epoch": 0.85, "learning_rate": 4.431909390885579e-05, "loss": 1.2631, "step": 15820 }, { "epoch": 0.85, "learning_rate": 4.4185320610006234e-05, "loss": 1.3254, "step": 15840 }, { "epoch": 0.85, "learning_rate": 4.405154731115669e-05, "loss": 1.2439, "step": 15860 }, { "epoch": 0.85, "learning_rate": 4.391777401230714e-05, "loss": 1.1706, "step": 15880 }, { "epoch": 0.85, "learning_rate": 4.378400071345759e-05, "loss": 1.2148, "step": 15900 }, { "epoch": 0.85, "learning_rate": 4.3650227414608036e-05, "loss": 1.1943, "step": 15920 }, { "epoch": 0.85, "learning_rate": 4.351645411575849e-05, "loss": 1.2454, "step": 15940 }, { "epoch": 0.85, "learning_rate": 4.3382680816908945e-05, "loss": 1.2304, "step": 15960 }, { "epoch": 0.86, "learning_rate": 4.324890751805939e-05, "loss": 1.1983, "step": 15980 }, { "epoch": 0.86, "learning_rate": 4.311513421920984e-05, "loss": 1.1838, "step": 16000 }, { "epoch": 0.86, "learning_rate": 4.298136092036029e-05, "loss": 1.1953, "step": 16020 }, { "epoch": 0.86, "learning_rate": 4.284758762151075e-05, "loss": 1.3063, "step": 16040 }, { "epoch": 0.86, "learning_rate": 4.271381432266119e-05, "loss": 1.258, "step": 16060 }, { "epoch": 0.86, "learning_rate": 4.258004102381164e-05, "loss": 1.2175, "step": 16080 }, { "epoch": 0.86, "learning_rate": 4.244626772496209e-05, "loss": 1.2322, "step": 16100 }, { "epoch": 0.86, "learning_rate": 4.231249442611255e-05, "loss": 1.2267, "step": 16120 }, { "epoch": 0.86, "learning_rate": 4.2178721127262994e-05, "loss": 1.1884, "step": 16140 }, { "epoch": 0.86, "learning_rate": 4.2044947828413445e-05, "loss": 1.2357, "step": 16160 }, { "epoch": 0.87, "learning_rate": 4.1911174529563896e-05, "loss": 1.2271, "step": 16180 }, { "epoch": 0.87, "learning_rate": 4.1777401230714347e-05, "loss": 1.24, "step": 16200 }, { "epoch": 0.87, "learning_rate": 4.16436279318648e-05, "loss": 1.2626, "step": 16220 }, { "epoch": 0.87, "learning_rate": 4.150985463301525e-05, "loss": 1.116, "step": 16240 }, { "epoch": 0.87, "learning_rate": 4.137608133416569e-05, "loss": 1.2163, "step": 16260 }, { "epoch": 0.87, "learning_rate": 4.124230803531614e-05, "loss": 1.1632, "step": 16280 }, { "epoch": 0.87, "learning_rate": 4.11085347364666e-05, "loss": 1.3105, "step": 16300 }, { "epoch": 0.87, "learning_rate": 4.097476143761705e-05, "loss": 1.1595, "step": 16320 }, { "epoch": 0.87, "learning_rate": 4.0840988138767495e-05, "loss": 1.2747, "step": 16340 }, { "epoch": 0.88, "learning_rate": 4.0707214839917945e-05, "loss": 1.2553, "step": 16360 }, { "epoch": 0.88, "learning_rate": 4.05734415410684e-05, "loss": 1.1972, "step": 16380 }, { "epoch": 0.88, "learning_rate": 4.0439668242218854e-05, "loss": 1.2622, "step": 16400 }, { "epoch": 0.88, "learning_rate": 4.03058949433693e-05, "loss": 1.2472, "step": 16420 }, { "epoch": 0.88, "learning_rate": 4.017212164451975e-05, "loss": 1.1597, "step": 16440 }, { "epoch": 0.88, "learning_rate": 4.0038348345670206e-05, "loss": 1.2404, "step": 16460 }, { "epoch": 0.88, "learning_rate": 3.990457504682065e-05, "loss": 1.2105, "step": 16480 }, { "epoch": 0.88, "learning_rate": 3.97708017479711e-05, "loss": 1.1818, "step": 16500 }, { "epoch": 0.88, "learning_rate": 3.963702844912155e-05, "loss": 1.2134, "step": 16520 }, { "epoch": 0.89, "learning_rate": 3.950325515027201e-05, "loss": 1.1783, "step": 16540 }, { "epoch": 0.89, "learning_rate": 3.936948185142245e-05, "loss": 1.2554, "step": 16560 }, { "epoch": 0.89, "learning_rate": 3.9235708552572903e-05, "loss": 1.1288, "step": 16580 }, { "epoch": 0.89, "learning_rate": 3.9101935253723354e-05, "loss": 1.245, "step": 16600 }, { "epoch": 0.89, "learning_rate": 3.89681619548738e-05, "loss": 1.2326, "step": 16620 }, { "epoch": 0.89, "learning_rate": 3.8834388656024256e-05, "loss": 1.1481, "step": 16640 }, { "epoch": 0.89, "learning_rate": 3.8700615357174706e-05, "loss": 1.2072, "step": 16660 }, { "epoch": 0.89, "learning_rate": 3.856684205832516e-05, "loss": 1.2576, "step": 16680 }, { "epoch": 0.89, "learning_rate": 3.84330687594756e-05, "loss": 1.2141, "step": 16700 }, { "epoch": 0.89, "learning_rate": 3.829929546062606e-05, "loss": 1.1467, "step": 16720 }, { "epoch": 0.9, "learning_rate": 3.816552216177651e-05, "loss": 1.2243, "step": 16740 }, { "epoch": 0.9, "learning_rate": 3.803174886292695e-05, "loss": 1.2173, "step": 16760 }, { "epoch": 0.9, "learning_rate": 3.7897975564077404e-05, "loss": 1.1714, "step": 16780 }, { "epoch": 0.9, "learning_rate": 3.7770890930170336e-05, "loss": 1.267, "step": 16800 }, { "epoch": 0.9, "learning_rate": 3.763711763132079e-05, "loss": 1.1719, "step": 16820 }, { "epoch": 0.9, "learning_rate": 3.750334433247124e-05, "loss": 1.2197, "step": 16840 }, { "epoch": 0.9, "learning_rate": 3.736957103362168e-05, "loss": 1.2144, "step": 16860 }, { "epoch": 0.9, "learning_rate": 3.723579773477214e-05, "loss": 1.1774, "step": 16880 }, { "epoch": 0.9, "learning_rate": 3.710202443592258e-05, "loss": 1.2671, "step": 16900 }, { "epoch": 0.91, "learning_rate": 3.696825113707304e-05, "loss": 1.2488, "step": 16920 }, { "epoch": 0.91, "learning_rate": 3.6834477838223484e-05, "loss": 1.2163, "step": 16940 }, { "epoch": 0.91, "learning_rate": 3.670070453937394e-05, "loss": 1.2128, "step": 16960 }, { "epoch": 0.91, "learning_rate": 3.6566931240524386e-05, "loss": 1.2154, "step": 16980 }, { "epoch": 0.91, "learning_rate": 3.6433157941674836e-05, "loss": 1.2128, "step": 17000 }, { "epoch": 0.91, "learning_rate": 3.629938464282529e-05, "loss": 1.2514, "step": 17020 }, { "epoch": 0.91, "learning_rate": 3.616561134397574e-05, "loss": 1.1731, "step": 17040 }, { "epoch": 0.91, "learning_rate": 3.603183804512619e-05, "loss": 1.2039, "step": 17060 }, { "epoch": 0.91, "learning_rate": 3.589806474627664e-05, "loss": 1.1935, "step": 17080 }, { "epoch": 0.92, "learning_rate": 3.576429144742709e-05, "loss": 1.3014, "step": 17100 }, { "epoch": 0.92, "learning_rate": 3.563051814857754e-05, "loss": 1.2171, "step": 17120 }, { "epoch": 0.92, "learning_rate": 3.5503433514670466e-05, "loss": 1.1721, "step": 17140 }, { "epoch": 0.92, "learning_rate": 3.536966021582092e-05, "loss": 1.1282, "step": 17160 }, { "epoch": 0.92, "learning_rate": 3.523588691697137e-05, "loss": 1.1972, "step": 17180 }, { "epoch": 0.92, "learning_rate": 3.510211361812182e-05, "loss": 1.2744, "step": 17200 }, { "epoch": 0.92, "learning_rate": 3.496834031927227e-05, "loss": 1.2004, "step": 17220 }, { "epoch": 0.92, "learning_rate": 3.483456702042272e-05, "loss": 1.1991, "step": 17240 }, { "epoch": 0.92, "learning_rate": 3.470079372157317e-05, "loss": 1.23, "step": 17260 }, { "epoch": 0.92, "learning_rate": 3.456702042272362e-05, "loss": 1.2807, "step": 17280 }, { "epoch": 0.93, "learning_rate": 3.443324712387407e-05, "loss": 1.198, "step": 17300 }, { "epoch": 0.93, "learning_rate": 3.429947382502452e-05, "loss": 1.1389, "step": 17320 }, { "epoch": 0.93, "learning_rate": 3.4165700526174973e-05, "loss": 1.2288, "step": 17340 }, { "epoch": 0.93, "learning_rate": 3.4031927227325424e-05, "loss": 1.2574, "step": 17360 }, { "epoch": 0.93, "learning_rate": 3.389815392847587e-05, "loss": 1.2408, "step": 17380 }, { "epoch": 0.93, "learning_rate": 3.3764380629626326e-05, "loss": 1.2079, "step": 17400 }, { "epoch": 0.93, "learning_rate": 3.363060733077677e-05, "loss": 1.1988, "step": 17420 }, { "epoch": 0.93, "learning_rate": 3.349683403192723e-05, "loss": 1.1764, "step": 17440 }, { "epoch": 0.93, "learning_rate": 3.336306073307767e-05, "loss": 1.207, "step": 17460 }, { "epoch": 0.94, "learning_rate": 3.322928743422813e-05, "loss": 1.1283, "step": 17480 }, { "epoch": 0.94, "learning_rate": 3.309551413537857e-05, "loss": 1.2414, "step": 17500 }, { "epoch": 0.94, "learning_rate": 3.296174083652903e-05, "loss": 1.2783, "step": 17520 }, { "epoch": 0.94, "learning_rate": 3.2827967537679474e-05, "loss": 1.2338, "step": 17540 }, { "epoch": 0.94, "learning_rate": 3.269419423882993e-05, "loss": 1.2175, "step": 17560 }, { "epoch": 0.94, "learning_rate": 3.2560420939980375e-05, "loss": 1.1971, "step": 17580 }, { "epoch": 0.94, "learning_rate": 3.2426647641130826e-05, "loss": 1.2329, "step": 17600 }, { "epoch": 0.94, "learning_rate": 3.229287434228128e-05, "loss": 1.2419, "step": 17620 }, { "epoch": 0.94, "learning_rate": 3.215910104343173e-05, "loss": 1.1922, "step": 17640 }, { "epoch": 0.94, "learning_rate": 3.202532774458218e-05, "loss": 1.1817, "step": 17660 }, { "epoch": 0.95, "learning_rate": 3.189155444573263e-05, "loss": 1.2774, "step": 17680 }, { "epoch": 0.95, "learning_rate": 3.175778114688308e-05, "loss": 1.1617, "step": 17700 }, { "epoch": 0.95, "learning_rate": 3.162400784803353e-05, "loss": 1.2094, "step": 17720 }, { "epoch": 0.95, "learning_rate": 3.149023454918398e-05, "loss": 1.205, "step": 17740 }, { "epoch": 0.95, "learning_rate": 3.135646125033443e-05, "loss": 1.2232, "step": 17760 }, { "epoch": 0.95, "learning_rate": 3.122268795148488e-05, "loss": 1.2858, "step": 17780 }, { "epoch": 0.95, "learning_rate": 3.108891465263533e-05, "loss": 1.2897, "step": 17800 }, { "epoch": 0.95, "learning_rate": 3.0955141353785784e-05, "loss": 1.2716, "step": 17820 }, { "epoch": 0.95, "learning_rate": 3.082136805493623e-05, "loss": 1.2439, "step": 17840 }, { "epoch": 0.96, "learning_rate": 3.0687594756086685e-05, "loss": 1.3103, "step": 17860 }, { "epoch": 0.96, "learning_rate": 3.055382145723713e-05, "loss": 1.1568, "step": 17880 }, { "epoch": 0.96, "learning_rate": 3.0420048158387583e-05, "loss": 1.1713, "step": 17900 }, { "epoch": 0.96, "learning_rate": 3.0286274859538034e-05, "loss": 1.2434, "step": 17920 }, { "epoch": 0.96, "learning_rate": 3.0152501560688485e-05, "loss": 1.2869, "step": 17940 }, { "epoch": 0.96, "learning_rate": 3.0018728261838935e-05, "loss": 1.1952, "step": 17960 }, { "epoch": 0.96, "learning_rate": 2.9884954962989386e-05, "loss": 1.1948, "step": 17980 }, { "epoch": 0.96, "learning_rate": 2.9751181664139834e-05, "loss": 1.3062, "step": 18000 }, { "epoch": 0.96, "learning_rate": 2.9617408365290284e-05, "loss": 1.2624, "step": 18020 }, { "epoch": 0.97, "learning_rate": 2.9483635066440735e-05, "loss": 1.2594, "step": 18040 }, { "epoch": 0.97, "learning_rate": 2.9349861767591186e-05, "loss": 1.1556, "step": 18060 }, { "epoch": 0.97, "learning_rate": 2.9216088468741636e-05, "loss": 1.244, "step": 18080 }, { "epoch": 0.97, "learning_rate": 2.9082315169892087e-05, "loss": 1.221, "step": 18100 }, { "epoch": 0.97, "learning_rate": 2.8948541871042538e-05, "loss": 1.2647, "step": 18120 }, { "epoch": 0.97, "learning_rate": 2.8814768572192985e-05, "loss": 1.2203, "step": 18140 }, { "epoch": 0.97, "learning_rate": 2.868099527334344e-05, "loss": 1.2193, "step": 18160 }, { "epoch": 0.97, "learning_rate": 2.8553910639436365e-05, "loss": 1.1949, "step": 18180 }, { "epoch": 0.97, "learning_rate": 2.842013734058682e-05, "loss": 1.2328, "step": 18200 }, { "epoch": 0.97, "learning_rate": 2.8286364041737266e-05, "loss": 1.1338, "step": 18220 }, { "epoch": 0.98, "learning_rate": 2.8152590742887714e-05, "loss": 1.217, "step": 18240 }, { "epoch": 0.98, "learning_rate": 2.8018817444038168e-05, "loss": 1.2662, "step": 18260 }, { "epoch": 0.98, "learning_rate": 2.7885044145188615e-05, "loss": 1.1758, "step": 18280 }, { "epoch": 0.98, "learning_rate": 2.775127084633907e-05, "loss": 1.1894, "step": 18300 }, { "epoch": 0.98, "learning_rate": 2.7617497547489516e-05, "loss": 1.2418, "step": 18320 }, { "epoch": 0.98, "learning_rate": 2.748372424863997e-05, "loss": 1.2025, "step": 18340 }, { "epoch": 0.98, "learning_rate": 2.7349950949790418e-05, "loss": 1.2621, "step": 18360 }, { "epoch": 0.98, "learning_rate": 2.7216177650940872e-05, "loss": 1.2007, "step": 18380 }, { "epoch": 0.98, "learning_rate": 2.708240435209132e-05, "loss": 1.197, "step": 18400 }, { "epoch": 0.99, "learning_rate": 2.6948631053241773e-05, "loss": 1.2301, "step": 18420 }, { "epoch": 0.99, "learning_rate": 2.681485775439222e-05, "loss": 1.2122, "step": 18440 }, { "epoch": 0.99, "learning_rate": 2.668108445554267e-05, "loss": 1.2291, "step": 18460 }, { "epoch": 0.99, "learning_rate": 2.6547311156693122e-05, "loss": 1.2433, "step": 18480 }, { "epoch": 0.99, "learning_rate": 2.6413537857843573e-05, "loss": 1.2205, "step": 18500 }, { "epoch": 0.99, "learning_rate": 2.6279764558994024e-05, "loss": 1.2598, "step": 18520 }, { "epoch": 0.99, "learning_rate": 2.6145991260144474e-05, "loss": 1.2419, "step": 18540 }, { "epoch": 0.99, "learning_rate": 2.601221796129492e-05, "loss": 1.2383, "step": 18560 }, { "epoch": 0.99, "learning_rate": 2.5878444662445372e-05, "loss": 1.2396, "step": 18580 }, { "epoch": 1.0, "learning_rate": 2.5744671363595823e-05, "loss": 1.2311, "step": 18600 }, { "epoch": 1.0, "learning_rate": 2.5610898064746274e-05, "loss": 1.229, "step": 18620 }, { "epoch": 1.0, "learning_rate": 2.5477124765896724e-05, "loss": 1.2473, "step": 18640 }, { "epoch": 1.0, "learning_rate": 2.5343351467047175e-05, "loss": 1.222, "step": 18660 }, { "epoch": 1.0, "learning_rate": 2.5216266833140104e-05, "loss": 1.1945, "step": 18680 }, { "epoch": 1.0, "learning_rate": 2.508249353429055e-05, "loss": 1.2896, "step": 18700 }, { "epoch": 1.0, "learning_rate": 2.4948720235441005e-05, "loss": 1.2152, "step": 18720 }, { "epoch": 1.0, "learning_rate": 2.4814946936591453e-05, "loss": 1.1425, "step": 18740 }, { "epoch": 1.0, "learning_rate": 2.4681173637741907e-05, "loss": 1.1809, "step": 18760 }, { "epoch": 1.0, "learning_rate": 2.4547400338892354e-05, "loss": 1.2205, "step": 18780 }, { "epoch": 1.01, "learning_rate": 2.44136270400428e-05, "loss": 1.219, "step": 18800 }, { "epoch": 1.01, "learning_rate": 2.4279853741193256e-05, "loss": 1.1933, "step": 18820 }, { "epoch": 1.01, "learning_rate": 2.4146080442343703e-05, "loss": 1.2225, "step": 18840 }, { "epoch": 1.01, "learning_rate": 2.4012307143494157e-05, "loss": 1.2416, "step": 18860 }, { "epoch": 1.01, "learning_rate": 2.3885222509587083e-05, "loss": 1.2004, "step": 18880 }, { "epoch": 1.01, "learning_rate": 2.3751449210737537e-05, "loss": 1.2951, "step": 18900 }, { "epoch": 1.01, "learning_rate": 2.3617675911887984e-05, "loss": 1.2528, "step": 18920 }, { "epoch": 1.01, "learning_rate": 2.3483902613038438e-05, "loss": 1.2223, "step": 18940 }, { "epoch": 1.01, "learning_rate": 2.3350129314188885e-05, "loss": 1.2212, "step": 18960 }, { "epoch": 1.02, "learning_rate": 2.321635601533934e-05, "loss": 1.1644, "step": 18980 }, { "epoch": 1.02, "learning_rate": 2.3082582716489787e-05, "loss": 1.176, "step": 19000 }, { "epoch": 1.02, "learning_rate": 2.2948809417640238e-05, "loss": 1.234, "step": 19020 }, { "epoch": 1.02, "learning_rate": 2.281503611879069e-05, "loss": 1.2224, "step": 19040 }, { "epoch": 1.02, "learning_rate": 2.2681262819941136e-05, "loss": 1.2349, "step": 19060 }, { "epoch": 1.02, "learning_rate": 2.254748952109159e-05, "loss": 1.2823, "step": 19080 }, { "epoch": 1.02, "learning_rate": 2.2413716222242037e-05, "loss": 1.2425, "step": 19100 }, { "epoch": 1.02, "learning_rate": 2.2279942923392488e-05, "loss": 1.2006, "step": 19120 }, { "epoch": 1.02, "learning_rate": 2.214616962454294e-05, "loss": 1.1504, "step": 19140 }, { "epoch": 1.03, "learning_rate": 2.201239632569339e-05, "loss": 1.1757, "step": 19160 }, { "epoch": 1.03, "learning_rate": 2.187862302684384e-05, "loss": 1.2448, "step": 19180 }, { "epoch": 1.03, "learning_rate": 2.174484972799429e-05, "loss": 1.1635, "step": 19200 }, { "epoch": 1.03, "learning_rate": 2.161107642914474e-05, "loss": 1.1359, "step": 19220 }, { "epoch": 1.03, "learning_rate": 2.1477303130295192e-05, "loss": 1.1805, "step": 19240 }, { "epoch": 1.03, "learning_rate": 2.134352983144564e-05, "loss": 1.246, "step": 19260 }, { "epoch": 1.03, "learning_rate": 2.1209756532596094e-05, "loss": 1.1909, "step": 19280 }, { "epoch": 1.03, "learning_rate": 2.107598323374654e-05, "loss": 1.2368, "step": 19300 }, { "epoch": 1.03, "learning_rate": 2.0942209934896995e-05, "loss": 1.1308, "step": 19320 }, { "epoch": 1.03, "learning_rate": 2.0808436636047442e-05, "loss": 1.2616, "step": 19340 }, { "epoch": 1.04, "learning_rate": 2.0674663337197893e-05, "loss": 1.2484, "step": 19360 }, { "epoch": 1.04, "learning_rate": 2.0540890038348344e-05, "loss": 1.2515, "step": 19380 }, { "epoch": 1.04, "learning_rate": 2.040711673949879e-05, "loss": 1.2046, "step": 19400 }, { "epoch": 1.04, "learning_rate": 2.0273343440649245e-05, "loss": 1.1855, "step": 19420 }, { "epoch": 1.04, "learning_rate": 2.0139570141799693e-05, "loss": 1.2236, "step": 19440 }, { "epoch": 1.04, "learning_rate": 2.0005796842950147e-05, "loss": 1.1993, "step": 19460 }, { "epoch": 1.04, "learning_rate": 1.9872023544100594e-05, "loss": 1.2085, "step": 19480 }, { "epoch": 1.04, "learning_rate": 1.9738250245251048e-05, "loss": 1.2522, "step": 19500 }, { "epoch": 1.04, "learning_rate": 1.9604476946401495e-05, "loss": 1.2009, "step": 19520 }, { "epoch": 1.05, "learning_rate": 1.947070364755195e-05, "loss": 1.2322, "step": 19540 }, { "epoch": 1.05, "learning_rate": 1.9336930348702397e-05, "loss": 1.2418, "step": 19560 }, { "epoch": 1.05, "learning_rate": 1.9203157049852848e-05, "loss": 1.2864, "step": 19580 }, { "epoch": 1.05, "learning_rate": 1.9069383751003298e-05, "loss": 1.1777, "step": 19600 }, { "epoch": 1.05, "learning_rate": 1.893561045215375e-05, "loss": 1.2666, "step": 19620 }, { "epoch": 1.05, "learning_rate": 1.88018371533042e-05, "loss": 1.2063, "step": 19640 }, { "epoch": 1.05, "learning_rate": 1.866806385445465e-05, "loss": 1.149, "step": 19660 }, { "epoch": 1.05, "learning_rate": 1.85342905556051e-05, "loss": 1.2305, "step": 19680 }, { "epoch": 1.05, "learning_rate": 1.8400517256755552e-05, "loss": 1.1864, "step": 19700 }, { "epoch": 1.06, "learning_rate": 1.8266743957906e-05, "loss": 1.1943, "step": 19720 }, { "epoch": 1.06, "learning_rate": 1.813297065905645e-05, "loss": 1.1674, "step": 19740 }, { "epoch": 1.06, "learning_rate": 1.79991973602069e-05, "loss": 1.2333, "step": 19760 }, { "epoch": 1.06, "learning_rate": 1.786542406135735e-05, "loss": 1.2205, "step": 19780 }, { "epoch": 1.06, "learning_rate": 1.7731650762507802e-05, "loss": 1.274, "step": 19800 }, { "epoch": 1.06, "learning_rate": 1.7597877463658253e-05, "loss": 1.2495, "step": 19820 }, { "epoch": 1.06, "learning_rate": 1.74641041648087e-05, "loss": 1.2416, "step": 19840 }, { "epoch": 1.06, "learning_rate": 1.733033086595915e-05, "loss": 1.1099, "step": 19860 }, { "epoch": 1.06, "learning_rate": 1.71965575671096e-05, "loss": 1.2867, "step": 19880 }, { "epoch": 1.06, "learning_rate": 1.7062784268260052e-05, "loss": 1.1876, "step": 19900 }, { "epoch": 1.07, "learning_rate": 1.6929010969410503e-05, "loss": 1.1902, "step": 19920 }, { "epoch": 1.07, "learning_rate": 1.6795237670560954e-05, "loss": 1.2259, "step": 19940 }, { "epoch": 1.07, "learning_rate": 1.6661464371711404e-05, "loss": 1.2047, "step": 19960 }, { "epoch": 1.07, "learning_rate": 1.6527691072861855e-05, "loss": 1.2194, "step": 19980 }, { "epoch": 1.07, "learning_rate": 1.6393917774012306e-05, "loss": 1.1991, "step": 20000 }, { "epoch": 1.07, "learning_rate": 1.6260144475162757e-05, "loss": 1.1613, "step": 20020 }, { "epoch": 1.07, "learning_rate": 1.6133059841255685e-05, "loss": 1.2577, "step": 20040 }, { "epoch": 1.07, "learning_rate": 1.5999286542406136e-05, "loss": 1.2165, "step": 20060 }, { "epoch": 1.07, "learning_rate": 1.5865513243556583e-05, "loss": 1.233, "step": 20080 }, { "epoch": 1.08, "learning_rate": 1.5731739944707034e-05, "loss": 1.3323, "step": 20100 }, { "epoch": 1.08, "learning_rate": 1.5597966645857485e-05, "loss": 1.2724, "step": 20120 }, { "epoch": 1.08, "learning_rate": 1.5464193347007936e-05, "loss": 1.2589, "step": 20140 }, { "epoch": 1.08, "learning_rate": 1.5330420048158386e-05, "loss": 1.1968, "step": 20160 }, { "epoch": 1.08, "learning_rate": 1.5196646749308837e-05, "loss": 1.1388, "step": 20180 }, { "epoch": 1.08, "learning_rate": 1.5062873450459286e-05, "loss": 1.3557, "step": 20200 }, { "epoch": 1.08, "learning_rate": 1.4929100151609737e-05, "loss": 1.1344, "step": 20220 }, { "epoch": 1.08, "learning_rate": 1.4795326852760188e-05, "loss": 1.2463, "step": 20240 }, { "epoch": 1.08, "learning_rate": 1.4661553553910638e-05, "loss": 1.0617, "step": 20260 }, { "epoch": 1.09, "learning_rate": 1.4527780255061089e-05, "loss": 1.1622, "step": 20280 }, { "epoch": 1.09, "learning_rate": 1.439400695621154e-05, "loss": 1.2195, "step": 20300 }, { "epoch": 1.09, "learning_rate": 1.426023365736199e-05, "loss": 1.2133, "step": 20320 }, { "epoch": 1.09, "learning_rate": 1.4133149023454916e-05, "loss": 1.158, "step": 20340 }, { "epoch": 1.09, "learning_rate": 1.3999375724605367e-05, "loss": 1.1724, "step": 20360 }, { "epoch": 1.09, "learning_rate": 1.3865602425755817e-05, "loss": 1.2219, "step": 20380 }, { "epoch": 1.09, "learning_rate": 1.3731829126906268e-05, "loss": 1.2623, "step": 20400 }, { "epoch": 1.09, "learning_rate": 1.3598055828056719e-05, "loss": 1.1944, "step": 20420 }, { "epoch": 1.09, "learning_rate": 1.346428252920717e-05, "loss": 1.1636, "step": 20440 }, { "epoch": 1.09, "learning_rate": 1.333050923035762e-05, "loss": 1.1656, "step": 20460 }, { "epoch": 1.1, "learning_rate": 1.319673593150807e-05, "loss": 1.2136, "step": 20480 }, { "epoch": 1.1, "learning_rate": 1.306296263265852e-05, "loss": 1.1909, "step": 20500 }, { "epoch": 1.1, "learning_rate": 1.292918933380897e-05, "loss": 1.2206, "step": 20520 }, { "epoch": 1.1, "learning_rate": 1.2795416034959421e-05, "loss": 1.2185, "step": 20540 }, { "epoch": 1.1, "learning_rate": 1.2661642736109872e-05, "loss": 1.2264, "step": 20560 }, { "epoch": 1.1, "learning_rate": 1.2527869437260323e-05, "loss": 1.1484, "step": 20580 }, { "epoch": 1.1, "learning_rate": 1.2394096138410774e-05, "loss": 1.1531, "step": 20600 }, { "epoch": 1.1, "learning_rate": 1.2260322839561224e-05, "loss": 1.2669, "step": 20620 }, { "epoch": 1.1, "learning_rate": 1.2126549540711672e-05, "loss": 1.2263, "step": 20640 }, { "epoch": 1.11, "learning_rate": 1.1992776241862122e-05, "loss": 1.2221, "step": 20660 }, { "epoch": 1.11, "learning_rate": 1.1859002943012573e-05, "loss": 1.2543, "step": 20680 }, { "epoch": 1.11, "learning_rate": 1.1725229644163024e-05, "loss": 1.2866, "step": 20700 }, { "epoch": 1.11, "learning_rate": 1.1591456345313474e-05, "loss": 1.2002, "step": 20720 }, { "epoch": 1.11, "learning_rate": 1.1457683046463925e-05, "loss": 1.2091, "step": 20740 }, { "epoch": 1.11, "learning_rate": 1.1323909747614376e-05, "loss": 1.202, "step": 20760 }, { "epoch": 1.11, "learning_rate": 1.1190136448764825e-05, "loss": 1.2199, "step": 20780 }, { "epoch": 1.11, "learning_rate": 1.1056363149915276e-05, "loss": 1.2467, "step": 20800 }, { "epoch": 1.11, "learning_rate": 1.0922589851065726e-05, "loss": 1.1745, "step": 20820 }, { "epoch": 1.12, "learning_rate": 1.0788816552216177e-05, "loss": 1.253, "step": 20840 }, { "epoch": 1.12, "learning_rate": 1.0655043253366628e-05, "loss": 1.2218, "step": 20860 }, { "epoch": 1.12, "learning_rate": 1.0521269954517078e-05, "loss": 1.2361, "step": 20880 }, { "epoch": 1.12, "learning_rate": 1.038749665566753e-05, "loss": 1.1373, "step": 20900 }, { "epoch": 1.12, "learning_rate": 1.025372335681798e-05, "loss": 1.1563, "step": 20920 }, { "epoch": 1.12, "learning_rate": 1.0119950057968429e-05, "loss": 1.2032, "step": 20940 }, { "epoch": 1.12, "learning_rate": 9.986176759118878e-06, "loss": 1.1974, "step": 20960 }, { "epoch": 1.12, "learning_rate": 9.852403460269329e-06, "loss": 1.2113, "step": 20980 }, { "epoch": 1.12, "learning_rate": 9.71863016141978e-06, "loss": 1.2158, "step": 21000 }, { "epoch": 1.12, "learning_rate": 9.58485686257023e-06, "loss": 1.2027, "step": 21020 }, { "epoch": 1.13, "learning_rate": 9.45108356372068e-06, "loss": 1.207, "step": 21040 }, { "epoch": 1.13, "learning_rate": 9.31731026487113e-06, "loss": 1.2265, "step": 21060 }, { "epoch": 1.13, "learning_rate": 9.18353696602158e-06, "loss": 1.3103, "step": 21080 }, { "epoch": 1.13, "learning_rate": 9.049763667172031e-06, "loss": 1.2378, "step": 21100 }, { "epoch": 1.13, "learning_rate": 8.915990368322482e-06, "loss": 1.2196, "step": 21120 }, { "epoch": 1.13, "learning_rate": 8.782217069472933e-06, "loss": 1.2528, "step": 21140 }, { "epoch": 1.13, "learning_rate": 8.648443770623383e-06, "loss": 1.2906, "step": 21160 }, { "epoch": 1.13, "learning_rate": 8.514670471773834e-06, "loss": 1.2394, "step": 21180 }, { "epoch": 1.13, "learning_rate": 8.380897172924283e-06, "loss": 1.2315, "step": 21200 }, { "epoch": 1.14, "learning_rate": 8.247123874074734e-06, "loss": 1.2877, "step": 21220 }, { "epoch": 1.14, "learning_rate": 8.113350575225185e-06, "loss": 1.1724, "step": 21240 }, { "epoch": 1.14, "learning_rate": 7.979577276375635e-06, "loss": 1.1764, "step": 21260 }, { "epoch": 1.14, "learning_rate": 7.845803977526086e-06, "loss": 1.2914, "step": 21280 }, { "epoch": 1.14, "learning_rate": 7.712030678676535e-06, "loss": 1.2299, "step": 21300 }, { "epoch": 1.14, "learning_rate": 7.578257379826987e-06, "loss": 1.1829, "step": 21320 }, { "epoch": 1.14, "learning_rate": 7.444484080977436e-06, "loss": 1.2475, "step": 21340 }, { "epoch": 1.14, "learning_rate": 7.310710782127886e-06, "loss": 1.2074, "step": 21360 }, { "epoch": 1.14, "learning_rate": 7.176937483278337e-06, "loss": 1.2162, "step": 21380 }, { "epoch": 1.15, "learning_rate": 7.043164184428787e-06, "loss": 1.2401, "step": 21400 }, { "epoch": 1.15, "learning_rate": 6.909390885579238e-06, "loss": 1.2712, "step": 21420 }, { "epoch": 1.15, "learning_rate": 6.775617586729688e-06, "loss": 1.2646, "step": 21440 }, { "epoch": 1.15, "learning_rate": 6.641844287880139e-06, "loss": 1.2326, "step": 21460 }, { "epoch": 1.15, "learning_rate": 6.508070989030589e-06, "loss": 1.1431, "step": 21480 }, { "epoch": 1.15, "learning_rate": 6.374297690181039e-06, "loss": 1.21, "step": 21500 }, { "epoch": 1.15, "learning_rate": 6.24052439133149e-06, "loss": 1.193, "step": 21520 }, { "epoch": 1.15, "learning_rate": 6.1067510924819395e-06, "loss": 1.2035, "step": 21540 }, { "epoch": 1.15, "learning_rate": 5.97297779363239e-06, "loss": 1.193, "step": 21560 }, { "epoch": 1.15, "learning_rate": 5.839204494782841e-06, "loss": 1.2175, "step": 21580 }, { "epoch": 1.16, "learning_rate": 5.705431195933292e-06, "loss": 1.1968, "step": 21600 }, { "epoch": 1.16, "learning_rate": 5.5716578970837415e-06, "loss": 1.248, "step": 21620 }, { "epoch": 1.16, "learning_rate": 5.437884598234192e-06, "loss": 1.1431, "step": 21640 }, { "epoch": 1.16, "learning_rate": 5.304111299384642e-06, "loss": 1.2148, "step": 21660 }, { "epoch": 1.16, "learning_rate": 5.170338000535092e-06, "loss": 1.1803, "step": 21680 }, { "epoch": 1.16, "learning_rate": 5.036564701685543e-06, "loss": 1.2198, "step": 21700 }, { "epoch": 1.16, "learning_rate": 4.902791402835993e-06, "loss": 1.2058, "step": 21720 }, { "epoch": 1.16, "learning_rate": 4.769018103986444e-06, "loss": 1.2211, "step": 21740 }, { "epoch": 1.16, "learning_rate": 4.635244805136895e-06, "loss": 1.249, "step": 21760 }, { "epoch": 1.17, "learning_rate": 4.501471506287345e-06, "loss": 1.2054, "step": 21780 }, { "epoch": 1.17, "learning_rate": 4.3676982074377946e-06, "loss": 1.1895, "step": 21800 }, { "epoch": 1.17, "learning_rate": 4.233924908588245e-06, "loss": 1.2016, "step": 21820 }, { "epoch": 1.17, "learning_rate": 4.100151609738696e-06, "loss": 1.1959, "step": 21840 }, { "epoch": 1.17, "learning_rate": 3.966378310889146e-06, "loss": 1.2296, "step": 21860 }, { "epoch": 1.17, "learning_rate": 3.832605012039597e-06, "loss": 1.1881, "step": 21880 }, { "epoch": 1.17, "learning_rate": 3.698831713190047e-06, "loss": 1.2088, "step": 21900 }, { "epoch": 1.17, "learning_rate": 3.5650584143404976e-06, "loss": 1.299, "step": 21920 }, { "epoch": 1.17, "learning_rate": 3.4312851154909475e-06, "loss": 1.1876, "step": 21940 }, { "epoch": 1.18, "learning_rate": 3.297511816641398e-06, "loss": 1.24, "step": 21960 }, { "epoch": 1.18, "learning_rate": 3.1637385177918485e-06, "loss": 1.2123, "step": 21980 }, { "epoch": 1.18, "learning_rate": 3.029965218942299e-06, "loss": 1.1636, "step": 22000 }, { "epoch": 1.18, "learning_rate": 2.896191920092749e-06, "loss": 1.2052, "step": 22020 }, { "epoch": 1.18, "learning_rate": 2.7624186212431994e-06, "loss": 1.265, "step": 22040 }, { "epoch": 1.18, "learning_rate": 2.62864532239365e-06, "loss": 1.2587, "step": 22060 }, { "epoch": 1.18, "learning_rate": 2.501560688486578e-06, "loss": 1.2638, "step": 22080 }, { "epoch": 1.18, "learning_rate": 2.367787389637028e-06, "loss": 1.2383, "step": 22100 }, { "epoch": 1.18, "learning_rate": 2.2340140907874787e-06, "loss": 1.1438, "step": 22120 }, { "epoch": 1.18, "learning_rate": 2.100240791937929e-06, "loss": 1.2706, "step": 22140 }, { "epoch": 1.19, "learning_rate": 1.9664674930883793e-06, "loss": 1.1764, "step": 22160 }, { "epoch": 1.19, "learning_rate": 1.8326941942388298e-06, "loss": 1.3106, "step": 22180 }, { "epoch": 1.19, "learning_rate": 1.69892089538928e-06, "loss": 1.2716, "step": 22200 }, { "epoch": 1.19, "learning_rate": 1.5651475965397306e-06, "loss": 1.1863, "step": 22220 }, { "epoch": 1.19, "learning_rate": 1.4313742976901809e-06, "loss": 1.1858, "step": 22240 }, { "epoch": 1.19, "learning_rate": 1.2976009988406314e-06, "loss": 1.2174, "step": 22260 }, { "epoch": 1.19, "learning_rate": 1.1638276999910817e-06, "loss": 1.209, "step": 22280 }, { "epoch": 1.19, "learning_rate": 1.0300544011415322e-06, "loss": 1.1951, "step": 22300 }, { "epoch": 1.19, "learning_rate": 8.962811022919825e-07, "loss": 1.2038, "step": 22320 }, { "epoch": 1.2, "learning_rate": 7.625078034424329e-07, "loss": 1.1815, "step": 22340 }, { "epoch": 1.2, "learning_rate": 6.287345045928833e-07, "loss": 1.2082, "step": 22360 }, { "epoch": 1.2, "learning_rate": 4.949612057433336e-07, "loss": 1.237, "step": 22380 }, { "epoch": 1.2, "learning_rate": 3.611879068937839e-07, "loss": 1.1325, "step": 22400 }, { "epoch": 1.2, "learning_rate": 2.2741460804423435e-07, "loss": 1.2143, "step": 22420 }, { "epoch": 1.2, "step": 22426, "total_flos": 2.934269490138972e+17, "train_loss": 1.2402145965278153, "train_runtime": 8811.1645, "train_samples_per_second": 15.271, "train_steps_per_second": 2.545 } ], "logging_steps": 20, "max_steps": 22426, "num_train_epochs": 2, "save_steps": 5000, "total_flos": 2.934269490138972e+17, "trial_name": null, "trial_params": null }