diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2222222222222223, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.9268292682926833e-06, + "loss": 2.0292, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 5.853658536585367e-06, + "loss": 2.0417, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 8.780487804878048e-06, + "loss": 1.5471, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.1707317073170733e-05, + "loss": 1.4769, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 1.4634146341463415e-05, + "loss": 1.3682, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 1.7560975609756096e-05, + "loss": 1.3256, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 2.048780487804878e-05, + "loss": 1.317, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 2.3414634146341466e-05, + "loss": 1.3441, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 2.6341463414634148e-05, + "loss": 1.3028, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 2.926829268292683e-05, + "loss": 1.3212, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 3.2195121951219514e-05, + "loss": 1.2535, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 3.512195121951219e-05, + "loss": 1.2933, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 3.804878048780488e-05, + "loss": 1.2811, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 4.097560975609756e-05, + "loss": 1.304, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 4.390243902439024e-05, + "loss": 1.3205, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 4.682926829268293e-05, + "loss": 1.2825, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 4.975609756097561e-05, + "loss": 1.2774, + "step": 17 + }, + { + "epoch": 0.04, + "learning_rate": 5.2682926829268296e-05, + "loss": 1.2641, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 5.560975609756098e-05, + "loss": 1.2429, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 5.853658536585366e-05, + "loss": 1.2503, + "step": 20 + }, + { + "epoch": 0.05, + "learning_rate": 6.146341463414634e-05, + "loss": 1.2882, + "step": 21 + }, + { + "epoch": 0.05, + "learning_rate": 6.439024390243903e-05, + "loss": 1.3068, + "step": 22 + }, + { + "epoch": 0.05, + "learning_rate": 6.731707317073171e-05, + "loss": 1.2592, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 7.024390243902439e-05, + "loss": 1.2538, + "step": 24 + }, + { + "epoch": 0.06, + "learning_rate": 7.317073170731707e-05, + "loss": 1.3129, + "step": 25 + }, + { + "epoch": 0.06, + "learning_rate": 7.609756097560976e-05, + "loss": 1.2578, + "step": 26 + }, + { + "epoch": 0.06, + "learning_rate": 7.902439024390244e-05, + "loss": 1.2534, + "step": 27 + }, + { + "epoch": 0.06, + "learning_rate": 8.195121951219513e-05, + "loss": 1.3043, + "step": 28 + }, + { + "epoch": 0.06, + "learning_rate": 8.48780487804878e-05, + "loss": 1.2677, + "step": 29 + }, + { + "epoch": 0.07, + "learning_rate": 8.780487804878048e-05, + "loss": 1.2535, + "step": 30 + }, + { + "epoch": 0.07, + "learning_rate": 9.073170731707318e-05, + "loss": 1.2639, + "step": 31 + }, + { + "epoch": 0.07, + "learning_rate": 9.365853658536587e-05, + "loss": 1.3038, + "step": 32 + }, + { + "epoch": 0.07, + "learning_rate": 9.658536585365855e-05, + "loss": 1.2966, + "step": 33 + }, + { + "epoch": 0.08, + "learning_rate": 9.951219512195122e-05, + "loss": 1.2355, + "step": 34 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001024390243902439, + "loss": 1.2861, + "step": 35 + }, + { + "epoch": 0.08, + "learning_rate": 0.00010536585365853659, + "loss": 1.3257, + "step": 36 + }, + { + "epoch": 0.08, + "learning_rate": 0.00010829268292682928, + "loss": 1.3195, + "step": 37 + }, + { + "epoch": 0.08, + "learning_rate": 0.00011121951219512196, + "loss": 1.3274, + "step": 38 + }, + { + "epoch": 0.09, + "learning_rate": 0.00011414634146341463, + "loss": 1.2388, + "step": 39 + }, + { + "epoch": 0.09, + "learning_rate": 0.00011707317073170732, + "loss": 1.2686, + "step": 40 + }, + { + "epoch": 0.09, + "learning_rate": 0.00012, + "loss": 1.274, + "step": 41 + }, + { + "epoch": 0.09, + "learning_rate": 0.00011999982720089112, + "loss": 1.2754, + "step": 42 + }, + { + "epoch": 0.1, + "learning_rate": 0.00011999930880455974, + "loss": 1.2609, + "step": 43 + }, + { + "epoch": 0.1, + "learning_rate": 0.00011999844481399185, + "loss": 1.3151, + "step": 44 + }, + { + "epoch": 0.1, + "learning_rate": 0.00011999723523416397, + "loss": 1.3002, + "step": 45 + }, + { + "epoch": 0.1, + "learning_rate": 0.00011999568007204328, + "loss": 1.2993, + "step": 46 + }, + { + "epoch": 0.1, + "learning_rate": 0.00011999377933658745, + "loss": 1.301, + "step": 47 + }, + { + "epoch": 0.11, + "learning_rate": 0.00011999153303874466, + "loss": 1.3308, + "step": 48 + }, + { + "epoch": 0.11, + "learning_rate": 0.00011998894119145353, + "loss": 1.2533, + "step": 49 + }, + { + "epoch": 0.11, + "learning_rate": 0.00011998600380964302, + "loss": 1.2932, + "step": 50 + }, + { + "epoch": 0.11, + "learning_rate": 0.00011998272091023235, + "loss": 1.2577, + "step": 51 + }, + { + "epoch": 0.12, + "learning_rate": 0.00011997909251213094, + "loss": 1.3185, + "step": 52 + }, + { + "epoch": 0.12, + "learning_rate": 0.00011997511863623823, + "loss": 1.2698, + "step": 53 + }, + { + "epoch": 0.12, + "learning_rate": 0.00011997079930544366, + "loss": 1.2681, + "step": 54 + }, + { + "epoch": 0.12, + "learning_rate": 0.00011996613454462643, + "loss": 1.2688, + "step": 55 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001199611243806554, + "loss": 1.2947, + "step": 56 + }, + { + "epoch": 0.13, + "learning_rate": 0.000119955768842389, + "loss": 1.3007, + "step": 57 + }, + { + "epoch": 0.13, + "learning_rate": 0.00011995006796067497, + "loss": 1.2609, + "step": 58 + }, + { + "epoch": 0.13, + "learning_rate": 0.00011994402176835021, + "loss": 1.3019, + "step": 59 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001199376303002406, + "loss": 1.3291, + "step": 60 + }, + { + "epoch": 0.14, + "learning_rate": 0.00011993089359316082, + "loss": 1.271, + "step": 61 + }, + { + "epoch": 0.14, + "learning_rate": 0.00011992381168591412, + "loss": 1.3238, + "step": 62 + }, + { + "epoch": 0.14, + "learning_rate": 0.00011991638461929203, + "loss": 1.325, + "step": 63 + }, + { + "epoch": 0.14, + "learning_rate": 0.00011990861243607424, + "loss": 1.249, + "step": 64 + }, + { + "epoch": 0.14, + "learning_rate": 0.00011990049518102833, + "loss": 1.2804, + "step": 65 + }, + { + "epoch": 0.15, + "learning_rate": 0.00011989203290090944, + "loss": 1.2872, + "step": 66 + }, + { + "epoch": 0.15, + "learning_rate": 0.00011988322564446003, + "loss": 1.3123, + "step": 67 + }, + { + "epoch": 0.15, + "learning_rate": 0.00011987407346240964, + "loss": 1.3108, + "step": 68 + }, + { + "epoch": 0.15, + "learning_rate": 0.00011986457640747457, + "loss": 1.3069, + "step": 69 + }, + { + "epoch": 0.16, + "learning_rate": 0.00011985473453435758, + "loss": 1.2944, + "step": 70 + }, + { + "epoch": 0.16, + "learning_rate": 0.00011984454789974758, + "loss": 1.2525, + "step": 71 + }, + { + "epoch": 0.16, + "learning_rate": 0.00011983401656231926, + "loss": 1.2582, + "step": 72 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001198231405827328, + "loss": 1.3009, + "step": 73 + }, + { + "epoch": 0.16, + "learning_rate": 0.00011981192002363357, + "loss": 1.3136, + "step": 74 + }, + { + "epoch": 0.17, + "learning_rate": 0.00011980035494965159, + "loss": 1.2795, + "step": 75 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001197884454274014, + "loss": 1.3302, + "step": 76 + }, + { + "epoch": 0.17, + "learning_rate": 0.00011977619152548147, + "loss": 1.3062, + "step": 77 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001197635933144739, + "loss": 1.2965, + "step": 78 + }, + { + "epoch": 0.18, + "learning_rate": 0.00011975065086694404, + "loss": 1.27, + "step": 79 + }, + { + "epoch": 0.18, + "learning_rate": 0.00011973736425743998, + "loss": 1.2587, + "step": 80 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001197237335624922, + "loss": 1.3065, + "step": 81 + }, + { + "epoch": 0.18, + "learning_rate": 0.00011970975886061309, + "loss": 1.2612, + "step": 82 + }, + { + "epoch": 0.18, + "learning_rate": 0.00011969544023229654, + "loss": 1.317, + "step": 83 + }, + { + "epoch": 0.19, + "learning_rate": 0.00011968077776001742, + "loss": 1.3093, + "step": 84 + }, + { + "epoch": 0.19, + "learning_rate": 0.00011966577152823111, + "loss": 1.2632, + "step": 85 + }, + { + "epoch": 0.19, + "learning_rate": 0.00011965042162337308, + "loss": 1.2846, + "step": 86 + }, + { + "epoch": 0.19, + "learning_rate": 0.00011963472813385833, + "loss": 1.3133, + "step": 87 + }, + { + "epoch": 0.2, + "learning_rate": 0.00011961869115008088, + "loss": 1.3272, + "step": 88 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001196023107644133, + "loss": 1.2728, + "step": 89 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001195855870712061, + "loss": 1.2478, + "step": 90 + }, + { + "epoch": 0.2, + "learning_rate": 0.00011956852016678727, + "loss": 1.2854, + "step": 91 + }, + { + "epoch": 0.2, + "learning_rate": 0.00011955111014946166, + "loss": 1.2712, + "step": 92 + }, + { + "epoch": 0.21, + "learning_rate": 0.00011953335711951047, + "loss": 1.2605, + "step": 93 + }, + { + "epoch": 0.21, + "learning_rate": 0.00011951526117919063, + "loss": 1.3021, + "step": 94 + }, + { + "epoch": 0.21, + "learning_rate": 0.00011949682243273419, + "loss": 1.2482, + "step": 95 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001194780409863478, + "loss": 1.2801, + "step": 96 + }, + { + "epoch": 0.22, + "learning_rate": 0.00011945891694821206, + "loss": 1.2746, + "step": 97 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001194394504284808, + "loss": 1.28, + "step": 98 + }, + { + "epoch": 0.22, + "learning_rate": 0.00011941964153928065, + "loss": 1.2296, + "step": 99 + }, + { + "epoch": 0.22, + "learning_rate": 0.00011939949039471018, + "loss": 1.2531, + "step": 100 + }, + { + "epoch": 0.22, + "learning_rate": 0.00011937899711083942, + "loss": 1.2641, + "step": 101 + }, + { + "epoch": 0.23, + "learning_rate": 0.00011935816180570905, + "loss": 1.3043, + "step": 102 + }, + { + "epoch": 0.23, + "learning_rate": 0.00011933698459932983, + "loss": 1.2511, + "step": 103 + }, + { + "epoch": 0.23, + "learning_rate": 0.00011931546561368184, + "loss": 1.2838, + "step": 104 + }, + { + "epoch": 0.23, + "learning_rate": 0.00011929360497271377, + "loss": 1.2616, + "step": 105 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001192714028023423, + "loss": 1.3006, + "step": 106 + }, + { + "epoch": 0.24, + "learning_rate": 0.00011924885923045124, + "loss": 1.2762, + "step": 107 + }, + { + "epoch": 0.24, + "learning_rate": 0.00011922597438689093, + "loss": 1.2797, + "step": 108 + }, + { + "epoch": 0.24, + "learning_rate": 0.00011920274840347734, + "loss": 1.2526, + "step": 109 + }, + { + "epoch": 0.24, + "learning_rate": 0.00011917918141399149, + "loss": 1.2742, + "step": 110 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001191552735541785, + "loss": 1.2698, + "step": 111 + }, + { + "epoch": 0.25, + "learning_rate": 0.00011913102496174698, + "loss": 1.2516, + "step": 112 + }, + { + "epoch": 0.25, + "learning_rate": 0.00011910643577636807, + "loss": 1.2814, + "step": 113 + }, + { + "epoch": 0.25, + "learning_rate": 0.00011908150613967473, + "loss": 1.3506, + "step": 114 + }, + { + "epoch": 0.26, + "learning_rate": 0.00011905623619526097, + "loss": 1.3044, + "step": 115 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001190306260886809, + "loss": 1.2427, + "step": 116 + }, + { + "epoch": 0.26, + "learning_rate": 0.00011900467596744797, + "loss": 1.3192, + "step": 117 + }, + { + "epoch": 0.26, + "learning_rate": 0.00011897838598103412, + "loss": 1.2457, + "step": 118 + }, + { + "epoch": 0.26, + "learning_rate": 0.00011895175628086887, + "loss": 1.2686, + "step": 119 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001189247870203385, + "loss": 1.2782, + "step": 120 + }, + { + "epoch": 0.27, + "learning_rate": 0.00011889747835478518, + "loss": 1.3098, + "step": 121 + }, + { + "epoch": 0.27, + "learning_rate": 0.00011886983044150598, + "loss": 1.266, + "step": 122 + }, + { + "epoch": 0.27, + "learning_rate": 0.00011884184343975209, + "loss": 1.2918, + "step": 123 + }, + { + "epoch": 0.28, + "learning_rate": 0.00011881351751072778, + "loss": 1.2611, + "step": 124 + }, + { + "epoch": 0.28, + "learning_rate": 0.00011878485281758958, + "loss": 1.2192, + "step": 125 + }, + { + "epoch": 0.28, + "learning_rate": 0.00011875584952544527, + "loss": 1.3018, + "step": 126 + }, + { + "epoch": 0.28, + "learning_rate": 0.00011872650780135294, + "loss": 1.304, + "step": 127 + }, + { + "epoch": 0.28, + "learning_rate": 0.00011869682781432005, + "loss": 1.3303, + "step": 128 + }, + { + "epoch": 0.29, + "learning_rate": 0.00011866680973530246, + "loss": 1.2869, + "step": 129 + }, + { + "epoch": 0.29, + "learning_rate": 0.00011863645373720338, + "loss": 1.2533, + "step": 130 + }, + { + "epoch": 0.29, + "learning_rate": 0.00011860575999487249, + "loss": 1.2678, + "step": 131 + }, + { + "epoch": 0.29, + "learning_rate": 0.00011857472868510483, + "loss": 1.2895, + "step": 132 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001185433599866398, + "loss": 1.3199, + "step": 133 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001185116540801602, + "loss": 1.264, + "step": 134 + }, + { + "epoch": 0.3, + "learning_rate": 0.00011847961114829109, + "loss": 1.2979, + "step": 135 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001184472313755988, + "loss": 1.2764, + "step": 136 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001184145149485899, + "loss": 1.286, + "step": 137 + }, + { + "epoch": 0.31, + "learning_rate": 0.00011838146205571, + "loss": 1.2782, + "step": 138 + }, + { + "epoch": 0.31, + "learning_rate": 0.00011834807288734277, + "loss": 1.2893, + "step": 139 + }, + { + "epoch": 0.31, + "learning_rate": 0.00011831434763580886, + "loss": 1.2874, + "step": 140 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001182802864953647, + "loss": 1.3005, + "step": 141 + }, + { + "epoch": 0.32, + "learning_rate": 0.00011824588966220147, + "loss": 1.2885, + "step": 142 + }, + { + "epoch": 0.32, + "learning_rate": 0.00011821115733444388, + "loss": 1.291, + "step": 143 + }, + { + "epoch": 0.32, + "learning_rate": 0.00011817608971214912, + "loss": 1.2475, + "step": 144 + }, + { + "epoch": 0.32, + "learning_rate": 0.00011814068699730562, + "loss": 1.2787, + "step": 145 + }, + { + "epoch": 0.32, + "learning_rate": 0.00011810494939383203, + "loss": 1.2816, + "step": 146 + }, + { + "epoch": 0.33, + "learning_rate": 0.00011806887710757583, + "loss": 1.3126, + "step": 147 + }, + { + "epoch": 0.33, + "learning_rate": 0.00011803247034631235, + "loss": 1.3111, + "step": 148 + }, + { + "epoch": 0.33, + "learning_rate": 0.00011799572931974343, + "loss": 1.2751, + "step": 149 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001179586542394963, + "loss": 1.232, + "step": 150 + }, + { + "epoch": 0.34, + "learning_rate": 0.00011792124531912233, + "loss": 1.2673, + "step": 151 + }, + { + "epoch": 0.34, + "learning_rate": 0.00011788350277409578, + "loss": 1.2299, + "step": 152 + }, + { + "epoch": 0.34, + "learning_rate": 0.00011784542682181257, + "loss": 1.2662, + "step": 153 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001178070176815891, + "loss": 1.2464, + "step": 154 + }, + { + "epoch": 0.34, + "learning_rate": 0.00011776827557466086, + "loss": 1.2414, + "step": 155 + }, + { + "epoch": 0.35, + "learning_rate": 0.00011772920072418121, + "loss": 1.254, + "step": 156 + }, + { + "epoch": 0.35, + "learning_rate": 0.00011768979335522015, + "loss": 1.2713, + "step": 157 + }, + { + "epoch": 0.35, + "learning_rate": 0.00011765005369476294, + "loss": 1.2481, + "step": 158 + }, + { + "epoch": 0.35, + "learning_rate": 0.00011760998197170885, + "loss": 1.2539, + "step": 159 + }, + { + "epoch": 0.36, + "learning_rate": 0.00011756957841686985, + "loss": 1.2687, + "step": 160 + }, + { + "epoch": 0.36, + "learning_rate": 0.00011752884326296917, + "loss": 1.2749, + "step": 161 + }, + { + "epoch": 0.36, + "learning_rate": 0.00011748777674464008, + "loss": 1.2518, + "step": 162 + }, + { + "epoch": 0.36, + "learning_rate": 0.00011744637909842455, + "loss": 1.3132, + "step": 163 + }, + { + "epoch": 0.36, + "learning_rate": 0.00011740465056277176, + "loss": 1.3026, + "step": 164 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011736259137803685, + "loss": 1.2225, + "step": 165 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011732020178647945, + "loss": 1.2805, + "step": 166 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001172774820322624, + "loss": 1.2332, + "step": 167 + }, + { + "epoch": 0.37, + "learning_rate": 0.00011723443236145015, + "loss": 1.2702, + "step": 168 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011719105302200757, + "loss": 1.2665, + "step": 169 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011714734426379837, + "loss": 1.2261, + "step": 170 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011710330633858367, + "loss": 1.2395, + "step": 171 + }, + { + "epoch": 0.38, + "learning_rate": 0.00011705893950002063, + "loss": 1.2588, + "step": 172 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001170142440036609, + "loss": 1.2609, + "step": 173 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011696922010694925, + "loss": 1.2454, + "step": 174 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011692386806922196, + "loss": 1.2901, + "step": 175 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011687818815170541, + "loss": 1.2764, + "step": 176 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001168321806175146, + "loss": 1.2632, + "step": 177 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011678584573165155, + "loss": 1.2594, + "step": 178 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001167391837610038, + "loss": 1.2609, + "step": 179 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011669219497434297, + "loss": 1.2928, + "step": 180 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011664487964232302, + "loss": 1.2612, + "step": 181 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011659723803747888, + "loss": 1.3024, + "step": 182 + }, + { + "epoch": 0.41, + "learning_rate": 0.00011654927043422479, + "loss": 1.2663, + "step": 183 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001165009771088527, + "loss": 1.2345, + "step": 184 + }, + { + "epoch": 0.41, + "learning_rate": 0.00011645235833953074, + "loss": 1.3054, + "step": 185 + }, + { + "epoch": 0.41, + "learning_rate": 0.00011640341440630155, + "loss": 1.2414, + "step": 186 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011635414559108078, + "loss": 1.304, + "step": 187 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011630455217765531, + "loss": 1.2611, + "step": 188 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011625463445168175, + "loss": 1.2403, + "step": 189 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011620439270068469, + "loss": 1.2718, + "step": 190 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011615382721405513, + "loss": 1.2647, + "step": 191 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001161029382830488, + "loss": 1.2481, + "step": 192 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011605172620078439, + "loss": 1.25, + "step": 193 + }, + { + "epoch": 0.43, + "learning_rate": 0.000116000191262242, + "loss": 1.2761, + "step": 194 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011594833376426134, + "loss": 1.3131, + "step": 195 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011589615400554007, + "loss": 1.222, + "step": 196 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011584365228663202, + "loss": 1.2354, + "step": 197 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011579082890994557, + "loss": 1.2224, + "step": 198 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011573768417974176, + "loss": 1.2783, + "step": 199 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011568421840213267, + "loss": 1.2999, + "step": 200 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011563043188507961, + "loss": 1.2273, + "step": 201 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001155763249383913, + "loss": 1.2849, + "step": 202 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011552189787372217, + "loss": 1.2166, + "step": 203 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011546715100457046, + "loss": 1.2472, + "step": 204 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011541208464627652, + "loss": 1.2523, + "step": 205 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011535669911602097, + "loss": 1.2413, + "step": 206 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011530099473282279, + "loss": 1.2395, + "step": 207 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011524497181753759, + "loss": 1.2717, + "step": 208 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011518863069285567, + "loss": 1.2152, + "step": 209 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011513197168330026, + "loss": 1.2583, + "step": 210 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011507499511522556, + "loss": 1.2311, + "step": 211 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011501770131681491, + "loss": 1.276, + "step": 212 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001149600906180789, + "loss": 1.2171, + "step": 213 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011490216335085345, + "loss": 1.2381, + "step": 214 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011484391984879785, + "loss": 1.2577, + "step": 215 + }, + { + "epoch": 0.48, + "learning_rate": 0.000114785360447393, + "loss": 1.2659, + "step": 216 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011472648548393928, + "loss": 1.2436, + "step": 217 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011466729529755472, + "loss": 1.2542, + "step": 218 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011460779022917307, + "loss": 1.2525, + "step": 219 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011454797062154173, + "loss": 1.2511, + "step": 220 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011448783681921988, + "loss": 1.2654, + "step": 221 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011442738916857643, + "loss": 1.2532, + "step": 222 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011436662801778805, + "loss": 1.2714, + "step": 223 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011430555371683716, + "loss": 1.2708, + "step": 224 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011424416661750994, + "loss": 1.1997, + "step": 225 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011418246707339422, + "loss": 1.2324, + "step": 226 + }, + { + "epoch": 0.5, + "learning_rate": 0.00011412045543987757, + "loss": 1.2788, + "step": 227 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011405813207414514, + "loss": 1.2543, + "step": 228 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001139954973351777, + "loss": 1.2854, + "step": 229 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011393255158374945, + "loss": 1.2034, + "step": 230 + }, + { + "epoch": 0.51, + "learning_rate": 0.00011386929518242606, + "loss": 1.2724, + "step": 231 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011380572849556251, + "loss": 1.2631, + "step": 232 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011374185188930107, + "loss": 1.281, + "step": 233 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011367766573156905, + "loss": 1.2162, + "step": 234 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011361317039207682, + "loss": 1.2456, + "step": 235 + }, + { + "epoch": 0.52, + "learning_rate": 0.00011354836624231564, + "loss": 1.2486, + "step": 236 + }, + { + "epoch": 0.53, + "learning_rate": 0.00011348325365555547, + "loss": 1.2923, + "step": 237 + }, + { + "epoch": 0.53, + "learning_rate": 0.00011341783300684288, + "loss": 1.2268, + "step": 238 + }, + { + "epoch": 0.53, + "learning_rate": 0.00011335210467299887, + "loss": 1.2583, + "step": 239 + }, + { + "epoch": 0.53, + "learning_rate": 0.00011328606903261669, + "loss": 1.2731, + "step": 240 + }, + { + "epoch": 0.54, + "learning_rate": 0.00011321972646605965, + "loss": 1.2224, + "step": 241 + }, + { + "epoch": 0.54, + "learning_rate": 0.00011315307735545897, + "loss": 1.2495, + "step": 242 + }, + { + "epoch": 0.54, + "learning_rate": 0.00011308612208471157, + "loss": 1.2641, + "step": 243 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001130188610394778, + "loss": 1.2308, + "step": 244 + }, + { + "epoch": 0.54, + "learning_rate": 0.00011295129460717928, + "loss": 1.2406, + "step": 245 + }, + { + "epoch": 0.55, + "learning_rate": 0.00011288342317699666, + "loss": 1.252, + "step": 246 + }, + { + "epoch": 0.55, + "learning_rate": 0.00011281524713986736, + "loss": 1.2578, + "step": 247 + }, + { + "epoch": 0.55, + "learning_rate": 0.00011274676688848332, + "loss": 1.2609, + "step": 248 + }, + { + "epoch": 0.55, + "learning_rate": 0.00011267798281728878, + "loss": 1.2398, + "step": 249 + }, + { + "epoch": 0.56, + "learning_rate": 0.00011260889532247793, + "loss": 1.2689, + "step": 250 + }, + { + "epoch": 0.56, + "learning_rate": 0.00011253950480199267, + "loss": 1.2193, + "step": 251 + }, + { + "epoch": 0.56, + "learning_rate": 0.00011246981165552038, + "loss": 1.2361, + "step": 252 + }, + { + "epoch": 0.56, + "learning_rate": 0.00011239981628449148, + "loss": 1.2999, + "step": 253 + }, + { + "epoch": 0.56, + "learning_rate": 0.00011232951909207721, + "loss": 1.2357, + "step": 254 + }, + { + "epoch": 0.57, + "learning_rate": 0.00011225892048318737, + "loss": 1.2478, + "step": 255 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001121880208644678, + "loss": 1.2718, + "step": 256 + }, + { + "epoch": 0.57, + "learning_rate": 0.00011211682064429823, + "loss": 1.2786, + "step": 257 + }, + { + "epoch": 0.57, + "learning_rate": 0.00011204532023278979, + "loss": 1.2636, + "step": 258 + }, + { + "epoch": 0.58, + "learning_rate": 0.00011197352004178271, + "loss": 1.2662, + "step": 259 + }, + { + "epoch": 0.58, + "learning_rate": 0.00011190142048484403, + "loss": 1.2616, + "step": 260 + }, + { + "epoch": 0.58, + "learning_rate": 0.00011182902197726497, + "loss": 1.2511, + "step": 261 + }, + { + "epoch": 0.58, + "learning_rate": 0.00011175632493605883, + "loss": 1.2794, + "step": 262 + }, + { + "epoch": 0.58, + "learning_rate": 0.00011168332977995841, + "loss": 1.2709, + "step": 263 + }, + { + "epoch": 0.59, + "learning_rate": 0.00011161003692941364, + "loss": 1.2583, + "step": 264 + }, + { + "epoch": 0.59, + "learning_rate": 0.00011153644680658915, + "loss": 1.2573, + "step": 265 + }, + { + "epoch": 0.59, + "learning_rate": 0.00011146255983536184, + "loss": 1.2251, + "step": 266 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001113883764413185, + "loss": 1.2647, + "step": 267 + }, + { + "epoch": 0.6, + "learning_rate": 0.00011131389705175328, + "loss": 1.2252, + "step": 268 + }, + { + "epoch": 0.6, + "learning_rate": 0.00011123912209566525, + "loss": 1.2373, + "step": 269 + }, + { + "epoch": 0.6, + "learning_rate": 0.00011116405200375591, + "loss": 1.2447, + "step": 270 + }, + { + "epoch": 0.6, + "learning_rate": 0.00011108868720842679, + "loss": 1.2294, + "step": 271 + }, + { + "epoch": 0.6, + "learning_rate": 0.00011101302814377686, + "loss": 1.2497, + "step": 272 + }, + { + "epoch": 0.61, + "learning_rate": 0.00011093707524560006, + "loss": 1.2151, + "step": 273 + }, + { + "epoch": 0.61, + "learning_rate": 0.00011086082895138288, + "loss": 1.2533, + "step": 274 + }, + { + "epoch": 0.61, + "learning_rate": 0.00011078428970030167, + "loss": 1.2424, + "step": 275 + }, + { + "epoch": 0.61, + "learning_rate": 0.00011070745793322026, + "loss": 1.2656, + "step": 276 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011063033409268734, + "loss": 1.2047, + "step": 277 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011055291862293394, + "loss": 1.2705, + "step": 278 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011047521196987087, + "loss": 1.2819, + "step": 279 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011039721458108616, + "loss": 1.2561, + "step": 280 + }, + { + "epoch": 0.62, + "learning_rate": 0.00011031892690584239, + "loss": 1.2291, + "step": 281 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011024034939507433, + "loss": 1.2234, + "step": 282 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011016148250138605, + "loss": 1.247, + "step": 283 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011008232667904853, + "loss": 1.2593, + "step": 284 + }, + { + "epoch": 0.63, + "learning_rate": 0.00011000288238399695, + "loss": 1.255, + "step": 285 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001099231500738281, + "loss": 1.2471, + "step": 286 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010984313020779771, + "loss": 1.2817, + "step": 287 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010976282324681785, + "loss": 1.2424, + "step": 288 + }, + { + "epoch": 0.64, + "learning_rate": 0.00010968222965345421, + "loss": 1.2042, + "step": 289 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001096013498919235, + "loss": 1.2324, + "step": 290 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010952018442809074, + "loss": 1.2063, + "step": 291 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010943873372946661, + "loss": 1.2345, + "step": 292 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010935699826520467, + "loss": 1.2373, + "step": 293 + }, + { + "epoch": 0.65, + "learning_rate": 0.00010927497850609882, + "loss": 1.2694, + "step": 294 + }, + { + "epoch": 0.66, + "learning_rate": 0.00010919267492458041, + "loss": 1.2393, + "step": 295 + }, + { + "epoch": 0.66, + "learning_rate": 0.00010911008799471562, + "loss": 1.2307, + "step": 296 + }, + { + "epoch": 0.66, + "learning_rate": 0.00010902721819220271, + "loss": 1.2317, + "step": 297 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001089440659943693, + "loss": 1.2129, + "step": 298 + }, + { + "epoch": 0.66, + "learning_rate": 0.00010886063188016958, + "loss": 1.2341, + "step": 299 + }, + { + "epoch": 0.67, + "learning_rate": 0.00010877691633018154, + "loss": 1.2456, + "step": 300 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001086929198266043, + "loss": 1.2511, + "step": 301 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001086086428532552, + "loss": 1.247, + "step": 302 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001085240858955671, + "loss": 1.2702, + "step": 303 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010843924944058557, + "loss": 1.2601, + "step": 304 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001083541339769661, + "loss": 1.1809, + "step": 305 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010826873999497118, + "loss": 1.2388, + "step": 306 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010818306798646766, + "loss": 1.2608, + "step": 307 + }, + { + "epoch": 0.68, + "learning_rate": 0.00010809711844492373, + "loss": 1.2479, + "step": 308 + }, + { + "epoch": 0.69, + "learning_rate": 0.00010801089186540621, + "loss": 1.2148, + "step": 309 + }, + { + "epoch": 0.69, + "learning_rate": 0.00010792438874457763, + "loss": 1.225, + "step": 310 + }, + { + "epoch": 0.69, + "learning_rate": 0.00010783760958069341, + "loss": 1.2321, + "step": 311 + }, + { + "epoch": 0.69, + "learning_rate": 0.00010775055487359894, + "loss": 1.2556, + "step": 312 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010766322512472675, + "loss": 1.2122, + "step": 313 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010757562083709362, + "loss": 1.2606, + "step": 314 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010748774251529763, + "loss": 1.2528, + "step": 315 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010739959066551528, + "loss": 1.2228, + "step": 316 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010731116579549864, + "loss": 1.1934, + "step": 317 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010722246841457232, + "loss": 1.2169, + "step": 318 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001071334990336306, + "loss": 1.2657, + "step": 319 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001070442581651345, + "loss": 1.2457, + "step": 320 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010695474632310871, + "loss": 1.2288, + "step": 321 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010686496402313882, + "loss": 1.2224, + "step": 322 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010677491178236823, + "loss": 1.2083, + "step": 323 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010668459011949512, + "loss": 1.2031, + "step": 324 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010659399955476964, + "loss": 1.2406, + "step": 325 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010650314060999073, + "loss": 1.1586, + "step": 326 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010641201380850319, + "loss": 1.2393, + "step": 327 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010632061967519473, + "loss": 1.1987, + "step": 328 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010622895873649281, + "loss": 1.1982, + "step": 329 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010613703152036172, + "loss": 1.2368, + "step": 330 + }, + { + "epoch": 0.74, + "learning_rate": 0.00010604483855629952, + "loss": 1.2315, + "step": 331 + }, + { + "epoch": 0.74, + "learning_rate": 0.00010595238037533491, + "loss": 1.1949, + "step": 332 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001058596575100243, + "loss": 1.2115, + "step": 333 + }, + { + "epoch": 0.74, + "learning_rate": 0.00010576667049444861, + "loss": 1.2225, + "step": 334 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001056734198642103, + "loss": 1.2243, + "step": 335 + }, + { + "epoch": 0.75, + "learning_rate": 0.00010557990615643023, + "loss": 1.2686, + "step": 336 + }, + { + "epoch": 0.75, + "learning_rate": 0.00010548612990974458, + "loss": 1.2224, + "step": 337 + }, + { + "epoch": 0.75, + "learning_rate": 0.00010539209166430176, + "loss": 1.2389, + "step": 338 + }, + { + "epoch": 0.75, + "learning_rate": 0.00010529779196175924, + "loss": 1.2085, + "step": 339 + }, + { + "epoch": 0.76, + "learning_rate": 0.00010520323134528051, + "loss": 1.2501, + "step": 340 + }, + { + "epoch": 0.76, + "learning_rate": 0.00010510841035953194, + "loss": 1.2202, + "step": 341 + }, + { + "epoch": 0.76, + "learning_rate": 0.00010501332955067958, + "loss": 1.2156, + "step": 342 + }, + { + "epoch": 0.76, + "learning_rate": 0.00010491798946638606, + "loss": 1.2211, + "step": 343 + }, + { + "epoch": 0.76, + "learning_rate": 0.00010482239065580742, + "loss": 1.2209, + "step": 344 + }, + { + "epoch": 0.77, + "learning_rate": 0.00010472653366958998, + "loss": 1.2249, + "step": 345 + }, + { + "epoch": 0.77, + "learning_rate": 0.00010463041905986715, + "loss": 1.1985, + "step": 346 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001045340473802562, + "loss": 1.2689, + "step": 347 + }, + { + "epoch": 0.77, + "learning_rate": 0.00010443741918585517, + "loss": 1.2237, + "step": 348 + }, + { + "epoch": 0.78, + "learning_rate": 0.00010434053503323955, + "loss": 1.2322, + "step": 349 + }, + { + "epoch": 0.78, + "learning_rate": 0.00010424339548045921, + "loss": 1.195, + "step": 350 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001041460010870351, + "loss": 1.1922, + "step": 351 + }, + { + "epoch": 0.78, + "learning_rate": 0.00010404835241395601, + "loss": 1.2476, + "step": 352 + }, + { + "epoch": 0.78, + "learning_rate": 0.00010395045002367541, + "loss": 1.2289, + "step": 353 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010385229448010814, + "loss": 1.249, + "step": 354 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010375388634862723, + "loss": 1.2138, + "step": 355 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010365522619606062, + "loss": 1.1953, + "step": 356 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010355631459068779, + "loss": 1.2433, + "step": 357 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010345715210223671, + "loss": 1.1922, + "step": 358 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010335773930188036, + "loss": 1.2071, + "step": 359 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001032580767622335, + "loss": 1.2189, + "step": 360 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001031581650573494, + "loss": 1.2081, + "step": 361 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010305800476271651, + "loss": 1.2028, + "step": 362 + }, + { + "epoch": 0.81, + "learning_rate": 0.00010295759645525515, + "loss": 1.2179, + "step": 363 + }, + { + "epoch": 0.81, + "learning_rate": 0.00010285694071331422, + "loss": 1.2041, + "step": 364 + }, + { + "epoch": 0.81, + "learning_rate": 0.00010275603811666778, + "loss": 1.2169, + "step": 365 + }, + { + "epoch": 0.81, + "learning_rate": 0.00010265488924651176, + "loss": 1.2272, + "step": 366 + }, + { + "epoch": 0.82, + "learning_rate": 0.00010255349468546072, + "loss": 1.2104, + "step": 367 + }, + { + "epoch": 0.82, + "learning_rate": 0.00010245185501754425, + "loss": 1.2381, + "step": 368 + }, + { + "epoch": 0.82, + "learning_rate": 0.00010234997082820383, + "loss": 1.2231, + "step": 369 + }, + { + "epoch": 0.82, + "learning_rate": 0.00010224784270428942, + "loss": 1.2252, + "step": 370 + }, + { + "epoch": 0.82, + "learning_rate": 0.00010214547123405592, + "loss": 1.2611, + "step": 371 + }, + { + "epoch": 0.83, + "learning_rate": 0.00010204285700715998, + "loss": 1.2557, + "step": 372 + }, + { + "epoch": 0.83, + "learning_rate": 0.00010194000061465648, + "loss": 1.2176, + "step": 373 + }, + { + "epoch": 0.83, + "learning_rate": 0.00010183690264899521, + "loss": 1.2211, + "step": 374 + }, + { + "epoch": 0.83, + "learning_rate": 0.00010173356370401741, + "loss": 1.2117, + "step": 375 + }, + { + "epoch": 0.84, + "learning_rate": 0.00010162998437495228, + "loss": 1.2404, + "step": 376 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001015261652584137, + "loss": 1.2181, + "step": 377 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001014221069523967, + "loss": 1.2391, + "step": 378 + }, + { + "epoch": 0.84, + "learning_rate": 0.00010131781005627406, + "loss": 1.2204, + "step": 379 + }, + { + "epoch": 0.84, + "learning_rate": 0.00010121327517079276, + "loss": 1.2545, + "step": 380 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010110850289807066, + "loss": 1.2036, + "step": 381 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010100349384159291, + "loss": 1.2642, + "step": 382 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010089824860620861, + "loss": 1.2331, + "step": 383 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010079276779812714, + "loss": 1.2063, + "step": 384 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010068705202491485, + "loss": 1.1969, + "step": 385 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010058110189549143, + "loss": 1.2161, + "step": 386 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010047491802012648, + "loss": 1.2371, + "step": 387 + }, + { + "epoch": 0.86, + "learning_rate": 0.000100368501010436, + "loss": 1.2154, + "step": 388 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010026185147937877, + "loss": 1.2139, + "step": 389 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010015497004125293, + "loss": 1.2437, + "step": 390 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010004785731169242, + "loss": 1.2077, + "step": 391 + }, + { + "epoch": 0.87, + "learning_rate": 9.994051390766333e-05, + "loss": 1.2164, + "step": 392 + }, + { + "epoch": 0.87, + "learning_rate": 9.983294044746051e-05, + "loss": 1.2177, + "step": 393 + }, + { + "epoch": 0.88, + "learning_rate": 9.97251375507039e-05, + "loss": 1.2081, + "step": 394 + }, + { + "epoch": 0.88, + "learning_rate": 9.961710583833494e-05, + "loss": 1.2373, + "step": 395 + }, + { + "epoch": 0.88, + "learning_rate": 9.950884593261315e-05, + "loss": 1.218, + "step": 396 + }, + { + "epoch": 0.88, + "learning_rate": 9.940035845711232e-05, + "loss": 1.24, + "step": 397 + }, + { + "epoch": 0.88, + "learning_rate": 9.929164403671711e-05, + "loss": 1.1771, + "step": 398 + }, + { + "epoch": 0.89, + "learning_rate": 9.918270329761933e-05, + "loss": 1.2131, + "step": 399 + }, + { + "epoch": 0.89, + "learning_rate": 9.907353686731444e-05, + "loss": 1.2335, + "step": 400 + }, + { + "epoch": 0.89, + "learning_rate": 9.89641453745978e-05, + "loss": 1.2427, + "step": 401 + }, + { + "epoch": 0.89, + "learning_rate": 9.885452944956118e-05, + "loss": 1.2184, + "step": 402 + }, + { + "epoch": 0.9, + "learning_rate": 9.874468972358904e-05, + "loss": 1.2157, + "step": 403 + }, + { + "epoch": 0.9, + "learning_rate": 9.863462682935493e-05, + "loss": 1.2399, + "step": 404 + }, + { + "epoch": 0.9, + "learning_rate": 9.852434140081789e-05, + "loss": 1.221, + "step": 405 + }, + { + "epoch": 0.9, + "learning_rate": 9.841383407321866e-05, + "loss": 1.2568, + "step": 406 + }, + { + "epoch": 0.9, + "learning_rate": 9.830310548307622e-05, + "loss": 1.2178, + "step": 407 + }, + { + "epoch": 0.91, + "learning_rate": 9.819215626818392e-05, + "loss": 1.2101, + "step": 408 + }, + { + "epoch": 0.91, + "learning_rate": 9.808098706760595e-05, + "loss": 1.212, + "step": 409 + }, + { + "epoch": 0.91, + "learning_rate": 9.796959852167363e-05, + "loss": 1.2028, + "step": 410 + }, + { + "epoch": 0.91, + "learning_rate": 9.785799127198162e-05, + "loss": 1.1922, + "step": 411 + }, + { + "epoch": 0.92, + "learning_rate": 9.77461659613844e-05, + "loss": 1.1668, + "step": 412 + }, + { + "epoch": 0.92, + "learning_rate": 9.763412323399245e-05, + "loss": 1.1926, + "step": 413 + }, + { + "epoch": 0.92, + "learning_rate": 9.752186373516853e-05, + "loss": 1.2085, + "step": 414 + }, + { + "epoch": 0.92, + "learning_rate": 9.740938811152401e-05, + "loss": 1.2137, + "step": 415 + }, + { + "epoch": 0.92, + "learning_rate": 9.729669701091517e-05, + "loss": 1.2701, + "step": 416 + }, + { + "epoch": 0.93, + "learning_rate": 9.718379108243939e-05, + "loss": 1.2591, + "step": 417 + }, + { + "epoch": 0.93, + "learning_rate": 9.707067097643147e-05, + "loss": 1.2277, + "step": 418 + }, + { + "epoch": 0.93, + "learning_rate": 9.695733734445982e-05, + "loss": 1.2128, + "step": 419 + }, + { + "epoch": 0.93, + "learning_rate": 9.684379083932286e-05, + "loss": 1.2091, + "step": 420 + }, + { + "epoch": 0.94, + "learning_rate": 9.673003211504503e-05, + "loss": 1.2067, + "step": 421 + }, + { + "epoch": 0.94, + "learning_rate": 9.661606182687324e-05, + "loss": 1.191, + "step": 422 + }, + { + "epoch": 0.94, + "learning_rate": 9.650188063127296e-05, + "loss": 1.1973, + "step": 423 + }, + { + "epoch": 0.94, + "learning_rate": 9.638748918592445e-05, + "loss": 1.2405, + "step": 424 + }, + { + "epoch": 0.94, + "learning_rate": 9.627288814971908e-05, + "loss": 1.2487, + "step": 425 + }, + { + "epoch": 0.95, + "learning_rate": 9.615807818275539e-05, + "loss": 1.1887, + "step": 426 + }, + { + "epoch": 0.95, + "learning_rate": 9.604305994633539e-05, + "loss": 1.2214, + "step": 427 + }, + { + "epoch": 0.95, + "learning_rate": 9.592783410296071e-05, + "loss": 1.2136, + "step": 428 + }, + { + "epoch": 0.95, + "learning_rate": 9.581240131632876e-05, + "loss": 1.2074, + "step": 429 + }, + { + "epoch": 0.96, + "learning_rate": 9.569676225132898e-05, + "loss": 1.1687, + "step": 430 + }, + { + "epoch": 0.96, + "learning_rate": 9.558091757403897e-05, + "loss": 1.1988, + "step": 431 + }, + { + "epoch": 0.96, + "learning_rate": 9.54648679517206e-05, + "loss": 1.2025, + "step": 432 + }, + { + "epoch": 0.96, + "learning_rate": 9.534861405281625e-05, + "loss": 1.2027, + "step": 433 + }, + { + "epoch": 0.96, + "learning_rate": 9.523215654694493e-05, + "loss": 1.2192, + "step": 434 + }, + { + "epoch": 0.97, + "learning_rate": 9.511549610489844e-05, + "loss": 1.2245, + "step": 435 + }, + { + "epoch": 0.97, + "learning_rate": 9.499863339863741e-05, + "loss": 1.1959, + "step": 436 + }, + { + "epoch": 0.97, + "learning_rate": 9.48815691012876e-05, + "loss": 1.2241, + "step": 437 + }, + { + "epoch": 0.97, + "learning_rate": 9.476430388713586e-05, + "loss": 1.2185, + "step": 438 + }, + { + "epoch": 0.98, + "learning_rate": 9.464683843162635e-05, + "loss": 1.202, + "step": 439 + }, + { + "epoch": 0.98, + "learning_rate": 9.45291734113566e-05, + "loss": 1.2031, + "step": 440 + }, + { + "epoch": 0.98, + "learning_rate": 9.441130950407367e-05, + "loss": 1.2341, + "step": 441 + }, + { + "epoch": 0.98, + "learning_rate": 9.42932473886701e-05, + "loss": 1.2146, + "step": 442 + }, + { + "epoch": 0.98, + "learning_rate": 9.417498774518019e-05, + "loss": 1.2221, + "step": 443 + }, + { + "epoch": 0.99, + "learning_rate": 9.4056531254776e-05, + "loss": 1.2147, + "step": 444 + }, + { + "epoch": 0.99, + "learning_rate": 9.393787859976338e-05, + "loss": 1.229, + "step": 445 + }, + { + "epoch": 0.99, + "learning_rate": 9.381903046357809e-05, + "loss": 1.2305, + "step": 446 + }, + { + "epoch": 0.99, + "learning_rate": 9.369998753078188e-05, + "loss": 1.2403, + "step": 447 + }, + { + "epoch": 1.0, + "learning_rate": 9.35807504870585e-05, + "loss": 1.2102, + "step": 448 + }, + { + "epoch": 1.0, + "learning_rate": 9.346132001920977e-05, + "loss": 1.1927, + "step": 449 + }, + { + "epoch": 1.0, + "learning_rate": 9.334169681515164e-05, + "loss": 1.1759, + "step": 450 + }, + { + "epoch": 1.0, + "learning_rate": 9.322188156391023e-05, + "loss": 0.9853, + "step": 451 + }, + { + "epoch": 1.0, + "learning_rate": 9.31018749556178e-05, + "loss": 0.9395, + "step": 452 + }, + { + "epoch": 1.01, + "learning_rate": 9.29816776815089e-05, + "loss": 0.9399, + "step": 453 + }, + { + "epoch": 1.01, + "learning_rate": 9.28612904339162e-05, + "loss": 0.8974, + "step": 454 + }, + { + "epoch": 1.01, + "learning_rate": 9.27407139062667e-05, + "loss": 0.9517, + "step": 455 + }, + { + "epoch": 1.01, + "learning_rate": 9.261994879307761e-05, + "loss": 0.889, + "step": 456 + }, + { + "epoch": 1.02, + "learning_rate": 9.24989957899524e-05, + "loss": 0.9045, + "step": 457 + }, + { + "epoch": 1.02, + "learning_rate": 9.237785559357675e-05, + "loss": 0.8965, + "step": 458 + }, + { + "epoch": 1.02, + "learning_rate": 9.225652890171464e-05, + "loss": 0.9576, + "step": 459 + }, + { + "epoch": 1.02, + "learning_rate": 9.213501641320418e-05, + "loss": 0.9374, + "step": 460 + }, + { + "epoch": 1.02, + "learning_rate": 9.20133188279537e-05, + "loss": 0.8864, + "step": 461 + }, + { + "epoch": 1.03, + "learning_rate": 9.189143684693768e-05, + "loss": 0.8912, + "step": 462 + }, + { + "epoch": 1.03, + "learning_rate": 9.176937117219272e-05, + "loss": 0.8981, + "step": 463 + }, + { + "epoch": 1.03, + "learning_rate": 9.164712250681344e-05, + "loss": 0.892, + "step": 464 + }, + { + "epoch": 1.03, + "learning_rate": 9.152469155494857e-05, + "loss": 0.8896, + "step": 465 + }, + { + "epoch": 1.04, + "learning_rate": 9.140207902179673e-05, + "loss": 0.9112, + "step": 466 + }, + { + "epoch": 1.04, + "learning_rate": 9.127928561360246e-05, + "loss": 0.8909, + "step": 467 + }, + { + "epoch": 1.04, + "learning_rate": 9.115631203765218e-05, + "loss": 0.9034, + "step": 468 + }, + { + "epoch": 1.04, + "learning_rate": 9.103315900226999e-05, + "loss": 0.903, + "step": 469 + }, + { + "epoch": 1.04, + "learning_rate": 9.090982721681376e-05, + "loss": 0.9069, + "step": 470 + }, + { + "epoch": 1.05, + "learning_rate": 9.07863173916709e-05, + "loss": 0.877, + "step": 471 + }, + { + "epoch": 1.05, + "learning_rate": 9.06626302382543e-05, + "loss": 0.874, + "step": 472 + }, + { + "epoch": 1.05, + "learning_rate": 9.05387664689983e-05, + "loss": 0.8924, + "step": 473 + }, + { + "epoch": 1.05, + "learning_rate": 9.041472679735459e-05, + "loss": 0.9288, + "step": 474 + }, + { + "epoch": 1.06, + "learning_rate": 9.029051193778793e-05, + "loss": 0.8858, + "step": 475 + }, + { + "epoch": 1.06, + "learning_rate": 9.016612260577223e-05, + "loss": 0.869, + "step": 476 + }, + { + "epoch": 1.06, + "learning_rate": 9.004155951778635e-05, + "loss": 0.8812, + "step": 477 + }, + { + "epoch": 1.06, + "learning_rate": 8.991682339130999e-05, + "loss": 0.8824, + "step": 478 + }, + { + "epoch": 1.06, + "learning_rate": 8.979191494481956e-05, + "loss": 0.891, + "step": 479 + }, + { + "epoch": 1.07, + "learning_rate": 8.966683489778394e-05, + "loss": 0.8898, + "step": 480 + }, + { + "epoch": 1.07, + "learning_rate": 8.954158397066053e-05, + "loss": 0.8971, + "step": 481 + }, + { + "epoch": 1.07, + "learning_rate": 8.941616288489093e-05, + "loss": 0.8956, + "step": 482 + }, + { + "epoch": 1.07, + "learning_rate": 8.929057236289687e-05, + "loss": 0.903, + "step": 483 + }, + { + "epoch": 1.08, + "learning_rate": 8.916481312807606e-05, + "loss": 0.9051, + "step": 484 + }, + { + "epoch": 1.08, + "learning_rate": 8.90388859047979e-05, + "loss": 0.8458, + "step": 485 + }, + { + "epoch": 1.08, + "learning_rate": 8.891279141839948e-05, + "loss": 0.8767, + "step": 486 + }, + { + "epoch": 1.08, + "learning_rate": 8.878653039518131e-05, + "loss": 0.878, + "step": 487 + }, + { + "epoch": 1.08, + "learning_rate": 8.866010356240313e-05, + "loss": 0.9164, + "step": 488 + }, + { + "epoch": 1.09, + "learning_rate": 8.853351164827973e-05, + "loss": 0.8943, + "step": 489 + }, + { + "epoch": 1.09, + "learning_rate": 8.840675538197676e-05, + "loss": 0.8845, + "step": 490 + }, + { + "epoch": 1.09, + "learning_rate": 8.827983549360659e-05, + "loss": 0.8685, + "step": 491 + }, + { + "epoch": 1.09, + "learning_rate": 8.815275271422398e-05, + "loss": 0.898, + "step": 492 + }, + { + "epoch": 1.1, + "learning_rate": 8.802550777582197e-05, + "loss": 0.8824, + "step": 493 + }, + { + "epoch": 1.1, + "learning_rate": 8.789810141132762e-05, + "loss": 0.9068, + "step": 494 + }, + { + "epoch": 1.1, + "learning_rate": 8.777053435459781e-05, + "loss": 0.9178, + "step": 495 + }, + { + "epoch": 1.1, + "learning_rate": 8.7642807340415e-05, + "loss": 0.886, + "step": 496 + }, + { + "epoch": 1.1, + "learning_rate": 8.7514921104483e-05, + "loss": 0.8724, + "step": 497 + }, + { + "epoch": 1.11, + "learning_rate": 8.738687638342273e-05, + "loss": 0.8978, + "step": 498 + }, + { + "epoch": 1.11, + "learning_rate": 8.725867391476798e-05, + "loss": 0.9092, + "step": 499 + }, + { + "epoch": 1.11, + "learning_rate": 8.713031443696114e-05, + "loss": 0.9074, + "step": 500 + }, + { + "epoch": 1.11, + "learning_rate": 8.700179868934902e-05, + "loss": 0.9011, + "step": 501 + }, + { + "epoch": 1.12, + "learning_rate": 8.687312741217851e-05, + "loss": 0.8765, + "step": 502 + }, + { + "epoch": 1.12, + "learning_rate": 8.67443013465923e-05, + "loss": 0.8833, + "step": 503 + }, + { + "epoch": 1.12, + "learning_rate": 8.661532123462474e-05, + "loss": 0.904, + "step": 504 + }, + { + "epoch": 1.12, + "learning_rate": 8.648618781919745e-05, + "loss": 0.9168, + "step": 505 + }, + { + "epoch": 1.12, + "learning_rate": 8.635690184411505e-05, + "loss": 0.9153, + "step": 506 + }, + { + "epoch": 1.13, + "learning_rate": 8.62274640540609e-05, + "loss": 0.8865, + "step": 507 + }, + { + "epoch": 1.13, + "learning_rate": 8.609787519459285e-05, + "loss": 0.9066, + "step": 508 + }, + { + "epoch": 1.13, + "learning_rate": 8.596813601213889e-05, + "loss": 0.8977, + "step": 509 + }, + { + "epoch": 1.13, + "learning_rate": 8.583824725399285e-05, + "loss": 0.8505, + "step": 510 + }, + { + "epoch": 1.14, + "learning_rate": 8.570820966831008e-05, + "loss": 0.9051, + "step": 511 + }, + { + "epoch": 1.14, + "learning_rate": 8.557802400410326e-05, + "loss": 0.8694, + "step": 512 + }, + { + "epoch": 1.14, + "learning_rate": 8.544769101123793e-05, + "loss": 0.8798, + "step": 513 + }, + { + "epoch": 1.14, + "learning_rate": 8.531721144042826e-05, + "loss": 0.8827, + "step": 514 + }, + { + "epoch": 1.14, + "learning_rate": 8.518658604323272e-05, + "loss": 0.8774, + "step": 515 + }, + { + "epoch": 1.15, + "learning_rate": 8.505581557204968e-05, + "loss": 0.9107, + "step": 516 + }, + { + "epoch": 1.15, + "learning_rate": 8.49249007801132e-05, + "loss": 0.8869, + "step": 517 + }, + { + "epoch": 1.15, + "learning_rate": 8.479384242148856e-05, + "loss": 0.9126, + "step": 518 + }, + { + "epoch": 1.15, + "learning_rate": 8.466264125106806e-05, + "loss": 0.9356, + "step": 519 + }, + { + "epoch": 1.16, + "learning_rate": 8.45312980245665e-05, + "loss": 0.8958, + "step": 520 + }, + { + "epoch": 1.16, + "learning_rate": 8.4399813498517e-05, + "loss": 0.8788, + "step": 521 + }, + { + "epoch": 1.16, + "learning_rate": 8.42681884302665e-05, + "loss": 0.8861, + "step": 522 + }, + { + "epoch": 1.16, + "learning_rate": 8.413642357797148e-05, + "loss": 0.8971, + "step": 523 + }, + { + "epoch": 1.16, + "learning_rate": 8.40045197005936e-05, + "loss": 0.882, + "step": 524 + }, + { + "epoch": 1.17, + "learning_rate": 8.387247755789525e-05, + "loss": 0.909, + "step": 525 + }, + { + "epoch": 1.17, + "learning_rate": 8.374029791043527e-05, + "loss": 0.8958, + "step": 526 + }, + { + "epoch": 1.17, + "learning_rate": 8.36079815195645e-05, + "loss": 0.9146, + "step": 527 + }, + { + "epoch": 1.17, + "learning_rate": 8.347552914742142e-05, + "loss": 0.8973, + "step": 528 + }, + { + "epoch": 1.18, + "learning_rate": 8.334294155692774e-05, + "loss": 0.9057, + "step": 529 + }, + { + "epoch": 1.18, + "learning_rate": 8.321021951178405e-05, + "loss": 0.8658, + "step": 530 + }, + { + "epoch": 1.18, + "learning_rate": 8.30773637764654e-05, + "loss": 0.9342, + "step": 531 + }, + { + "epoch": 1.18, + "learning_rate": 8.29443751162169e-05, + "loss": 0.8892, + "step": 532 + }, + { + "epoch": 1.18, + "learning_rate": 8.281125429704923e-05, + "loss": 0.9283, + "step": 533 + }, + { + "epoch": 1.19, + "learning_rate": 8.26780020857344e-05, + "loss": 0.8811, + "step": 534 + }, + { + "epoch": 1.19, + "learning_rate": 8.254461924980116e-05, + "loss": 0.8753, + "step": 535 + }, + { + "epoch": 1.19, + "learning_rate": 8.24111065575307e-05, + "loss": 0.8788, + "step": 536 + }, + { + "epoch": 1.19, + "learning_rate": 8.227746477795215e-05, + "loss": 0.9025, + "step": 537 + }, + { + "epoch": 1.2, + "learning_rate": 8.21436946808382e-05, + "loss": 0.9066, + "step": 538 + }, + { + "epoch": 1.2, + "learning_rate": 8.200979703670062e-05, + "loss": 0.8974, + "step": 539 + }, + { + "epoch": 1.2, + "learning_rate": 8.18757726167859e-05, + "loss": 0.898, + "step": 540 + }, + { + "epoch": 1.2, + "learning_rate": 8.17416221930707e-05, + "loss": 0.9442, + "step": 541 + }, + { + "epoch": 1.2, + "learning_rate": 8.160734653825743e-05, + "loss": 0.9257, + "step": 542 + }, + { + "epoch": 1.21, + "learning_rate": 8.147294642576993e-05, + "loss": 0.9328, + "step": 543 + }, + { + "epoch": 1.21, + "learning_rate": 8.133842262974885e-05, + "loss": 0.8714, + "step": 544 + }, + { + "epoch": 1.21, + "learning_rate": 8.120377592504725e-05, + "loss": 0.8962, + "step": 545 + }, + { + "epoch": 1.21, + "learning_rate": 8.106900708722612e-05, + "loss": 0.9002, + "step": 546 + }, + { + "epoch": 1.22, + "learning_rate": 8.093411689255001e-05, + "loss": 0.912, + "step": 547 + }, + { + "epoch": 1.22, + "learning_rate": 8.07991061179824e-05, + "loss": 0.8852, + "step": 548 + }, + { + "epoch": 1.22, + "learning_rate": 8.066397554118136e-05, + "loss": 0.8716, + "step": 549 + }, + { + "epoch": 1.22, + "learning_rate": 8.052872594049501e-05, + "loss": 0.9013, + "step": 550 + }, + { + "epoch": 1.22, + "learning_rate": 8.0393358094957e-05, + "loss": 0.9178, + "step": 551 + }, + { + "epoch": 1.23, + "learning_rate": 8.025787278428213e-05, + "loss": 0.9117, + "step": 552 + }, + { + "epoch": 1.23, + "learning_rate": 8.012227078886174e-05, + "loss": 0.9364, + "step": 553 + }, + { + "epoch": 1.23, + "learning_rate": 7.998655288975931e-05, + "loss": 0.886, + "step": 554 + }, + { + "epoch": 1.23, + "learning_rate": 7.985071986870591e-05, + "loss": 0.8811, + "step": 555 + }, + { + "epoch": 1.24, + "learning_rate": 7.971477250809569e-05, + "loss": 0.9123, + "step": 556 + }, + { + "epoch": 1.24, + "learning_rate": 7.957871159098143e-05, + "loss": 0.8964, + "step": 557 + }, + { + "epoch": 1.24, + "learning_rate": 7.944253790106996e-05, + "loss": 0.8677, + "step": 558 + }, + { + "epoch": 1.24, + "learning_rate": 7.930625222271768e-05, + "loss": 0.9193, + "step": 559 + }, + { + "epoch": 1.24, + "learning_rate": 7.916985534092606e-05, + "loss": 0.9202, + "step": 560 + }, + { + "epoch": 1.25, + "learning_rate": 7.903334804133711e-05, + "loss": 0.8913, + "step": 561 + }, + { + "epoch": 1.25, + "learning_rate": 7.889673111022878e-05, + "loss": 0.884, + "step": 562 + }, + { + "epoch": 1.25, + "learning_rate": 7.876000533451057e-05, + "loss": 0.8619, + "step": 563 + }, + { + "epoch": 1.25, + "learning_rate": 7.862317150171886e-05, + "loss": 0.9247, + "step": 564 + }, + { + "epoch": 1.26, + "learning_rate": 7.848623040001246e-05, + "loss": 0.9068, + "step": 565 + }, + { + "epoch": 1.26, + "learning_rate": 7.834918281816805e-05, + "loss": 0.8869, + "step": 566 + }, + { + "epoch": 1.26, + "learning_rate": 7.821202954557568e-05, + "loss": 0.8924, + "step": 567 + }, + { + "epoch": 1.26, + "learning_rate": 7.807477137223406e-05, + "loss": 0.906, + "step": 568 + }, + { + "epoch": 1.26, + "learning_rate": 7.793740908874622e-05, + "loss": 0.9001, + "step": 569 + }, + { + "epoch": 1.27, + "learning_rate": 7.779994348631484e-05, + "loss": 0.9018, + "step": 570 + }, + { + "epoch": 1.27, + "learning_rate": 7.76623753567377e-05, + "loss": 0.9202, + "step": 571 + }, + { + "epoch": 1.27, + "learning_rate": 7.752470549240314e-05, + "loss": 0.8876, + "step": 572 + }, + { + "epoch": 1.27, + "learning_rate": 7.738693468628548e-05, + "loss": 0.8735, + "step": 573 + }, + { + "epoch": 1.28, + "learning_rate": 7.724906373194049e-05, + "loss": 0.8747, + "step": 574 + }, + { + "epoch": 1.28, + "learning_rate": 7.711109342350075e-05, + "loss": 0.9046, + "step": 575 + }, + { + "epoch": 1.28, + "learning_rate": 7.697302455567116e-05, + "loss": 0.8478, + "step": 576 + }, + { + "epoch": 1.28, + "learning_rate": 7.683485792372427e-05, + "loss": 0.887, + "step": 577 + }, + { + "epoch": 1.28, + "learning_rate": 7.669659432349581e-05, + "loss": 0.8691, + "step": 578 + }, + { + "epoch": 1.29, + "learning_rate": 7.655823455137998e-05, + "loss": 0.9361, + "step": 579 + }, + { + "epoch": 1.29, + "learning_rate": 7.641977940432499e-05, + "loss": 0.8743, + "step": 580 + }, + { + "epoch": 1.29, + "learning_rate": 7.628122967982834e-05, + "loss": 0.8967, + "step": 581 + }, + { + "epoch": 1.29, + "learning_rate": 7.614258617593234e-05, + "loss": 0.8915, + "step": 582 + }, + { + "epoch": 1.3, + "learning_rate": 7.600384969121945e-05, + "loss": 0.8841, + "step": 583 + }, + { + "epoch": 1.3, + "learning_rate": 7.586502102480773e-05, + "loss": 0.8665, + "step": 584 + }, + { + "epoch": 1.3, + "learning_rate": 7.572610097634613e-05, + "loss": 0.8986, + "step": 585 + }, + { + "epoch": 1.3, + "learning_rate": 7.558709034601004e-05, + "loss": 0.9087, + "step": 586 + }, + { + "epoch": 1.3, + "learning_rate": 7.544798993449654e-05, + "loss": 0.9034, + "step": 587 + }, + { + "epoch": 1.31, + "learning_rate": 7.53088005430199e-05, + "loss": 0.8983, + "step": 588 + }, + { + "epoch": 1.31, + "learning_rate": 7.516952297330684e-05, + "loss": 0.9164, + "step": 589 + }, + { + "epoch": 1.31, + "learning_rate": 7.503015802759202e-05, + "loss": 0.8989, + "step": 590 + }, + { + "epoch": 1.31, + "learning_rate": 7.489070650861344e-05, + "loss": 0.921, + "step": 591 + }, + { + "epoch": 1.32, + "learning_rate": 7.475116921960766e-05, + "loss": 0.8921, + "step": 592 + }, + { + "epoch": 1.32, + "learning_rate": 7.461154696430534e-05, + "loss": 0.8911, + "step": 593 + }, + { + "epoch": 1.32, + "learning_rate": 7.447184054692651e-05, + "loss": 0.8925, + "step": 594 + }, + { + "epoch": 1.32, + "learning_rate": 7.4332050772176e-05, + "loss": 0.9112, + "step": 595 + }, + { + "epoch": 1.32, + "learning_rate": 7.419217844523875e-05, + "loss": 0.9141, + "step": 596 + }, + { + "epoch": 1.33, + "learning_rate": 7.405222437177523e-05, + "loss": 0.8961, + "step": 597 + }, + { + "epoch": 1.33, + "learning_rate": 7.391218935791671e-05, + "loss": 0.9301, + "step": 598 + }, + { + "epoch": 1.33, + "learning_rate": 7.377207421026074e-05, + "loss": 0.8793, + "step": 599 + }, + { + "epoch": 1.33, + "learning_rate": 7.363187973586639e-05, + "loss": 0.8918, + "step": 600 + }, + { + "epoch": 1.34, + "learning_rate": 7.349160674224967e-05, + "loss": 0.9046, + "step": 601 + }, + { + "epoch": 1.34, + "learning_rate": 7.335125603737886e-05, + "loss": 0.871, + "step": 602 + }, + { + "epoch": 1.34, + "learning_rate": 7.321082842966986e-05, + "loss": 0.8872, + "step": 603 + }, + { + "epoch": 1.34, + "learning_rate": 7.307032472798151e-05, + "loss": 0.91, + "step": 604 + }, + { + "epoch": 1.34, + "learning_rate": 7.292974574161098e-05, + "loss": 0.927, + "step": 605 + }, + { + "epoch": 1.35, + "learning_rate": 7.278909228028903e-05, + "loss": 0.9181, + "step": 606 + }, + { + "epoch": 1.35, + "learning_rate": 7.26483651541754e-05, + "loss": 0.8765, + "step": 607 + }, + { + "epoch": 1.35, + "learning_rate": 7.25075651738542e-05, + "loss": 0.9266, + "step": 608 + }, + { + "epoch": 1.35, + "learning_rate": 7.236669315032912e-05, + "loss": 0.9351, + "step": 609 + }, + { + "epoch": 1.36, + "learning_rate": 7.222574989501881e-05, + "loss": 0.8773, + "step": 610 + }, + { + "epoch": 1.36, + "learning_rate": 7.208473621975227e-05, + "loss": 0.9041, + "step": 611 + }, + { + "epoch": 1.36, + "learning_rate": 7.194365293676404e-05, + "loss": 0.9115, + "step": 612 + }, + { + "epoch": 1.36, + "learning_rate": 7.180250085868969e-05, + "loss": 0.8905, + "step": 613 + }, + { + "epoch": 1.36, + "learning_rate": 7.166128079856096e-05, + "loss": 0.883, + "step": 614 + }, + { + "epoch": 1.37, + "learning_rate": 7.151999356980121e-05, + "loss": 0.9163, + "step": 615 + }, + { + "epoch": 1.37, + "learning_rate": 7.137863998622067e-05, + "loss": 0.8888, + "step": 616 + }, + { + "epoch": 1.37, + "learning_rate": 7.123722086201181e-05, + "loss": 0.9359, + "step": 617 + }, + { + "epoch": 1.37, + "learning_rate": 7.109573701174457e-05, + "loss": 0.8758, + "step": 618 + }, + { + "epoch": 1.38, + "learning_rate": 7.09541892503617e-05, + "loss": 0.8859, + "step": 619 + }, + { + "epoch": 1.38, + "learning_rate": 7.081257839317415e-05, + "loss": 0.8991, + "step": 620 + }, + { + "epoch": 1.38, + "learning_rate": 7.067090525585621e-05, + "loss": 0.8943, + "step": 621 + }, + { + "epoch": 1.38, + "learning_rate": 7.052917065444098e-05, + "loss": 0.8796, + "step": 622 + }, + { + "epoch": 1.38, + "learning_rate": 7.03873754053155e-05, + "loss": 0.8951, + "step": 623 + }, + { + "epoch": 1.39, + "learning_rate": 7.024552032521625e-05, + "loss": 0.9414, + "step": 624 + }, + { + "epoch": 1.39, + "learning_rate": 7.010360623122425e-05, + "loss": 0.8948, + "step": 625 + }, + { + "epoch": 1.39, + "learning_rate": 6.996163394076047e-05, + "loss": 0.8822, + "step": 626 + }, + { + "epoch": 1.39, + "learning_rate": 6.98196042715811e-05, + "loss": 0.9204, + "step": 627 + }, + { + "epoch": 1.4, + "learning_rate": 6.967751804177279e-05, + "loss": 0.8788, + "step": 628 + }, + { + "epoch": 1.4, + "learning_rate": 6.953537606974799e-05, + "loss": 0.9132, + "step": 629 + }, + { + "epoch": 1.4, + "learning_rate": 6.939317917424028e-05, + "loss": 0.9086, + "step": 630 + }, + { + "epoch": 1.4, + "learning_rate": 6.925092817429956e-05, + "loss": 0.8749, + "step": 631 + }, + { + "epoch": 1.4, + "learning_rate": 6.910862388928732e-05, + "loss": 0.8853, + "step": 632 + }, + { + "epoch": 1.41, + "learning_rate": 6.896626713887203e-05, + "loss": 0.9135, + "step": 633 + }, + { + "epoch": 1.41, + "learning_rate": 6.882385874302436e-05, + "loss": 0.8551, + "step": 634 + }, + { + "epoch": 1.41, + "learning_rate": 6.868139952201243e-05, + "loss": 0.8678, + "step": 635 + }, + { + "epoch": 1.41, + "learning_rate": 6.853889029639712e-05, + "loss": 0.9216, + "step": 636 + }, + { + "epoch": 1.42, + "learning_rate": 6.839633188702733e-05, + "loss": 0.8722, + "step": 637 + }, + { + "epoch": 1.42, + "learning_rate": 6.825372511503526e-05, + "loss": 0.9105, + "step": 638 + }, + { + "epoch": 1.42, + "learning_rate": 6.811107080183171e-05, + "loss": 0.8553, + "step": 639 + }, + { + "epoch": 1.42, + "learning_rate": 6.796836976910128e-05, + "loss": 0.8705, + "step": 640 + }, + { + "epoch": 1.42, + "learning_rate": 6.782562283879765e-05, + "loss": 0.9197, + "step": 641 + }, + { + "epoch": 1.43, + "learning_rate": 6.768283083313891e-05, + "loss": 0.8856, + "step": 642 + }, + { + "epoch": 1.43, + "learning_rate": 6.753999457460279e-05, + "loss": 0.8936, + "step": 643 + }, + { + "epoch": 1.43, + "learning_rate": 6.739711488592188e-05, + "loss": 0.8796, + "step": 644 + }, + { + "epoch": 1.43, + "learning_rate": 6.725419259007895e-05, + "loss": 0.8901, + "step": 645 + }, + { + "epoch": 1.44, + "learning_rate": 6.711122851030217e-05, + "loss": 0.9187, + "step": 646 + }, + { + "epoch": 1.44, + "learning_rate": 6.696822347006038e-05, + "loss": 0.8882, + "step": 647 + }, + { + "epoch": 1.44, + "learning_rate": 6.682517829305842e-05, + "loss": 0.9129, + "step": 648 + }, + { + "epoch": 1.44, + "learning_rate": 6.668209380323221e-05, + "loss": 0.9057, + "step": 649 + }, + { + "epoch": 1.44, + "learning_rate": 6.653897082474416e-05, + "loss": 0.8826, + "step": 650 + }, + { + "epoch": 1.45, + "learning_rate": 6.639581018197841e-05, + "loss": 0.8933, + "step": 651 + }, + { + "epoch": 1.45, + "learning_rate": 6.625261269953598e-05, + "loss": 0.8698, + "step": 652 + }, + { + "epoch": 1.45, + "learning_rate": 6.610937920223014e-05, + "loss": 0.9076, + "step": 653 + }, + { + "epoch": 1.45, + "learning_rate": 6.596611051508155e-05, + "loss": 0.8564, + "step": 654 + }, + { + "epoch": 1.46, + "learning_rate": 6.58228074633136e-05, + "loss": 0.8766, + "step": 655 + }, + { + "epoch": 1.46, + "learning_rate": 6.567947087234762e-05, + "loss": 0.9037, + "step": 656 + }, + { + "epoch": 1.46, + "learning_rate": 6.553610156779812e-05, + "loss": 0.9012, + "step": 657 + }, + { + "epoch": 1.46, + "learning_rate": 6.539270037546804e-05, + "loss": 0.9266, + "step": 658 + }, + { + "epoch": 1.46, + "learning_rate": 6.524926812134396e-05, + "loss": 0.8679, + "step": 659 + }, + { + "epoch": 1.47, + "learning_rate": 6.510580563159145e-05, + "loss": 0.9083, + "step": 660 + }, + { + "epoch": 1.47, + "learning_rate": 6.496231373255014e-05, + "loss": 0.8996, + "step": 661 + }, + { + "epoch": 1.47, + "learning_rate": 6.481879325072914e-05, + "loss": 0.9023, + "step": 662 + }, + { + "epoch": 1.47, + "learning_rate": 6.467524501280213e-05, + "loss": 0.9016, + "step": 663 + }, + { + "epoch": 1.48, + "learning_rate": 6.453166984560274e-05, + "loss": 0.8643, + "step": 664 + }, + { + "epoch": 1.48, + "learning_rate": 6.438806857611963e-05, + "loss": 0.8907, + "step": 665 + }, + { + "epoch": 1.48, + "learning_rate": 6.424444203149187e-05, + "loss": 0.8973, + "step": 666 + }, + { + "epoch": 1.48, + "learning_rate": 6.410079103900409e-05, + "loss": 0.9139, + "step": 667 + }, + { + "epoch": 1.48, + "learning_rate": 6.395711642608172e-05, + "loss": 0.8845, + "step": 668 + }, + { + "epoch": 1.49, + "learning_rate": 6.381341902028629e-05, + "loss": 0.8828, + "step": 669 + }, + { + "epoch": 1.49, + "learning_rate": 6.366969964931058e-05, + "loss": 0.882, + "step": 670 + }, + { + "epoch": 1.49, + "learning_rate": 6.352595914097388e-05, + "loss": 0.8749, + "step": 671 + }, + { + "epoch": 1.49, + "learning_rate": 6.338219832321725e-05, + "loss": 0.9002, + "step": 672 + }, + { + "epoch": 1.5, + "learning_rate": 6.323841802409875e-05, + "loss": 0.8694, + "step": 673 + }, + { + "epoch": 1.5, + "learning_rate": 6.309461907178863e-05, + "loss": 0.9151, + "step": 674 + }, + { + "epoch": 1.5, + "learning_rate": 6.295080229456456e-05, + "loss": 0.9025, + "step": 675 + }, + { + "epoch": 1.5, + "learning_rate": 6.280696852080694e-05, + "loss": 0.8891, + "step": 676 + }, + { + "epoch": 1.5, + "learning_rate": 6.2663118578994e-05, + "loss": 0.9178, + "step": 677 + }, + { + "epoch": 1.51, + "learning_rate": 6.251925329769718e-05, + "loss": 0.9171, + "step": 678 + }, + { + "epoch": 1.51, + "learning_rate": 6.237537350557617e-05, + "loss": 0.9079, + "step": 679 + }, + { + "epoch": 1.51, + "learning_rate": 6.223148003137435e-05, + "loss": 0.8745, + "step": 680 + }, + { + "epoch": 1.51, + "learning_rate": 6.208757370391379e-05, + "loss": 0.9134, + "step": 681 + }, + { + "epoch": 1.52, + "learning_rate": 6.194365535209074e-05, + "loss": 0.8587, + "step": 682 + }, + { + "epoch": 1.52, + "learning_rate": 6.179972580487057e-05, + "loss": 0.9039, + "step": 683 + }, + { + "epoch": 1.52, + "learning_rate": 6.165578589128323e-05, + "loss": 0.9315, + "step": 684 + }, + { + "epoch": 1.52, + "learning_rate": 6.151183644041834e-05, + "loss": 0.9108, + "step": 685 + }, + { + "epoch": 1.52, + "learning_rate": 6.136787828142047e-05, + "loss": 0.8866, + "step": 686 + }, + { + "epoch": 1.53, + "learning_rate": 6.122391224348433e-05, + "loss": 0.8872, + "step": 687 + }, + { + "epoch": 1.53, + "learning_rate": 6.107993915585001e-05, + "loss": 0.9115, + "step": 688 + }, + { + "epoch": 1.53, + "learning_rate": 6.0935959847798226e-05, + "loss": 0.8776, + "step": 689 + }, + { + "epoch": 1.53, + "learning_rate": 6.079197514864554e-05, + "loss": 0.8889, + "step": 690 + }, + { + "epoch": 1.54, + "learning_rate": 6.064798588773952e-05, + "loss": 0.922, + "step": 691 + }, + { + "epoch": 1.54, + "learning_rate": 6.0503992894454006e-05, + "loss": 0.907, + "step": 692 + }, + { + "epoch": 1.54, + "learning_rate": 6.035999699818442e-05, + "loss": 0.9033, + "step": 693 + }, + { + "epoch": 1.54, + "learning_rate": 6.02159990283428e-05, + "loss": 0.8618, + "step": 694 + }, + { + "epoch": 1.54, + "learning_rate": 6.0071999814353204e-05, + "loss": 0.8837, + "step": 695 + }, + { + "epoch": 1.55, + "learning_rate": 5.99280001856468e-05, + "loss": 0.8993, + "step": 696 + }, + { + "epoch": 1.55, + "learning_rate": 5.97840009716572e-05, + "loss": 0.8508, + "step": 697 + }, + { + "epoch": 1.55, + "learning_rate": 5.964000300181559e-05, + "loss": 0.9043, + "step": 698 + }, + { + "epoch": 1.55, + "learning_rate": 5.9496007105546004e-05, + "loss": 0.9256, + "step": 699 + }, + { + "epoch": 1.56, + "learning_rate": 5.935201411226049e-05, + "loss": 0.8824, + "step": 700 + }, + { + "epoch": 1.56, + "learning_rate": 5.920802485135447e-05, + "loss": 0.9082, + "step": 701 + }, + { + "epoch": 1.56, + "learning_rate": 5.9064040152201777e-05, + "loss": 0.9122, + "step": 702 + }, + { + "epoch": 1.56, + "learning_rate": 5.892006084415001e-05, + "loss": 0.8684, + "step": 703 + }, + { + "epoch": 1.56, + "learning_rate": 5.87760877565157e-05, + "loss": 0.8681, + "step": 704 + }, + { + "epoch": 1.57, + "learning_rate": 5.863212171857953e-05, + "loss": 0.8688, + "step": 705 + }, + { + "epoch": 1.57, + "learning_rate": 5.8488163559581656e-05, + "loss": 0.867, + "step": 706 + }, + { + "epoch": 1.57, + "learning_rate": 5.8344214108716775e-05, + "loss": 0.8632, + "step": 707 + }, + { + "epoch": 1.57, + "learning_rate": 5.820027419512944e-05, + "loss": 0.8988, + "step": 708 + }, + { + "epoch": 1.58, + "learning_rate": 5.805634464790927e-05, + "loss": 0.8976, + "step": 709 + }, + { + "epoch": 1.58, + "learning_rate": 5.791242629608622e-05, + "loss": 0.8957, + "step": 710 + }, + { + "epoch": 1.58, + "learning_rate": 5.7768519968625685e-05, + "loss": 0.8449, + "step": 711 + }, + { + "epoch": 1.58, + "learning_rate": 5.7624626494423846e-05, + "loss": 0.9322, + "step": 712 + }, + { + "epoch": 1.58, + "learning_rate": 5.748074670230282e-05, + "loss": 0.8835, + "step": 713 + }, + { + "epoch": 1.59, + "learning_rate": 5.733688142100598e-05, + "loss": 0.9181, + "step": 714 + }, + { + "epoch": 1.59, + "learning_rate": 5.7193031479193065e-05, + "loss": 0.8979, + "step": 715 + }, + { + "epoch": 1.59, + "learning_rate": 5.704919770543544e-05, + "loss": 0.8659, + "step": 716 + }, + { + "epoch": 1.59, + "learning_rate": 5.690538092821139e-05, + "loss": 0.8898, + "step": 717 + }, + { + "epoch": 1.6, + "learning_rate": 5.6761581975901255e-05, + "loss": 0.8836, + "step": 718 + }, + { + "epoch": 1.6, + "learning_rate": 5.661780167678277e-05, + "loss": 0.9026, + "step": 719 + }, + { + "epoch": 1.6, + "learning_rate": 5.6474040859026145e-05, + "loss": 0.9006, + "step": 720 + }, + { + "epoch": 1.6, + "learning_rate": 5.633030035068945e-05, + "loss": 0.8909, + "step": 721 + }, + { + "epoch": 1.6, + "learning_rate": 5.6186580979713706e-05, + "loss": 0.9137, + "step": 722 + }, + { + "epoch": 1.61, + "learning_rate": 5.604288357391828e-05, + "loss": 0.912, + "step": 723 + }, + { + "epoch": 1.61, + "learning_rate": 5.5899208960995915e-05, + "loss": 0.9013, + "step": 724 + }, + { + "epoch": 1.61, + "learning_rate": 5.575555796850813e-05, + "loss": 0.8905, + "step": 725 + }, + { + "epoch": 1.61, + "learning_rate": 5.561193142388037e-05, + "loss": 0.9419, + "step": 726 + }, + { + "epoch": 1.62, + "learning_rate": 5.546833015439727e-05, + "loss": 0.9136, + "step": 727 + }, + { + "epoch": 1.62, + "learning_rate": 5.5324754987197876e-05, + "loss": 0.8949, + "step": 728 + }, + { + "epoch": 1.62, + "learning_rate": 5.518120674927088e-05, + "loss": 0.8903, + "step": 729 + }, + { + "epoch": 1.62, + "learning_rate": 5.5037686267449886e-05, + "loss": 0.8621, + "step": 730 + }, + { + "epoch": 1.62, + "learning_rate": 5.489419436840856e-05, + "loss": 0.873, + "step": 731 + }, + { + "epoch": 1.63, + "learning_rate": 5.475073187865603e-05, + "loss": 0.8902, + "step": 732 + }, + { + "epoch": 1.63, + "learning_rate": 5.460729962453197e-05, + "loss": 0.8985, + "step": 733 + }, + { + "epoch": 1.63, + "learning_rate": 5.446389843220189e-05, + "loss": 0.8906, + "step": 734 + }, + { + "epoch": 1.63, + "learning_rate": 5.4320529127652394e-05, + "loss": 0.8643, + "step": 735 + }, + { + "epoch": 1.64, + "learning_rate": 5.417719253668641e-05, + "loss": 0.8695, + "step": 736 + }, + { + "epoch": 1.64, + "learning_rate": 5.4033889484918476e-05, + "loss": 0.8997, + "step": 737 + }, + { + "epoch": 1.64, + "learning_rate": 5.3890620797769894e-05, + "loss": 0.8748, + "step": 738 + }, + { + "epoch": 1.64, + "learning_rate": 5.374738730046401e-05, + "loss": 0.8905, + "step": 739 + }, + { + "epoch": 1.64, + "learning_rate": 5.360418981802159e-05, + "loss": 0.9044, + "step": 740 + }, + { + "epoch": 1.65, + "learning_rate": 5.3461029175255834e-05, + "loss": 0.9008, + "step": 741 + }, + { + "epoch": 1.65, + "learning_rate": 5.33179061967678e-05, + "loss": 0.8648, + "step": 742 + }, + { + "epoch": 1.65, + "learning_rate": 5.317482170694159e-05, + "loss": 0.8987, + "step": 743 + }, + { + "epoch": 1.65, + "learning_rate": 5.303177652993962e-05, + "loss": 0.9102, + "step": 744 + }, + { + "epoch": 1.66, + "learning_rate": 5.288877148969784e-05, + "loss": 0.8639, + "step": 745 + }, + { + "epoch": 1.66, + "learning_rate": 5.274580740992107e-05, + "loss": 0.8713, + "step": 746 + }, + { + "epoch": 1.66, + "learning_rate": 5.260288511407814e-05, + "loss": 0.8878, + "step": 747 + }, + { + "epoch": 1.66, + "learning_rate": 5.246000542539721e-05, + "loss": 0.888, + "step": 748 + }, + { + "epoch": 1.66, + "learning_rate": 5.2317169166861096e-05, + "loss": 0.8598, + "step": 749 + }, + { + "epoch": 1.67, + "learning_rate": 5.217437716120237e-05, + "loss": 0.9094, + "step": 750 + }, + { + "epoch": 1.67, + "learning_rate": 5.203163023089874e-05, + "loss": 0.8648, + "step": 751 + }, + { + "epoch": 1.67, + "learning_rate": 5.18889291981683e-05, + "loss": 0.8844, + "step": 752 + }, + { + "epoch": 1.67, + "learning_rate": 5.174627488496475e-05, + "loss": 0.8593, + "step": 753 + }, + { + "epoch": 1.68, + "learning_rate": 5.160366811297269e-05, + "loss": 0.8916, + "step": 754 + }, + { + "epoch": 1.68, + "learning_rate": 5.14611097036029e-05, + "loss": 0.8647, + "step": 755 + }, + { + "epoch": 1.68, + "learning_rate": 5.131860047798759e-05, + "loss": 0.8975, + "step": 756 + }, + { + "epoch": 1.68, + "learning_rate": 5.117614125697564e-05, + "loss": 0.898, + "step": 757 + }, + { + "epoch": 1.68, + "learning_rate": 5.103373286112797e-05, + "loss": 0.8703, + "step": 758 + }, + { + "epoch": 1.69, + "learning_rate": 5.089137611071269e-05, + "loss": 0.865, + "step": 759 + }, + { + "epoch": 1.69, + "learning_rate": 5.0749071825700455e-05, + "loss": 0.888, + "step": 760 + }, + { + "epoch": 1.69, + "learning_rate": 5.060682082575972e-05, + "loss": 0.8707, + "step": 761 + }, + { + "epoch": 1.69, + "learning_rate": 5.046462393025202e-05, + "loss": 0.8743, + "step": 762 + }, + { + "epoch": 1.7, + "learning_rate": 5.032248195822724e-05, + "loss": 0.8957, + "step": 763 + }, + { + "epoch": 1.7, + "learning_rate": 5.0180395728418923e-05, + "loss": 0.8876, + "step": 764 + }, + { + "epoch": 1.7, + "learning_rate": 5.003836605923952e-05, + "loss": 0.8571, + "step": 765 + }, + { + "epoch": 1.7, + "learning_rate": 4.989639376877574e-05, + "loss": 0.8655, + "step": 766 + }, + { + "epoch": 1.7, + "learning_rate": 4.9754479674783744e-05, + "loss": 0.8823, + "step": 767 + }, + { + "epoch": 1.71, + "learning_rate": 4.96126245946845e-05, + "loss": 0.8753, + "step": 768 + }, + { + "epoch": 1.71, + "learning_rate": 4.9470829345559044e-05, + "loss": 0.8835, + "step": 769 + }, + { + "epoch": 1.71, + "learning_rate": 4.93290947441438e-05, + "loss": 0.8921, + "step": 770 + }, + { + "epoch": 1.71, + "learning_rate": 4.918742160682586e-05, + "loss": 0.876, + "step": 771 + }, + { + "epoch": 1.72, + "learning_rate": 4.9045810749638305e-05, + "loss": 0.912, + "step": 772 + }, + { + "epoch": 1.72, + "learning_rate": 4.890426298825546e-05, + "loss": 0.9259, + "step": 773 + }, + { + "epoch": 1.72, + "learning_rate": 4.876277913798819e-05, + "loss": 0.8822, + "step": 774 + }, + { + "epoch": 1.72, + "learning_rate": 4.862136001377932e-05, + "loss": 0.8957, + "step": 775 + }, + { + "epoch": 1.72, + "learning_rate": 4.84800064301988e-05, + "loss": 0.8935, + "step": 776 + }, + { + "epoch": 1.73, + "learning_rate": 4.833871920143905e-05, + "loss": 0.8739, + "step": 777 + }, + { + "epoch": 1.73, + "learning_rate": 4.819749914131032e-05, + "loss": 0.88, + "step": 778 + }, + { + "epoch": 1.73, + "learning_rate": 4.805634706323596e-05, + "loss": 0.8859, + "step": 779 + }, + { + "epoch": 1.73, + "learning_rate": 4.7915263780247744e-05, + "loss": 0.852, + "step": 780 + }, + { + "epoch": 1.74, + "learning_rate": 4.77742501049812e-05, + "loss": 0.8774, + "step": 781 + }, + { + "epoch": 1.74, + "learning_rate": 4.7633306849670906e-05, + "loss": 0.9166, + "step": 782 + }, + { + "epoch": 1.74, + "learning_rate": 4.74924348261458e-05, + "loss": 0.8773, + "step": 783 + }, + { + "epoch": 1.74, + "learning_rate": 4.73516348458246e-05, + "loss": 0.9, + "step": 784 + }, + { + "epoch": 1.74, + "learning_rate": 4.7210907719710985e-05, + "loss": 0.8678, + "step": 785 + }, + { + "epoch": 1.75, + "learning_rate": 4.707025425838904e-05, + "loss": 0.8743, + "step": 786 + }, + { + "epoch": 1.75, + "learning_rate": 4.692967527201849e-05, + "loss": 0.8748, + "step": 787 + }, + { + "epoch": 1.75, + "learning_rate": 4.678917157033015e-05, + "loss": 0.8972, + "step": 788 + }, + { + "epoch": 1.75, + "learning_rate": 4.664874396262114e-05, + "loss": 0.9149, + "step": 789 + }, + { + "epoch": 1.76, + "learning_rate": 4.650839325775035e-05, + "loss": 0.8609, + "step": 790 + }, + { + "epoch": 1.76, + "learning_rate": 4.636812026413362e-05, + "loss": 0.8849, + "step": 791 + }, + { + "epoch": 1.76, + "learning_rate": 4.622792578973926e-05, + "loss": 0.8396, + "step": 792 + }, + { + "epoch": 1.76, + "learning_rate": 4.608781064208329e-05, + "loss": 0.8749, + "step": 793 + }, + { + "epoch": 1.76, + "learning_rate": 4.594777562822478e-05, + "loss": 0.8876, + "step": 794 + }, + { + "epoch": 1.77, + "learning_rate": 4.580782155476124e-05, + "loss": 0.8608, + "step": 795 + }, + { + "epoch": 1.77, + "learning_rate": 4.566794922782401e-05, + "loss": 0.8541, + "step": 796 + }, + { + "epoch": 1.77, + "learning_rate": 4.55281594530735e-05, + "loss": 0.8943, + "step": 797 + }, + { + "epoch": 1.77, + "learning_rate": 4.538845303569468e-05, + "loss": 0.8855, + "step": 798 + }, + { + "epoch": 1.78, + "learning_rate": 4.524883078039236e-05, + "loss": 0.9044, + "step": 799 + }, + { + "epoch": 1.78, + "learning_rate": 4.5109293491386574e-05, + "loss": 0.8696, + "step": 800 + }, + { + "epoch": 1.78, + "learning_rate": 4.4969841972407975e-05, + "loss": 0.8856, + "step": 801 + }, + { + "epoch": 1.78, + "learning_rate": 4.4830477026693174e-05, + "loss": 0.8855, + "step": 802 + }, + { + "epoch": 1.78, + "learning_rate": 4.4691199456980124e-05, + "loss": 0.8757, + "step": 803 + }, + { + "epoch": 1.79, + "learning_rate": 4.455201006550346e-05, + "loss": 0.8659, + "step": 804 + }, + { + "epoch": 1.79, + "learning_rate": 4.441290965398997e-05, + "loss": 0.8783, + "step": 805 + }, + { + "epoch": 1.79, + "learning_rate": 4.427389902365389e-05, + "loss": 0.8709, + "step": 806 + }, + { + "epoch": 1.79, + "learning_rate": 4.413497897519229e-05, + "loss": 0.8902, + "step": 807 + }, + { + "epoch": 1.8, + "learning_rate": 4.399615030878056e-05, + "loss": 0.8592, + "step": 808 + }, + { + "epoch": 1.8, + "learning_rate": 4.385741382406768e-05, + "loss": 0.8872, + "step": 809 + }, + { + "epoch": 1.8, + "learning_rate": 4.3718770320171675e-05, + "loss": 0.8412, + "step": 810 + }, + { + "epoch": 1.8, + "learning_rate": 4.3580220595675034e-05, + "loss": 0.8991, + "step": 811 + }, + { + "epoch": 1.8, + "learning_rate": 4.344176544862003e-05, + "loss": 0.8699, + "step": 812 + }, + { + "epoch": 1.81, + "learning_rate": 4.330340567650421e-05, + "loss": 0.899, + "step": 813 + }, + { + "epoch": 1.81, + "learning_rate": 4.316514207627574e-05, + "loss": 0.8972, + "step": 814 + }, + { + "epoch": 1.81, + "learning_rate": 4.302697544432887e-05, + "loss": 0.9027, + "step": 815 + }, + { + "epoch": 1.81, + "learning_rate": 4.288890657649926e-05, + "loss": 0.8791, + "step": 816 + }, + { + "epoch": 1.82, + "learning_rate": 4.275093626805952e-05, + "loss": 0.8567, + "step": 817 + }, + { + "epoch": 1.82, + "learning_rate": 4.261306531371451e-05, + "loss": 0.8879, + "step": 818 + }, + { + "epoch": 1.82, + "learning_rate": 4.247529450759686e-05, + "loss": 0.8643, + "step": 819 + }, + { + "epoch": 1.82, + "learning_rate": 4.2337624643262304e-05, + "loss": 0.8892, + "step": 820 + }, + { + "epoch": 1.82, + "learning_rate": 4.220005651368516e-05, + "loss": 0.8808, + "step": 821 + }, + { + "epoch": 1.83, + "learning_rate": 4.2062590911253785e-05, + "loss": 0.8825, + "step": 822 + }, + { + "epoch": 1.83, + "learning_rate": 4.192522862776596e-05, + "loss": 0.9095, + "step": 823 + }, + { + "epoch": 1.83, + "learning_rate": 4.178797045442435e-05, + "loss": 0.8632, + "step": 824 + }, + { + "epoch": 1.83, + "learning_rate": 4.165081718183195e-05, + "loss": 0.8327, + "step": 825 + }, + { + "epoch": 1.84, + "learning_rate": 4.151376959998756e-05, + "loss": 0.8498, + "step": 826 + }, + { + "epoch": 1.84, + "learning_rate": 4.1376828498281155e-05, + "loss": 0.8993, + "step": 827 + }, + { + "epoch": 1.84, + "learning_rate": 4.123999466548944e-05, + "loss": 0.8822, + "step": 828 + }, + { + "epoch": 1.84, + "learning_rate": 4.110326888977122e-05, + "loss": 0.8647, + "step": 829 + }, + { + "epoch": 1.84, + "learning_rate": 4.09666519586629e-05, + "loss": 0.8726, + "step": 830 + }, + { + "epoch": 1.85, + "learning_rate": 4.083014465907394e-05, + "loss": 0.8798, + "step": 831 + }, + { + "epoch": 1.85, + "learning_rate": 4.0693747777282334e-05, + "loss": 0.909, + "step": 832 + }, + { + "epoch": 1.85, + "learning_rate": 4.0557462098930055e-05, + "loss": 0.8665, + "step": 833 + }, + { + "epoch": 1.85, + "learning_rate": 4.042128840901858e-05, + "loss": 0.8626, + "step": 834 + }, + { + "epoch": 1.86, + "learning_rate": 4.028522749190431e-05, + "loss": 0.8765, + "step": 835 + }, + { + "epoch": 1.86, + "learning_rate": 4.014928013129409e-05, + "loss": 0.8295, + "step": 836 + }, + { + "epoch": 1.86, + "learning_rate": 4.001344711024069e-05, + "loss": 0.8936, + "step": 837 + }, + { + "epoch": 1.86, + "learning_rate": 3.987772921113826e-05, + "loss": 0.893, + "step": 838 + }, + { + "epoch": 1.86, + "learning_rate": 3.974212721571788e-05, + "loss": 0.8726, + "step": 839 + }, + { + "epoch": 1.87, + "learning_rate": 3.960664190504301e-05, + "loss": 0.8881, + "step": 840 + }, + { + "epoch": 1.87, + "learning_rate": 3.9471274059505005e-05, + "loss": 0.8708, + "step": 841 + }, + { + "epoch": 1.87, + "learning_rate": 3.9336024458818635e-05, + "loss": 0.8602, + "step": 842 + }, + { + "epoch": 1.87, + "learning_rate": 3.920089388201759e-05, + "loss": 0.8906, + "step": 843 + }, + { + "epoch": 1.88, + "learning_rate": 3.906588310745e-05, + "loss": 0.8987, + "step": 844 + }, + { + "epoch": 1.88, + "learning_rate": 3.893099291277388e-05, + "loss": 0.898, + "step": 845 + }, + { + "epoch": 1.88, + "learning_rate": 3.879622407495277e-05, + "loss": 0.8516, + "step": 846 + }, + { + "epoch": 1.88, + "learning_rate": 3.866157737025116e-05, + "loss": 0.8571, + "step": 847 + }, + { + "epoch": 1.88, + "learning_rate": 3.852705357423007e-05, + "loss": 0.8516, + "step": 848 + }, + { + "epoch": 1.89, + "learning_rate": 3.8392653461742577e-05, + "loss": 0.8529, + "step": 849 + }, + { + "epoch": 1.89, + "learning_rate": 3.825837780692934e-05, + "loss": 0.8345, + "step": 850 + }, + { + "epoch": 1.89, + "learning_rate": 3.812422738321411e-05, + "loss": 0.8737, + "step": 851 + }, + { + "epoch": 1.89, + "learning_rate": 3.7990202963299366e-05, + "loss": 0.8744, + "step": 852 + }, + { + "epoch": 1.9, + "learning_rate": 3.785630531916181e-05, + "loss": 0.8445, + "step": 853 + }, + { + "epoch": 1.9, + "learning_rate": 3.772253522204784e-05, + "loss": 0.8953, + "step": 854 + }, + { + "epoch": 1.9, + "learning_rate": 3.7588893442469306e-05, + "loss": 0.8855, + "step": 855 + }, + { + "epoch": 1.9, + "learning_rate": 3.7455380750198846e-05, + "loss": 0.865, + "step": 856 + }, + { + "epoch": 1.9, + "learning_rate": 3.7321997914265616e-05, + "loss": 0.8641, + "step": 857 + }, + { + "epoch": 1.91, + "learning_rate": 3.718874570295078e-05, + "loss": 0.8741, + "step": 858 + }, + { + "epoch": 1.91, + "learning_rate": 3.705562488378312e-05, + "loss": 0.855, + "step": 859 + }, + { + "epoch": 1.91, + "learning_rate": 3.69226362235346e-05, + "loss": 0.8534, + "step": 860 + }, + { + "epoch": 1.91, + "learning_rate": 3.678978048821595e-05, + "loss": 0.8412, + "step": 861 + }, + { + "epoch": 1.92, + "learning_rate": 3.665705844307227e-05, + "loss": 0.8799, + "step": 862 + }, + { + "epoch": 1.92, + "learning_rate": 3.65244708525786e-05, + "loss": 0.88, + "step": 863 + }, + { + "epoch": 1.92, + "learning_rate": 3.6392018480435505e-05, + "loss": 0.8754, + "step": 864 + }, + { + "epoch": 1.92, + "learning_rate": 3.6259702089564735e-05, + "loss": 0.9, + "step": 865 + }, + { + "epoch": 1.92, + "learning_rate": 3.612752244210476e-05, + "loss": 0.8999, + "step": 866 + }, + { + "epoch": 1.93, + "learning_rate": 3.599548029940642e-05, + "loss": 0.8757, + "step": 867 + }, + { + "epoch": 1.93, + "learning_rate": 3.5863576422028536e-05, + "loss": 0.9028, + "step": 868 + }, + { + "epoch": 1.93, + "learning_rate": 3.573181156973351e-05, + "loss": 0.8594, + "step": 869 + }, + { + "epoch": 1.93, + "learning_rate": 3.560018650148302e-05, + "loss": 0.8576, + "step": 870 + }, + { + "epoch": 1.94, + "learning_rate": 3.5468701975433504e-05, + "loss": 0.8628, + "step": 871 + }, + { + "epoch": 1.94, + "learning_rate": 3.5337358748931946e-05, + "loss": 0.8762, + "step": 872 + }, + { + "epoch": 1.94, + "learning_rate": 3.520615757851144e-05, + "loss": 0.8671, + "step": 873 + }, + { + "epoch": 1.94, + "learning_rate": 3.507509921988682e-05, + "loss": 0.8806, + "step": 874 + }, + { + "epoch": 1.94, + "learning_rate": 3.4944184427950325e-05, + "loss": 0.8895, + "step": 875 + }, + { + "epoch": 1.95, + "learning_rate": 3.4813413956767295e-05, + "loss": 0.8812, + "step": 876 + }, + { + "epoch": 1.95, + "learning_rate": 3.468278855957174e-05, + "loss": 0.8756, + "step": 877 + }, + { + "epoch": 1.95, + "learning_rate": 3.455230898876205e-05, + "loss": 0.8863, + "step": 878 + }, + { + "epoch": 1.95, + "learning_rate": 3.4421975995896726e-05, + "loss": 0.8704, + "step": 879 + }, + { + "epoch": 1.96, + "learning_rate": 3.429179033168992e-05, + "loss": 0.8714, + "step": 880 + }, + { + "epoch": 1.96, + "learning_rate": 3.416175274600717e-05, + "loss": 0.8869, + "step": 881 + }, + { + "epoch": 1.96, + "learning_rate": 3.403186398786112e-05, + "loss": 0.8608, + "step": 882 + }, + { + "epoch": 1.96, + "learning_rate": 3.3902124805407154e-05, + "loss": 0.8984, + "step": 883 + }, + { + "epoch": 1.96, + "learning_rate": 3.377253594593912e-05, + "loss": 0.8477, + "step": 884 + }, + { + "epoch": 1.97, + "learning_rate": 3.364309815588499e-05, + "loss": 0.8735, + "step": 885 + }, + { + "epoch": 1.97, + "learning_rate": 3.351381218080258e-05, + "loss": 0.8592, + "step": 886 + }, + { + "epoch": 1.97, + "learning_rate": 3.3384678765375257e-05, + "loss": 0.837, + "step": 887 + }, + { + "epoch": 1.97, + "learning_rate": 3.325569865340771e-05, + "loss": 0.8653, + "step": 888 + }, + { + "epoch": 1.98, + "learning_rate": 3.312687258782151e-05, + "loss": 0.8385, + "step": 889 + }, + { + "epoch": 1.98, + "learning_rate": 3.2998201310650995e-05, + "loss": 0.8794, + "step": 890 + }, + { + "epoch": 1.98, + "learning_rate": 3.286968556303887e-05, + "loss": 0.8761, + "step": 891 + }, + { + "epoch": 1.98, + "learning_rate": 3.274132608523204e-05, + "loss": 0.882, + "step": 892 + }, + { + "epoch": 1.98, + "learning_rate": 3.261312361657727e-05, + "loss": 0.8849, + "step": 893 + }, + { + "epoch": 1.99, + "learning_rate": 3.2485078895517e-05, + "loss": 0.8571, + "step": 894 + }, + { + "epoch": 1.99, + "learning_rate": 3.235719265958498e-05, + "loss": 0.8554, + "step": 895 + }, + { + "epoch": 1.99, + "learning_rate": 3.222946564540217e-05, + "loss": 0.8459, + "step": 896 + }, + { + "epoch": 1.99, + "learning_rate": 3.210189858867238e-05, + "loss": 0.8627, + "step": 897 + }, + { + "epoch": 2.0, + "learning_rate": 3.197449222417804e-05, + "loss": 0.8543, + "step": 898 + }, + { + "epoch": 2.0, + "learning_rate": 3.184724728577603e-05, + "loss": 0.8538, + "step": 899 + }, + { + "epoch": 2.0, + "learning_rate": 3.1720164506393426e-05, + "loss": 0.8591, + "step": 900 + }, + { + "epoch": 2.0, + "learning_rate": 3.1593244618023246e-05, + "loss": 0.4844, + "step": 901 + }, + { + "epoch": 2.0, + "learning_rate": 3.14664883517203e-05, + "loss": 0.4434, + "step": 902 + }, + { + "epoch": 2.01, + "learning_rate": 3.13398964375969e-05, + "loss": 0.4157, + "step": 903 + }, + { + "epoch": 2.01, + "learning_rate": 3.121346960481869e-05, + "loss": 0.392, + "step": 904 + }, + { + "epoch": 2.01, + "learning_rate": 3.108720858160052e-05, + "loss": 0.4112, + "step": 905 + }, + { + "epoch": 2.01, + "learning_rate": 3.0961114095202115e-05, + "loss": 0.4132, + "step": 906 + }, + { + "epoch": 2.02, + "learning_rate": 3.083518687192397e-05, + "loss": 0.4043, + "step": 907 + }, + { + "epoch": 2.02, + "learning_rate": 3.070942763710314e-05, + "loss": 0.3904, + "step": 908 + }, + { + "epoch": 2.02, + "learning_rate": 3.0583837115109085e-05, + "loss": 0.3975, + "step": 909 + }, + { + "epoch": 2.02, + "learning_rate": 3.045841602933947e-05, + "loss": 0.3928, + "step": 910 + }, + { + "epoch": 2.02, + "learning_rate": 3.0333165102216057e-05, + "loss": 0.3887, + "step": 911 + }, + { + "epoch": 2.03, + "learning_rate": 3.020808505518045e-05, + "loss": 0.3765, + "step": 912 + }, + { + "epoch": 2.03, + "learning_rate": 3.0083176608689983e-05, + "loss": 0.3679, + "step": 913 + }, + { + "epoch": 2.03, + "learning_rate": 2.9958440482213635e-05, + "loss": 0.4016, + "step": 914 + }, + { + "epoch": 2.03, + "learning_rate": 2.9833877394227778e-05, + "loss": 0.3845, + "step": 915 + }, + { + "epoch": 2.04, + "learning_rate": 2.9709488062212084e-05, + "loss": 0.3582, + "step": 916 + }, + { + "epoch": 2.04, + "learning_rate": 2.9585273202645425e-05, + "loss": 0.3709, + "step": 917 + }, + { + "epoch": 2.04, + "learning_rate": 2.9461233531001697e-05, + "loss": 0.3715, + "step": 918 + }, + { + "epoch": 2.04, + "learning_rate": 2.9337369761745715e-05, + "loss": 0.3608, + "step": 919 + }, + { + "epoch": 2.04, + "learning_rate": 2.9213682608329127e-05, + "loss": 0.3733, + "step": 920 + }, + { + "epoch": 2.05, + "learning_rate": 2.9090172783186234e-05, + "loss": 0.3788, + "step": 921 + }, + { + "epoch": 2.05, + "learning_rate": 2.8966840997729996e-05, + "loss": 0.3656, + "step": 922 + }, + { + "epoch": 2.05, + "learning_rate": 2.884368796234783e-05, + "loss": 0.3637, + "step": 923 + }, + { + "epoch": 2.05, + "learning_rate": 2.8720714386397548e-05, + "loss": 0.3615, + "step": 924 + }, + { + "epoch": 2.06, + "learning_rate": 2.8597920978203288e-05, + "loss": 0.3734, + "step": 925 + }, + { + "epoch": 2.06, + "learning_rate": 2.8475308445051456e-05, + "loss": 0.3603, + "step": 926 + }, + { + "epoch": 2.06, + "learning_rate": 2.835287749318658e-05, + "loss": 0.3749, + "step": 927 + }, + { + "epoch": 2.06, + "learning_rate": 2.8230628827807295e-05, + "loss": 0.3683, + "step": 928 + }, + { + "epoch": 2.06, + "learning_rate": 2.8108563153062326e-05, + "loss": 0.3673, + "step": 929 + }, + { + "epoch": 2.07, + "learning_rate": 2.7986681172046306e-05, + "loss": 0.3817, + "step": 930 + }, + { + "epoch": 2.07, + "learning_rate": 2.7864983586795808e-05, + "loss": 0.3785, + "step": 931 + }, + { + "epoch": 2.07, + "learning_rate": 2.774347109828535e-05, + "loss": 0.3589, + "step": 932 + }, + { + "epoch": 2.07, + "learning_rate": 2.7622144406423235e-05, + "loss": 0.3586, + "step": 933 + }, + { + "epoch": 2.08, + "learning_rate": 2.7501004210047614e-05, + "loss": 0.3766, + "step": 934 + }, + { + "epoch": 2.08, + "learning_rate": 2.73800512069224e-05, + "loss": 0.359, + "step": 935 + }, + { + "epoch": 2.08, + "learning_rate": 2.7259286093733313e-05, + "loss": 0.3778, + "step": 936 + }, + { + "epoch": 2.08, + "learning_rate": 2.7138709566083814e-05, + "loss": 0.3664, + "step": 937 + }, + { + "epoch": 2.08, + "learning_rate": 2.701832231849112e-05, + "loss": 0.3484, + "step": 938 + }, + { + "epoch": 2.09, + "learning_rate": 2.6898125044382186e-05, + "loss": 0.3625, + "step": 939 + }, + { + "epoch": 2.09, + "learning_rate": 2.6778118436089773e-05, + "loss": 0.3585, + "step": 940 + }, + { + "epoch": 2.09, + "learning_rate": 2.6658303184848363e-05, + "loss": 0.3774, + "step": 941 + }, + { + "epoch": 2.09, + "learning_rate": 2.6538679980790244e-05, + "loss": 0.3574, + "step": 942 + }, + { + "epoch": 2.1, + "learning_rate": 2.6419249512941523e-05, + "loss": 0.342, + "step": 943 + }, + { + "epoch": 2.1, + "learning_rate": 2.630001246921814e-05, + "loss": 0.3684, + "step": 944 + }, + { + "epoch": 2.1, + "learning_rate": 2.6180969536421928e-05, + "loss": 0.373, + "step": 945 + }, + { + "epoch": 2.1, + "learning_rate": 2.6062121400236623e-05, + "loss": 0.3772, + "step": 946 + }, + { + "epoch": 2.1, + "learning_rate": 2.5943468745224004e-05, + "loss": 0.3624, + "step": 947 + }, + { + "epoch": 2.11, + "learning_rate": 2.5825012254819793e-05, + "loss": 0.3545, + "step": 948 + }, + { + "epoch": 2.11, + "learning_rate": 2.5706752611329903e-05, + "loss": 0.3572, + "step": 949 + }, + { + "epoch": 2.11, + "learning_rate": 2.5588690495926343e-05, + "loss": 0.3541, + "step": 950 + }, + { + "epoch": 2.11, + "learning_rate": 2.5470826588643393e-05, + "loss": 0.3597, + "step": 951 + }, + { + "epoch": 2.12, + "learning_rate": 2.5353161568373653e-05, + "loss": 0.3567, + "step": 952 + }, + { + "epoch": 2.12, + "learning_rate": 2.523569611286415e-05, + "loss": 0.3684, + "step": 953 + }, + { + "epoch": 2.12, + "learning_rate": 2.5118430898712418e-05, + "loss": 0.3586, + "step": 954 + }, + { + "epoch": 2.12, + "learning_rate": 2.5001366601362606e-05, + "loss": 0.3616, + "step": 955 + }, + { + "epoch": 2.12, + "learning_rate": 2.488450389510157e-05, + "loss": 0.3661, + "step": 956 + }, + { + "epoch": 2.13, + "learning_rate": 2.4767843453055065e-05, + "loss": 0.3443, + "step": 957 + }, + { + "epoch": 2.13, + "learning_rate": 2.4651385947183756e-05, + "loss": 0.3586, + "step": 958 + }, + { + "epoch": 2.13, + "learning_rate": 2.4535132048279413e-05, + "loss": 0.3522, + "step": 959 + }, + { + "epoch": 2.13, + "learning_rate": 2.4419082425961047e-05, + "loss": 0.3683, + "step": 960 + }, + { + "epoch": 2.14, + "learning_rate": 2.4303237748671032e-05, + "loss": 0.3437, + "step": 961 + }, + { + "epoch": 2.14, + "learning_rate": 2.418759868367126e-05, + "loss": 0.3581, + "step": 962 + }, + { + "epoch": 2.14, + "learning_rate": 2.4072165897039327e-05, + "loss": 0.376, + "step": 963 + }, + { + "epoch": 2.14, + "learning_rate": 2.3956940053664616e-05, + "loss": 0.3558, + "step": 964 + }, + { + "epoch": 2.14, + "learning_rate": 2.384192181724462e-05, + "loss": 0.3449, + "step": 965 + }, + { + "epoch": 2.15, + "learning_rate": 2.3727111850280917e-05, + "loss": 0.3712, + "step": 966 + }, + { + "epoch": 2.15, + "learning_rate": 2.361251081407555e-05, + "loss": 0.3631, + "step": 967 + }, + { + "epoch": 2.15, + "learning_rate": 2.3498119368727052e-05, + "loss": 0.3532, + "step": 968 + }, + { + "epoch": 2.15, + "learning_rate": 2.3383938173126764e-05, + "loss": 0.3545, + "step": 969 + }, + { + "epoch": 2.16, + "learning_rate": 2.3269967884954974e-05, + "loss": 0.3638, + "step": 970 + }, + { + "epoch": 2.16, + "learning_rate": 2.315620916067716e-05, + "loss": 0.3532, + "step": 971 + }, + { + "epoch": 2.16, + "learning_rate": 2.304266265554019e-05, + "loss": 0.3485, + "step": 972 + }, + { + "epoch": 2.16, + "learning_rate": 2.2929329023568543e-05, + "loss": 0.3604, + "step": 973 + }, + { + "epoch": 2.16, + "learning_rate": 2.281620891756061e-05, + "loss": 0.3547, + "step": 974 + }, + { + "epoch": 2.17, + "learning_rate": 2.2703302989084833e-05, + "loss": 0.3621, + "step": 975 + }, + { + "epoch": 2.17, + "learning_rate": 2.2590611888475993e-05, + "loss": 0.3608, + "step": 976 + }, + { + "epoch": 2.17, + "learning_rate": 2.2478136264831488e-05, + "loss": 0.3623, + "step": 977 + }, + { + "epoch": 2.17, + "learning_rate": 2.236587676600757e-05, + "loss": 0.3622, + "step": 978 + }, + { + "epoch": 2.18, + "learning_rate": 2.225383403861562e-05, + "loss": 0.3664, + "step": 979 + }, + { + "epoch": 2.18, + "learning_rate": 2.214200872801841e-05, + "loss": 0.3603, + "step": 980 + }, + { + "epoch": 2.18, + "learning_rate": 2.2030401478326398e-05, + "loss": 0.3437, + "step": 981 + }, + { + "epoch": 2.18, + "learning_rate": 2.191901293239406e-05, + "loss": 0.3715, + "step": 982 + }, + { + "epoch": 2.18, + "learning_rate": 2.180784373181609e-05, + "loss": 0.3613, + "step": 983 + }, + { + "epoch": 2.19, + "learning_rate": 2.1696894516923776e-05, + "loss": 0.3566, + "step": 984 + }, + { + "epoch": 2.19, + "learning_rate": 2.1586165926781324e-05, + "loss": 0.3476, + "step": 985 + }, + { + "epoch": 2.19, + "learning_rate": 2.147565859918211e-05, + "loss": 0.3506, + "step": 986 + }, + { + "epoch": 2.19, + "learning_rate": 2.1365373170645063e-05, + "loss": 0.3543, + "step": 987 + }, + { + "epoch": 2.2, + "learning_rate": 2.1255310276410968e-05, + "loss": 0.3517, + "step": 988 + }, + { + "epoch": 2.2, + "learning_rate": 2.114547055043883e-05, + "loss": 0.3567, + "step": 989 + }, + { + "epoch": 2.2, + "learning_rate": 2.103585462540221e-05, + "loss": 0.3647, + "step": 990 + }, + { + "epoch": 2.2, + "learning_rate": 2.0926463132685555e-05, + "loss": 0.364, + "step": 991 + }, + { + "epoch": 2.2, + "learning_rate": 2.0817296702380662e-05, + "loss": 0.3489, + "step": 992 + }, + { + "epoch": 2.21, + "learning_rate": 2.07083559632829e-05, + "loss": 0.3516, + "step": 993 + }, + { + "epoch": 2.21, + "learning_rate": 2.059964154288769e-05, + "loss": 0.3576, + "step": 994 + }, + { + "epoch": 2.21, + "learning_rate": 2.049115406738687e-05, + "loss": 0.3573, + "step": 995 + }, + { + "epoch": 2.21, + "learning_rate": 2.0382894161665065e-05, + "loss": 0.3637, + "step": 996 + }, + { + "epoch": 2.22, + "learning_rate": 2.0274862449296132e-05, + "loss": 0.348, + "step": 997 + }, + { + "epoch": 2.22, + "learning_rate": 2.016705955253951e-05, + "loss": 0.361, + "step": 998 + }, + { + "epoch": 2.22, + "learning_rate": 2.005948609233668e-05, + "loss": 0.3559, + "step": 999 + }, + { + "epoch": 2.22, + "learning_rate": 1.99521426883076e-05, + "loss": 0.3608, + "step": 1000 + } + ], + "max_steps": 1350, + "num_train_epochs": 3, + "total_flos": 9.936494717011231e+17, + "trial_name": null, + "trial_params": null +}