diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,8 +1,8 @@ { - "best_metric": 1.9827316999435425, - "best_model_checkpoint": "output/checkpoint-2200", - "epoch": 1.5041047745712288, - "global_step": 2200, + "best_metric": 2.021120071411133, + "best_model_checkpoint": "output/checkpoint-400", + "epoch": 0.27347359537658705, + "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -10,13295 +10,2415 @@ { "epoch": 0.0, "learning_rate": 2.9999999999999997e-06, - "loss": 2.1853, + "loss": 1.8876, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.999999999999999e-06, - "loss": 2.3789, + "loss": 2.0725, "step": 2 }, { "epoch": 0.0, "learning_rate": 8.999999999999999e-06, - "loss": 2.3312, + "loss": 2.0423, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.1999999999999999e-05, - "loss": 2.2895, + "loss": 2.0022, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.4999999999999999e-05, - "loss": 2.3325, + "loss": 2.0503, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.7999999999999997e-05, - "loss": 2.3354, + "loss": 2.0501, "step": 6 }, { "epoch": 0.0, "learning_rate": 2.1e-05, - "loss": 2.3068, + "loss": 2.0322, "step": 7 }, { "epoch": 0.01, "learning_rate": 2.3999999999999997e-05, - "loss": 2.3766, + "loss": 2.0892, "step": 8 }, { "epoch": 0.01, "learning_rate": 2.6999999999999996e-05, - "loss": 2.3448, + "loss": 2.0786, "step": 9 }, { "epoch": 0.01, "learning_rate": 2.9999999999999997e-05, - "loss": 2.2754, + "loss": 2.0111, "step": 10 }, { "epoch": 0.01, "learning_rate": 3.2999999999999996e-05, - "loss": 2.2119, + "loss": 1.9539, "step": 11 }, { "epoch": 0.01, "learning_rate": 3.5999999999999994e-05, - "loss": 2.3228, + "loss": 2.0585, "step": 12 }, { "epoch": 0.01, "learning_rate": 3.9e-05, - "loss": 2.2545, + "loss": 2.0, "step": 13 }, { "epoch": 0.01, "learning_rate": 4.2e-05, - "loss": 2.2331, + "loss": 1.9882, "step": 14 }, { "epoch": 0.01, "learning_rate": 4.4999999999999996e-05, - "loss": 2.1402, + "loss": 1.8964, "step": 15 }, { "epoch": 0.01, "learning_rate": 4.7999999999999994e-05, - "loss": 2.2808, + "loss": 2.0331, "step": 16 }, { "epoch": 0.01, "learning_rate": 5.1e-05, - "loss": 2.3187, + "loss": 2.0569, "step": 17 }, { "epoch": 0.01, "learning_rate": 5.399999999999999e-05, - "loss": 2.2397, + "loss": 1.9941, "step": 18 }, { "epoch": 0.01, "learning_rate": 5.6999999999999996e-05, - "loss": 2.1723, + "loss": 1.9494, "step": 19 }, { "epoch": 0.01, "learning_rate": 5.9999999999999995e-05, - "loss": 2.2569, + "loss": 2.0266, "step": 20 }, { "epoch": 0.01, "learning_rate": 6.299999999999999e-05, - "loss": 2.1196, + "loss": 1.9042, "step": 21 }, { "epoch": 0.02, "learning_rate": 6.599999999999999e-05, - "loss": 2.2139, + "loss": 2.009, "step": 22 }, { "epoch": 0.02, "learning_rate": 6.9e-05, - "loss": 2.1917, + "loss": 1.9669, "step": 23 }, { "epoch": 0.02, "learning_rate": 7.199999999999999e-05, - "loss": 2.2482, + "loss": 2.0222, "step": 24 }, { "epoch": 0.02, "learning_rate": 7.5e-05, - "loss": 2.099, + "loss": 1.8443, "step": 25 }, { "epoch": 0.02, "learning_rate": 7.8e-05, - "loss": 2.1668, + "loss": 1.9351, "step": 26 }, { "epoch": 0.02, "learning_rate": 8.1e-05, - "loss": 2.2079, + "loss": 1.9858, "step": 27 }, { "epoch": 0.02, "learning_rate": 8.4e-05, - "loss": 2.2618, + "loss": 2.0311, "step": 28 }, { "epoch": 0.02, "learning_rate": 8.699999999999999e-05, - "loss": 2.2295, + "loss": 1.9912, "step": 29 }, { "epoch": 0.02, "learning_rate": 8.999999999999999e-05, - "loss": 2.2095, + "loss": 2.0, "step": 30 }, { "epoch": 0.02, "learning_rate": 9.3e-05, - "loss": 2.219, + "loss": 2.0135, "step": 31 }, { "epoch": 0.02, "learning_rate": 9.599999999999999e-05, - "loss": 2.172, + "loss": 1.9629, "step": 32 }, { "epoch": 0.02, "learning_rate": 9.9e-05, - "loss": 2.2062, + "loss": 1.9932, "step": 33 }, { "epoch": 0.02, "learning_rate": 0.000102, - "loss": 2.2535, + "loss": 2.0303, "step": 34 }, { "epoch": 0.02, "learning_rate": 0.00010499999999999999, - "loss": 2.164, + "loss": 1.9356, "step": 35 }, { "epoch": 0.02, "learning_rate": 0.00010799999999999998, - "loss": 2.2069, + "loss": 1.9696, "step": 36 }, { "epoch": 0.03, "learning_rate": 0.00011099999999999999, - "loss": 2.1966, + "loss": 1.9832, "step": 37 }, { "epoch": 0.03, "learning_rate": 0.00011399999999999999, - "loss": 2.1929, + "loss": 1.9901, "step": 38 }, { "epoch": 0.03, "learning_rate": 0.000117, - "loss": 2.2215, + "loss": 2.017, "step": 39 }, { "epoch": 0.03, "learning_rate": 0.00011999999999999999, - "loss": 2.2541, + "loss": 2.0139, "step": 40 }, { "epoch": 0.03, "learning_rate": 0.00012299999999999998, - "loss": 2.1316, + "loss": 1.9377, "step": 41 }, { "epoch": 0.03, "learning_rate": 0.00012599999999999997, - "loss": 2.0486, + "loss": 1.8405, "step": 42 }, { "epoch": 0.03, "learning_rate": 0.000129, - "loss": 2.2175, + "loss": 1.9986, "step": 43 }, { "epoch": 0.03, "learning_rate": 0.00013199999999999998, - "loss": 2.2277, + "loss": 2.0137, "step": 44 }, { "epoch": 0.03, "learning_rate": 0.000135, - "loss": 2.2629, + "loss": 2.0437, "step": 45 }, { "epoch": 0.03, "learning_rate": 0.000138, - "loss": 2.2549, + "loss": 2.0392, "step": 46 }, { "epoch": 0.03, "learning_rate": 0.00014099999999999998, - "loss": 2.1836, + "loss": 1.964, "step": 47 }, { "epoch": 0.03, "learning_rate": 0.00014399999999999998, - "loss": 2.1772, + "loss": 1.9871, "step": 48 }, { "epoch": 0.03, "learning_rate": 0.000147, - "loss": 2.2013, + "loss": 1.9833, "step": 49 }, { "epoch": 0.03, "learning_rate": 0.00015, - "loss": 2.2103, + "loss": 2.0132, "step": 50 }, { "epoch": 0.03, "learning_rate": 0.00015299999999999998, - "loss": 2.244, + "loss": 2.0262, "step": 51 }, { "epoch": 0.04, "learning_rate": 0.000156, - "loss": 2.178, + "loss": 1.9727, "step": 52 }, { "epoch": 0.04, "learning_rate": 0.000159, - "loss": 2.1942, + "loss": 1.9904, "step": 53 }, { "epoch": 0.04, "learning_rate": 0.000162, - "loss": 2.171, + "loss": 1.955, "step": 54 }, { "epoch": 0.04, "learning_rate": 0.000165, - "loss": 2.2166, + "loss": 1.9937, "step": 55 }, { "epoch": 0.04, "learning_rate": 0.000168, - "loss": 2.2737, + "loss": 2.0414, "step": 56 }, { "epoch": 0.04, "learning_rate": 0.00017099999999999998, - "loss": 2.179, + "loss": 1.9754, "step": 57 }, { "epoch": 0.04, "learning_rate": 0.00017399999999999997, - "loss": 2.173, + "loss": 1.9559, "step": 58 }, { "epoch": 0.04, "learning_rate": 0.00017699999999999997, - "loss": 2.1753, + "loss": 1.971, "step": 59 }, { "epoch": 0.04, "learning_rate": 0.00017999999999999998, - "loss": 2.1591, + "loss": 1.959, "step": 60 }, { "epoch": 0.04, "learning_rate": 0.00018299999999999998, - "loss": 2.1936, + "loss": 1.973, "step": 61 }, { "epoch": 0.04, "learning_rate": 0.000186, - "loss": 2.1971, + "loss": 1.9736, "step": 62 }, { "epoch": 0.04, "learning_rate": 0.00018899999999999999, - "loss": 2.209, + "loss": 2.01, "step": 63 }, { "epoch": 0.04, "learning_rate": 0.00019199999999999998, - "loss": 2.2032, + "loss": 1.9916, "step": 64 }, { "epoch": 0.04, "learning_rate": 0.000195, - "loss": 2.0609, + "loss": 1.8412, "step": 65 }, { "epoch": 0.05, "learning_rate": 0.000198, - "loss": 2.1394, + "loss": 1.9188, "step": 66 }, { "epoch": 0.05, "learning_rate": 0.000201, - "loss": 2.1906, + "loss": 1.9836, "step": 67 }, { "epoch": 0.05, "learning_rate": 0.000204, - "loss": 2.1658, + "loss": 1.9659, "step": 68 }, { "epoch": 0.05, "learning_rate": 0.00020699999999999996, - "loss": 2.0702, + "loss": 1.8651, "step": 69 }, { "epoch": 0.05, "learning_rate": 0.00020999999999999998, - "loss": 2.0302, + "loss": 1.8155, "step": 70 }, { "epoch": 0.05, "learning_rate": 0.00021299999999999997, - "loss": 2.1205, + "loss": 1.9043, "step": 71 }, { "epoch": 0.05, "learning_rate": 0.00021599999999999996, - "loss": 2.1048, + "loss": 1.9087, "step": 72 }, { "epoch": 0.05, "learning_rate": 0.00021899999999999998, - "loss": 2.2125, + "loss": 2.0042, "step": 73 }, { "epoch": 0.05, "learning_rate": 0.00022199999999999998, - "loss": 2.1554, + "loss": 1.9537, "step": 74 }, { "epoch": 0.05, "learning_rate": 0.000225, - "loss": 2.1242, + "loss": 1.9211, "step": 75 }, { "epoch": 0.05, "learning_rate": 0.00022799999999999999, - "loss": 2.1383, + "loss": 1.9345, "step": 76 }, { "epoch": 0.05, "learning_rate": 0.00023099999999999998, - "loss": 2.0299, + "loss": 1.84, "step": 77 }, { "epoch": 0.05, "learning_rate": 0.000234, - "loss": 2.1578, + "loss": 1.9432, "step": 78 }, { "epoch": 0.05, "learning_rate": 0.000237, - "loss": 2.1432, + "loss": 1.9381, "step": 79 }, { "epoch": 0.05, "learning_rate": 0.00023999999999999998, - "loss": 2.1129, + "loss": 1.9052, "step": 80 }, { "epoch": 0.06, "learning_rate": 0.000243, - "loss": 2.1672, + "loss": 1.9634, "step": 81 }, { "epoch": 0.06, "learning_rate": 0.00024599999999999996, - "loss": 2.1425, + "loss": 1.9323, "step": 82 }, { "epoch": 0.06, "learning_rate": 0.000249, - "loss": 2.2758, + "loss": 2.0616, "step": 83 }, { "epoch": 0.06, "learning_rate": 0.00025199999999999995, - "loss": 2.173, + "loss": 1.971, "step": 84 }, { "epoch": 0.06, "learning_rate": 0.00025499999999999996, - "loss": 2.1262, + "loss": 1.9197, "step": 85 }, { "epoch": 0.06, "learning_rate": 0.000258, - "loss": 2.1657, + "loss": 1.9734, "step": 86 }, { "epoch": 0.06, "learning_rate": 0.000261, - "loss": 2.0322, + "loss": 1.8279, "step": 87 }, { "epoch": 0.06, "learning_rate": 0.00026399999999999997, - "loss": 2.0887, + "loss": 1.8787, "step": 88 }, { "epoch": 0.06, "learning_rate": 0.000267, - "loss": 2.135, + "loss": 1.9271, "step": 89 }, { "epoch": 0.06, "learning_rate": 0.00027, - "loss": 2.1048, + "loss": 1.8918, "step": 90 }, { "epoch": 0.06, "learning_rate": 0.00027299999999999997, - "loss": 2.1313, + "loss": 1.9327, "step": 91 }, { "epoch": 0.06, "learning_rate": 0.000276, - "loss": 2.0808, + "loss": 1.8631, "step": 92 }, { "epoch": 0.06, "learning_rate": 0.000279, - "loss": 2.0489, + "loss": 1.8385, "step": 93 }, { "epoch": 0.06, "learning_rate": 0.00028199999999999997, - "loss": 2.1537, + "loss": 1.9437, "step": 94 }, { "epoch": 0.06, "learning_rate": 0.000285, - "loss": 2.1017, + "loss": 1.8879, "step": 95 }, { "epoch": 0.07, "learning_rate": 0.00028799999999999995, - "loss": 2.1312, + "loss": 1.9186, "step": 96 }, { "epoch": 0.07, "learning_rate": 0.00029099999999999997, - "loss": 2.1248, + "loss": 1.9182, "step": 97 }, { "epoch": 0.07, "learning_rate": 0.000294, - "loss": 2.0856, + "loss": 1.8664, "step": 98 }, { "epoch": 0.07, "learning_rate": 0.00029699999999999996, - "loss": 2.1286, + "loss": 1.9239, "step": 99 }, { "epoch": 0.07, "learning_rate": 0.0003, - "loss": 2.202, + "loss": 1.9942, "step": 100 }, { "epoch": 0.07, "learning_rate": 0.000303, - "loss": 2.0967, + "loss": 1.8882, "step": 101 }, { "epoch": 0.07, "learning_rate": 0.00030599999999999996, - "loss": 2.1731, + "loss": 1.9568, "step": 102 }, { "epoch": 0.07, "learning_rate": 0.000309, - "loss": 2.1321, + "loss": 1.9213, "step": 103 }, { "epoch": 0.07, "learning_rate": 0.000312, - "loss": 2.0159, + "loss": 1.8019, "step": 104 }, { "epoch": 0.07, "learning_rate": 0.00031499999999999996, - "loss": 2.071, + "loss": 1.8682, "step": 105 }, { "epoch": 0.07, "learning_rate": 0.000318, - "loss": 2.0542, + "loss": 1.8517, "step": 106 }, { "epoch": 0.07, "learning_rate": 0.000321, - "loss": 2.1085, + "loss": 1.8922, "step": 107 }, { "epoch": 0.07, "learning_rate": 0.000324, - "loss": 2.1758, + "loss": 1.9581, "step": 108 }, { "epoch": 0.07, "learning_rate": 0.000327, - "loss": 2.1243, + "loss": 1.9073, "step": 109 }, { "epoch": 0.08, "learning_rate": 0.00033, - "loss": 2.0375, + "loss": 1.8199, "step": 110 }, { "epoch": 0.08, "learning_rate": 0.000333, - "loss": 2.1409, + "loss": 1.933, "step": 111 }, { "epoch": 0.08, "learning_rate": 0.000336, - "loss": 2.1071, + "loss": 1.8907, "step": 112 }, { "epoch": 0.08, "learning_rate": 0.00033899999999999995, - "loss": 2.1259, + "loss": 1.9093, "step": 113 }, { "epoch": 0.08, "learning_rate": 0.00034199999999999996, - "loss": 2.0782, + "loss": 1.8501, "step": 114 }, { "epoch": 0.08, "learning_rate": 0.00034499999999999993, - "loss": 2.2005, + "loss": 1.968, "step": 115 }, { "epoch": 0.08, "learning_rate": 0.00034799999999999995, - "loss": 2.1199, + "loss": 1.8998, "step": 116 }, { "epoch": 0.08, "learning_rate": 0.00035099999999999997, - "loss": 2.1757, + "loss": 1.9588, "step": 117 }, { "epoch": 0.08, "learning_rate": 0.00035399999999999993, - "loss": 2.0705, + "loss": 1.8592, "step": 118 }, { "epoch": 0.08, "learning_rate": 0.00035699999999999995, - "loss": 2.1261, + "loss": 1.9135, "step": 119 }, { "epoch": 0.08, "learning_rate": 0.00035999999999999997, - "loss": 2.0633, + "loss": 1.8535, "step": 120 }, { "epoch": 0.08, "learning_rate": 0.00036299999999999993, - "loss": 2.1012, + "loss": 1.8781, "step": 121 }, { "epoch": 0.08, "learning_rate": 0.00036599999999999995, - "loss": 2.1077, + "loss": 1.8987, "step": 122 }, { "epoch": 0.08, "learning_rate": 0.00036899999999999997, - "loss": 2.0978, + "loss": 1.8756, "step": 123 }, { "epoch": 0.08, "learning_rate": 0.000372, - "loss": 2.0924, + "loss": 1.8677, "step": 124 }, { "epoch": 0.09, "learning_rate": 0.00037499999999999995, - "loss": 2.0565, + "loss": 1.8222, "step": 125 }, { "epoch": 0.09, "learning_rate": 0.00037799999999999997, - "loss": 2.1612, + "loss": 1.9473, "step": 126 }, { "epoch": 0.09, "learning_rate": 0.000381, - "loss": 2.0699, + "loss": 1.8606, "step": 127 }, { "epoch": 0.09, "learning_rate": 0.00038399999999999996, - "loss": 2.1886, + "loss": 1.9605, "step": 128 }, { "epoch": 0.09, "learning_rate": 0.000387, - "loss": 2.1232, + "loss": 1.9135, "step": 129 }, { "epoch": 0.09, "learning_rate": 0.00039, - "loss": 2.1499, + "loss": 1.9203, "step": 130 }, { "epoch": 0.09, "learning_rate": 0.00039299999999999996, - "loss": 2.1805, + "loss": 1.9645, "step": 131 }, { "epoch": 0.09, "learning_rate": 0.000396, - "loss": 2.0487, + "loss": 1.8207, "step": 132 }, { "epoch": 0.09, "learning_rate": 0.000399, - "loss": 2.1278, + "loss": 1.9054, "step": 133 }, { "epoch": 0.09, "learning_rate": 0.000402, - "loss": 2.0348, + "loss": 1.8238, "step": 134 }, { "epoch": 0.09, "learning_rate": 0.000405, - "loss": 2.1963, + "loss": 1.9795, "step": 135 }, { "epoch": 0.09, "learning_rate": 0.000408, - "loss": 2.1453, + "loss": 1.923, "step": 136 }, { "epoch": 0.09, "learning_rate": 0.000411, - "loss": 2.035, + "loss": 1.8111, "step": 137 }, { "epoch": 0.09, "learning_rate": 0.0004139999999999999, - "loss": 2.0129, + "loss": 1.8022, "step": 138 }, { "epoch": 0.1, "learning_rate": 0.00041699999999999994, - "loss": 2.0832, + "loss": 1.8601, "step": 139 }, { "epoch": 0.1, "learning_rate": 0.00041999999999999996, - "loss": 2.1616, + "loss": 1.9157, "step": 140 }, { "epoch": 0.1, "learning_rate": 0.00042299999999999993, - "loss": 2.1153, + "loss": 1.8795, "step": 141 }, { "epoch": 0.1, "learning_rate": 0.00042599999999999995, - "loss": 2.0437, + "loss": 1.8175, "step": 142 }, { "epoch": 0.1, "learning_rate": 0.00042899999999999997, - "loss": 2.1931, + "loss": 1.9633, "step": 143 }, { "epoch": 0.1, "learning_rate": 0.00043199999999999993, - "loss": 1.9648, + "loss": 1.7384, "step": 144 }, { "epoch": 0.1, "learning_rate": 0.00043499999999999995, - "loss": 2.0986, + "loss": 1.862, "step": 145 }, { "epoch": 0.1, "learning_rate": 0.00043799999999999997, - "loss": 2.0978, + "loss": 1.8612, "step": 146 }, { "epoch": 0.1, "learning_rate": 0.00044099999999999993, - "loss": 2.0683, + "loss": 1.8592, "step": 147 }, { "epoch": 0.1, "learning_rate": 0.00044399999999999995, - "loss": 2.0536, + "loss": 1.8175, "step": 148 }, { "epoch": 0.1, "learning_rate": 0.00044699999999999997, - "loss": 2.1095, + "loss": 1.8907, "step": 149 }, { "epoch": 0.1, "learning_rate": 0.00045, - "loss": 2.154, + "loss": 1.9142, "step": 150 }, { "epoch": 0.1, "learning_rate": 0.00045299999999999995, - "loss": 2.1351, + "loss": 1.8859, "step": 151 }, { "epoch": 0.1, "learning_rate": 0.00045599999999999997, - "loss": 2.0517, + "loss": 1.8369, "step": 152 }, { "epoch": 0.1, "learning_rate": 0.000459, - "loss": 2.1572, + "loss": 1.914, "step": 153 }, { "epoch": 0.11, "learning_rate": 0.00046199999999999995, - "loss": 2.0666, + "loss": 1.8381, "step": 154 }, { "epoch": 0.11, "learning_rate": 0.00046499999999999997, - "loss": 2.195, + "loss": 1.96, "step": 155 }, { "epoch": 0.11, "learning_rate": 0.000468, - "loss": 1.9685, + "loss": 1.7527, "step": 156 }, { "epoch": 0.11, "learning_rate": 0.00047099999999999996, - "loss": 2.11, + "loss": 1.8762, "step": 157 }, { "epoch": 0.11, "learning_rate": 0.000474, - "loss": 2.1476, + "loss": 1.9295, "step": 158 }, { "epoch": 0.11, "learning_rate": 0.000477, - "loss": 2.0957, + "loss": 1.8433, "step": 159 }, { "epoch": 0.11, "learning_rate": 0.00047999999999999996, - "loss": 1.9753, + "loss": 1.7407, "step": 160 }, { "epoch": 0.11, "learning_rate": 0.000483, - "loss": 2.1659, + "loss": 1.9237, "step": 161 }, { "epoch": 0.11, "learning_rate": 0.000486, - "loss": 2.0978, + "loss": 1.8505, "step": 162 }, { "epoch": 0.11, "learning_rate": 0.000489, - "loss": 2.0742, + "loss": 1.8386, "step": 163 }, { "epoch": 0.11, "learning_rate": 0.0004919999999999999, - "loss": 2.0865, + "loss": 1.8489, "step": 164 }, { "epoch": 0.11, "learning_rate": 0.0004949999999999999, - "loss": 2.0911, + "loss": 1.8447, "step": 165 }, { "epoch": 0.11, "learning_rate": 0.000498, - "loss": 2.1147, + "loss": 1.8709, "step": 166 }, { "epoch": 0.11, "learning_rate": 0.0005009999999999999, - "loss": 1.9884, + "loss": 1.7427, "step": 167 }, { "epoch": 0.11, "learning_rate": 0.0005039999999999999, - "loss": 1.9497, + "loss": 1.7115, "step": 168 }, { "epoch": 0.12, "learning_rate": 0.000507, - "loss": 2.1134, + "loss": 1.8692, "step": 169 }, { "epoch": 0.12, "learning_rate": 0.0005099999999999999, - "loss": 2.1097, + "loss": 1.8863, "step": 170 }, { "epoch": 0.12, "learning_rate": 0.0005129999999999999, - "loss": 2.0186, + "loss": 1.7856, "step": 171 }, { "epoch": 0.12, "learning_rate": 0.000516, - "loss": 2.0772, + "loss": 1.8338, "step": 172 }, { "epoch": 0.12, "learning_rate": 0.0005189999999999999, - "loss": 2.1284, + "loss": 1.9064, "step": 173 }, { "epoch": 0.12, "learning_rate": 0.000522, - "loss": 1.9895, + "loss": 1.7566, "step": 174 }, { "epoch": 0.12, "learning_rate": 0.000525, - "loss": 2.0206, + "loss": 1.7792, "step": 175 }, { "epoch": 0.12, "learning_rate": 0.0005279999999999999, - "loss": 1.9765, + "loss": 1.7444, "step": 176 }, { "epoch": 0.12, "learning_rate": 0.000531, - "loss": 2.0687, + "loss": 1.8172, "step": 177 }, { "epoch": 0.12, "learning_rate": 0.000534, - "loss": 2.079, + "loss": 1.8232, "step": 178 }, { "epoch": 0.12, "learning_rate": 0.0005369999999999999, - "loss": 2.0544, + "loss": 1.808, "step": 179 }, { "epoch": 0.12, "learning_rate": 0.00054, - "loss": 1.9787, + "loss": 1.7316, "step": 180 }, { "epoch": 0.12, "learning_rate": 0.000543, - "loss": 2.0328, + "loss": 1.7889, "step": 181 }, { "epoch": 0.12, "learning_rate": 0.0005459999999999999, - "loss": 2.0692, + "loss": 1.8263, "step": 182 }, { "epoch": 0.13, "learning_rate": 0.000549, - "loss": 2.1062, + "loss": 1.8702, "step": 183 }, { "epoch": 0.13, "learning_rate": 0.000552, - "loss": 2.0996, + "loss": 1.8295, "step": 184 }, { "epoch": 0.13, "learning_rate": 0.0005549999999999999, - "loss": 1.9696, + "loss": 1.7151, "step": 185 }, { "epoch": 0.13, "learning_rate": 0.000558, - "loss": 2.0402, + "loss": 1.7877, "step": 186 }, { "epoch": 0.13, "learning_rate": 0.000561, - "loss": 2.0141, + "loss": 1.7469, "step": 187 }, { "epoch": 0.13, "learning_rate": 0.0005639999999999999, - "loss": 2.1041, + "loss": 1.8627, "step": 188 }, { "epoch": 0.13, "learning_rate": 0.0005669999999999999, - "loss": 1.9539, + "loss": 1.6922, "step": 189 }, { "epoch": 0.13, "learning_rate": 0.00057, - "loss": 2.0689, + "loss": 1.8159, "step": 190 }, { "epoch": 0.13, "learning_rate": 0.0005729999999999999, - "loss": 2.047, + "loss": 1.7839, "step": 191 }, { "epoch": 0.13, "learning_rate": 0.0005759999999999999, - "loss": 2.0751, + "loss": 1.8165, "step": 192 }, { "epoch": 0.13, "learning_rate": 0.000579, - "loss": 2.0649, + "loss": 1.8228, "step": 193 }, { "epoch": 0.13, "learning_rate": 0.0005819999999999999, - "loss": 2.1111, + "loss": 1.8615, "step": 194 }, { "epoch": 0.13, "learning_rate": 0.0005849999999999999, - "loss": 2.0669, + "loss": 1.8149, "step": 195 }, { "epoch": 0.13, "learning_rate": 0.000588, - "loss": 2.1075, + "loss": 1.8531, "step": 196 }, { "epoch": 0.13, "learning_rate": 0.0005909999999999999, - "loss": 2.0398, + "loss": 1.7961, "step": 197 }, { "epoch": 0.14, "learning_rate": 0.0005939999999999999, - "loss": 2.0357, + "loss": 1.7878, "step": 198 }, { "epoch": 0.14, "learning_rate": 0.000597, - "loss": 1.953, + "loss": 1.7078, "step": 199 }, { "epoch": 0.14, "learning_rate": 0.0006, - "loss": 2.026, - "step": 200 - }, - { - "epoch": 0.14, - "eval_loss": 2.0633840560913086, - "eval_runtime": 1727.3111, - "eval_samples_per_second": 5.789, - "eval_steps_per_second": 5.789, + "loss": 1.77, "step": 200 }, { "epoch": 0.14, - "learning_rate": 0.0005998566650740563, - "loss": 2.0193, + "learning_rate": 0.0005998937677053823, + "loss": 1.7704, "step": 201 }, { "epoch": 0.14, - "learning_rate": 0.0005997133301481127, - "loss": 2.0543, + "learning_rate": 0.0005997875354107648, + "loss": 1.8041, "step": 202 }, { "epoch": 0.14, - "learning_rate": 0.0005995699952221691, - "loss": 2.0542, + "learning_rate": 0.0005996813031161472, + "loss": 1.791, "step": 203 }, { "epoch": 0.14, - "learning_rate": 0.0005994266602962255, - "loss": 2.1345, + "learning_rate": 0.0005995750708215297, + "loss": 1.8864, "step": 204 }, { "epoch": 0.14, - "learning_rate": 0.0005992833253702819, - "loss": 2.1243, + "learning_rate": 0.0005994688385269121, + "loss": 1.8871, "step": 205 }, { "epoch": 0.14, - "learning_rate": 0.0005991399904443383, - "loss": 2.0224, + "learning_rate": 0.0005993626062322946, + "loss": 1.781, "step": 206 }, { "epoch": 0.14, - "learning_rate": 0.0005989966555183946, - "loss": 2.0536, + "learning_rate": 0.000599256373937677, + "loss": 1.8129, "step": 207 }, { "epoch": 0.14, - "learning_rate": 0.0005988533205924509, - "loss": 2.073, + "learning_rate": 0.0005991501416430595, + "loss": 1.8208, "step": 208 }, { "epoch": 0.14, - "learning_rate": 0.0005987099856665074, - "loss": 1.9553, + "learning_rate": 0.0005990439093484419, + "loss": 1.6943, "step": 209 }, { "epoch": 0.14, - "learning_rate": 0.0005985666507405637, - "loss": 2.0866, + "learning_rate": 0.0005989376770538244, + "loss": 1.8093, "step": 210 }, { "epoch": 0.14, - "learning_rate": 0.0005984233158146202, - "loss": 2.0656, + "learning_rate": 0.0005988314447592068, + "loss": 1.8158, "step": 211 }, { "epoch": 0.14, - "learning_rate": 0.0005982799808886764, - "loss": 2.0459, + "learning_rate": 0.0005987252124645891, + "loss": 1.7879, "step": 212 }, { "epoch": 0.15, - "learning_rate": 0.0005981366459627329, - "loss": 1.9813, + "learning_rate": 0.0005986189801699716, + "loss": 1.7495, "step": 213 }, { "epoch": 0.15, - "learning_rate": 0.0005979933110367892, - "loss": 2.0579, + "learning_rate": 0.000598512747875354, + "loss": 1.8075, "step": 214 }, { "epoch": 0.15, - "learning_rate": 0.0005978499761108456, - "loss": 2.1568, + "learning_rate": 0.0005984065155807364, + "loss": 1.897, "step": 215 }, { "epoch": 0.15, - "learning_rate": 0.000597706641184902, - "loss": 2.0049, + "learning_rate": 0.0005983002832861189, + "loss": 1.7493, "step": 216 }, { "epoch": 0.15, - "learning_rate": 0.0005975633062589584, - "loss": 1.9582, + "learning_rate": 0.0005981940509915014, + "loss": 1.7134, "step": 217 }, { "epoch": 0.15, - "learning_rate": 0.0005974199713330147, - "loss": 1.9647, + "learning_rate": 0.0005980878186968838, + "loss": 1.7252, "step": 218 }, { "epoch": 0.15, - "learning_rate": 0.0005972766364070711, - "loss": 2.0564, + "learning_rate": 0.0005979815864022663, + "loss": 1.8076, "step": 219 }, { "epoch": 0.15, - "learning_rate": 0.0005971333014811275, - "loss": 2.0314, + "learning_rate": 0.0005978753541076487, + "loss": 1.7753, "step": 220 }, { "epoch": 0.15, - "learning_rate": 0.0005969899665551839, - "loss": 1.9784, + "learning_rate": 0.0005977691218130311, + "loss": 1.7346, "step": 221 }, { "epoch": 0.15, - "learning_rate": 0.0005968466316292403, - "loss": 2.015, + "learning_rate": 0.0005976628895184136, + "loss": 1.7701, "step": 222 }, { "epoch": 0.15, - "learning_rate": 0.0005967032967032967, - "loss": 1.9988, + "learning_rate": 0.000597556657223796, + "loss": 1.7343, "step": 223 }, { "epoch": 0.15, - "learning_rate": 0.000596559961777353, - "loss": 2.1571, + "learning_rate": 0.0005974504249291785, + "loss": 1.9214, "step": 224 }, { "epoch": 0.15, - "learning_rate": 0.0005964166268514094, - "loss": 2.0942, + "learning_rate": 0.0005973441926345608, + "loss": 1.8475, "step": 225 }, { "epoch": 0.15, - "learning_rate": 0.0005962732919254658, - "loss": 2.0249, + "learning_rate": 0.0005972379603399432, + "loss": 1.7976, "step": 226 }, { "epoch": 0.16, - "learning_rate": 0.0005961299569995222, - "loss": 2.0765, + "learning_rate": 0.0005971317280453257, + "loss": 1.8312, "step": 227 }, { "epoch": 0.16, - "learning_rate": 0.0005959866220735786, - "loss": 2.0339, + "learning_rate": 0.0005970254957507082, + "loss": 1.7942, "step": 228 }, { "epoch": 0.16, - "learning_rate": 0.0005958432871476349, - "loss": 1.9932, + "learning_rate": 0.0005969192634560906, + "loss": 1.7478, "step": 229 }, { "epoch": 0.16, - "learning_rate": 0.0005956999522216914, - "loss": 2.0667, + "learning_rate": 0.0005968130311614731, + "loss": 1.8265, "step": 230 }, { "epoch": 0.16, - "learning_rate": 0.0005955566172957476, - "loss": 1.9975, + "learning_rate": 0.0005967067988668555, + "loss": 1.743, "step": 231 }, { "epoch": 0.16, - "learning_rate": 0.0005954132823698041, - "loss": 2.1712, + "learning_rate": 0.0005966005665722379, + "loss": 1.924, "step": 232 }, { "epoch": 0.16, - "learning_rate": 0.0005952699474438604, - "loss": 2.0982, + "learning_rate": 0.0005964943342776204, + "loss": 1.8448, "step": 233 }, { "epoch": 0.16, - "learning_rate": 0.0005951266125179169, - "loss": 2.0666, + "learning_rate": 0.0005963881019830028, + "loss": 1.8081, "step": 234 }, { "epoch": 0.16, - "learning_rate": 0.0005949832775919732, - "loss": 2.0398, + "learning_rate": 0.0005962818696883852, + "loss": 1.7978, "step": 235 }, { "epoch": 0.16, - "learning_rate": 0.0005948399426660295, - "loss": 2.148, + "learning_rate": 0.0005961756373937677, + "loss": 1.9161, "step": 236 }, { "epoch": 0.16, - "learning_rate": 0.0005946966077400859, - "loss": 2.129, + "learning_rate": 0.00059606940509915, + "loss": 1.8836, "step": 237 }, { "epoch": 0.16, - "learning_rate": 0.0005945532728141423, - "loss": 1.9516, + "learning_rate": 0.0005959631728045325, + "loss": 1.6924, "step": 238 }, { "epoch": 0.16, - "learning_rate": 0.0005944099378881987, - "loss": 2.1107, + "learning_rate": 0.000595856940509915, + "loss": 1.8725, "step": 239 }, { "epoch": 0.16, - "learning_rate": 0.0005942666029622551, - "loss": 2.0135, + "learning_rate": 0.0005957507082152974, + "loss": 1.7645, "step": 240 }, { "epoch": 0.16, - "learning_rate": 0.0005941232680363115, - "loss": 2.1016, + "learning_rate": 0.0005956444759206798, + "loss": 1.8543, "step": 241 }, { "epoch": 0.17, - "learning_rate": 0.0005939799331103678, - "loss": 2.1468, + "learning_rate": 0.0005955382436260623, + "loss": 1.8915, "step": 242 }, { "epoch": 0.17, - "learning_rate": 0.0005938365981844242, - "loss": 1.9971, + "learning_rate": 0.0005954320113314447, + "loss": 1.7467, "step": 243 }, { "epoch": 0.17, - "learning_rate": 0.0005936932632584806, - "loss": 1.9989, + "learning_rate": 0.0005953257790368272, + "loss": 1.7581, "step": 244 }, { "epoch": 0.17, - "learning_rate": 0.000593549928332537, - "loss": 2.0357, + "learning_rate": 0.0005952195467422096, + "loss": 1.7859, "step": 245 }, { "epoch": 0.17, - "learning_rate": 0.0005934065934065934, - "loss": 2.0284, + "learning_rate": 0.000595113314447592, + "loss": 1.7575, "step": 246 }, { "epoch": 0.17, - "learning_rate": 0.0005932632584806498, - "loss": 1.9888, + "learning_rate": 0.0005950070821529745, + "loss": 1.7291, "step": 247 }, { "epoch": 0.17, - "learning_rate": 0.0005931199235547062, - "loss": 1.9282, + "learning_rate": 0.0005949008498583569, + "loss": 1.692, "step": 248 }, { "epoch": 0.17, - "learning_rate": 0.0005929765886287625, - "loss": 2.1674, + "learning_rate": 0.0005947946175637393, + "loss": 1.9056, "step": 249 }, { "epoch": 0.17, - "learning_rate": 0.0005928332537028188, - "loss": 2.1181, + "learning_rate": 0.0005946883852691218, + "loss": 1.8572, "step": 250 }, { "epoch": 0.17, - "learning_rate": 0.0005926899187768753, - "loss": 2.0121, + "learning_rate": 0.0005945821529745042, + "loss": 1.7458, "step": 251 }, { "epoch": 0.17, - "learning_rate": 0.0005925465838509316, - "loss": 2.0473, + "learning_rate": 0.0005944759206798866, + "loss": 1.8021, "step": 252 }, { "epoch": 0.17, - "learning_rate": 0.0005924032489249881, - "loss": 2.1176, + "learning_rate": 0.0005943696883852691, + "loss": 1.8534, "step": 253 }, { "epoch": 0.17, - "learning_rate": 0.0005922599139990443, - "loss": 2.0193, + "learning_rate": 0.0005942634560906515, + "loss": 1.7671, "step": 254 }, { "epoch": 0.17, - "learning_rate": 0.0005921165790731007, - "loss": 2.1572, + "learning_rate": 0.0005941572237960339, + "loss": 1.9101, "step": 255 }, { "epoch": 0.18, - "learning_rate": 0.0005919732441471571, - "loss": 2.0089, + "learning_rate": 0.0005940509915014164, + "loss": 1.7567, "step": 256 }, { "epoch": 0.18, - "learning_rate": 0.0005918299092212135, - "loss": 2.0258, + "learning_rate": 0.0005939447592067988, + "loss": 1.7771, "step": 257 }, { "epoch": 0.18, - "learning_rate": 0.0005916865742952699, - "loss": 2.1088, + "learning_rate": 0.0005938385269121813, + "loss": 1.8691, "step": 258 }, { "epoch": 0.18, - "learning_rate": 0.0005915432393693263, - "loss": 2.0018, + "learning_rate": 0.0005937322946175637, + "loss": 1.764, "step": 259 }, { "epoch": 0.18, - "learning_rate": 0.0005913999044433826, - "loss": 2.0369, + "learning_rate": 0.000593626062322946, + "loss": 1.7978, "step": 260 }, { "epoch": 0.18, - "learning_rate": 0.000591256569517439, - "loss": 2.119, + "learning_rate": 0.0005935198300283285, + "loss": 1.8609, "step": 261 }, { "epoch": 0.18, - "learning_rate": 0.0005911132345914954, - "loss": 2.1527, + "learning_rate": 0.000593413597733711, + "loss": 1.9074, "step": 262 }, { "epoch": 0.18, - "learning_rate": 0.0005909698996655518, - "loss": 2.0851, + "learning_rate": 0.0005933073654390934, + "loss": 1.8182, "step": 263 }, { "epoch": 0.18, - "learning_rate": 0.0005908265647396082, - "loss": 2.1476, + "learning_rate": 0.0005932011331444759, + "loss": 1.9095, "step": 264 }, { "epoch": 0.18, - "learning_rate": 0.0005906832298136646, - "loss": 2.1613, + "learning_rate": 0.0005930949008498583, + "loss": 1.9103, "step": 265 }, { "epoch": 0.18, - "learning_rate": 0.000590539894887721, - "loss": 2.2171, + "learning_rate": 0.0005929886685552407, + "loss": 1.969, "step": 266 }, { "epoch": 0.18, - "learning_rate": 0.0005903965599617773, - "loss": 2.1749, + "learning_rate": 0.0005928824362606232, + "loss": 1.9142, "step": 267 }, { "epoch": 0.18, - "learning_rate": 0.0005902532250358337, - "loss": 2.0579, + "learning_rate": 0.0005927762039660056, + "loss": 1.8281, "step": 268 }, { "epoch": 0.18, - "learning_rate": 0.00059010989010989, - "loss": 2.0551, + "learning_rate": 0.0005926699716713881, + "loss": 1.8105, "step": 269 }, { "epoch": 0.18, - "learning_rate": 0.0005899665551839465, - "loss": 2.0846, + "learning_rate": 0.0005925637393767705, + "loss": 1.854, "step": 270 }, { "epoch": 0.19, - "learning_rate": 0.0005898232202580028, - "loss": 2.03, + "learning_rate": 0.0005924575070821529, + "loss": 1.7758, "step": 271 }, { "epoch": 0.19, - "learning_rate": 0.0005896798853320593, - "loss": 1.9923, + "learning_rate": 0.0005923512747875354, + "loss": 1.7467, "step": 272 }, { "epoch": 0.19, - "learning_rate": 0.0005895365504061155, - "loss": 2.0308, + "learning_rate": 0.0005922450424929179, + "loss": 1.8032, "step": 273 }, { "epoch": 0.19, - "learning_rate": 0.000589393215480172, - "loss": 1.9601, + "learning_rate": 0.0005921388101983002, + "loss": 1.7267, "step": 274 }, { "epoch": 0.19, - "learning_rate": 0.0005892498805542283, - "loss": 2.0328, + "learning_rate": 0.0005920325779036826, + "loss": 1.7873, "step": 275 }, { "epoch": 0.19, - "learning_rate": 0.0005891065456282847, - "loss": 2.0893, + "learning_rate": 0.0005919263456090651, + "loss": 1.8578, "step": 276 }, { "epoch": 0.19, - "learning_rate": 0.0005889632107023411, - "loss": 2.1038, + "learning_rate": 0.0005918201133144475, + "loss": 1.8684, "step": 277 }, { "epoch": 0.19, - "learning_rate": 0.0005888198757763974, - "loss": 2.0607, + "learning_rate": 0.00059171388101983, + "loss": 1.8266, "step": 278 }, { "epoch": 0.19, - "learning_rate": 0.0005886765408504538, - "loss": 2.054, + "learning_rate": 0.0005916076487252124, + "loss": 1.7916, "step": 279 }, { "epoch": 0.19, - "learning_rate": 0.0005885332059245102, - "loss": 2.151, + "learning_rate": 0.0005915014164305948, + "loss": 1.9168, "step": 280 }, { "epoch": 0.19, - "learning_rate": 0.0005883898709985666, - "loss": 2.1348, + "learning_rate": 0.0005913951841359773, + "loss": 1.8837, "step": 281 }, { "epoch": 0.19, - "learning_rate": 0.000588246536072623, - "loss": 2.0293, + "learning_rate": 0.0005912889518413597, + "loss": 1.7875, "step": 282 }, { "epoch": 0.19, - "learning_rate": 0.0005881032011466794, - "loss": 2.1417, + "learning_rate": 0.0005911827195467422, + "loss": 1.9046, "step": 283 }, { "epoch": 0.19, - "learning_rate": 0.0005879598662207358, - "loss": 1.9914, + "learning_rate": 0.0005910764872521247, + "loss": 1.7552, "step": 284 }, { "epoch": 0.19, - "learning_rate": 0.0005878165312947921, - "loss": 2.0846, + "learning_rate": 0.000590970254957507, + "loss": 1.8291, "step": 285 }, { "epoch": 0.2, - "learning_rate": 0.0005876731963688485, - "loss": 2.1063, + "learning_rate": 0.0005908640226628894, + "loss": 1.8531, "step": 286 }, { "epoch": 0.2, - "learning_rate": 0.0005875298614429049, - "loss": 2.0454, + "learning_rate": 0.0005907577903682719, + "loss": 1.7991, "step": 287 }, { "epoch": 0.2, - "learning_rate": 0.0005873865265169613, - "loss": 2.0268, + "learning_rate": 0.0005906515580736543, + "loss": 1.7896, "step": 288 }, { "epoch": 0.2, - "learning_rate": 0.0005872431915910177, - "loss": 2.0762, + "learning_rate": 0.0005905453257790368, + "loss": 1.8406, "step": 289 }, { "epoch": 0.2, - "learning_rate": 0.0005870998566650739, - "loss": 2.0691, + "learning_rate": 0.0005904390934844192, + "loss": 1.8222, "step": 290 }, { "epoch": 0.2, - "learning_rate": 0.0005869565217391304, - "loss": 2.0831, + "learning_rate": 0.0005903328611898016, + "loss": 1.8473, "step": 291 }, { "epoch": 0.2, - "learning_rate": 0.0005868131868131867, - "loss": 2.0361, + "learning_rate": 0.0005902266288951841, + "loss": 1.7958, "step": 292 }, { "epoch": 0.2, - "learning_rate": 0.0005866698518872432, - "loss": 2.041, + "learning_rate": 0.0005901203966005665, + "loss": 1.7879, "step": 293 }, { "epoch": 0.2, - "learning_rate": 0.0005865265169612995, - "loss": 2.0438, + "learning_rate": 0.000590014164305949, + "loss": 1.8077, "step": 294 }, { "epoch": 0.2, - "learning_rate": 0.000586383182035356, - "loss": 2.0392, + "learning_rate": 0.0005899079320113314, + "loss": 1.7829, "step": 295 }, { "epoch": 0.2, - "learning_rate": 0.0005862398471094122, - "loss": 2.0769, + "learning_rate": 0.0005898016997167139, + "loss": 1.8282, "step": 296 }, { "epoch": 0.2, - "learning_rate": 0.0005860965121834686, - "loss": 2.0878, + "learning_rate": 0.0005896954674220963, + "loss": 1.8525, "step": 297 }, { "epoch": 0.2, - "learning_rate": 0.000585953177257525, - "loss": 2.1053, + "learning_rate": 0.0005895892351274787, + "loss": 1.8552, "step": 298 }, { "epoch": 0.2, - "learning_rate": 0.0005858098423315814, - "loss": 2.1494, + "learning_rate": 0.0005894830028328611, + "loss": 1.9035, "step": 299 }, { "epoch": 0.21, - "learning_rate": 0.0005856665074056378, - "loss": 2.0596, + "learning_rate": 0.0005893767705382435, + "loss": 1.8043, "step": 300 }, { "epoch": 0.21, - "learning_rate": 0.0005855231724796942, - "loss": 2.0303, + "learning_rate": 0.000589270538243626, + "loss": 1.7884, "step": 301 }, { "epoch": 0.21, - "learning_rate": 0.0005853798375537506, - "loss": 2.0842, + "learning_rate": 0.0005891643059490084, + "loss": 1.9804, "step": 302 }, { "epoch": 0.21, - "learning_rate": 0.0005852365026278069, - "loss": 2.1627, + "learning_rate": 0.0005890580736543909, + "loss": 1.9464, "step": 303 }, { "epoch": 0.21, - "learning_rate": 0.0005850931677018633, - "loss": 2.0082, + "learning_rate": 0.0005889518413597733, + "loss": 1.7684, "step": 304 }, { "epoch": 0.21, - "learning_rate": 0.0005849498327759197, - "loss": 2.062, + "learning_rate": 0.0005888456090651558, + "loss": 1.8019, "step": 305 }, { "epoch": 0.21, - "learning_rate": 0.0005848064978499761, - "loss": 2.0595, + "learning_rate": 0.0005887393767705382, + "loss": 1.8127, "step": 306 }, { "epoch": 0.21, - "learning_rate": 0.0005846631629240325, - "loss": 2.0231, + "learning_rate": 0.0005886331444759207, + "loss": 1.7938, "step": 307 }, { "epoch": 0.21, - "learning_rate": 0.0005845198279980889, - "loss": 2.0741, + "learning_rate": 0.0005885269121813031, + "loss": 1.8452, "step": 308 }, { "epoch": 0.21, - "learning_rate": 0.0005843764930721452, - "loss": 2.032, + "learning_rate": 0.0005884206798866856, + "loss": 1.7761, "step": 309 }, { "epoch": 0.21, - "learning_rate": 0.0005842331581462016, - "loss": 2.0719, + "learning_rate": 0.000588314447592068, + "loss": 1.8166, "step": 310 }, { "epoch": 0.21, - "learning_rate": 0.0005840898232202579, - "loss": 2.2077, + "learning_rate": 0.0005882082152974503, + "loss": 1.9685, "step": 311 }, { "epoch": 0.21, - "learning_rate": 0.0005839464882943144, - "loss": 2.0157, + "learning_rate": 0.0005881019830028328, + "loss": 1.7582, "step": 312 }, { "epoch": 0.21, - "learning_rate": 0.0005838031533683707, - "loss": 2.0623, + "learning_rate": 0.0005879957507082152, + "loss": 1.8127, "step": 313 }, { "epoch": 0.21, - "learning_rate": 0.0005836598184424272, - "loss": 2.109, + "learning_rate": 0.0005878895184135976, + "loss": 1.875, "step": 314 }, { "epoch": 0.22, - "learning_rate": 0.0005835164835164834, - "loss": 1.9956, + "learning_rate": 0.0005877832861189801, + "loss": 1.7606, "step": 315 }, { "epoch": 0.22, - "learning_rate": 0.0005833731485905399, - "loss": 2.037, + "learning_rate": 0.0005876770538243626, + "loss": 1.8047, "step": 316 }, { "epoch": 0.22, - "learning_rate": 0.0005832298136645962, - "loss": 2.1446, + "learning_rate": 0.000587570821529745, + "loss": 1.8821, "step": 317 }, { "epoch": 0.22, - "learning_rate": 0.0005830864787386526, - "loss": 2.1719, + "learning_rate": 0.0005874645892351275, + "loss": 1.9359, "step": 318 }, { "epoch": 0.22, - "learning_rate": 0.000582943143812709, - "loss": 2.1727, + "learning_rate": 0.0005873583569405099, + "loss": 1.9346, "step": 319 }, { "epoch": 0.22, - "learning_rate": 0.0005827998088867654, - "loss": 2.1275, + "learning_rate": 0.0005872521246458923, + "loss": 1.8984, "step": 320 }, { "epoch": 0.22, - "learning_rate": 0.0005826564739608217, - "loss": 2.0181, + "learning_rate": 0.0005871458923512748, + "loss": 1.7759, "step": 321 }, { "epoch": 0.22, - "learning_rate": 0.0005825131390348781, - "loss": 2.0416, + "learning_rate": 0.0005870396600566571, + "loss": 1.7961, "step": 322 }, { "epoch": 0.22, - "learning_rate": 0.0005823698041089345, - "loss": 2.1615, + "learning_rate": 0.0005869334277620396, + "loss": 1.9148, "step": 323 }, { "epoch": 0.22, - "learning_rate": 0.0005822264691829909, - "loss": 2.1476, + "learning_rate": 0.000586827195467422, + "loss": 1.898, "step": 324 }, { "epoch": 0.22, - "learning_rate": 0.0005820831342570473, - "loss": 2.0338, + "learning_rate": 0.0005867209631728044, + "loss": 1.7916, "step": 325 }, { "epoch": 0.22, - "learning_rate": 0.0005819397993311037, - "loss": 2.1117, + "learning_rate": 0.0005866147308781869, + "loss": 1.8758, "step": 326 }, { "epoch": 0.22, - "learning_rate": 0.00058179646440516, - "loss": 2.0487, + "learning_rate": 0.0005865084985835694, + "loss": 1.8184, "step": 327 }, { "epoch": 0.22, - "learning_rate": 0.0005816531294792164, - "loss": 2.03, + "learning_rate": 0.0005864022662889518, + "loss": 1.7792, "step": 328 }, { "epoch": 0.22, - "learning_rate": 0.0005815097945532728, - "loss": 2.06, + "learning_rate": 0.0005862960339943343, + "loss": 1.8199, "step": 329 }, { "epoch": 0.23, - "learning_rate": 0.0005813664596273292, - "loss": 2.0517, + "learning_rate": 0.0005861898016997167, + "loss": 1.8013, "step": 330 }, { "epoch": 0.23, - "learning_rate": 0.0005812231247013856, - "loss": 2.1105, + "learning_rate": 0.0005860835694050991, + "loss": 1.8723, "step": 331 }, { "epoch": 0.23, - "learning_rate": 0.0005810797897754418, - "loss": 2.0244, + "learning_rate": 0.0005859773371104816, + "loss": 1.7972, "step": 332 }, { "epoch": 0.23, - "learning_rate": 0.0005809364548494983, - "loss": 2.1025, + "learning_rate": 0.000585871104815864, + "loss": 1.8765, "step": 333 }, { "epoch": 0.23, - "learning_rate": 0.0005807931199235546, - "loss": 2.0371, + "learning_rate": 0.0005857648725212463, + "loss": 1.8028, "step": 334 }, { "epoch": 0.23, - "learning_rate": 0.0005806497849976111, - "loss": 2.083, + "learning_rate": 0.0005856586402266288, + "loss": 1.8377, "step": 335 }, { "epoch": 0.23, - "learning_rate": 0.0005805064500716674, - "loss": 1.9636, + "learning_rate": 0.0005855524079320112, + "loss": 1.7413, "step": 336 }, { "epoch": 0.23, - "learning_rate": 0.0005803631151457239, - "loss": 2.1417, + "learning_rate": 0.0005854461756373937, + "loss": 1.8906, "step": 337 }, { "epoch": 0.23, - "learning_rate": 0.0005802197802197801, - "loss": 2.0717, + "learning_rate": 0.0005853399433427762, + "loss": 1.8192, "step": 338 }, { "epoch": 0.23, - "learning_rate": 0.0005800764452938365, - "loss": 2.0832, + "learning_rate": 0.0005852337110481586, + "loss": 1.8595, "step": 339 }, { "epoch": 0.23, - "learning_rate": 0.0005799331103678929, - "loss": 2.0748, + "learning_rate": 0.000585127478753541, + "loss": 1.8473, "step": 340 }, { "epoch": 0.23, - "learning_rate": 0.0005797897754419493, - "loss": 2.086, + "learning_rate": 0.0005850212464589235, + "loss": 1.861, "step": 341 }, { "epoch": 0.23, - "learning_rate": 0.0005796464405160057, - "loss": 2.075, + "learning_rate": 0.0005849150141643059, + "loss": 1.8337, "step": 342 }, { "epoch": 0.23, - "learning_rate": 0.0005795031055900621, - "loss": 2.1223, + "learning_rate": 0.0005848087818696884, + "loss": 1.8747, "step": 343 }, { "epoch": 0.24, - "learning_rate": 0.0005793597706641185, - "loss": 2.0627, + "learning_rate": 0.0005847025495750708, + "loss": 1.8202, "step": 344 }, { "epoch": 0.24, - "learning_rate": 0.0005792164357381748, - "loss": 2.069, + "learning_rate": 0.0005845963172804532, + "loss": 1.8235, "step": 345 }, { "epoch": 0.24, - "learning_rate": 0.0005790731008122312, - "loss": 2.1574, + "learning_rate": 0.0005844900849858357, + "loss": 1.915, "step": 346 }, { "epoch": 0.24, - "learning_rate": 0.0005789297658862876, - "loss": 2.0647, + "learning_rate": 0.000584383852691218, + "loss": 1.8257, "step": 347 }, { "epoch": 0.24, - "learning_rate": 0.000578786430960344, - "loss": 2.1713, + "learning_rate": 0.0005842776203966005, + "loss": 1.9219, "step": 348 }, { "epoch": 0.24, - "learning_rate": 0.0005786430960344004, - "loss": 2.0984, + "learning_rate": 0.000584171388101983, + "loss": 1.8675, "step": 349 }, { "epoch": 0.24, - "learning_rate": 0.0005784997611084568, - "loss": 2.1219, + "learning_rate": 0.0005840651558073654, + "loss": 1.8938, "step": 350 }, { "epoch": 0.24, - "learning_rate": 0.000578356426182513, - "loss": 1.9956, + "learning_rate": 0.0005839589235127478, + "loss": 1.7652, "step": 351 }, { "epoch": 0.24, - "learning_rate": 0.0005782130912565695, - "loss": 2.1522, + "learning_rate": 0.0005838526912181303, + "loss": 1.9201, "step": 352 }, { "epoch": 0.24, - "learning_rate": 0.0005780697563306258, - "loss": 2.0521, + "learning_rate": 0.0005837464589235127, + "loss": 1.8092, "step": 353 }, { "epoch": 0.24, - "learning_rate": 0.0005779264214046823, - "loss": 1.9579, + "learning_rate": 0.0005836402266288951, + "loss": 1.7251, "step": 354 }, { "epoch": 0.24, - "learning_rate": 0.0005777830864787386, - "loss": 2.0615, + "learning_rate": 0.0005835339943342776, + "loss": 1.8227, "step": 355 }, { "epoch": 0.24, - "learning_rate": 0.0005776397515527951, - "loss": 2.0337, + "learning_rate": 0.00058342776203966, + "loss": 1.7901, "step": 356 }, { "epoch": 0.24, - "learning_rate": 0.0005774964166268513, - "loss": 2.043, + "learning_rate": 0.0005833215297450425, + "loss": 1.7998, "step": 357 }, { "epoch": 0.24, - "learning_rate": 0.0005773530817009077, - "loss": 2.0453, + "learning_rate": 0.0005832152974504249, + "loss": 1.8229, "step": 358 }, { "epoch": 0.25, - "learning_rate": 0.0005772097467749641, - "loss": 2.0554, + "learning_rate": 0.0005831090651558072, + "loss": 1.8179, "step": 359 }, { "epoch": 0.25, - "learning_rate": 0.0005770664118490205, - "loss": 2.0509, + "learning_rate": 0.0005830028328611897, + "loss": 1.8142, "step": 360 }, { "epoch": 0.25, - "learning_rate": 0.0005769230769230769, - "loss": 1.9859, + "learning_rate": 0.0005828966005665722, + "loss": 1.7447, "step": 361 }, { "epoch": 0.25, - "learning_rate": 0.0005767797419971333, - "loss": 2.0209, + "learning_rate": 0.0005827903682719546, + "loss": 1.7978, "step": 362 }, { "epoch": 0.25, - "learning_rate": 0.0005766364070711896, - "loss": 1.9763, + "learning_rate": 0.0005826841359773371, + "loss": 1.7471, "step": 363 }, { "epoch": 0.25, - "learning_rate": 0.000576493072145246, - "loss": 2.1231, + "learning_rate": 0.0005825779036827195, + "loss": 1.9006, "step": 364 }, { "epoch": 0.25, - "learning_rate": 0.0005763497372193024, - "loss": 2.0418, + "learning_rate": 0.0005824716713881019, + "loss": 1.8023, "step": 365 }, { "epoch": 0.25, - "learning_rate": 0.0005762064022933588, - "loss": 1.9824, + "learning_rate": 0.0005823654390934844, + "loss": 1.7522, "step": 366 }, { "epoch": 0.25, - "learning_rate": 0.0005760630673674152, - "loss": 2.1536, + "learning_rate": 0.0005822592067988668, + "loss": 1.8935, "step": 367 }, { "epoch": 0.25, - "learning_rate": 0.0005759197324414716, - "loss": 2.0565, + "learning_rate": 0.0005821529745042493, + "loss": 1.8137, "step": 368 }, { "epoch": 0.25, - "learning_rate": 0.0005757763975155279, - "loss": 2.0413, + "learning_rate": 0.0005820467422096317, + "loss": 1.8023, "step": 369 }, { "epoch": 0.25, - "learning_rate": 0.0005756330625895843, - "loss": 2.0659, + "learning_rate": 0.000581940509915014, + "loss": 1.8378, "step": 370 }, { "epoch": 0.25, - "learning_rate": 0.0005754897276636407, - "loss": 2.018, + "learning_rate": 0.0005818342776203965, + "loss": 1.7871, "step": 371 }, { "epoch": 0.25, - "learning_rate": 0.000575346392737697, - "loss": 2.0262, + "learning_rate": 0.000581728045325779, + "loss": 1.793, "step": 372 }, { "epoch": 0.26, - "learning_rate": 0.0005752030578117535, - "loss": 2.1005, + "learning_rate": 0.0005816218130311614, + "loss": 1.8656, "step": 373 }, { "epoch": 0.26, - "learning_rate": 0.0005750597228858097, - "loss": 2.0796, + "learning_rate": 0.0005815155807365438, + "loss": 1.8213, "step": 374 }, { "epoch": 0.26, - "learning_rate": 0.0005749163879598662, - "loss": 2.1344, + "learning_rate": 0.0005814093484419263, + "loss": 1.8686, "step": 375 }, { "epoch": 0.26, - "learning_rate": 0.0005747730530339225, - "loss": 1.9448, + "learning_rate": 0.0005813031161473087, + "loss": 1.7107, "step": 376 }, { "epoch": 0.26, - "learning_rate": 0.0005746297181079789, - "loss": 2.0658, + "learning_rate": 0.0005811968838526912, + "loss": 1.8303, "step": 377 }, { "epoch": 0.26, - "learning_rate": 0.0005744863831820353, - "loss": 2.0931, + "learning_rate": 0.0005810906515580736, + "loss": 1.8433, "step": 378 }, { "epoch": 0.26, - "learning_rate": 0.0005743430482560917, - "loss": 2.1048, + "learning_rate": 0.000580984419263456, + "loss": 1.8635, "step": 379 }, { "epoch": 0.26, - "learning_rate": 0.000574199713330148, - "loss": 2.0263, + "learning_rate": 0.0005808781869688385, + "loss": 1.8097, "step": 380 }, { "epoch": 0.26, - "learning_rate": 0.0005740563784042044, - "loss": 2.1217, + "learning_rate": 0.0005807719546742209, + "loss": 1.8751, "step": 381 }, { "epoch": 0.26, - "learning_rate": 0.0005739130434782608, - "loss": 2.1169, + "learning_rate": 0.0005806657223796034, + "loss": 1.8784, "step": 382 }, { "epoch": 0.26, - "learning_rate": 0.0005737697085523172, - "loss": 2.1247, + "learning_rate": 0.0005805594900849859, + "loss": 1.8826, "step": 383 }, { "epoch": 0.26, - "learning_rate": 0.0005736263736263736, - "loss": 2.0325, + "learning_rate": 0.0005804532577903682, + "loss": 1.7963, "step": 384 }, { "epoch": 0.26, - "learning_rate": 0.00057348303870043, - "loss": 2.0885, + "learning_rate": 0.0005803470254957506, + "loss": 1.8452, "step": 385 }, { "epoch": 0.26, - "learning_rate": 0.0005733397037744864, - "loss": 1.9968, + "learning_rate": 0.0005802407932011331, + "loss": 1.7549, "step": 386 }, { "epoch": 0.26, - "learning_rate": 0.0005731963688485427, - "loss": 2.0911, + "learning_rate": 0.0005801345609065155, + "loss": 1.851, "step": 387 }, { "epoch": 0.27, - "learning_rate": 0.0005730530339225991, - "loss": 2.0654, + "learning_rate": 0.000580028328611898, + "loss": 1.8198, "step": 388 }, { "epoch": 0.27, - "learning_rate": 0.0005729096989966555, - "loss": 2.1938, + "learning_rate": 0.0005799220963172804, + "loss": 1.956, "step": 389 }, { "epoch": 0.27, - "learning_rate": 0.0005727663640707119, - "loss": 2.1723, + "learning_rate": 0.0005798158640226628, + "loss": 1.9291, "step": 390 }, { "epoch": 0.27, - "learning_rate": 0.0005726230291447683, - "loss": 2.0289, + "learning_rate": 0.0005797096317280453, + "loss": 1.784, "step": 391 }, { "epoch": 0.27, - "learning_rate": 0.0005724796942188245, - "loss": 2.1383, + "learning_rate": 0.0005796033994334277, + "loss": 1.894, "step": 392 }, { "epoch": 0.27, - "learning_rate": 0.0005723363592928809, - "loss": 2.1457, + "learning_rate": 0.0005794971671388101, + "loss": 1.886, "step": 393 }, { "epoch": 0.27, - "learning_rate": 0.0005721930243669373, - "loss": 2.0625, + "learning_rate": 0.0005793909348441927, + "loss": 1.826, "step": 394 }, { "epoch": 0.27, - "learning_rate": 0.0005720496894409937, - "loss": 2.0906, + "learning_rate": 0.000579284702549575, + "loss": 1.8398, "step": 395 }, { "epoch": 0.27, - "learning_rate": 0.0005719063545150501, - "loss": 2.0087, + "learning_rate": 0.0005791784702549574, + "loss": 1.7506, "step": 396 }, { "epoch": 0.27, - "learning_rate": 0.0005717630195891065, - "loss": 2.1128, + "learning_rate": 0.0005790722379603399, + "loss": 1.8669, "step": 397 }, { "epoch": 0.27, - "learning_rate": 0.0005716196846631629, - "loss": 2.1217, + "learning_rate": 0.0005789660056657223, + "loss": 1.8566, "step": 398 }, { "epoch": 0.27, - "learning_rate": 0.0005714763497372192, - "loss": 2.1162, + "learning_rate": 0.0005788597733711047, + "loss": 1.8618, "step": 399 }, { "epoch": 0.27, - "learning_rate": 0.0005713330148112756, - "loss": 2.1342, + "learning_rate": 0.0005787535410764872, + "loss": 1.8632, "step": 400 }, { "epoch": 0.27, - "eval_loss": 2.0627405643463135, - "eval_runtime": 1739.0775, - "eval_samples_per_second": 5.75, - "eval_steps_per_second": 5.75, + "eval_loss": 2.021120071411133, + "eval_runtime": 1471.972, + "eval_samples_per_second": 9.879, + "eval_steps_per_second": 9.879, "step": 400 - }, - { - "epoch": 0.27, - "learning_rate": 0.000571189679885332, - "loss": 2.1854, - "step": 401 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005710463449593884, - "loss": 2.1751, - "step": 402 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005709030100334448, - "loss": 2.2177, - "step": 403 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005707596751075012, - "loss": 2.1151, - "step": 404 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005706163401815575, - "loss": 2.1397, - "step": 405 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005704730052556139, - "loss": 2.1307, - "step": 406 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005703296703296703, - "loss": 2.1163, - "step": 407 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005701863354037267, - "loss": 2.2228, - "step": 408 - }, - { - "epoch": 0.28, - "learning_rate": 0.000570043000477783, - "loss": 2.0841, - "step": 409 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005698996655518395, - "loss": 2.2314, - "step": 410 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005697563306258957, - "loss": 2.2701, - "step": 411 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005696129956999522, - "loss": 2.1293, - "step": 412 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005694696607740085, - "loss": 2.1884, - "step": 413 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005693263258480649, - "loss": 2.1422, - "step": 414 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005691829909221213, - "loss": 2.1939, - "step": 415 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005690396559961777, - "loss": 2.1862, - "step": 416 - }, - { - "epoch": 0.29, - "learning_rate": 0.000568896321070234, - "loss": 2.2965, - "step": 417 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005687529861442904, - "loss": 2.2042, - "step": 418 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005686096512183468, - "loss": 2.1242, - "step": 419 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005684663162924032, - "loss": 2.1837, - "step": 420 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005683229813664596, - "loss": 2.1068, - "step": 421 - }, - { - "epoch": 0.29, - "learning_rate": 0.000568179646440516, - "loss": 2.2236, - "step": 422 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005680363115145723, - "loss": 2.1684, - "step": 423 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005678929765886287, - "loss": 2.1441, - "step": 424 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005677496416626851, - "loss": 2.1091, - "step": 425 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005676063067367415, - "loss": 2.2047, - "step": 426 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005674629718107979, - "loss": 2.1858, - "step": 427 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005673196368848541, - "loss": 2.141, - "step": 428 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005671763019589106, - "loss": 2.1447, - "step": 429 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005670329670329669, - "loss": 2.1216, - "step": 430 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005668896321070234, - "loss": 2.1843, - "step": 431 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005667462971810797, - "loss": 2.3159, - "step": 432 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005666029622551362, - "loss": 2.2143, - "step": 433 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005664596273291925, - "loss": 2.1456, - "step": 434 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005663162924032488, - "loss": 2.1061, - "step": 435 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005661729574773052, - "loss": 2.2449, - "step": 436 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005660296225513616, - "loss": 2.1625, - "step": 437 - }, - { - "epoch": 0.3, - "learning_rate": 0.000565886287625418, - "loss": 2.1141, - "step": 438 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005657429526994744, - "loss": 2.1314, - "step": 439 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005655996177735308, - "loss": 2.1726, - "step": 440 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005654562828475871, - "loss": 2.1889, - "step": 441 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005653129479216435, - "loss": 2.2172, - "step": 442 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005651696129956999, - "loss": 2.1766, - "step": 443 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005650262780697563, - "loss": 2.0534, - "step": 444 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005648829431438127, - "loss": 2.1665, - "step": 445 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005647396082178691, - "loss": 2.1383, - "step": 446 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005645962732919254, - "loss": 2.1737, - "step": 447 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005644529383659818, - "loss": 2.1792, - "step": 448 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005643096034400381, - "loss": 2.2596, - "step": 449 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005641662685140946, - "loss": 2.1804, - "step": 450 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005640229335881509, - "loss": 2.0674, - "step": 451 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005638795986622074, - "loss": 2.0845, - "step": 452 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005637362637362636, - "loss": 2.0754, - "step": 453 - }, - { - "epoch": 0.31, - "learning_rate": 0.00056359292881032, - "loss": 2.1973, - "step": 454 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005634495938843764, - "loss": 2.1183, - "step": 455 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005633062589584328, - "loss": 2.1111, - "step": 456 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005631629240324892, - "loss": 2.1094, - "step": 457 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005630195891065456, - "loss": 2.0889, - "step": 458 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005628762541806019, - "loss": 2.2136, - "step": 459 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005627329192546583, - "loss": 2.1521, - "step": 460 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005625895843287147, - "loss": 2.1277, - "step": 461 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005624462494027711, - "loss": 2.2086, - "step": 462 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005623029144768275, - "loss": 2.139, - "step": 463 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005621595795508839, - "loss": 2.0627, - "step": 464 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005620162446249402, - "loss": 2.2356, - "step": 465 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005618729096989966, - "loss": 2.2947, - "step": 466 - }, - { - "epoch": 0.32, - "learning_rate": 0.000561729574773053, - "loss": 2.1619, - "step": 467 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005615862398471093, - "loss": 2.2515, - "step": 468 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005614429049211658, - "loss": 2.1746, - "step": 469 - }, - { - "epoch": 0.32, - "learning_rate": 0.000561299569995222, - "loss": 2.259, - "step": 470 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005611562350692785, - "loss": 2.2093, - "step": 471 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005610129001433348, - "loss": 2.2077, - "step": 472 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005608695652173913, - "loss": 2.2464, - "step": 473 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005607262302914476, - "loss": 2.0556, - "step": 474 - }, - { - "epoch": 0.32, - "learning_rate": 0.000560582895365504, - "loss": 2.135, - "step": 475 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005604395604395604, - "loss": 2.1812, - "step": 476 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005602962255136167, - "loss": 2.1082, - "step": 477 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005601528905876731, - "loss": 2.0997, - "step": 478 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005600095556617295, - "loss": 2.1532, - "step": 479 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005598662207357859, - "loss": 2.1389, - "step": 480 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005597228858098423, - "loss": 2.1122, - "step": 481 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005595795508838987, - "loss": 2.1091, - "step": 482 - }, - { - "epoch": 0.33, - "learning_rate": 0.000559436215957955, - "loss": 2.1699, - "step": 483 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005592928810320114, - "loss": 2.1043, - "step": 484 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005591495461060678, - "loss": 2.143, - "step": 485 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005590062111801242, - "loss": 2.1184, - "step": 486 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005588628762541806, - "loss": 2.0185, - "step": 487 - }, - { - "epoch": 0.33, - "learning_rate": 0.000558719541328237, - "loss": 2.2015, - "step": 488 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005585762064022932, - "loss": 2.2501, - "step": 489 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005584328714763497, - "loss": 2.1088, - "step": 490 - }, - { - "epoch": 0.34, - "learning_rate": 0.000558289536550406, - "loss": 2.0932, - "step": 491 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005581462016244625, - "loss": 2.1717, - "step": 492 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005580028666985188, - "loss": 2.1509, - "step": 493 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005578595317725753, - "loss": 2.1354, - "step": 494 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005577161968466315, - "loss": 2.2007, - "step": 495 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005575728619206879, - "loss": 2.0769, - "step": 496 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005574295269947443, - "loss": 2.1375, - "step": 497 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005572861920688007, - "loss": 2.1743, - "step": 498 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005571428571428571, - "loss": 2.1998, - "step": 499 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005569995222169135, - "loss": 2.2041, - "step": 500 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005568561872909698, - "loss": 2.1778, - "step": 501 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005567128523650262, - "loss": 2.2309, - "step": 502 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005565695174390826, - "loss": 2.1322, - "step": 503 - }, - { - "epoch": 0.34, - "learning_rate": 0.000556426182513139, - "loss": 2.1951, - "step": 504 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005562828475871954, - "loss": 2.2756, - "step": 505 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005561395126612518, - "loss": 2.0972, - "step": 506 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005559961777353081, - "loss": 2.138, - "step": 507 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005558528428093645, - "loss": 2.1627, - "step": 508 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005557095078834209, - "loss": 2.1876, - "step": 509 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005555661729574772, - "loss": 2.1359, - "step": 510 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005554228380315337, - "loss": 2.2495, - "step": 511 - }, - { - "epoch": 0.35, - "learning_rate": 0.00055527950310559, - "loss": 2.1951, - "step": 512 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005551361681796464, - "loss": 2.1939, - "step": 513 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005549928332537027, - "loss": 2.12, - "step": 514 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005548494983277592, - "loss": 2.1258, - "step": 515 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005547061634018155, - "loss": 2.2273, - "step": 516 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005545628284758719, - "loss": 2.1856, - "step": 517 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005544194935499283, - "loss": 2.0875, - "step": 518 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005542761586239846, - "loss": 2.0916, - "step": 519 - }, - { - "epoch": 0.36, - "learning_rate": 0.000554132823698041, - "loss": 2.094, - "step": 520 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005539894887720974, - "loss": 2.1244, - "step": 521 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005538461538461538, - "loss": 2.1668, - "step": 522 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005537028189202102, - "loss": 2.1785, - "step": 523 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005535594839942666, - "loss": 2.1497, - "step": 524 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005534161490683229, - "loss": 2.2136, - "step": 525 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005532728141423793, - "loss": 2.1554, - "step": 526 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005531294792164357, - "loss": 2.1288, - "step": 527 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005529861442904921, - "loss": 2.1221, - "step": 528 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005528428093645485, - "loss": 2.1583, - "step": 529 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005526994744386049, - "loss": 2.1514, - "step": 530 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005525561395126611, - "loss": 2.2256, - "step": 531 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005524128045867176, - "loss": 2.1435, - "step": 532 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005522694696607739, - "loss": 2.169, - "step": 533 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005521261347348304, - "loss": 2.1734, - "step": 534 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005519827998088867, - "loss": 2.0882, - "step": 535 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005518394648829432, - "loss": 2.1364, - "step": 536 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005516961299569994, - "loss": 2.1544, - "step": 537 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005515527950310558, - "loss": 2.1356, - "step": 538 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005514094601051122, - "loss": 2.019, - "step": 539 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005512661251791686, - "loss": 2.1198, - "step": 540 - }, - { - "epoch": 0.37, - "learning_rate": 0.000551122790253225, - "loss": 2.1896, - "step": 541 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005509794553272814, - "loss": 2.1615, - "step": 542 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005508361204013377, - "loss": 2.1163, - "step": 543 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005506927854753941, - "loss": 2.2056, - "step": 544 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005505494505494505, - "loss": 2.2128, - "step": 545 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005504061156235069, - "loss": 2.195, - "step": 546 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005502627806975633, - "loss": 2.0768, - "step": 547 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005501194457716197, - "loss": 2.1667, - "step": 548 - }, - { - "epoch": 0.38, - "learning_rate": 0.000549976110845676, - "loss": 2.1035, - "step": 549 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005498327759197323, - "loss": 2.1628, - "step": 550 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005496894409937888, - "loss": 2.1491, - "step": 551 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005495461060678451, - "loss": 2.0676, - "step": 552 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005494027711419016, - "loss": 2.3462, - "step": 553 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005492594362159579, - "loss": 2.107, - "step": 554 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005491161012900143, - "loss": 2.1844, - "step": 555 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005489727663640706, - "loss": 2.1328, - "step": 556 - }, - { - "epoch": 0.38, - "learning_rate": 0.000548829431438127, - "loss": 2.1467, - "step": 557 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005486860965121834, - "loss": 2.1706, - "step": 558 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005485427615862398, - "loss": 2.1649, - "step": 559 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005483994266602962, - "loss": 2.154, - "step": 560 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005482560917343525, - "loss": 2.1095, - "step": 561 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005481127568084089, - "loss": 2.1706, - "step": 562 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005479694218824653, - "loss": 2.2179, - "step": 563 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005478260869565217, - "loss": 2.2188, - "step": 564 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005476827520305781, - "loss": 2.1167, - "step": 565 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005475394171046345, - "loss": 2.1275, - "step": 566 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005473960821786908, - "loss": 2.0766, - "step": 567 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005472527472527472, - "loss": 2.1719, - "step": 568 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005471094123268036, - "loss": 2.2378, - "step": 569 - }, - { - "epoch": 0.39, - "learning_rate": 0.00054696607740086, - "loss": 2.1612, - "step": 570 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005468227424749163, - "loss": 2.0903, - "step": 571 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005466794075489728, - "loss": 2.2279, - "step": 572 - }, - { - "epoch": 0.39, - "learning_rate": 0.000546536072623029, - "loss": 2.1084, - "step": 573 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005463927376970855, - "loss": 2.1652, - "step": 574 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005462494027711418, - "loss": 2.1035, - "step": 575 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005461060678451983, - "loss": 2.2028, - "step": 576 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005459627329192546, - "loss": 2.2347, - "step": 577 - }, - { - "epoch": 0.4, - "learning_rate": 0.000545819397993311, - "loss": 2.1609, - "step": 578 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005456760630673673, - "loss": 2.0815, - "step": 579 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005455327281414237, - "loss": 2.0575, - "step": 580 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005453893932154801, - "loss": 2.1104, - "step": 581 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005452460582895365, - "loss": 2.1784, - "step": 582 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005451027233635929, - "loss": 2.117, - "step": 583 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005449593884376493, - "loss": 2.0947, - "step": 584 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005448160535117056, - "loss": 2.1204, - "step": 585 - }, - { - "epoch": 0.4, - "learning_rate": 0.000544672718585762, - "loss": 2.0614, - "step": 586 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005445293836598184, - "loss": 2.1227, - "step": 587 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005443860487338748, - "loss": 2.1831, - "step": 588 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005442427138079312, - "loss": 2.2267, - "step": 589 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005440993788819876, - "loss": 2.1008, - "step": 590 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005439560439560439, - "loss": 2.1126, - "step": 591 - }, - { - "epoch": 0.4, - "learning_rate": 0.0005438127090301002, - "loss": 2.2081, - "step": 592 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005436693741041567, - "loss": 2.2372, - "step": 593 - }, - { - "epoch": 0.41, - "learning_rate": 0.000543526039178213, - "loss": 2.1783, - "step": 594 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005433827042522695, - "loss": 2.1631, - "step": 595 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005432393693263258, - "loss": 2.2263, - "step": 596 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005430960344003822, - "loss": 2.2438, - "step": 597 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005429526994744385, - "loss": 2.125, - "step": 598 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005428093645484949, - "loss": 2.1655, - "step": 599 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005426660296225513, - "loss": 2.241, - "step": 600 - }, - { - "epoch": 0.41, - "eval_loss": 2.047600030899048, - "eval_runtime": 1736.5603, - "eval_samples_per_second": 5.759, - "eval_steps_per_second": 5.759, - "step": 600 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005425226946966077, - "loss": 2.0728, - "step": 601 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005423793597706641, - "loss": 2.051, - "step": 602 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005422360248447204, - "loss": 2.1657, - "step": 603 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005420926899187768, - "loss": 2.1084, - "step": 604 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005419493549928332, - "loss": 2.1312, - "step": 605 - }, - { - "epoch": 0.41, - "learning_rate": 0.0005418060200668896, - "loss": 2.0724, - "step": 606 - }, - { - "epoch": 0.41, - "learning_rate": 0.000541662685140946, - "loss": 2.0999, - "step": 607 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005415193502150024, - "loss": 2.1649, - "step": 608 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005413760152890587, - "loss": 2.1458, - "step": 609 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005412326803631151, - "loss": 2.071, - "step": 610 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005410893454371715, - "loss": 2.1931, - "step": 611 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005409460105112279, - "loss": 2.1968, - "step": 612 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005408026755852842, - "loss": 2.105, - "step": 613 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005406593406593407, - "loss": 2.1429, - "step": 614 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005405160057333969, - "loss": 2.0812, - "step": 615 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005403726708074534, - "loss": 2.1389, - "step": 616 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005402293358815097, - "loss": 2.1685, - "step": 617 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005400860009555662, - "loss": 2.2248, - "step": 618 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005399426660296225, - "loss": 2.1771, - "step": 619 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005397993311036789, - "loss": 2.1094, - "step": 620 - }, - { - "epoch": 0.42, - "learning_rate": 0.0005396559961777352, - "loss": 2.141, - "step": 621 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005395126612517916, - "loss": 2.2442, - "step": 622 - }, - { - "epoch": 0.43, - "learning_rate": 0.000539369326325848, - "loss": 2.1703, - "step": 623 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005392259913999044, - "loss": 2.1351, - "step": 624 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005390826564739608, - "loss": 2.2008, - "step": 625 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005389393215480172, - "loss": 2.2114, - "step": 626 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005387959866220735, - "loss": 2.1368, - "step": 627 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005386526516961299, - "loss": 2.2127, - "step": 628 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005385093167701863, - "loss": 2.2554, - "step": 629 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005383659818442427, - "loss": 2.143, - "step": 630 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005382226469182991, - "loss": 2.1306, - "step": 631 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005380793119923555, - "loss": 2.1257, - "step": 632 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005379359770664118, - "loss": 2.2013, - "step": 633 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005377926421404681, - "loss": 2.1856, - "step": 634 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005376493072145246, - "loss": 2.1917, - "step": 635 - }, - { - "epoch": 0.43, - "learning_rate": 0.0005375059722885809, - "loss": 2.1044, - "step": 636 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005373626373626374, - "loss": 2.1909, - "step": 637 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005372193024366937, - "loss": 2.0706, - "step": 638 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005370759675107501, - "loss": 2.142, - "step": 639 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005369326325848064, - "loss": 2.1813, - "step": 640 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005367892976588628, - "loss": 2.1802, - "step": 641 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005366459627329192, - "loss": 2.1461, - "step": 642 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005365026278069756, - "loss": 2.045, - "step": 643 - }, - { - "epoch": 0.44, - "learning_rate": 0.000536359292881032, - "loss": 2.1449, - "step": 644 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005362159579550883, - "loss": 2.0893, - "step": 645 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005360726230291447, - "loss": 2.1477, - "step": 646 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005359292881032011, - "loss": 2.172, - "step": 647 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005357859531772575, - "loss": 2.2154, - "step": 648 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005356426182513139, - "loss": 2.0651, - "step": 649 - }, - { - "epoch": 0.44, - "learning_rate": 0.0005354992833253703, - "loss": 2.2091, - "step": 650 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005353559483994266, - "loss": 2.1892, - "step": 651 - }, - { - "epoch": 0.45, - "learning_rate": 0.000535212613473483, - "loss": 2.1727, - "step": 652 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005350692785475393, - "loss": 2.1532, - "step": 653 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005349259436215958, - "loss": 2.0929, - "step": 654 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005347826086956521, - "loss": 2.1364, - "step": 655 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005346392737697086, - "loss": 2.1704, - "step": 656 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005344959388437648, - "loss": 2.116, - "step": 657 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005343526039178213, - "loss": 2.2498, - "step": 658 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005342092689918776, - "loss": 2.2484, - "step": 659 - }, - { - "epoch": 0.45, - "learning_rate": 0.000534065934065934, - "loss": 2.0954, - "step": 660 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005339225991399904, - "loss": 2.1747, - "step": 661 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005337792642140468, - "loss": 2.1775, - "step": 662 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005336359292881031, - "loss": 2.2085, - "step": 663 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005334925943621595, - "loss": 2.1576, - "step": 664 - }, - { - "epoch": 0.45, - "learning_rate": 0.0005333492594362159, - "loss": 2.1618, - "step": 665 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005332059245102723, - "loss": 2.1005, - "step": 666 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005330625895843287, - "loss": 2.1803, - "step": 667 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005329192546583851, - "loss": 2.036, - "step": 668 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005327759197324414, - "loss": 2.1275, - "step": 669 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005326325848064978, - "loss": 2.1677, - "step": 670 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005324892498805542, - "loss": 2.1649, - "step": 671 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005323459149546106, - "loss": 2.1187, - "step": 672 - }, - { - "epoch": 0.46, - "learning_rate": 0.000532202580028667, - "loss": 2.0904, - "step": 673 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005320592451027233, - "loss": 2.1749, - "step": 674 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005319159101767797, - "loss": 2.1062, - "step": 675 - }, - { - "epoch": 0.46, - "learning_rate": 0.000531772575250836, - "loss": 2.1854, - "step": 676 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005316292403248925, - "loss": 2.1493, - "step": 677 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005314859053989488, - "loss": 2.2119, - "step": 678 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005313425704730053, - "loss": 2.104, - "step": 679 - }, - { - "epoch": 0.46, - "learning_rate": 0.0005311992355470616, - "loss": 2.2132, - "step": 680 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005310559006211179, - "loss": 2.2275, - "step": 681 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005309125656951743, - "loss": 2.146, - "step": 682 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005307692307692307, - "loss": 2.2401, - "step": 683 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005306258958432871, - "loss": 2.1453, - "step": 684 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005304825609173435, - "loss": 2.1154, - "step": 685 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005303392259913999, - "loss": 2.2073, - "step": 686 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005301958910654562, - "loss": 2.3166, - "step": 687 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005300525561395126, - "loss": 2.1223, - "step": 688 - }, - { - "epoch": 0.47, - "learning_rate": 0.000529909221213569, - "loss": 2.1857, - "step": 689 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005297658862876254, - "loss": 2.1932, - "step": 690 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005296225513616818, - "loss": 2.1361, - "step": 691 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005294792164357382, - "loss": 2.1133, - "step": 692 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005293358815097945, - "loss": 2.137, - "step": 693 - }, - { - "epoch": 0.47, - "learning_rate": 0.0005291925465838509, - "loss": 2.1077, - "step": 694 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005290492116579072, - "loss": 2.2026, - "step": 695 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005289058767319637, - "loss": 2.1443, - "step": 696 - }, - { - "epoch": 0.48, - "learning_rate": 0.00052876254180602, - "loss": 2.0933, - "step": 697 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005286192068800765, - "loss": 2.1728, - "step": 698 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005284758719541327, - "loss": 2.1026, - "step": 699 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005283325370281892, - "loss": 2.1373, - "step": 700 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005281892021022455, - "loss": 2.0439, - "step": 701 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005280458671763019, - "loss": 2.1893, - "step": 702 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005279025322503583, - "loss": 2.2672, - "step": 703 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005277591973244147, - "loss": 2.176, - "step": 704 - }, - { - "epoch": 0.48, - "learning_rate": 0.000527615862398471, - "loss": 2.1351, - "step": 705 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005274725274725274, - "loss": 2.0995, - "step": 706 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005273291925465838, - "loss": 2.1155, - "step": 707 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005271858576206402, - "loss": 2.1424, - "step": 708 - }, - { - "epoch": 0.48, - "learning_rate": 0.0005270425226946966, - "loss": 2.2112, - "step": 709 - }, - { - "epoch": 0.49, - "learning_rate": 0.000526899187768753, - "loss": 2.1792, - "step": 710 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005267558528428093, - "loss": 2.133, - "step": 711 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005266125179168657, - "loss": 2.1542, - "step": 712 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005264691829909221, - "loss": 2.1184, - "step": 713 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005263258480649785, - "loss": 2.0157, - "step": 714 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005261825131390349, - "loss": 2.1661, - "step": 715 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005260391782130912, - "loss": 2.2518, - "step": 716 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005258958432871477, - "loss": 2.0832, - "step": 717 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005257525083612039, - "loss": 2.2078, - "step": 718 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005256091734352604, - "loss": 2.085, - "step": 719 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005254658385093167, - "loss": 2.1, - "step": 720 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005253225035833732, - "loss": 2.0984, - "step": 721 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005251791686574295, - "loss": 2.1313, - "step": 722 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005250358337314858, - "loss": 2.0344, - "step": 723 - }, - { - "epoch": 0.49, - "learning_rate": 0.0005248924988055422, - "loss": 2.2206, - "step": 724 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005247491638795986, - "loss": 2.2121, - "step": 725 - }, - { - "epoch": 0.5, - "learning_rate": 0.000524605828953655, - "loss": 2.1267, - "step": 726 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005244624940277114, - "loss": 2.0358, - "step": 727 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005243191591017678, - "loss": 2.1301, - "step": 728 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005241758241758241, - "loss": 2.1678, - "step": 729 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005240324892498805, - "loss": 2.0717, - "step": 730 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005238891543239369, - "loss": 2.1041, - "step": 731 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005237458193979933, - "loss": 2.1661, - "step": 732 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005236024844720497, - "loss": 2.2038, - "step": 733 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005234591495461061, - "loss": 2.0797, - "step": 734 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005233158146201624, - "loss": 1.9513, - "step": 735 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005231724796942188, - "loss": 2.0952, - "step": 736 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005230291447682751, - "loss": 2.0112, - "step": 737 - }, - { - "epoch": 0.5, - "learning_rate": 0.0005228858098423316, - "loss": 2.1552, - "step": 738 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005227424749163879, - "loss": 2.1163, - "step": 739 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005225991399904444, - "loss": 2.1879, - "step": 740 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005224558050645006, - "loss": 2.1494, - "step": 741 - }, - { - "epoch": 0.51, - "learning_rate": 0.000522312470138557, - "loss": 2.1554, - "step": 742 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005221691352126134, - "loss": 2.1672, - "step": 743 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005220258002866698, - "loss": 2.1297, - "step": 744 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005218824653607262, - "loss": 2.1553, - "step": 745 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005217391304347826, - "loss": 2.1184, - "step": 746 - }, - { - "epoch": 0.51, - "learning_rate": 0.000521595795508839, - "loss": 2.1945, - "step": 747 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005214524605828953, - "loss": 2.1973, - "step": 748 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005213091256569517, - "loss": 2.076, - "step": 749 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005211657907310081, - "loss": 2.1019, - "step": 750 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005210224558050645, - "loss": 2.1234, - "step": 751 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005208791208791209, - "loss": 2.221, - "step": 752 - }, - { - "epoch": 0.51, - "learning_rate": 0.0005207357859531772, - "loss": 2.0994, - "step": 753 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005205924510272336, - "loss": 2.0699, - "step": 754 - }, - { - "epoch": 0.52, - "learning_rate": 0.00052044911610129, - "loss": 2.1554, - "step": 755 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005203057811753463, - "loss": 2.1254, - "step": 756 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005201624462494028, - "loss": 2.2119, - "step": 757 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005200191113234591, - "loss": 2.0509, - "step": 758 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005198757763975154, - "loss": 2.2616, - "step": 759 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005197324414715718, - "loss": 2.0417, - "step": 760 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005195891065456282, - "loss": 2.1356, - "step": 761 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005194457716196846, - "loss": 2.0701, - "step": 762 - }, - { - "epoch": 0.52, - "learning_rate": 0.000519302436693741, - "loss": 2.1677, - "step": 763 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005191591017677974, - "loss": 2.0766, - "step": 764 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005190157668418537, - "loss": 2.1733, - "step": 765 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005188724319159101, - "loss": 2.1307, - "step": 766 - }, - { - "epoch": 0.52, - "learning_rate": 0.0005187290969899665, - "loss": 2.0885, - "step": 767 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005185857620640229, - "loss": 2.1836, - "step": 768 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005184424271380793, - "loss": 2.1477, - "step": 769 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005182990922121357, - "loss": 2.1539, - "step": 770 - }, - { - "epoch": 0.53, - "learning_rate": 0.000518155757286192, - "loss": 2.1302, - "step": 771 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005180124223602484, - "loss": 2.1686, - "step": 772 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005178690874343048, - "loss": 2.1822, - "step": 773 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005177257525083612, - "loss": 2.1349, - "step": 774 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005175824175824176, - "loss": 2.1254, - "step": 775 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005174390826564739, - "loss": 2.1617, - "step": 776 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005172957477305302, - "loss": 2.0366, - "step": 777 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005171524128045866, - "loss": 2.1776, - "step": 778 - }, - { - "epoch": 0.53, - "learning_rate": 0.000517009077878643, - "loss": 2.0636, - "step": 779 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005168657429526994, - "loss": 2.0966, - "step": 780 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005167224080267558, - "loss": 2.0935, - "step": 781 - }, - { - "epoch": 0.53, - "learning_rate": 0.0005165790731008122, - "loss": 2.0194, - "step": 782 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005164357381748685, - "loss": 2.1995, - "step": 783 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005162924032489249, - "loss": 2.0758, - "step": 784 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005161490683229813, - "loss": 2.134, - "step": 785 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005160057333970377, - "loss": 2.1708, - "step": 786 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005158623984710941, - "loss": 2.155, - "step": 787 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005157190635451505, - "loss": 2.2275, - "step": 788 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005155757286192068, - "loss": 2.151, - "step": 789 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005154323936932632, - "loss": 2.1853, - "step": 790 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005152890587673196, - "loss": 2.1455, - "step": 791 - }, - { - "epoch": 0.54, - "learning_rate": 0.000515145723841376, - "loss": 2.1934, - "step": 792 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005150023889154323, - "loss": 2.0501, - "step": 793 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005148590539894888, - "loss": 2.197, - "step": 794 - }, - { - "epoch": 0.54, - "learning_rate": 0.000514715719063545, - "loss": 2.0713, - "step": 795 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005145723841376015, - "loss": 2.1562, - "step": 796 - }, - { - "epoch": 0.54, - "learning_rate": 0.0005144290492116578, - "loss": 2.158, - "step": 797 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005142857142857142, - "loss": 2.0456, - "step": 798 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005141423793597706, - "loss": 2.157, - "step": 799 - }, - { - "epoch": 0.55, - "learning_rate": 0.000513999044433827, - "loss": 2.0934, - "step": 800 - }, - { - "epoch": 0.55, - "eval_loss": 2.0316452980041504, - "eval_runtime": 1689.8048, - "eval_samples_per_second": 5.918, - "eval_steps_per_second": 5.918, - "step": 800 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005138557095078833, - "loss": 2.1501, - "step": 801 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005137123745819397, - "loss": 2.0977, - "step": 802 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005135690396559961, - "loss": 2.2608, - "step": 803 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005134257047300525, - "loss": 2.0765, - "step": 804 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005132823698041089, - "loss": 2.1414, - "step": 805 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005131390348781653, - "loss": 2.2186, - "step": 806 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005129956999522216, - "loss": 2.1596, - "step": 807 - }, - { - "epoch": 0.55, - "learning_rate": 0.000512852365026278, - "loss": 2.1241, - "step": 808 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005127090301003344, - "loss": 2.0742, - "step": 809 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005125656951743908, - "loss": 2.0797, - "step": 810 - }, - { - "epoch": 0.55, - "learning_rate": 0.0005124223602484472, - "loss": 2.1866, - "step": 811 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005122790253225035, - "loss": 2.1895, - "step": 812 - }, - { - "epoch": 0.56, - "learning_rate": 0.00051213569039656, - "loss": 2.0698, - "step": 813 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005119923554706162, - "loss": 2.1982, - "step": 814 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005118490205446727, - "loss": 2.1768, - "step": 815 - }, - { - "epoch": 0.56, - "learning_rate": 0.000511705685618729, - "loss": 2.0943, - "step": 816 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005115623506927855, - "loss": 2.067, - "step": 817 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005114190157668418, - "loss": 2.0416, - "step": 818 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005112756808408981, - "loss": 2.1266, - "step": 819 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005111323459149545, - "loss": 2.1835, - "step": 820 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005109890109890109, - "loss": 2.1033, - "step": 821 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005108456760630673, - "loss": 2.1953, - "step": 822 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005107023411371237, - "loss": 2.2002, - "step": 823 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005105590062111801, - "loss": 2.0887, - "step": 824 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005104156712852364, - "loss": 2.1419, - "step": 825 - }, - { - "epoch": 0.56, - "learning_rate": 0.0005102723363592928, - "loss": 2.0993, - "step": 826 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005101290014333492, - "loss": 2.1632, - "step": 827 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005099856665074056, - "loss": 2.11, - "step": 828 - }, - { - "epoch": 0.57, - "learning_rate": 0.000509842331581462, - "loss": 2.1375, - "step": 829 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005096989966555184, - "loss": 2.0241, - "step": 830 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005095556617295748, - "loss": 1.9997, - "step": 831 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005094123268036311, - "loss": 2.095, - "step": 832 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005092689918776874, - "loss": 2.114, - "step": 833 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005091256569517439, - "loss": 2.0887, - "step": 834 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005089823220258002, - "loss": 2.1666, - "step": 835 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005088389870998567, - "loss": 2.1407, - "step": 836 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005086956521739129, - "loss": 2.2634, - "step": 837 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005085523172479694, - "loss": 2.2145, - "step": 838 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005084089823220257, - "loss": 2.0569, - "step": 839 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005082656473960821, - "loss": 2.1156, - "step": 840 - }, - { - "epoch": 0.57, - "learning_rate": 0.0005081223124701385, - "loss": 2.1948, - "step": 841 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005079789775441949, - "loss": 2.1315, - "step": 842 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005078356426182512, - "loss": 2.1392, - "step": 843 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005076923076923076, - "loss": 2.1225, - "step": 844 - }, - { - "epoch": 0.58, - "learning_rate": 0.000507548972766364, - "loss": 2.1076, - "step": 845 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005074056378404204, - "loss": 2.0865, - "step": 846 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005072623029144768, - "loss": 2.159, - "step": 847 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005071189679885332, - "loss": 2.0958, - "step": 848 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005069756330625896, - "loss": 2.1292, - "step": 849 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005068322981366459, - "loss": 2.1155, - "step": 850 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005066889632107023, - "loss": 2.0926, - "step": 851 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005065456282847586, - "loss": 2.1385, - "step": 852 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005064022933588151, - "loss": 2.1187, - "step": 853 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005062589584328714, - "loss": 2.0747, - "step": 854 - }, - { - "epoch": 0.58, - "learning_rate": 0.0005061156235069279, - "loss": 2.147, - "step": 855 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005059722885809841, - "loss": 2.2023, - "step": 856 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005058289536550406, - "loss": 2.0863, - "step": 857 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005056856187290969, - "loss": 2.1402, - "step": 858 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005055422838031533, - "loss": 2.1758, - "step": 859 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005053989488772097, - "loss": 2.1767, - "step": 860 - }, - { - "epoch": 0.59, - "learning_rate": 0.000505255613951266, - "loss": 2.2041, - "step": 861 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005051122790253224, - "loss": 2.1227, - "step": 862 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005049689440993788, - "loss": 2.1992, - "step": 863 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005048256091734352, - "loss": 2.0532, - "step": 864 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005046822742474916, - "loss": 2.1845, - "step": 865 - }, - { - "epoch": 0.59, - "learning_rate": 0.000504538939321548, - "loss": 2.1466, - "step": 866 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005043956043956043, - "loss": 2.0371, - "step": 867 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005042522694696607, - "loss": 2.0987, - "step": 868 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005041089345437171, - "loss": 2.1345, - "step": 869 - }, - { - "epoch": 0.59, - "learning_rate": 0.0005039655996177735, - "loss": 2.2204, - "step": 870 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005038222646918299, - "loss": 2.1403, - "step": 871 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005036789297658863, - "loss": 2.0582, - "step": 872 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005035355948399425, - "loss": 2.1603, - "step": 873 - }, - { - "epoch": 0.6, - "learning_rate": 0.000503392259913999, - "loss": 2.0686, - "step": 874 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005032489249880553, - "loss": 2.0596, - "step": 875 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005031055900621118, - "loss": 2.1168, - "step": 876 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005029622551361681, - "loss": 2.19, - "step": 877 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005028189202102246, - "loss": 2.2216, - "step": 878 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005026755852842808, - "loss": 2.1723, - "step": 879 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005025322503583372, - "loss": 2.1189, - "step": 880 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005023889154323936, - "loss": 2.1425, - "step": 881 - }, - { - "epoch": 0.6, - "learning_rate": 0.00050224558050645, - "loss": 2.2003, - "step": 882 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005021022455805064, - "loss": 2.211, - "step": 883 - }, - { - "epoch": 0.6, - "learning_rate": 0.0005019589106545628, - "loss": 2.1302, - "step": 884 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005018155757286191, - "loss": 2.1729, - "step": 885 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005016722408026755, - "loss": 2.1203, - "step": 886 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005015289058767319, - "loss": 2.0511, - "step": 887 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005013855709507883, - "loss": 1.9758, - "step": 888 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005012422360248447, - "loss": 2.1151, - "step": 889 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005010989010989011, - "loss": 2.0998, - "step": 890 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005009555661729575, - "loss": 2.0739, - "step": 891 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005008122312470138, - "loss": 2.0421, - "step": 892 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005006688963210702, - "loss": 2.0808, - "step": 893 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005005255613951265, - "loss": 2.1301, - "step": 894 - }, - { - "epoch": 0.61, - "learning_rate": 0.000500382226469183, - "loss": 2.1207, - "step": 895 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005002388915432393, - "loss": 2.1089, - "step": 896 - }, - { - "epoch": 0.61, - "learning_rate": 0.0005000955566172958, - "loss": 2.1796, - "step": 897 - }, - { - "epoch": 0.61, - "learning_rate": 0.000499952221691352, - "loss": 2.1123, - "step": 898 - }, - { - "epoch": 0.61, - "learning_rate": 0.0004998088867654085, - "loss": 2.1304, - "step": 899 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004996655518394648, - "loss": 2.0691, - "step": 900 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004995222169135212, - "loss": 2.215, - "step": 901 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004993788819875776, - "loss": 2.1303, - "step": 902 - }, - { - "epoch": 0.62, - "learning_rate": 0.000499235547061634, - "loss": 2.0983, - "step": 903 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004990922121356903, - "loss": 2.0761, - "step": 904 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004989488772097467, - "loss": 2.1889, - "step": 905 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004988055422838031, - "loss": 2.0557, - "step": 906 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004986622073578595, - "loss": 2.1591, - "step": 907 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004985188724319159, - "loss": 2.1217, - "step": 908 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004983755375059723, - "loss": 2.0165, - "step": 909 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004982322025800286, - "loss": 2.2102, - "step": 910 - }, - { - "epoch": 0.62, - "learning_rate": 0.000498088867654085, - "loss": 2.187, - "step": 911 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004979455327281414, - "loss": 2.0758, - "step": 912 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004978021978021978, - "loss": 2.2606, - "step": 913 - }, - { - "epoch": 0.62, - "learning_rate": 0.0004976588628762542, - "loss": 1.9795, - "step": 914 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004975155279503104, - "loss": 2.1235, - "step": 915 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004973721930243669, - "loss": 2.2042, - "step": 916 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004972288580984232, - "loss": 2.1169, - "step": 917 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004970855231724797, - "loss": 2.082, - "step": 918 - }, - { - "epoch": 0.63, - "learning_rate": 0.000496942188246536, - "loss": 2.0924, - "step": 919 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004967988533205925, - "loss": 2.1969, - "step": 920 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004966555183946487, - "loss": 2.1238, - "step": 921 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004965121834687051, - "loss": 2.1776, - "step": 922 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004963688485427615, - "loss": 2.1328, - "step": 923 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004962255136168179, - "loss": 2.0723, - "step": 924 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004960821786908743, - "loss": 2.2225, - "step": 925 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004959388437649307, - "loss": 2.1698, - "step": 926 - }, - { - "epoch": 0.63, - "learning_rate": 0.000495795508838987, - "loss": 2.1764, - "step": 927 - }, - { - "epoch": 0.63, - "learning_rate": 0.0004956521739130434, - "loss": 2.2167, - "step": 928 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004955088389870998, - "loss": 2.2408, - "step": 929 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004953655040611562, - "loss": 2.049, - "step": 930 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004952221691352126, - "loss": 2.0461, - "step": 931 - }, - { - "epoch": 0.64, - "learning_rate": 0.000495078834209269, - "loss": 2.1895, - "step": 932 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004949354992833254, - "loss": 2.1358, - "step": 933 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004947921643573817, - "loss": 2.1586, - "step": 934 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004946488294314381, - "loss": 2.0463, - "step": 935 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004945054945054944, - "loss": 2.006, - "step": 936 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004943621595795509, - "loss": 2.0742, - "step": 937 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004942188246536072, - "loss": 2.2008, - "step": 938 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004940754897276637, - "loss": 2.1771, - "step": 939 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004939321548017199, - "loss": 2.1418, - "step": 940 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004937888198757764, - "loss": 2.1089, - "step": 941 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004936454849498327, - "loss": 2.1641, - "step": 942 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004935021500238891, - "loss": 2.2123, - "step": 943 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004933588150979455, - "loss": 2.083, - "step": 944 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004932154801720019, - "loss": 2.0856, - "step": 945 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004930721452460582, - "loss": 2.0576, - "step": 946 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004929288103201146, - "loss": 1.9976, - "step": 947 - }, - { - "epoch": 0.65, - "learning_rate": 0.000492785475394171, - "loss": 2.1001, - "step": 948 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004926421404682274, - "loss": 2.1942, - "step": 949 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004924988055422838, - "loss": 2.0722, - "step": 950 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004923554706163402, - "loss": 2.1542, - "step": 951 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004922121356903965, - "loss": 2.1446, - "step": 952 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004920688007644529, - "loss": 2.1166, - "step": 953 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004919254658385093, - "loss": 2.1406, - "step": 954 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004917821309125656, - "loss": 2.1346, - "step": 955 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004916387959866221, - "loss": 2.063, - "step": 956 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004914954610606783, - "loss": 2.0332, - "step": 957 - }, - { - "epoch": 0.65, - "learning_rate": 0.0004913521261347348, - "loss": 2.0845, - "step": 958 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004912087912087911, - "loss": 2.2177, - "step": 959 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004910654562828476, - "loss": 2.1375, - "step": 960 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004909221213569039, - "loss": 2.1171, - "step": 961 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004907787864309603, - "loss": 2.0483, - "step": 962 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004906354515050167, - "loss": 2.1325, - "step": 963 - }, - { - "epoch": 0.66, - "learning_rate": 0.000490492116579073, - "loss": 2.0789, - "step": 964 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004903487816531294, - "loss": 2.1548, - "step": 965 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004902054467271858, - "loss": 2.2216, - "step": 966 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004900621118012422, - "loss": 2.0969, - "step": 967 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004899187768752986, - "loss": 2.025, - "step": 968 - }, - { - "epoch": 0.66, - "learning_rate": 0.000489775441949355, - "loss": 2.2016, - "step": 969 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004896321070234113, - "loss": 2.1138, - "step": 970 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004894887720974677, - "loss": 2.0992, - "step": 971 - }, - { - "epoch": 0.66, - "learning_rate": 0.0004893454371715241, - "loss": 2.1381, - "step": 972 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004892021022455805, - "loss": 2.0633, - "step": 973 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004890587673196369, - "loss": 2.1169, - "step": 974 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004889154323936933, - "loss": 2.0574, - "step": 975 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004887720974677495, - "loss": 2.1371, - "step": 976 - }, - { - "epoch": 0.67, - "learning_rate": 0.000488628762541806, - "loss": 2.178, - "step": 977 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004884854276158623, - "loss": 2.1797, - "step": 978 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004883420926899188, - "loss": 2.0527, - "step": 979 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004881987577639751, - "loss": 2.2112, - "step": 980 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004880554228380315, - "loss": 2.1351, - "step": 981 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004879120879120879, - "loss": 2.0178, - "step": 982 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004877687529861442, - "loss": 2.1084, - "step": 983 - }, - { - "epoch": 0.67, - "learning_rate": 0.00048762541806020066, - "loss": 2.0922, - "step": 984 - }, - { - "epoch": 0.67, - "learning_rate": 0.000487482083134257, - "loss": 2.027, - "step": 985 - }, - { - "epoch": 0.67, - "learning_rate": 0.0004873387482083134, - "loss": 2.1061, - "step": 986 - }, - { - "epoch": 0.67, - "learning_rate": 0.00048719541328236975, - "loss": 2.0671, - "step": 987 - }, - { - "epoch": 0.68, - "learning_rate": 0.0004870520783564262, - "loss": 2.112, - "step": 988 - }, - { - "epoch": 0.68, - "learning_rate": 0.0004869087434304825, - "loss": 2.1656, - "step": 989 - }, - { - "epoch": 0.68, - "learning_rate": 0.00048676540850453885, - "loss": 2.0472, - "step": 990 - }, - { - "epoch": 0.68, - "learning_rate": 0.0004866220735785953, - "loss": 2.0411, - "step": 991 - }, - { - "epoch": 0.68, - "learning_rate": 0.0004864787386526516, - "loss": 2.0884, - "step": 992 - }, - { - "epoch": 0.68, - "learning_rate": 0.00048633540372670806, - "loss": 2.1037, - "step": 993 - }, - { - "epoch": 0.68, - "learning_rate": 0.0004861920688007644, - "loss": 2.1009, - "step": 994 - }, - { - "epoch": 0.68, - "learning_rate": 0.0004860487338748208, - "loss": 2.1671, - "step": 995 - }, - { - "epoch": 0.68, - "learning_rate": 0.00048590539894887715, - "loss": 2.0654, - "step": 996 - }, - { - "epoch": 0.68, - "learning_rate": 0.00048576206402293354, - "loss": 2.0784, - "step": 997 - }, - { - "epoch": 0.68, - "learning_rate": 0.0004856187290969899, - "loss": 2.1642, - "step": 998 - }, - { - "epoch": 0.68, - "learning_rate": 0.0004854753941710463, - "loss": 2.1117, - "step": 999 - }, - { - "epoch": 0.68, - "learning_rate": 0.0004853320592451027, - "loss": 2.0927, - "step": 1000 - }, - { - "epoch": 0.68, - "eval_loss": 2.0208685398101807, - "eval_runtime": 1688.1492, - "eval_samples_per_second": 5.924, - "eval_steps_per_second": 5.924, - "step": 1000 - }, - { - "epoch": 0.68, - "learning_rate": 0.00048518872431915907, - "loss": 2.1515, - "step": 1001 - }, - { - "epoch": 0.69, - "learning_rate": 0.00048504538939321545, - "loss": 2.0814, - "step": 1002 - }, - { - "epoch": 0.69, - "learning_rate": 0.00048490205446727184, - "loss": 2.0079, - "step": 1003 - }, - { - "epoch": 0.69, - "learning_rate": 0.00048475871954132817, - "loss": 2.1011, - "step": 1004 - }, - { - "epoch": 0.69, - "learning_rate": 0.0004846153846153846, - "loss": 2.1068, - "step": 1005 - }, - { - "epoch": 0.69, - "learning_rate": 0.00048447204968944094, - "loss": 1.9542, - "step": 1006 - }, - { - "epoch": 0.69, - "learning_rate": 0.0004843287147634974, - "loss": 2.0243, - "step": 1007 - }, - { - "epoch": 0.69, - "learning_rate": 0.0004841853798375537, - "loss": 2.0099, - "step": 1008 - }, - { - "epoch": 0.69, - "learning_rate": 0.00048404204491161014, - "loss": 2.0935, - "step": 1009 - }, - { - "epoch": 0.69, - "learning_rate": 0.00048389870998566647, - "loss": 2.1306, - "step": 1010 - }, - { - "epoch": 0.69, - "learning_rate": 0.0004837553750597228, - "loss": 2.1464, - "step": 1011 - }, - { - "epoch": 0.69, - "learning_rate": 0.00048361204013377924, - "loss": 2.1239, - "step": 1012 - }, - { - "epoch": 0.69, - "learning_rate": 0.00048346870520783557, - "loss": 2.0952, - "step": 1013 - }, - { - "epoch": 0.69, - "learning_rate": 0.000483325370281892, - "loss": 2.1171, - "step": 1014 - }, - { - "epoch": 0.69, - "learning_rate": 0.00048318203535594834, - "loss": 2.1482, - "step": 1015 - }, - { - "epoch": 0.69, - "learning_rate": 0.0004830387004300048, - "loss": 2.1371, - "step": 1016 - }, - { - "epoch": 0.7, - "learning_rate": 0.0004828953655040611, - "loss": 2.0772, - "step": 1017 - }, - { - "epoch": 0.7, - "learning_rate": 0.0004827520305781175, - "loss": 2.2531, - "step": 1018 - }, - { - "epoch": 0.7, - "learning_rate": 0.00048260869565217387, - "loss": 2.157, - "step": 1019 - }, - { - "epoch": 0.7, - "learning_rate": 0.00048246536072623025, - "loss": 2.105, - "step": 1020 - }, - { - "epoch": 0.7, - "learning_rate": 0.00048232202580028664, - "loss": 2.1585, - "step": 1021 - }, - { - "epoch": 0.7, - "learning_rate": 0.000482178690874343, - "loss": 2.1916, - "step": 1022 - }, - { - "epoch": 0.7, - "learning_rate": 0.0004820353559483994, - "loss": 2.0452, - "step": 1023 - }, - { - "epoch": 0.7, - "learning_rate": 0.0004818920210224558, - "loss": 2.0769, - "step": 1024 - }, - { - "epoch": 0.7, - "learning_rate": 0.0004817486860965121, - "loss": 2.2082, - "step": 1025 - }, - { - "epoch": 0.7, - "learning_rate": 0.00048160535117056856, - "loss": 2.1002, - "step": 1026 - }, - { - "epoch": 0.7, - "learning_rate": 0.0004814620162446249, - "loss": 2.1154, - "step": 1027 - }, - { - "epoch": 0.7, - "learning_rate": 0.0004813186813186813, - "loss": 1.9913, - "step": 1028 - }, - { - "epoch": 0.7, - "learning_rate": 0.00048117534639273765, - "loss": 2.1603, - "step": 1029 - }, - { - "epoch": 0.7, - "learning_rate": 0.00048103201146679404, - "loss": 2.0342, - "step": 1030 - }, - { - "epoch": 0.7, - "learning_rate": 0.0004808886765408504, - "loss": 2.1135, - "step": 1031 - }, - { - "epoch": 0.71, - "learning_rate": 0.00048074534161490675, - "loss": 2.0988, - "step": 1032 - }, - { - "epoch": 0.71, - "learning_rate": 0.0004806020066889632, - "loss": 2.093, - "step": 1033 - }, - { - "epoch": 0.71, - "learning_rate": 0.0004804586717630195, - "loss": 2.1204, - "step": 1034 - }, - { - "epoch": 0.71, - "learning_rate": 0.00048031533683707596, - "loss": 2.1183, - "step": 1035 - }, - { - "epoch": 0.71, - "learning_rate": 0.0004801720019111323, - "loss": 2.101, - "step": 1036 - }, - { - "epoch": 0.71, - "learning_rate": 0.0004800286669851887, - "loss": 2.1722, - "step": 1037 - }, - { - "epoch": 0.71, - "learning_rate": 0.00047988533205924505, - "loss": 2.122, - "step": 1038 - }, - { - "epoch": 0.71, - "learning_rate": 0.00047974199713330144, - "loss": 2.1153, - "step": 1039 - }, - { - "epoch": 0.71, - "learning_rate": 0.0004795986622073578, - "loss": 2.0863, - "step": 1040 - }, - { - "epoch": 0.71, - "learning_rate": 0.0004794553272814142, - "loss": 2.0038, - "step": 1041 - }, - { - "epoch": 0.71, - "learning_rate": 0.0004793119923554706, - "loss": 2.1352, - "step": 1042 - }, - { - "epoch": 0.71, - "learning_rate": 0.000479168657429527, - "loss": 2.0381, - "step": 1043 - }, - { - "epoch": 0.71, - "learning_rate": 0.00047902532250358336, - "loss": 2.1007, - "step": 1044 - }, - { - "epoch": 0.71, - "learning_rate": 0.00047888198757763974, - "loss": 2.0599, - "step": 1045 - }, - { - "epoch": 0.72, - "learning_rate": 0.00047873865265169607, - "loss": 2.0975, - "step": 1046 - }, - { - "epoch": 0.72, - "learning_rate": 0.00047859531772575245, - "loss": 2.1381, - "step": 1047 - }, - { - "epoch": 0.72, - "learning_rate": 0.00047845198279980884, - "loss": 2.145, - "step": 1048 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004783086478738652, - "loss": 2.1833, - "step": 1049 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004781653129479216, - "loss": 2.0726, - "step": 1050 - }, - { - "epoch": 0.72, - "learning_rate": 0.00047802197802197793, - "loss": 2.1836, - "step": 1051 - }, - { - "epoch": 0.72, - "learning_rate": 0.00047787864309603437, - "loss": 2.128, - "step": 1052 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004777353081700907, - "loss": 2.0933, - "step": 1053 - }, - { - "epoch": 0.72, - "learning_rate": 0.00047759197324414714, - "loss": 2.0189, - "step": 1054 - }, - { - "epoch": 0.72, - "learning_rate": 0.00047744863831820347, - "loss": 2.0973, - "step": 1055 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004773053033922599, - "loss": 2.0629, - "step": 1056 - }, - { - "epoch": 0.72, - "learning_rate": 0.00047716196846631624, - "loss": 2.1493, - "step": 1057 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004770186335403726, - "loss": 2.0983, - "step": 1058 - }, - { - "epoch": 0.72, - "learning_rate": 0.000476875298614429, - "loss": 2.0396, - "step": 1059 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004767319636884854, - "loss": 2.1371, - "step": 1060 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047658862876254177, - "loss": 2.0203, - "step": 1061 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047644529383659816, - "loss": 2.0575, - "step": 1062 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047630195891065454, - "loss": 2.1714, - "step": 1063 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047615862398471087, - "loss": 2.0467, - "step": 1064 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047601528905876725, - "loss": 2.1803, - "step": 1065 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047587195413282364, - "loss": 2.0905, - "step": 1066 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047572861920688, - "loss": 2.1515, - "step": 1067 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004755852842809364, - "loss": 2.052, - "step": 1068 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004754419493549928, - "loss": 2.0514, - "step": 1069 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047529861442904917, - "loss": 2.2441, - "step": 1070 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047515527950310556, - "loss": 2.1311, - "step": 1071 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004750119445771619, - "loss": 2.0485, - "step": 1072 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004748686096512183, - "loss": 2.0591, - "step": 1073 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047472527472527465, - "loss": 2.1058, - "step": 1074 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004745819397993311, - "loss": 2.1968, - "step": 1075 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004744386048733874, - "loss": 2.1501, - "step": 1076 - }, - { - "epoch": 0.74, - "learning_rate": 0.00047429526994744386, - "loss": 2.0594, - "step": 1077 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004741519350215002, - "loss": 2.1584, - "step": 1078 - }, - { - "epoch": 0.74, - "learning_rate": 0.00047400860009555657, - "loss": 2.1617, - "step": 1079 - }, - { - "epoch": 0.74, - "learning_rate": 0.00047386526516961296, - "loss": 2.1563, - "step": 1080 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004737219302436693, - "loss": 2.1209, - "step": 1081 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004735785953177257, - "loss": 2.0942, - "step": 1082 - }, - { - "epoch": 0.74, - "learning_rate": 0.00047343526039178205, - "loss": 2.0905, - "step": 1083 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004732919254658385, - "loss": 2.1083, - "step": 1084 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004731485905398948, - "loss": 2.1352, - "step": 1085 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004730052556139512, - "loss": 2.1182, - "step": 1086 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004728619206880076, - "loss": 2.0542, - "step": 1087 - }, - { - "epoch": 0.74, - "learning_rate": 0.00047271858576206397, - "loss": 2.0865, - "step": 1088 - }, - { - "epoch": 0.74, - "learning_rate": 0.00047257525083612036, - "loss": 2.0584, - "step": 1089 - }, - { - "epoch": 0.75, - "learning_rate": 0.00047243191591017674, - "loss": 2.079, - "step": 1090 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004722885809842331, - "loss": 2.0989, - "step": 1091 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004721452460582895, - "loss": 2.0882, - "step": 1092 - }, - { - "epoch": 0.75, - "learning_rate": 0.00047200191113234584, - "loss": 2.1022, - "step": 1093 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004718585762064023, - "loss": 2.1201, - "step": 1094 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004717152412804586, - "loss": 2.1393, - "step": 1095 - }, - { - "epoch": 0.75, - "learning_rate": 0.00047157190635451504, - "loss": 2.0398, - "step": 1096 - }, - { - "epoch": 0.75, - "learning_rate": 0.00047142857142857137, - "loss": 2.1704, - "step": 1097 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004712852365026278, - "loss": 2.057, - "step": 1098 - }, - { - "epoch": 0.75, - "learning_rate": 0.00047114190157668414, - "loss": 2.0615, - "step": 1099 - }, - { - "epoch": 0.75, - "learning_rate": 0.00047099856665074047, - "loss": 2.0887, - "step": 1100 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004708552317247969, - "loss": 2.1307, - "step": 1101 - }, - { - "epoch": 0.75, - "learning_rate": 0.00047071189679885324, - "loss": 2.0054, - "step": 1102 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004705685618729097, - "loss": 2.1443, - "step": 1103 - }, - { - "epoch": 0.75, - "learning_rate": 0.000470425226946966, - "loss": 2.0008, - "step": 1104 - }, - { - "epoch": 0.76, - "learning_rate": 0.00047028189202102244, - "loss": 2.0476, - "step": 1105 - }, - { - "epoch": 0.76, - "learning_rate": 0.00047013855709507877, - "loss": 2.0807, - "step": 1106 - }, - { - "epoch": 0.76, - "learning_rate": 0.00046999522216913516, - "loss": 2.0908, - "step": 1107 - }, - { - "epoch": 0.76, - "learning_rate": 0.00046985188724319154, - "loss": 2.1526, - "step": 1108 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004697085523172479, - "loss": 2.0984, - "step": 1109 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004695652173913043, - "loss": 2.173, - "step": 1110 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004694218824653607, - "loss": 2.1548, - "step": 1111 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004692785475394171, - "loss": 2.1361, - "step": 1112 - }, - { - "epoch": 0.76, - "learning_rate": 0.00046913521261347346, - "loss": 2.1679, - "step": 1113 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004689918776875298, - "loss": 2.0432, - "step": 1114 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004688485427615862, - "loss": 2.1054, - "step": 1115 - }, - { - "epoch": 0.76, - "learning_rate": 0.00046870520783564256, - "loss": 2.1144, - "step": 1116 - }, - { - "epoch": 0.76, - "learning_rate": 0.000468561872909699, - "loss": 2.1629, - "step": 1117 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004684185379837553, - "loss": 2.1008, - "step": 1118 - }, - { - "epoch": 0.77, - "learning_rate": 0.00046827520305781176, - "loss": 2.2149, - "step": 1119 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004681318681318681, - "loss": 2.1446, - "step": 1120 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004679885332059244, - "loss": 2.1551, - "step": 1121 - }, - { - "epoch": 0.77, - "learning_rate": 0.00046784519827998086, - "loss": 2.1443, - "step": 1122 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004677018633540372, - "loss": 2.1708, - "step": 1123 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004675585284280936, - "loss": 2.0796, - "step": 1124 - }, - { - "epoch": 0.77, - "learning_rate": 0.00046741519350214995, - "loss": 2.1689, - "step": 1125 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004672718585762064, - "loss": 2.0454, - "step": 1126 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004671285236502627, - "loss": 2.1714, - "step": 1127 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004669851887243191, - "loss": 2.0867, - "step": 1128 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004668418537983755, - "loss": 2.0246, - "step": 1129 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004666985188724319, - "loss": 2.1147, - "step": 1130 - }, - { - "epoch": 0.77, - "learning_rate": 0.00046655518394648826, - "loss": 2.1384, - "step": 1131 - }, - { - "epoch": 0.77, - "learning_rate": 0.00046641184902054464, - "loss": 2.035, - "step": 1132 - }, - { - "epoch": 0.77, - "learning_rate": 0.000466268514094601, - "loss": 2.1166, - "step": 1133 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004661251791686574, - "loss": 2.0252, - "step": 1134 - }, - { - "epoch": 0.78, - "learning_rate": 0.00046598184424271374, - "loss": 2.0221, - "step": 1135 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004658385093167702, - "loss": 2.1787, - "step": 1136 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004656951743908265, - "loss": 2.1729, - "step": 1137 - }, - { - "epoch": 0.78, - "learning_rate": 0.00046555183946488294, - "loss": 2.1452, - "step": 1138 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004654085045389393, - "loss": 2.0615, - "step": 1139 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004652651696129957, - "loss": 2.0503, - "step": 1140 - }, - { - "epoch": 0.78, - "learning_rate": 0.00046512183468705204, - "loss": 2.048, - "step": 1141 - }, - { - "epoch": 0.78, - "learning_rate": 0.00046497849976110837, - "loss": 2.043, - "step": 1142 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004648351648351648, - "loss": 2.18, - "step": 1143 - }, - { - "epoch": 0.78, - "learning_rate": 0.00046469182990922114, - "loss": 2.128, - "step": 1144 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004645484949832776, - "loss": 2.1708, - "step": 1145 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004644051600573339, - "loss": 2.0475, - "step": 1146 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004642618251313903, - "loss": 2.0693, - "step": 1147 - }, - { - "epoch": 0.78, - "learning_rate": 0.0004641184902054467, - "loss": 2.1685, - "step": 1148 - }, - { - "epoch": 0.79, - "learning_rate": 0.00046397515527950306, - "loss": 2.1782, - "step": 1149 - }, - { - "epoch": 0.79, - "learning_rate": 0.00046383182035355944, - "loss": 2.0848, - "step": 1150 - }, - { - "epoch": 0.79, - "learning_rate": 0.0004636884854276158, - "loss": 2.1424, - "step": 1151 - }, - { - "epoch": 0.79, - "learning_rate": 0.0004635451505016722, - "loss": 2.0642, - "step": 1152 - }, - { - "epoch": 0.79, - "learning_rate": 0.0004634018155757286, - "loss": 1.9898, - "step": 1153 - }, - { - "epoch": 0.79, - "learning_rate": 0.0004632584806497849, - "loss": 2.0492, - "step": 1154 - }, - { - "epoch": 0.79, - "learning_rate": 0.00046311514572384136, - "loss": 2.1056, - "step": 1155 - }, - { - "epoch": 0.79, - "learning_rate": 0.0004629718107978977, - "loss": 2.1412, - "step": 1156 - }, - { - "epoch": 0.79, - "learning_rate": 0.00046282847587195413, - "loss": 2.0486, - "step": 1157 - }, - { - "epoch": 0.79, - "learning_rate": 0.00046268514094601046, - "loss": 2.1234, - "step": 1158 - }, - { - "epoch": 0.79, - "learning_rate": 0.0004625418060200669, - "loss": 2.1705, - "step": 1159 - }, - { - "epoch": 0.79, - "learning_rate": 0.0004623984710941232, - "loss": 2.122, - "step": 1160 - }, - { - "epoch": 0.79, - "learning_rate": 0.00046225513616817955, - "loss": 2.0406, - "step": 1161 - }, - { - "epoch": 0.79, - "learning_rate": 0.000462111801242236, - "loss": 2.1442, - "step": 1162 - }, - { - "epoch": 0.8, - "learning_rate": 0.0004619684663162923, - "loss": 2.052, - "step": 1163 - }, - { - "epoch": 0.8, - "learning_rate": 0.00046182513139034876, - "loss": 2.0891, - "step": 1164 - }, - { - "epoch": 0.8, - "learning_rate": 0.0004616817964644051, - "loss": 2.0678, - "step": 1165 - }, - { - "epoch": 0.8, - "learning_rate": 0.00046153846153846153, - "loss": 2.1483, - "step": 1166 - }, - { - "epoch": 0.8, - "learning_rate": 0.00046139512661251786, - "loss": 2.0679, - "step": 1167 - }, - { - "epoch": 0.8, - "learning_rate": 0.00046125179168657424, - "loss": 2.1211, - "step": 1168 - }, - { - "epoch": 0.8, - "learning_rate": 0.0004611084567606306, - "loss": 2.1161, - "step": 1169 - }, - { - "epoch": 0.8, - "learning_rate": 0.000460965121834687, - "loss": 2.1112, - "step": 1170 - }, - { - "epoch": 0.8, - "learning_rate": 0.0004608217869087434, - "loss": 2.1506, - "step": 1171 - }, - { - "epoch": 0.8, - "learning_rate": 0.0004606784519827998, - "loss": 2.0986, - "step": 1172 - }, - { - "epoch": 0.8, - "learning_rate": 0.00046053511705685616, - "loss": 2.0766, - "step": 1173 - }, - { - "epoch": 0.8, - "learning_rate": 0.00046039178213091254, - "loss": 2.115, - "step": 1174 - }, - { - "epoch": 0.8, - "learning_rate": 0.00046024844720496887, - "loss": 2.1063, - "step": 1175 - }, - { - "epoch": 0.8, - "learning_rate": 0.0004601051122790253, - "loss": 2.0088, - "step": 1176 - }, - { - "epoch": 0.8, - "learning_rate": 0.00045996177735308164, - "loss": 2.0694, - "step": 1177 - }, - { - "epoch": 0.81, - "learning_rate": 0.0004598184424271381, - "loss": 2.1738, - "step": 1178 - }, - { - "epoch": 0.81, - "learning_rate": 0.0004596751075011944, - "loss": 2.0662, - "step": 1179 - }, - { - "epoch": 0.81, - "learning_rate": 0.00045953177257525085, - "loss": 2.1288, - "step": 1180 - }, - { - "epoch": 0.81, - "learning_rate": 0.0004593884376493072, - "loss": 2.1382, - "step": 1181 - }, - { - "epoch": 0.81, - "learning_rate": 0.0004592451027233635, - "loss": 2.1263, - "step": 1182 - }, - { - "epoch": 0.81, - "learning_rate": 0.00045910176779741994, - "loss": 1.9623, - "step": 1183 - }, - { - "epoch": 0.81, - "learning_rate": 0.00045895843287147627, - "loss": 2.2013, - "step": 1184 - }, - { - "epoch": 0.81, - "learning_rate": 0.0004588150979455327, - "loss": 2.2295, - "step": 1185 - }, - { - "epoch": 0.81, - "learning_rate": 0.00045867176301958904, - "loss": 2.1698, - "step": 1186 - }, - { - "epoch": 0.81, - "learning_rate": 0.0004585284280936455, - "loss": 2.1143, - "step": 1187 - }, - { - "epoch": 0.81, - "learning_rate": 0.0004583850931677018, - "loss": 2.0956, - "step": 1188 - }, - { - "epoch": 0.81, - "learning_rate": 0.0004582417582417582, - "loss": 2.0658, - "step": 1189 - }, - { - "epoch": 0.81, - "learning_rate": 0.0004580984233158146, - "loss": 2.1017, - "step": 1190 - }, - { - "epoch": 0.81, - "learning_rate": 0.00045795508838987096, - "loss": 2.1492, - "step": 1191 - }, - { - "epoch": 0.81, - "learning_rate": 0.00045781175346392734, - "loss": 2.2528, - "step": 1192 - }, - { - "epoch": 0.82, - "learning_rate": 0.0004576684185379837, - "loss": 2.0356, - "step": 1193 - }, - { - "epoch": 0.82, - "learning_rate": 0.0004575250836120401, - "loss": 2.0912, - "step": 1194 - }, - { - "epoch": 0.82, - "learning_rate": 0.0004573817486860965, - "loss": 2.1458, - "step": 1195 - }, - { - "epoch": 0.82, - "learning_rate": 0.0004572384137601528, - "loss": 2.0456, - "step": 1196 - }, - { - "epoch": 0.82, - "learning_rate": 0.00045709507883420926, - "loss": 2.0842, - "step": 1197 - }, - { - "epoch": 0.82, - "learning_rate": 0.0004569517439082656, - "loss": 2.1316, - "step": 1198 - }, - { - "epoch": 0.82, - "learning_rate": 0.00045680840898232203, - "loss": 2.1506, - "step": 1199 - }, - { - "epoch": 0.82, - "learning_rate": 0.00045666507405637836, - "loss": 2.1229, - "step": 1200 - }, - { - "epoch": 0.82, - "eval_loss": 2.010464906692505, - "eval_runtime": 1649.1596, - "eval_samples_per_second": 6.064, - "eval_steps_per_second": 6.064, - "step": 1200 - }, - { - "epoch": 0.82, - "learning_rate": 0.0004565217391304348, - "loss": 2.0344, - "step": 1201 - }, - { - "epoch": 0.82, - "learning_rate": 0.0004563784042044911, - "loss": 2.1438, - "step": 1202 - }, - { - "epoch": 0.82, - "learning_rate": 0.00045623506927854746, - "loss": 1.9869, - "step": 1203 - }, - { - "epoch": 0.82, - "learning_rate": 0.0004560917343526039, - "loss": 2.2059, - "step": 1204 - }, - { - "epoch": 0.82, - "learning_rate": 0.0004559483994266602, - "loss": 1.9896, - "step": 1205 - }, - { - "epoch": 0.82, - "learning_rate": 0.00045580506450071666, - "loss": 2.0367, - "step": 1206 - }, - { - "epoch": 0.83, - "learning_rate": 0.000455661729574773, - "loss": 2.0457, - "step": 1207 - }, - { - "epoch": 0.83, - "learning_rate": 0.00045551839464882943, - "loss": 1.9596, - "step": 1208 - }, - { - "epoch": 0.83, - "learning_rate": 0.00045537505972288576, - "loss": 2.0869, - "step": 1209 - }, - { - "epoch": 0.83, - "learning_rate": 0.00045523172479694214, - "loss": 2.1679, - "step": 1210 - }, - { - "epoch": 0.83, - "learning_rate": 0.0004550883898709985, - "loss": 2.0897, - "step": 1211 - }, - { - "epoch": 0.83, - "learning_rate": 0.0004549450549450549, - "loss": 2.0965, - "step": 1212 - }, - { - "epoch": 0.83, - "learning_rate": 0.0004548017200191113, - "loss": 2.1179, - "step": 1213 - }, - { - "epoch": 0.83, - "learning_rate": 0.0004546583850931677, - "loss": 2.1033, - "step": 1214 - }, - { - "epoch": 0.83, - "learning_rate": 0.00045451505016722406, - "loss": 2.0576, - "step": 1215 - }, - { - "epoch": 0.83, - "learning_rate": 0.00045437171524128045, - "loss": 2.1722, - "step": 1216 - }, - { - "epoch": 0.83, - "learning_rate": 0.0004542283803153368, - "loss": 2.0906, - "step": 1217 - }, - { - "epoch": 0.83, - "learning_rate": 0.0004540850453893932, - "loss": 2.07, - "step": 1218 - }, - { - "epoch": 0.83, - "learning_rate": 0.00045394171046344954, - "loss": 2.1588, - "step": 1219 - }, - { - "epoch": 0.83, - "learning_rate": 0.000453798375537506, - "loss": 2.1003, - "step": 1220 - }, - { - "epoch": 0.83, - "learning_rate": 0.0004536550406115623, - "loss": 2.1694, - "step": 1221 - }, - { - "epoch": 0.84, - "learning_rate": 0.0004535117056856187, - "loss": 2.09, - "step": 1222 - }, - { - "epoch": 0.84, - "learning_rate": 0.0004533683707596751, - "loss": 2.1133, - "step": 1223 - }, - { - "epoch": 0.84, - "learning_rate": 0.0004532250358337314, - "loss": 2.1067, - "step": 1224 - }, - { - "epoch": 0.84, - "learning_rate": 0.00045308170090778784, - "loss": 2.1304, - "step": 1225 - }, - { - "epoch": 0.84, - "learning_rate": 0.0004529383659818442, - "loss": 2.1474, - "step": 1226 - }, - { - "epoch": 0.84, - "learning_rate": 0.0004527950310559006, - "loss": 2.0828, - "step": 1227 - }, - { - "epoch": 0.84, - "learning_rate": 0.00045265169612995694, - "loss": 2.1004, - "step": 1228 - }, - { - "epoch": 0.84, - "learning_rate": 0.0004525083612040134, - "loss": 2.0719, - "step": 1229 - }, - { - "epoch": 0.84, - "learning_rate": 0.0004523650262780697, - "loss": 2.0861, - "step": 1230 - }, - { - "epoch": 0.84, - "learning_rate": 0.0004522216913521261, - "loss": 2.136, - "step": 1231 - }, - { - "epoch": 0.84, - "learning_rate": 0.0004520783564261825, - "loss": 2.1708, - "step": 1232 - }, - { - "epoch": 0.84, - "learning_rate": 0.00045193502150023886, - "loss": 2.0946, - "step": 1233 - }, - { - "epoch": 0.84, - "learning_rate": 0.00045179168657429524, - "loss": 2.1396, - "step": 1234 - }, - { - "epoch": 0.84, - "learning_rate": 0.00045164835164835163, - "loss": 2.1104, - "step": 1235 - }, - { - "epoch": 0.85, - "learning_rate": 0.000451505016722408, - "loss": 2.1906, - "step": 1236 - }, - { - "epoch": 0.85, - "learning_rate": 0.0004513616817964644, - "loss": 2.1554, - "step": 1237 - }, - { - "epoch": 0.85, - "learning_rate": 0.0004512183468705207, - "loss": 2.1313, - "step": 1238 - }, - { - "epoch": 0.85, - "learning_rate": 0.0004510750119445771, - "loss": 2.1129, - "step": 1239 - }, - { - "epoch": 0.85, - "learning_rate": 0.0004509316770186335, - "loss": 1.9433, - "step": 1240 - }, - { - "epoch": 0.85, - "learning_rate": 0.0004507883420926899, - "loss": 2.0958, - "step": 1241 - }, - { - "epoch": 0.85, - "learning_rate": 0.00045064500716674626, - "loss": 2.0529, - "step": 1242 - }, - { - "epoch": 0.85, - "learning_rate": 0.00045050167224080264, - "loss": 2.1668, - "step": 1243 - }, - { - "epoch": 0.85, - "learning_rate": 0.00045035833731485903, - "loss": 2.1123, - "step": 1244 - }, - { - "epoch": 0.85, - "learning_rate": 0.00045021500238891536, - "loss": 1.9926, - "step": 1245 - }, - { - "epoch": 0.85, - "learning_rate": 0.0004500716674629718, - "loss": 2.0264, - "step": 1246 - }, - { - "epoch": 0.85, - "learning_rate": 0.0004499283325370281, - "loss": 2.0906, - "step": 1247 - }, - { - "epoch": 0.85, - "learning_rate": 0.00044978499761108456, - "loss": 2.066, - "step": 1248 - }, - { - "epoch": 0.85, - "learning_rate": 0.0004496416626851409, - "loss": 2.0788, - "step": 1249 - }, - { - "epoch": 0.85, - "learning_rate": 0.0004494983277591973, - "loss": 2.0461, - "step": 1250 - }, - { - "epoch": 0.86, - "learning_rate": 0.00044935499283325366, - "loss": 2.0026, - "step": 1251 - }, - { - "epoch": 0.86, - "learning_rate": 0.00044921165790731004, - "loss": 2.0602, - "step": 1252 - }, - { - "epoch": 0.86, - "learning_rate": 0.00044906832298136643, - "loss": 2.1321, - "step": 1253 - }, - { - "epoch": 0.86, - "learning_rate": 0.0004489249880554228, - "loss": 2.0282, - "step": 1254 - }, - { - "epoch": 0.86, - "learning_rate": 0.0004487816531294792, - "loss": 2.042, - "step": 1255 - }, - { - "epoch": 0.86, - "learning_rate": 0.0004486383182035355, - "loss": 2.0985, - "step": 1256 - }, - { - "epoch": 0.86, - "learning_rate": 0.0004484949832775919, - "loss": 2.0782, - "step": 1257 - }, - { - "epoch": 0.86, - "learning_rate": 0.0004483516483516483, - "loss": 2.0993, - "step": 1258 - }, - { - "epoch": 0.86, - "learning_rate": 0.0004482083134257047, - "loss": 2.0959, - "step": 1259 - }, - { - "epoch": 0.86, - "learning_rate": 0.00044806497849976106, - "loss": 2.0444, - "step": 1260 - }, - { - "epoch": 0.86, - "learning_rate": 0.00044792164357381744, - "loss": 2.0952, - "step": 1261 - }, - { - "epoch": 0.86, - "learning_rate": 0.00044777830864787383, - "loss": 2.077, - "step": 1262 - }, - { - "epoch": 0.86, - "learning_rate": 0.0004476349737219302, - "loss": 2.2078, - "step": 1263 - }, - { - "epoch": 0.86, - "learning_rate": 0.00044749163879598654, - "loss": 2.1267, - "step": 1264 - }, - { - "epoch": 0.86, - "learning_rate": 0.000447348303870043, - "loss": 2.0193, - "step": 1265 - }, - { - "epoch": 0.87, - "learning_rate": 0.0004472049689440993, - "loss": 2.118, - "step": 1266 - }, - { - "epoch": 0.87, - "learning_rate": 0.00044706163401815575, - "loss": 2.0803, - "step": 1267 - }, - { - "epoch": 0.87, - "learning_rate": 0.0004469182990922121, - "loss": 2.1214, - "step": 1268 - }, - { - "epoch": 0.87, - "learning_rate": 0.0004467749641662685, - "loss": 2.0874, - "step": 1269 - }, - { - "epoch": 0.87, - "learning_rate": 0.00044663162924032484, - "loss": 2.1083, - "step": 1270 - }, - { - "epoch": 0.87, - "learning_rate": 0.00044648829431438123, - "loss": 2.0644, - "step": 1271 - }, - { - "epoch": 0.87, - "learning_rate": 0.0004463449593884376, - "loss": 1.9959, - "step": 1272 - }, - { - "epoch": 0.87, - "learning_rate": 0.00044620162446249394, - "loss": 2.04, - "step": 1273 - }, - { - "epoch": 0.87, - "learning_rate": 0.0004460582895365504, - "loss": 2.1078, - "step": 1274 - }, - { - "epoch": 0.87, - "learning_rate": 0.0004459149546106067, - "loss": 2.0223, - "step": 1275 - }, - { - "epoch": 0.87, - "learning_rate": 0.00044577161968466315, - "loss": 2.0671, - "step": 1276 - }, - { - "epoch": 0.87, - "learning_rate": 0.0004456282847587195, - "loss": 2.1617, - "step": 1277 - }, - { - "epoch": 0.87, - "learning_rate": 0.00044548494983277586, - "loss": 2.0564, - "step": 1278 - }, - { - "epoch": 0.87, - "learning_rate": 0.00044534161490683224, - "loss": 2.121, - "step": 1279 - }, - { - "epoch": 0.88, - "learning_rate": 0.00044519827998088863, - "loss": 2.0868, - "step": 1280 - }, - { - "epoch": 0.88, - "learning_rate": 0.000445054945054945, - "loss": 2.0621, - "step": 1281 - }, - { - "epoch": 0.88, - "learning_rate": 0.0004449116101290014, - "loss": 2.0657, - "step": 1282 - }, - { - "epoch": 0.88, - "learning_rate": 0.0004447682752030578, - "loss": 2.1032, - "step": 1283 - }, - { - "epoch": 0.88, - "learning_rate": 0.00044462494027711416, - "loss": 2.142, - "step": 1284 - }, - { - "epoch": 0.88, - "learning_rate": 0.0004444816053511705, - "loss": 2.1185, - "step": 1285 - }, - { - "epoch": 0.88, - "learning_rate": 0.00044433827042522693, - "loss": 2.119, - "step": 1286 - }, - { - "epoch": 0.88, - "learning_rate": 0.00044419493549928326, - "loss": 1.9116, - "step": 1287 - }, - { - "epoch": 0.88, - "learning_rate": 0.0004440516005733397, - "loss": 2.112, - "step": 1288 - }, - { - "epoch": 0.88, - "learning_rate": 0.00044390826564739603, - "loss": 2.0351, - "step": 1289 - }, - { - "epoch": 0.88, - "learning_rate": 0.00044376493072145247, - "loss": 2.0875, - "step": 1290 - }, - { - "epoch": 0.88, - "learning_rate": 0.0004436215957955088, - "loss": 2.1202, - "step": 1291 - }, - { - "epoch": 0.88, - "learning_rate": 0.0004434782608695651, - "loss": 2.1295, - "step": 1292 - }, - { - "epoch": 0.88, - "learning_rate": 0.00044333492594362156, - "loss": 2.0939, - "step": 1293 - }, - { - "epoch": 0.88, - "learning_rate": 0.0004431915910176779, - "loss": 1.9528, - "step": 1294 - }, - { - "epoch": 0.89, - "learning_rate": 0.00044304825609173433, - "loss": 2.155, - "step": 1295 - }, - { - "epoch": 0.89, - "learning_rate": 0.00044290492116579066, - "loss": 2.1111, - "step": 1296 - }, - { - "epoch": 0.89, - "learning_rate": 0.0004427615862398471, - "loss": 2.1194, - "step": 1297 - }, - { - "epoch": 0.89, - "learning_rate": 0.0004426182513139034, - "loss": 2.1306, - "step": 1298 - }, - { - "epoch": 0.89, - "learning_rate": 0.0004424749163879598, - "loss": 2.0579, - "step": 1299 - }, - { - "epoch": 0.89, - "learning_rate": 0.0004423315814620162, - "loss": 2.1123, - "step": 1300 - }, - { - "epoch": 0.89, - "learning_rate": 0.0004421882465360726, - "loss": 2.0886, - "step": 1301 - }, - { - "epoch": 0.89, - "learning_rate": 0.00044204491161012896, - "loss": 2.0761, - "step": 1302 - }, - { - "epoch": 0.89, - "learning_rate": 0.00044190157668418535, - "loss": 2.0334, - "step": 1303 - }, - { - "epoch": 0.89, - "learning_rate": 0.00044175824175824173, - "loss": 2.0314, - "step": 1304 - }, - { - "epoch": 0.89, - "learning_rate": 0.0004416149068322981, - "loss": 2.173, - "step": 1305 - }, - { - "epoch": 0.89, - "learning_rate": 0.00044147157190635444, - "loss": 2.0194, - "step": 1306 - }, - { - "epoch": 0.89, - "learning_rate": 0.0004413282369804109, - "loss": 2.1108, - "step": 1307 - }, - { - "epoch": 0.89, - "learning_rate": 0.0004411849020544672, - "loss": 2.0581, - "step": 1308 - }, - { - "epoch": 0.89, - "learning_rate": 0.00044104156712852365, - "loss": 1.9859, - "step": 1309 - }, - { - "epoch": 0.9, - "learning_rate": 0.00044089823220258, - "loss": 2.0621, - "step": 1310 - }, - { - "epoch": 0.9, - "learning_rate": 0.0004407548972766364, - "loss": 2.1089, - "step": 1311 - }, - { - "epoch": 0.9, - "learning_rate": 0.00044061156235069275, - "loss": 2.1205, - "step": 1312 - }, - { - "epoch": 0.9, - "learning_rate": 0.0004404682274247491, - "loss": 2.0944, - "step": 1313 - }, - { - "epoch": 0.9, - "learning_rate": 0.0004403248924988055, - "loss": 2.0718, - "step": 1314 - }, - { - "epoch": 0.9, - "learning_rate": 0.00044018155757286184, - "loss": 2.0605, - "step": 1315 - }, - { - "epoch": 0.9, - "learning_rate": 0.0004400382226469183, - "loss": 2.0731, - "step": 1316 - }, - { - "epoch": 0.9, - "learning_rate": 0.0004398948877209746, - "loss": 2.017, - "step": 1317 - }, - { - "epoch": 0.9, - "learning_rate": 0.00043975155279503105, - "loss": 2.0787, - "step": 1318 - }, - { - "epoch": 0.9, - "learning_rate": 0.0004396082178690874, - "loss": 2.1489, - "step": 1319 - }, - { - "epoch": 0.9, - "learning_rate": 0.00043946488294314376, - "loss": 2.0481, - "step": 1320 - }, - { - "epoch": 0.9, - "learning_rate": 0.00043932154801720015, - "loss": 2.1374, - "step": 1321 - }, - { - "epoch": 0.9, - "learning_rate": 0.00043917821309125653, - "loss": 2.1136, - "step": 1322 - }, - { - "epoch": 0.9, - "learning_rate": 0.0004390348781653129, - "loss": 2.0005, - "step": 1323 - }, - { - "epoch": 0.91, - "learning_rate": 0.0004388915432393693, - "loss": 1.9886, - "step": 1324 - }, - { - "epoch": 0.91, - "learning_rate": 0.0004387482083134257, - "loss": 2.1114, - "step": 1325 - }, - { - "epoch": 0.91, - "learning_rate": 0.00043860487338748206, - "loss": 2.0226, - "step": 1326 - }, - { - "epoch": 0.91, - "learning_rate": 0.0004384615384615384, - "loss": 2.1214, - "step": 1327 - }, - { - "epoch": 0.91, - "learning_rate": 0.00043831820353559483, - "loss": 2.2046, - "step": 1328 - }, - { - "epoch": 0.91, - "learning_rate": 0.00043817486860965116, - "loss": 2.094, - "step": 1329 - }, - { - "epoch": 0.91, - "learning_rate": 0.0004380315336837076, - "loss": 2.1184, - "step": 1330 - }, - { - "epoch": 0.91, - "learning_rate": 0.00043788819875776393, - "loss": 2.0185, - "step": 1331 - }, - { - "epoch": 0.91, - "learning_rate": 0.00043774486383182037, - "loss": 2.0475, - "step": 1332 - }, - { - "epoch": 0.91, - "learning_rate": 0.0004376015289058767, - "loss": 2.102, - "step": 1333 - }, - { - "epoch": 0.91, - "learning_rate": 0.000437458193979933, - "loss": 2.0615, - "step": 1334 - }, - { - "epoch": 0.91, - "learning_rate": 0.00043731485905398946, - "loss": 1.9326, - "step": 1335 - }, - { - "epoch": 0.91, - "learning_rate": 0.0004371715241280458, - "loss": 2.1058, - "step": 1336 - }, - { - "epoch": 0.91, - "learning_rate": 0.00043702818920210223, - "loss": 2.0959, - "step": 1337 - }, - { - "epoch": 0.91, - "learning_rate": 0.00043688485427615856, - "loss": 2.0412, - "step": 1338 - }, - { - "epoch": 0.92, - "learning_rate": 0.000436741519350215, - "loss": 2.104, - "step": 1339 - }, - { - "epoch": 0.92, - "learning_rate": 0.00043659818442427133, - "loss": 2.0625, - "step": 1340 - }, - { - "epoch": 0.92, - "learning_rate": 0.0004364548494983277, - "loss": 2.1427, - "step": 1341 - }, - { - "epoch": 0.92, - "learning_rate": 0.0004363115145723841, - "loss": 2.0692, - "step": 1342 - }, - { - "epoch": 0.92, - "learning_rate": 0.0004361681796464405, - "loss": 2.1571, - "step": 1343 - }, - { - "epoch": 0.92, - "learning_rate": 0.00043602484472049686, - "loss": 2.0097, - "step": 1344 - }, - { - "epoch": 0.92, - "learning_rate": 0.00043588150979455325, - "loss": 2.0033, - "step": 1345 - }, - { - "epoch": 0.92, - "learning_rate": 0.00043573817486860963, - "loss": 2.0994, - "step": 1346 - }, - { - "epoch": 0.92, - "learning_rate": 0.000435594839942666, - "loss": 2.0478, - "step": 1347 - }, - { - "epoch": 0.92, - "learning_rate": 0.00043545150501672234, - "loss": 2.006, - "step": 1348 - }, - { - "epoch": 0.92, - "learning_rate": 0.0004353081700907788, - "loss": 2.0641, - "step": 1349 - }, - { - "epoch": 0.92, - "learning_rate": 0.0004351648351648351, - "loss": 2.079, - "step": 1350 - }, - { - "epoch": 0.92, - "learning_rate": 0.00043502150023889155, - "loss": 2.1691, - "step": 1351 - }, - { - "epoch": 0.92, - "learning_rate": 0.0004348781653129479, - "loss": 2.0747, - "step": 1352 - }, - { - "epoch": 0.93, - "learning_rate": 0.0004347348303870042, - "loss": 2.1697, - "step": 1353 - }, - { - "epoch": 0.93, - "learning_rate": 0.00043459149546106065, - "loss": 2.0546, - "step": 1354 - }, - { - "epoch": 0.93, - "learning_rate": 0.000434448160535117, - "loss": 2.1145, - "step": 1355 - }, - { - "epoch": 0.93, - "learning_rate": 0.0004343048256091734, - "loss": 2.0477, - "step": 1356 - }, - { - "epoch": 0.93, - "learning_rate": 0.00043416149068322974, - "loss": 2.1252, - "step": 1357 - }, - { - "epoch": 0.93, - "learning_rate": 0.0004340181557572862, - "loss": 2.0674, - "step": 1358 - }, - { - "epoch": 0.93, - "learning_rate": 0.0004338748208313425, - "loss": 2.2159, - "step": 1359 - }, - { - "epoch": 0.93, - "learning_rate": 0.0004337314859053989, - "loss": 2.0525, - "step": 1360 - }, - { - "epoch": 0.93, - "learning_rate": 0.0004335881509794553, - "loss": 2.1488, - "step": 1361 - }, - { - "epoch": 0.93, - "learning_rate": 0.00043344481605351166, - "loss": 2.0317, - "step": 1362 - }, - { - "epoch": 0.93, - "learning_rate": 0.00043330148112756805, - "loss": 2.0714, - "step": 1363 - }, - { - "epoch": 0.93, - "learning_rate": 0.00043315814620162443, - "loss": 2.1052, - "step": 1364 - }, - { - "epoch": 0.93, - "learning_rate": 0.0004330148112756808, - "loss": 2.139, - "step": 1365 - }, - { - "epoch": 0.93, - "learning_rate": 0.0004328714763497372, - "loss": 2.1675, - "step": 1366 - }, - { - "epoch": 0.93, - "learning_rate": 0.00043272814142379353, - "loss": 2.1032, - "step": 1367 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043258480649784997, - "loss": 2.0958, - "step": 1368 - }, - { - "epoch": 0.94, - "learning_rate": 0.0004324414715719063, - "loss": 2.1408, - "step": 1369 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043229813664596273, - "loss": 2.0466, - "step": 1370 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043215480172001906, - "loss": 2.097, - "step": 1371 - }, - { - "epoch": 0.94, - "learning_rate": 0.0004320114667940755, - "loss": 2.1923, - "step": 1372 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043186813186813183, - "loss": 2.0855, - "step": 1373 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043172479694218816, - "loss": 2.1068, - "step": 1374 - }, - { - "epoch": 0.94, - "learning_rate": 0.0004315814620162446, - "loss": 2.1737, - "step": 1375 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043143812709030093, - "loss": 2.1328, - "step": 1376 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043129479216435737, - "loss": 2.1188, - "step": 1377 - }, - { - "epoch": 0.94, - "learning_rate": 0.0004311514572384137, - "loss": 2.1673, - "step": 1378 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043100812231247013, - "loss": 2.0582, - "step": 1379 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043086478738652646, - "loss": 2.1352, - "step": 1380 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043072145246058285, - "loss": 1.9892, - "step": 1381 - }, - { - "epoch": 0.94, - "learning_rate": 0.00043057811753463923, - "loss": 2.1073, - "step": 1382 - }, - { - "epoch": 0.95, - "learning_rate": 0.0004304347826086956, - "loss": 1.9898, - "step": 1383 - }, - { - "epoch": 0.95, - "learning_rate": 0.000430291447682752, - "loss": 2.0761, - "step": 1384 - }, - { - "epoch": 0.95, - "learning_rate": 0.0004301481127568084, - "loss": 2.0451, - "step": 1385 - }, - { - "epoch": 0.95, - "learning_rate": 0.00043000477783086477, - "loss": 2.1101, - "step": 1386 - }, - { - "epoch": 0.95, - "learning_rate": 0.00042986144290492115, - "loss": 2.089, - "step": 1387 - }, - { - "epoch": 0.95, - "learning_rate": 0.0004297181079789775, - "loss": 2.1503, - "step": 1388 - }, - { - "epoch": 0.95, - "learning_rate": 0.0004295747730530339, - "loss": 2.1124, - "step": 1389 - }, - { - "epoch": 0.95, - "learning_rate": 0.00042943143812709025, - "loss": 2.0814, - "step": 1390 - }, - { - "epoch": 0.95, - "learning_rate": 0.0004292881032011467, - "loss": 2.0763, - "step": 1391 - }, - { - "epoch": 0.95, - "learning_rate": 0.000429144768275203, - "loss": 2.0478, - "step": 1392 - }, - { - "epoch": 0.95, - "learning_rate": 0.00042900143334925945, - "loss": 2.0456, - "step": 1393 - }, - { - "epoch": 0.95, - "learning_rate": 0.0004288580984233158, - "loss": 1.9715, - "step": 1394 - }, - { - "epoch": 0.95, - "learning_rate": 0.0004287147634973721, - "loss": 2.1651, - "step": 1395 - }, - { - "epoch": 0.95, - "learning_rate": 0.00042857142857142855, - "loss": 2.0314, - "step": 1396 - }, - { - "epoch": 0.96, - "learning_rate": 0.0004284280936454849, - "loss": 2.1104, - "step": 1397 - }, - { - "epoch": 0.96, - "learning_rate": 0.0004282847587195413, - "loss": 2.068, - "step": 1398 - }, - { - "epoch": 0.96, - "learning_rate": 0.00042814142379359765, - "loss": 2.013, - "step": 1399 - }, - { - "epoch": 0.96, - "learning_rate": 0.0004279980888676541, - "loss": 2.058, - "step": 1400 - }, - { - "epoch": 0.96, - "eval_loss": 2.0060460567474365, - "eval_runtime": 1646.7958, - "eval_samples_per_second": 6.072, - "eval_steps_per_second": 6.072, - "step": 1400 - }, - { - "epoch": 0.96, - "learning_rate": 0.0004278547539417104, - "loss": 2.0666, - "step": 1401 - }, - { - "epoch": 0.96, - "learning_rate": 0.0004277114190157668, - "loss": 2.1469, - "step": 1402 - }, - { - "epoch": 0.96, - "learning_rate": 0.0004275680840898232, - "loss": 2.0233, - "step": 1403 - }, - { - "epoch": 0.96, - "learning_rate": 0.00042742474916387957, - "loss": 2.0452, - "step": 1404 - }, - { - "epoch": 0.96, - "learning_rate": 0.00042728141423793595, - "loss": 2.0146, - "step": 1405 - }, - { - "epoch": 0.96, - "learning_rate": 0.00042713807931199233, - "loss": 2.1285, - "step": 1406 - }, - { - "epoch": 0.96, - "learning_rate": 0.0004269947443860487, - "loss": 2.0872, - "step": 1407 - }, - { - "epoch": 0.96, - "learning_rate": 0.0004268514094601051, - "loss": 2.0824, - "step": 1408 - }, - { - "epoch": 0.96, - "learning_rate": 0.00042670807453416143, - "loss": 2.1307, - "step": 1409 - }, - { - "epoch": 0.96, - "learning_rate": 0.00042656473960821787, - "loss": 2.0136, - "step": 1410 - }, - { - "epoch": 0.96, - "learning_rate": 0.0004264214046822742, - "loss": 2.0709, - "step": 1411 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042627806975633064, - "loss": 2.0025, - "step": 1412 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042613473483038697, - "loss": 2.1293, - "step": 1413 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042599139990444335, - "loss": 2.1285, - "step": 1414 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042584806497849973, - "loss": 2.1601, - "step": 1415 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042570473005255606, - "loss": 2.0589, - "step": 1416 - }, - { - "epoch": 0.97, - "learning_rate": 0.0004255613951266125, - "loss": 2.087, - "step": 1417 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042541806020066883, - "loss": 2.1115, - "step": 1418 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042527472527472527, - "loss": 2.0327, - "step": 1419 - }, - { - "epoch": 0.97, - "learning_rate": 0.0004251313903487816, - "loss": 2.0255, - "step": 1420 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042498805542283804, - "loss": 2.0882, - "step": 1421 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042484472049689436, - "loss": 2.1193, - "step": 1422 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042470138557095075, - "loss": 2.0616, - "step": 1423 - }, - { - "epoch": 0.97, - "learning_rate": 0.00042455805064500713, - "loss": 2.088, - "step": 1424 - }, - { - "epoch": 0.97, - "learning_rate": 0.0004244147157190635, - "loss": 2.1032, - "step": 1425 - }, - { - "epoch": 0.97, - "learning_rate": 0.0004242713807931199, - "loss": 2.1068, - "step": 1426 - }, - { - "epoch": 0.98, - "learning_rate": 0.0004241280458671763, - "loss": 2.0329, - "step": 1427 - }, - { - "epoch": 0.98, - "learning_rate": 0.00042398471094123267, - "loss": 2.2018, - "step": 1428 - }, - { - "epoch": 0.98, - "learning_rate": 0.00042384137601528905, - "loss": 2.107, - "step": 1429 - }, - { - "epoch": 0.98, - "learning_rate": 0.0004236980410893454, - "loss": 2.0242, - "step": 1430 - }, - { - "epoch": 0.98, - "learning_rate": 0.00042355470616340176, - "loss": 2.1247, - "step": 1431 - }, - { - "epoch": 0.98, - "learning_rate": 0.00042341137123745815, - "loss": 2.0634, - "step": 1432 - }, - { - "epoch": 0.98, - "learning_rate": 0.00042326803631151453, - "loss": 2.0362, - "step": 1433 - }, - { - "epoch": 0.98, - "learning_rate": 0.0004231247013855709, - "loss": 2.1368, - "step": 1434 - }, - { - "epoch": 0.98, - "learning_rate": 0.0004229813664596273, - "loss": 1.9856, - "step": 1435 - }, - { - "epoch": 0.98, - "learning_rate": 0.0004228380315336837, - "loss": 2.0913, - "step": 1436 - }, - { - "epoch": 0.98, - "learning_rate": 0.00042269469660774, - "loss": 2.1541, - "step": 1437 - }, - { - "epoch": 0.98, - "learning_rate": 0.00042255136168179645, - "loss": 2.1063, - "step": 1438 - }, - { - "epoch": 0.98, - "learning_rate": 0.0004224080267558528, - "loss": 2.0437, - "step": 1439 - }, - { - "epoch": 0.98, - "learning_rate": 0.0004222646918299092, - "loss": 2.1481, - "step": 1440 - }, - { - "epoch": 0.99, - "learning_rate": 0.00042212135690396555, - "loss": 2.2177, - "step": 1441 - }, - { - "epoch": 0.99, - "learning_rate": 0.000421978021978022, - "loss": 2.0707, - "step": 1442 - }, - { - "epoch": 0.99, - "learning_rate": 0.0004218346870520783, - "loss": 2.1417, - "step": 1443 - }, - { - "epoch": 0.99, - "learning_rate": 0.0004216913521261347, - "loss": 2.0127, - "step": 1444 - }, - { - "epoch": 0.99, - "learning_rate": 0.0004215480172001911, - "loss": 2.0886, - "step": 1445 - }, - { - "epoch": 0.99, - "learning_rate": 0.00042140468227424747, - "loss": 2.0717, - "step": 1446 - }, - { - "epoch": 0.99, - "learning_rate": 0.00042126134734830385, - "loss": 2.1458, - "step": 1447 - }, - { - "epoch": 0.99, - "learning_rate": 0.0004211180124223602, - "loss": 2.1917, - "step": 1448 - }, - { - "epoch": 0.99, - "learning_rate": 0.00042097467749641656, - "loss": 2.0842, - "step": 1449 - }, - { - "epoch": 0.99, - "learning_rate": 0.00042083134257047295, - "loss": 2.1187, - "step": 1450 - }, - { - "epoch": 0.99, - "learning_rate": 0.00042068800764452933, - "loss": 2.141, - "step": 1451 - }, - { - "epoch": 0.99, - "learning_rate": 0.0004205446727185857, - "loss": 2.079, - "step": 1452 - }, - { - "epoch": 0.99, - "learning_rate": 0.0004204013377926421, - "loss": 2.1057, - "step": 1453 - }, - { - "epoch": 0.99, - "learning_rate": 0.0004202580028666985, - "loss": 2.1162, - "step": 1454 - }, - { - "epoch": 0.99, - "learning_rate": 0.00042011466794075487, - "loss": 2.0668, - "step": 1455 - }, - { - "epoch": 1.0, - "learning_rate": 0.0004199713330148112, - "loss": 2.1383, - "step": 1456 - }, - { - "epoch": 1.0, - "learning_rate": 0.00041982799808886763, - "loss": 2.0792, - "step": 1457 - }, - { - "epoch": 1.0, - "learning_rate": 0.00041968466316292396, - "loss": 2.1121, - "step": 1458 - }, - { - "epoch": 1.0, - "learning_rate": 0.0004195413282369804, - "loss": 2.0756, - "step": 1459 - }, - { - "epoch": 1.0, - "learning_rate": 0.00041939799331103673, - "loss": 1.9645, - "step": 1460 - }, - { - "epoch": 1.0, - "learning_rate": 0.00041925465838509317, - "loss": 2.0766, - "step": 1461 - }, - { - "epoch": 1.0, - "learning_rate": 0.0004191113234591495, - "loss": 2.0359, - "step": 1462 - }, - { - "epoch": 1.0, - "learning_rate": 0.0004189679885332059, - "loss": 2.2025, - "step": 1463 - }, - { - "epoch": 1.0, - "learning_rate": 0.00041882465360726227, - "loss": 1.9701, - "step": 1464 - }, - { - "epoch": 1.0, - "learning_rate": 0.0004186813186813186, - "loss": 2.0596, - "step": 1465 - }, - { - "epoch": 1.0, - "learning_rate": 0.00041853798375537503, - "loss": 2.0145, - "step": 1466 - }, - { - "epoch": 1.0, - "learning_rate": 0.00041839464882943136, - "loss": 1.9526, - "step": 1467 - }, - { - "epoch": 1.0, - "learning_rate": 0.0004182513139034878, - "loss": 2.0803, - "step": 1468 - }, - { - "epoch": 1.0, - "learning_rate": 0.00041810797897754413, - "loss": 2.0068, - "step": 1469 - }, - { - "epoch": 1.01, - "learning_rate": 0.0004179646440516005, - "loss": 2.013, - "step": 1470 - }, - { - "epoch": 1.01, - "learning_rate": 0.0004178213091256569, - "loss": 2.0862, - "step": 1471 - }, - { - "epoch": 1.01, - "learning_rate": 0.0004176779741997133, - "loss": 1.8934, - "step": 1472 - }, - { - "epoch": 1.01, - "learning_rate": 0.00041753463927376967, - "loss": 2.0101, - "step": 1473 - }, - { - "epoch": 1.01, - "learning_rate": 0.00041739130434782605, - "loss": 2.0879, - "step": 1474 - }, - { - "epoch": 1.01, - "learning_rate": 0.00041724796942188243, - "loss": 2.0305, - "step": 1475 - }, - { - "epoch": 1.01, - "learning_rate": 0.0004171046344959388, - "loss": 1.9277, - "step": 1476 - }, - { - "epoch": 1.01, - "learning_rate": 0.00041696129956999515, - "loss": 2.0387, - "step": 1477 - }, - { - "epoch": 1.01, - "learning_rate": 0.0004168179646440516, - "loss": 2.0777, - "step": 1478 - }, - { - "epoch": 1.01, - "learning_rate": 0.0004166746297181079, - "loss": 2.0581, - "step": 1479 - }, - { - "epoch": 1.01, - "learning_rate": 0.00041653129479216435, - "loss": 1.9738, - "step": 1480 - }, - { - "epoch": 1.01, - "learning_rate": 0.0004163879598662207, - "loss": 2.0169, - "step": 1481 - }, - { - "epoch": 1.01, - "learning_rate": 0.0004162446249402771, - "loss": 1.9716, - "step": 1482 - }, - { - "epoch": 1.01, - "learning_rate": 0.00041610129001433345, - "loss": 2.0244, - "step": 1483 - }, - { - "epoch": 1.01, - "learning_rate": 0.0004159579550883898, - "loss": 2.0311, - "step": 1484 - }, - { - "epoch": 1.02, - "learning_rate": 0.0004158146201624462, - "loss": 2.0443, - "step": 1485 - }, - { - "epoch": 1.02, - "learning_rate": 0.00041567128523650255, - "loss": 1.9971, - "step": 1486 - }, - { - "epoch": 1.02, - "learning_rate": 0.000415527950310559, - "loss": 2.0571, - "step": 1487 - }, - { - "epoch": 1.02, - "learning_rate": 0.0004153846153846153, - "loss": 2.0578, - "step": 1488 - }, - { - "epoch": 1.02, - "learning_rate": 0.00041524128045867175, - "loss": 2.0293, - "step": 1489 - }, - { - "epoch": 1.02, - "learning_rate": 0.0004150979455327281, - "loss": 1.9722, - "step": 1490 - }, - { - "epoch": 1.02, - "learning_rate": 0.00041495461060678447, - "loss": 2.076, - "step": 1491 - }, - { - "epoch": 1.02, - "learning_rate": 0.00041481127568084085, - "loss": 2.0976, - "step": 1492 - }, - { - "epoch": 1.02, - "learning_rate": 0.00041466794075489723, - "loss": 2.0126, - "step": 1493 - }, - { - "epoch": 1.02, - "learning_rate": 0.0004145246058289536, - "loss": 1.9639, - "step": 1494 - }, - { - "epoch": 1.02, - "learning_rate": 0.00041438127090301, - "loss": 2.0662, - "step": 1495 - }, - { - "epoch": 1.02, - "learning_rate": 0.0004142379359770664, - "loss": 2.0507, - "step": 1496 - }, - { - "epoch": 1.02, - "learning_rate": 0.00041409460105112277, - "loss": 1.9897, - "step": 1497 - }, - { - "epoch": 1.02, - "learning_rate": 0.0004139512661251791, - "loss": 2.1243, - "step": 1498 - }, - { - "epoch": 1.02, - "learning_rate": 0.00041380793119923554, - "loss": 1.9646, - "step": 1499 - }, - { - "epoch": 1.03, - "learning_rate": 0.00041366459627329187, - "loss": 2.0035, - "step": 1500 - }, - { - "epoch": 1.03, - "learning_rate": 0.0004135212613473483, - "loss": 2.0319, - "step": 1501 - }, - { - "epoch": 1.03, - "learning_rate": 0.00041337792642140463, - "loss": 2.052, - "step": 1502 - }, - { - "epoch": 1.03, - "learning_rate": 0.00041323459149546107, - "loss": 2.0439, - "step": 1503 - }, - { - "epoch": 1.03, - "learning_rate": 0.0004130912565695174, - "loss": 2.0266, - "step": 1504 - }, - { - "epoch": 1.03, - "learning_rate": 0.00041294792164357373, - "loss": 2.0671, - "step": 1505 - }, - { - "epoch": 1.03, - "learning_rate": 0.00041280458671763017, - "loss": 1.9876, - "step": 1506 - }, - { - "epoch": 1.03, - "learning_rate": 0.0004126612517916865, - "loss": 2.0493, - "step": 1507 - }, - { - "epoch": 1.03, - "learning_rate": 0.00041251791686574294, - "loss": 2.1042, - "step": 1508 - }, - { - "epoch": 1.03, - "learning_rate": 0.00041237458193979927, - "loss": 1.9368, - "step": 1509 - }, - { - "epoch": 1.03, - "learning_rate": 0.0004122312470138557, - "loss": 2.0264, - "step": 1510 - }, - { - "epoch": 1.03, - "learning_rate": 0.00041208791208791203, - "loss": 2.0312, - "step": 1511 - }, - { - "epoch": 1.03, - "learning_rate": 0.0004119445771619684, - "loss": 2.0225, - "step": 1512 - }, - { - "epoch": 1.03, - "learning_rate": 0.0004118012422360248, - "loss": 2.0262, - "step": 1513 - }, - { - "epoch": 1.04, - "learning_rate": 0.0004116579073100812, - "loss": 2.0185, - "step": 1514 - }, - { - "epoch": 1.04, - "learning_rate": 0.00041151457238413757, - "loss": 1.9769, - "step": 1515 - }, - { - "epoch": 1.04, - "learning_rate": 0.00041137123745819395, - "loss": 2.0439, - "step": 1516 - }, - { - "epoch": 1.04, - "learning_rate": 0.00041122790253225034, - "loss": 2.0507, - "step": 1517 - }, - { - "epoch": 1.04, - "learning_rate": 0.0004110845676063067, - "loss": 2.0452, - "step": 1518 - }, - { - "epoch": 1.04, - "learning_rate": 0.00041094123268036305, - "loss": 2.1165, - "step": 1519 - }, - { - "epoch": 1.04, - "learning_rate": 0.0004107978977544195, - "loss": 1.9441, - "step": 1520 - }, - { - "epoch": 1.04, - "learning_rate": 0.0004106545628284758, - "loss": 1.9882, - "step": 1521 - }, - { - "epoch": 1.04, - "learning_rate": 0.00041051122790253225, - "loss": 2.0254, - "step": 1522 - }, - { - "epoch": 1.04, - "learning_rate": 0.0004103678929765886, - "loss": 1.978, - "step": 1523 - }, - { - "epoch": 1.04, - "learning_rate": 0.000410224558050645, - "loss": 1.9725, - "step": 1524 - }, - { - "epoch": 1.04, - "learning_rate": 0.00041008122312470135, - "loss": 1.9926, - "step": 1525 - }, - { - "epoch": 1.04, - "learning_rate": 0.0004099378881987577, - "loss": 1.9377, - "step": 1526 - }, - { - "epoch": 1.04, - "learning_rate": 0.0004097945532728141, - "loss": 1.9989, - "step": 1527 - }, - { - "epoch": 1.04, - "learning_rate": 0.00040965121834687045, - "loss": 1.999, - "step": 1528 - }, - { - "epoch": 1.05, - "learning_rate": 0.0004095078834209269, - "loss": 2.0974, - "step": 1529 - }, - { - "epoch": 1.05, - "learning_rate": 0.0004093645484949832, - "loss": 1.9753, - "step": 1530 - }, - { - "epoch": 1.05, - "learning_rate": 0.00040922121356903965, - "loss": 1.9776, - "step": 1531 - }, - { - "epoch": 1.05, - "learning_rate": 0.000409077878643096, - "loss": 2.004, - "step": 1532 - }, - { - "epoch": 1.05, - "learning_rate": 0.00040893454371715237, - "loss": 1.9964, - "step": 1533 - }, - { - "epoch": 1.05, - "learning_rate": 0.00040879120879120875, - "loss": 1.9928, - "step": 1534 - }, - { - "epoch": 1.05, - "learning_rate": 0.00040864787386526514, - "loss": 2.0106, - "step": 1535 - }, - { - "epoch": 1.05, - "learning_rate": 0.0004085045389393215, - "loss": 1.9374, - "step": 1536 - }, - { - "epoch": 1.05, - "learning_rate": 0.0004083612040133779, - "loss": 1.9205, - "step": 1537 - }, - { - "epoch": 1.05, - "learning_rate": 0.0004082178690874343, - "loss": 1.9928, - "step": 1538 - }, - { - "epoch": 1.05, - "learning_rate": 0.00040807453416149067, - "loss": 2.0717, - "step": 1539 - }, - { - "epoch": 1.05, - "learning_rate": 0.000407931199235547, - "loss": 2.0671, - "step": 1540 - }, - { - "epoch": 1.05, - "learning_rate": 0.00040778786430960344, - "loss": 1.955, - "step": 1541 - }, - { - "epoch": 1.05, - "learning_rate": 0.00040764452938365977, - "loss": 1.9376, - "step": 1542 - }, - { - "epoch": 1.05, - "learning_rate": 0.0004075011944577162, - "loss": 1.9598, - "step": 1543 - }, - { - "epoch": 1.06, - "learning_rate": 0.00040735785953177254, - "loss": 1.981, - "step": 1544 - }, - { - "epoch": 1.06, - "learning_rate": 0.000407214524605829, - "loss": 2.0193, - "step": 1545 - }, - { - "epoch": 1.06, - "learning_rate": 0.0004070711896798853, - "loss": 1.9919, - "step": 1546 - }, - { - "epoch": 1.06, - "learning_rate": 0.00040692785475394163, - "loss": 2.0287, - "step": 1547 - }, - { - "epoch": 1.06, - "learning_rate": 0.00040678451982799807, - "loss": 2.0366, - "step": 1548 - }, - { - "epoch": 1.06, - "learning_rate": 0.0004066411849020544, - "loss": 1.9564, - "step": 1549 - }, - { - "epoch": 1.06, - "learning_rate": 0.00040649784997611084, - "loss": 2.0538, - "step": 1550 - }, - { - "epoch": 1.06, - "learning_rate": 0.00040635451505016717, - "loss": 2.0049, - "step": 1551 - }, - { - "epoch": 1.06, - "learning_rate": 0.00040621118012422355, - "loss": 2.0463, - "step": 1552 - }, - { - "epoch": 1.06, - "learning_rate": 0.00040606784519827994, - "loss": 2.002, - "step": 1553 - }, - { - "epoch": 1.06, - "learning_rate": 0.0004059245102723363, - "loss": 2.0292, - "step": 1554 - }, - { - "epoch": 1.06, - "learning_rate": 0.0004057811753463927, - "loss": 2.0681, - "step": 1555 - }, - { - "epoch": 1.06, - "learning_rate": 0.0004056378404204491, - "loss": 2.0833, - "step": 1556 - }, - { - "epoch": 1.06, - "learning_rate": 0.00040549450549450547, - "loss": 2.0966, - "step": 1557 - }, - { - "epoch": 1.07, - "learning_rate": 0.00040535117056856185, - "loss": 1.9594, - "step": 1558 - }, - { - "epoch": 1.07, - "learning_rate": 0.0004052078356426182, - "loss": 1.9783, - "step": 1559 - }, - { - "epoch": 1.07, - "learning_rate": 0.0004050645007166746, - "loss": 2.0691, - "step": 1560 - }, - { - "epoch": 1.07, - "learning_rate": 0.00040492116579073095, - "loss": 2.0509, - "step": 1561 - }, - { - "epoch": 1.07, - "learning_rate": 0.0004047778308647874, - "loss": 2.0472, - "step": 1562 - }, - { - "epoch": 1.07, - "learning_rate": 0.0004046344959388437, - "loss": 1.9536, - "step": 1563 - }, - { - "epoch": 1.07, - "learning_rate": 0.00040449116101290016, - "loss": 2.0033, - "step": 1564 - }, - { - "epoch": 1.07, - "learning_rate": 0.0004043478260869565, - "loss": 2.0034, - "step": 1565 - }, - { - "epoch": 1.07, - "learning_rate": 0.0004042044911610128, - "loss": 1.9905, - "step": 1566 - }, - { - "epoch": 1.07, - "learning_rate": 0.00040406115623506925, - "loss": 2.0264, - "step": 1567 - }, - { - "epoch": 1.07, - "learning_rate": 0.0004039178213091256, - "loss": 1.982, - "step": 1568 - }, - { - "epoch": 1.07, - "learning_rate": 0.000403774486383182, - "loss": 2.0034, - "step": 1569 - }, - { - "epoch": 1.07, - "learning_rate": 0.00040363115145723835, - "loss": 1.994, - "step": 1570 - }, - { - "epoch": 1.07, - "learning_rate": 0.0004034878165312948, - "loss": 1.9319, - "step": 1571 - }, - { - "epoch": 1.07, - "learning_rate": 0.0004033444816053511, - "loss": 1.9808, - "step": 1572 - }, - { - "epoch": 1.08, - "learning_rate": 0.0004032011466794075, - "loss": 2.057, - "step": 1573 - }, - { - "epoch": 1.08, - "learning_rate": 0.0004030578117534639, - "loss": 1.9804, - "step": 1574 - }, - { - "epoch": 1.08, - "learning_rate": 0.00040291447682752027, - "loss": 1.9346, - "step": 1575 - }, - { - "epoch": 1.08, - "learning_rate": 0.00040277114190157665, - "loss": 1.9386, - "step": 1576 - }, - { - "epoch": 1.08, - "learning_rate": 0.00040262780697563304, - "loss": 2.0997, - "step": 1577 - }, - { - "epoch": 1.08, - "learning_rate": 0.0004024844720496894, - "loss": 1.9615, - "step": 1578 - }, - { - "epoch": 1.08, - "learning_rate": 0.0004023411371237458, - "loss": 2.0695, - "step": 1579 - }, - { - "epoch": 1.08, - "learning_rate": 0.00040219780219780213, - "loss": 2.0928, - "step": 1580 - }, - { - "epoch": 1.08, - "learning_rate": 0.00040205446727185857, - "loss": 1.9805, - "step": 1581 - }, - { - "epoch": 1.08, - "learning_rate": 0.0004019111323459149, - "loss": 1.9855, - "step": 1582 - }, - { - "epoch": 1.08, - "learning_rate": 0.00040176779741997134, - "loss": 2.0609, - "step": 1583 - }, - { - "epoch": 1.08, - "learning_rate": 0.00040162446249402767, - "loss": 2.0458, - "step": 1584 - }, - { - "epoch": 1.08, - "learning_rate": 0.0004014811275680841, - "loss": 1.9908, - "step": 1585 - }, - { - "epoch": 1.08, - "learning_rate": 0.00040133779264214044, - "loss": 2.1244, - "step": 1586 - }, - { - "epoch": 1.09, - "learning_rate": 0.00040119445771619677, - "loss": 2.0618, - "step": 1587 - }, - { - "epoch": 1.09, - "learning_rate": 0.0004010511227902532, - "loss": 1.9426, - "step": 1588 - }, - { - "epoch": 1.09, - "learning_rate": 0.00040090778786430953, - "loss": 2.0683, - "step": 1589 - }, - { - "epoch": 1.09, - "learning_rate": 0.00040076445293836597, - "loss": 1.9883, - "step": 1590 - }, - { - "epoch": 1.09, - "learning_rate": 0.0004006211180124223, - "loss": 2.081, - "step": 1591 - }, - { - "epoch": 1.09, - "learning_rate": 0.00040047778308647874, - "loss": 1.9766, - "step": 1592 - }, - { - "epoch": 1.09, - "learning_rate": 0.00040033444816053507, - "loss": 2.0358, - "step": 1593 - }, - { - "epoch": 1.09, - "learning_rate": 0.00040019111323459145, - "loss": 2.0776, - "step": 1594 - }, - { - "epoch": 1.09, - "learning_rate": 0.00040004777830864784, - "loss": 2.065, - "step": 1595 - }, - { - "epoch": 1.09, - "learning_rate": 0.0003999044433827042, - "loss": 2.0691, - "step": 1596 - }, - { - "epoch": 1.09, - "learning_rate": 0.0003997611084567606, - "loss": 2.1403, - "step": 1597 - }, - { - "epoch": 1.09, - "learning_rate": 0.000399617773530817, - "loss": 1.9704, - "step": 1598 - }, - { - "epoch": 1.09, - "learning_rate": 0.00039947443860487337, - "loss": 2.0766, - "step": 1599 - }, - { - "epoch": 1.09, - "learning_rate": 0.00039933110367892976, - "loss": 2.005, - "step": 1600 - }, - { - "epoch": 1.09, - "eval_loss": 2.000626564025879, - "eval_runtime": 1644.0479, - "eval_samples_per_second": 6.083, - "eval_steps_per_second": 6.083, - "step": 1600 - }, - { - "epoch": 1.09, - "learning_rate": 0.0003991877687529861, - "loss": 1.9763, - "step": 1601 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003990444338270425, - "loss": 2.0162, - "step": 1602 - }, - { - "epoch": 1.1, - "learning_rate": 0.00039890109890109885, - "loss": 1.8628, - "step": 1603 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003987577639751553, - "loss": 2.0199, - "step": 1604 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003986144290492116, - "loss": 1.9488, - "step": 1605 - }, - { - "epoch": 1.1, - "learning_rate": 0.000398471094123268, - "loss": 2.0393, - "step": 1606 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003983277591973244, - "loss": 1.9423, - "step": 1607 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003981844242713807, - "loss": 2.1568, - "step": 1608 - }, - { - "epoch": 1.1, - "learning_rate": 0.00039804108934543716, - "loss": 2.0384, - "step": 1609 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003978977544194935, - "loss": 1.9644, - "step": 1610 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003977544194935499, - "loss": 2.0689, - "step": 1611 - }, - { - "epoch": 1.1, - "learning_rate": 0.00039761108456760625, - "loss": 2.0017, - "step": 1612 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003974677496416627, - "loss": 1.9427, - "step": 1613 - }, - { - "epoch": 1.1, - "learning_rate": 0.000397324414715719, - "loss": 1.9709, - "step": 1614 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003971810797897754, - "loss": 2.0033, - "step": 1615 - }, - { - "epoch": 1.1, - "learning_rate": 0.0003970377448638318, - "loss": 1.9937, - "step": 1616 - }, - { - "epoch": 1.11, - "learning_rate": 0.00039689440993788817, - "loss": 2.005, - "step": 1617 - }, - { - "epoch": 1.11, - "learning_rate": 0.00039675107501194456, - "loss": 2.0023, - "step": 1618 - }, - { - "epoch": 1.11, - "learning_rate": 0.00039660774008600094, - "loss": 2.0233, - "step": 1619 - }, - { - "epoch": 1.11, - "learning_rate": 0.0003964644051600573, - "loss": 1.962, - "step": 1620 - }, - { - "epoch": 1.11, - "learning_rate": 0.0003963210702341137, - "loss": 2.026, - "step": 1621 - }, - { - "epoch": 1.11, - "learning_rate": 0.00039617773530817004, - "loss": 2.0934, - "step": 1622 - }, - { - "epoch": 1.11, - "learning_rate": 0.0003960344003822264, - "loss": 2.0144, - "step": 1623 - }, - { - "epoch": 1.11, - "learning_rate": 0.0003958910654562828, - "loss": 1.9919, - "step": 1624 - }, - { - "epoch": 1.11, - "learning_rate": 0.0003957477305303392, - "loss": 1.9533, - "step": 1625 - }, - { - "epoch": 1.11, - "learning_rate": 0.00039560439560439557, - "loss": 1.9303, - "step": 1626 - }, - { - "epoch": 1.11, - "learning_rate": 0.00039546106067845196, - "loss": 1.9299, - "step": 1627 - }, - { - "epoch": 1.11, - "learning_rate": 0.00039531772575250834, - "loss": 2.0134, - "step": 1628 - }, - { - "epoch": 1.11, - "learning_rate": 0.00039517439082656467, - "loss": 1.9218, - "step": 1629 - }, - { - "epoch": 1.11, - "learning_rate": 0.0003950310559006211, - "loss": 2.0074, - "step": 1630 - }, - { - "epoch": 1.12, - "learning_rate": 0.00039488772097467744, - "loss": 2.1142, - "step": 1631 - }, - { - "epoch": 1.12, - "learning_rate": 0.0003947443860487339, - "loss": 2.0237, - "step": 1632 - }, - { - "epoch": 1.12, - "learning_rate": 0.0003946010511227902, - "loss": 2.0208, - "step": 1633 - }, - { - "epoch": 1.12, - "learning_rate": 0.00039445771619684664, - "loss": 1.9773, - "step": 1634 - }, - { - "epoch": 1.12, - "learning_rate": 0.00039431438127090297, - "loss": 1.9945, - "step": 1635 - }, - { - "epoch": 1.12, - "learning_rate": 0.00039417104634495936, - "loss": 1.9521, - "step": 1636 - }, - { - "epoch": 1.12, - "learning_rate": 0.00039402771141901574, - "loss": 2.0321, - "step": 1637 - }, - { - "epoch": 1.12, - "learning_rate": 0.0003938843764930721, - "loss": 2.0056, - "step": 1638 - }, - { - "epoch": 1.12, - "learning_rate": 0.0003937410415671285, - "loss": 2.008, - "step": 1639 - }, - { - "epoch": 1.12, - "learning_rate": 0.00039359770664118484, - "loss": 2.0351, - "step": 1640 - }, - { - "epoch": 1.12, - "learning_rate": 0.0003934543717152413, - "loss": 2.0366, - "step": 1641 - }, - { - "epoch": 1.12, - "learning_rate": 0.0003933110367892976, - "loss": 2.0498, - "step": 1642 - }, - { - "epoch": 1.12, - "learning_rate": 0.000393167701863354, - "loss": 2.0764, - "step": 1643 - }, - { - "epoch": 1.12, - "learning_rate": 0.00039302436693741037, - "loss": 1.9468, - "step": 1644 - }, - { - "epoch": 1.12, - "learning_rate": 0.00039288103201146675, - "loss": 2.0169, - "step": 1645 - }, - { - "epoch": 1.13, - "learning_rate": 0.00039273769708552314, - "loss": 1.9899, - "step": 1646 - }, - { - "epoch": 1.13, - "learning_rate": 0.0003925943621595795, - "loss": 2.0945, - "step": 1647 - }, - { - "epoch": 1.13, - "learning_rate": 0.00039245102723363585, - "loss": 2.055, - "step": 1648 - }, - { - "epoch": 1.13, - "learning_rate": 0.0003923076923076923, - "loss": 1.9816, - "step": 1649 - }, - { - "epoch": 1.13, - "learning_rate": 0.0003921643573817486, - "loss": 1.9407, - "step": 1650 - }, - { - "epoch": 1.13, - "learning_rate": 0.00039202102245580506, - "loss": 2.1146, - "step": 1651 - }, - { - "epoch": 1.13, - "learning_rate": 0.0003918776875298614, - "loss": 2.096, - "step": 1652 - }, - { - "epoch": 1.13, - "learning_rate": 0.0003917343526039178, - "loss": 2.0008, - "step": 1653 - }, - { - "epoch": 1.13, - "learning_rate": 0.00039159101767797415, - "loss": 2.0295, - "step": 1654 - }, - { - "epoch": 1.13, - "learning_rate": 0.00039144768275203054, - "loss": 1.9967, - "step": 1655 - }, - { - "epoch": 1.13, - "learning_rate": 0.0003913043478260869, - "loss": 2.0425, - "step": 1656 - }, - { - "epoch": 1.13, - "learning_rate": 0.00039116101290014325, - "loss": 2.062, - "step": 1657 - }, - { - "epoch": 1.13, - "learning_rate": 0.0003910176779741997, - "loss": 2.0158, - "step": 1658 - }, - { - "epoch": 1.13, - "learning_rate": 0.000390874343048256, - "loss": 1.9327, - "step": 1659 - }, - { - "epoch": 1.13, - "learning_rate": 0.00039073100812231246, - "loss": 2.0324, - "step": 1660 - }, - { - "epoch": 1.14, - "learning_rate": 0.0003905876731963688, - "loss": 1.9874, - "step": 1661 - }, - { - "epoch": 1.14, - "learning_rate": 0.00039044433827042517, - "loss": 1.9798, - "step": 1662 - }, - { - "epoch": 1.14, - "learning_rate": 0.00039030100334448155, - "loss": 1.8957, - "step": 1663 - }, - { - "epoch": 1.14, - "learning_rate": 0.00039015766841853794, - "loss": 2.0757, - "step": 1664 - }, - { - "epoch": 1.14, - "learning_rate": 0.0003900143334925943, - "loss": 2.0211, - "step": 1665 - }, - { - "epoch": 1.14, - "learning_rate": 0.0003898709985666507, - "loss": 2.0094, - "step": 1666 - }, - { - "epoch": 1.14, - "learning_rate": 0.0003897276636407071, - "loss": 2.0978, - "step": 1667 - }, - { - "epoch": 1.14, - "learning_rate": 0.0003895843287147635, - "loss": 2.0842, - "step": 1668 - }, - { - "epoch": 1.14, - "learning_rate": 0.0003894409937888198, - "loss": 2.1968, - "step": 1669 - }, - { - "epoch": 1.14, - "learning_rate": 0.00038929765886287624, - "loss": 2.0997, - "step": 1670 - }, - { - "epoch": 1.14, - "learning_rate": 0.00038915432393693257, - "loss": 1.9782, - "step": 1671 - }, - { - "epoch": 1.14, - "learning_rate": 0.000389010989010989, - "loss": 2.0218, - "step": 1672 - }, - { - "epoch": 1.14, - "learning_rate": 0.00038886765408504534, - "loss": 2.038, - "step": 1673 - }, - { - "epoch": 1.14, - "learning_rate": 0.0003887243191591018, - "loss": 2.049, - "step": 1674 - }, - { - "epoch": 1.15, - "learning_rate": 0.0003885809842331581, - "loss": 2.1634, - "step": 1675 - }, - { - "epoch": 1.15, - "learning_rate": 0.00038843764930721444, - "loss": 2.0358, - "step": 1676 - }, - { - "epoch": 1.15, - "learning_rate": 0.0003882943143812709, - "loss": 2.0982, - "step": 1677 - }, - { - "epoch": 1.15, - "learning_rate": 0.0003881509794553272, - "loss": 2.1026, - "step": 1678 - }, - { - "epoch": 1.15, - "learning_rate": 0.00038800764452938364, - "loss": 1.9847, - "step": 1679 - }, - { - "epoch": 1.15, - "learning_rate": 0.00038786430960343997, - "loss": 1.9671, - "step": 1680 - }, - { - "epoch": 1.15, - "learning_rate": 0.0003877209746774964, - "loss": 2.0934, - "step": 1681 - }, - { - "epoch": 1.15, - "learning_rate": 0.00038757763975155274, - "loss": 1.9865, - "step": 1682 - }, - { - "epoch": 1.15, - "learning_rate": 0.0003874343048256091, - "loss": 2.0684, - "step": 1683 - }, - { - "epoch": 1.15, - "learning_rate": 0.0003872909698996655, - "loss": 2.1101, - "step": 1684 - }, - { - "epoch": 1.15, - "learning_rate": 0.0003871476349737219, - "loss": 1.9562, - "step": 1685 - }, - { - "epoch": 1.15, - "learning_rate": 0.0003870043000477783, - "loss": 1.9814, - "step": 1686 - }, - { - "epoch": 1.15, - "learning_rate": 0.00038686096512183466, - "loss": 2.0065, - "step": 1687 - }, - { - "epoch": 1.15, - "learning_rate": 0.00038671763019589104, - "loss": 2.1368, - "step": 1688 - }, - { - "epoch": 1.15, - "learning_rate": 0.0003865742952699474, - "loss": 1.9981, - "step": 1689 - }, - { - "epoch": 1.16, - "learning_rate": 0.00038643096034400375, - "loss": 1.993, - "step": 1690 - }, - { - "epoch": 1.16, - "learning_rate": 0.0003862876254180602, - "loss": 2.0458, - "step": 1691 - }, - { - "epoch": 1.16, - "learning_rate": 0.0003861442904921165, - "loss": 1.9745, - "step": 1692 - }, - { - "epoch": 1.16, - "learning_rate": 0.00038600095556617296, - "loss": 1.932, - "step": 1693 - }, - { - "epoch": 1.16, - "learning_rate": 0.0003858576206402293, - "loss": 2.0137, - "step": 1694 - }, - { - "epoch": 1.16, - "learning_rate": 0.0003857142857142857, - "loss": 2.0512, - "step": 1695 - }, - { - "epoch": 1.16, - "learning_rate": 0.00038557095078834206, - "loss": 1.9725, - "step": 1696 - }, - { - "epoch": 1.16, - "learning_rate": 0.0003854276158623984, - "loss": 1.9678, - "step": 1697 - }, - { - "epoch": 1.16, - "learning_rate": 0.0003852842809364548, - "loss": 1.9604, - "step": 1698 - }, - { - "epoch": 1.16, - "learning_rate": 0.00038514094601051115, - "loss": 2.0289, - "step": 1699 - }, - { - "epoch": 1.16, - "learning_rate": 0.0003849976110845676, - "loss": 1.9483, - "step": 1700 - }, - { - "epoch": 1.16, - "learning_rate": 0.0003848542761586239, - "loss": 2.0639, - "step": 1701 - }, - { - "epoch": 1.16, - "learning_rate": 0.00038471094123268036, - "loss": 2.0865, - "step": 1702 - }, - { - "epoch": 1.16, - "learning_rate": 0.0003845676063067367, - "loss": 2.014, - "step": 1703 - }, - { - "epoch": 1.16, - "learning_rate": 0.00038442427138079307, - "loss": 2.0076, - "step": 1704 - }, - { - "epoch": 1.17, - "learning_rate": 0.00038428093645484946, - "loss": 2.042, - "step": 1705 - }, - { - "epoch": 1.17, - "learning_rate": 0.00038413760152890584, - "loss": 1.974, - "step": 1706 - }, - { - "epoch": 1.17, - "learning_rate": 0.0003839942666029622, - "loss": 2.0785, - "step": 1707 - }, - { - "epoch": 1.17, - "learning_rate": 0.0003838509316770186, - "loss": 1.971, - "step": 1708 - }, - { - "epoch": 1.17, - "learning_rate": 0.000383707596751075, - "loss": 2.0806, - "step": 1709 - }, - { - "epoch": 1.17, - "learning_rate": 0.0003835642618251314, - "loss": 2.0394, - "step": 1710 - }, - { - "epoch": 1.17, - "learning_rate": 0.0003834209268991877, - "loss": 2.0122, - "step": 1711 - }, - { - "epoch": 1.17, - "learning_rate": 0.00038327759197324414, - "loss": 1.9853, - "step": 1712 - }, - { - "epoch": 1.17, - "learning_rate": 0.00038313425704730047, - "loss": 1.9518, - "step": 1713 - }, - { - "epoch": 1.17, - "learning_rate": 0.0003829909221213569, - "loss": 2.0134, - "step": 1714 - }, - { - "epoch": 1.17, - "learning_rate": 0.00038284758719541324, - "loss": 2.0334, - "step": 1715 - }, - { - "epoch": 1.17, - "learning_rate": 0.0003827042522694697, - "loss": 2.0515, - "step": 1716 - }, - { - "epoch": 1.17, - "learning_rate": 0.000382560917343526, - "loss": 2.0012, - "step": 1717 - }, - { - "epoch": 1.17, - "learning_rate": 0.00038241758241758234, - "loss": 1.9745, - "step": 1718 - }, - { - "epoch": 1.18, - "learning_rate": 0.0003822742474916388, - "loss": 2.0406, - "step": 1719 - }, - { - "epoch": 1.18, - "learning_rate": 0.0003821309125656951, - "loss": 2.0851, - "step": 1720 - }, - { - "epoch": 1.18, - "learning_rate": 0.00038198757763975154, - "loss": 1.991, - "step": 1721 - }, - { - "epoch": 1.18, - "learning_rate": 0.00038184424271380787, - "loss": 1.9414, - "step": 1722 - }, - { - "epoch": 1.18, - "learning_rate": 0.0003817009077878643, - "loss": 1.9876, - "step": 1723 - }, - { - "epoch": 1.18, - "learning_rate": 0.00038155757286192064, - "loss": 1.9759, - "step": 1724 - }, - { - "epoch": 1.18, - "learning_rate": 0.000381414237935977, - "loss": 2.0176, - "step": 1725 - }, - { - "epoch": 1.18, - "learning_rate": 0.0003812709030100334, - "loss": 2.063, - "step": 1726 - }, - { - "epoch": 1.18, - "learning_rate": 0.0003811275680840898, - "loss": 2.0379, - "step": 1727 - }, - { - "epoch": 1.18, - "learning_rate": 0.0003809842331581462, - "loss": 2.0511, - "step": 1728 - }, - { - "epoch": 1.18, - "learning_rate": 0.00038084089823220256, - "loss": 2.0851, - "step": 1729 - }, - { - "epoch": 1.18, - "learning_rate": 0.00038069756330625894, - "loss": 2.0056, - "step": 1730 - }, - { - "epoch": 1.18, - "learning_rate": 0.0003805542283803153, - "loss": 2.0117, - "step": 1731 - }, - { - "epoch": 1.18, - "learning_rate": 0.00038041089345437166, - "loss": 1.924, - "step": 1732 - }, - { - "epoch": 1.18, - "learning_rate": 0.0003802675585284281, - "loss": 1.9935, - "step": 1733 - }, - { - "epoch": 1.19, - "learning_rate": 0.0003801242236024844, - "loss": 1.9831, - "step": 1734 - }, - { - "epoch": 1.19, - "learning_rate": 0.00037998088867654086, - "loss": 2.0066, - "step": 1735 - }, - { - "epoch": 1.19, - "learning_rate": 0.0003798375537505972, - "loss": 1.9368, - "step": 1736 - }, - { - "epoch": 1.19, - "learning_rate": 0.00037969421882465363, - "loss": 2.1333, - "step": 1737 - }, - { - "epoch": 1.19, - "learning_rate": 0.00037955088389870996, - "loss": 2.0094, - "step": 1738 - }, - { - "epoch": 1.19, - "learning_rate": 0.0003794075489727663, - "loss": 2.0104, - "step": 1739 - }, - { - "epoch": 1.19, - "learning_rate": 0.0003792642140468227, - "loss": 2.1044, - "step": 1740 - }, - { - "epoch": 1.19, - "learning_rate": 0.00037912087912087906, - "loss": 1.9823, - "step": 1741 - }, - { - "epoch": 1.19, - "learning_rate": 0.0003789775441949355, - "loss": 2.0191, - "step": 1742 - }, - { - "epoch": 1.19, - "learning_rate": 0.0003788342092689918, - "loss": 1.9903, - "step": 1743 - }, - { - "epoch": 1.19, - "learning_rate": 0.00037869087434304826, - "loss": 2.0378, - "step": 1744 - }, - { - "epoch": 1.19, - "learning_rate": 0.0003785475394171046, - "loss": 1.9787, - "step": 1745 - }, - { - "epoch": 1.19, - "learning_rate": 0.000378404204491161, - "loss": 2.016, - "step": 1746 - }, - { - "epoch": 1.19, - "learning_rate": 0.00037826086956521736, - "loss": 2.0677, - "step": 1747 - }, - { - "epoch": 1.2, - "learning_rate": 0.00037811753463927374, - "loss": 2.0095, - "step": 1748 - }, - { - "epoch": 1.2, - "learning_rate": 0.0003779741997133301, - "loss": 2.0386, - "step": 1749 - }, - { - "epoch": 1.2, - "learning_rate": 0.0003778308647873865, - "loss": 2.0146, - "step": 1750 - }, - { - "epoch": 1.2, - "learning_rate": 0.00037768752986144284, - "loss": 2.0429, - "step": 1751 - }, - { - "epoch": 1.2, - "learning_rate": 0.0003775441949354993, - "loss": 2.0113, - "step": 1752 - }, - { - "epoch": 1.2, - "learning_rate": 0.0003774008600095556, - "loss": 2.0442, - "step": 1753 - }, - { - "epoch": 1.2, - "learning_rate": 0.00037725752508361204, - "loss": 2.0165, - "step": 1754 - }, - { - "epoch": 1.2, - "learning_rate": 0.0003771141901576684, - "loss": 2.0509, - "step": 1755 - }, - { - "epoch": 1.2, - "learning_rate": 0.0003769708552317248, - "loss": 2.0619, - "step": 1756 - }, - { - "epoch": 1.2, - "learning_rate": 0.00037682752030578114, - "loss": 1.9896, - "step": 1757 - }, - { - "epoch": 1.2, - "learning_rate": 0.00037668418537983747, - "loss": 1.9639, - "step": 1758 - }, - { - "epoch": 1.2, - "learning_rate": 0.0003765408504538939, - "loss": 1.9989, - "step": 1759 - }, - { - "epoch": 1.2, - "learning_rate": 0.00037639751552795024, - "loss": 1.9954, - "step": 1760 - }, - { - "epoch": 1.2, - "learning_rate": 0.0003762541806020067, - "loss": 2.0313, - "step": 1761 - }, - { - "epoch": 1.2, - "learning_rate": 0.000376110845676063, - "loss": 1.9543, - "step": 1762 - }, - { - "epoch": 1.21, - "learning_rate": 0.00037596751075011944, - "loss": 1.982, - "step": 1763 - }, - { - "epoch": 1.21, - "learning_rate": 0.0003758241758241758, - "loss": 2.0651, - "step": 1764 - }, - { - "epoch": 1.21, - "learning_rate": 0.00037568084089823216, - "loss": 2.0458, - "step": 1765 - }, - { - "epoch": 1.21, - "learning_rate": 0.00037553750597228854, - "loss": 2.0584, - "step": 1766 - }, - { - "epoch": 1.21, - "learning_rate": 0.0003753941710463449, - "loss": 2.0646, - "step": 1767 - }, - { - "epoch": 1.21, - "learning_rate": 0.0003752508361204013, - "loss": 1.9832, - "step": 1768 - }, - { - "epoch": 1.21, - "learning_rate": 0.0003751075011944577, - "loss": 2.0284, - "step": 1769 - }, - { - "epoch": 1.21, - "learning_rate": 0.0003749641662685141, - "loss": 2.002, - "step": 1770 - }, - { - "epoch": 1.21, - "learning_rate": 0.00037482083134257046, - "loss": 2.0567, - "step": 1771 - }, - { - "epoch": 1.21, - "learning_rate": 0.0003746774964166268, - "loss": 1.9435, - "step": 1772 - }, - { - "epoch": 1.21, - "learning_rate": 0.00037453416149068323, - "loss": 2.0795, - "step": 1773 - }, - { - "epoch": 1.21, - "learning_rate": 0.00037439082656473956, - "loss": 1.9883, - "step": 1774 - }, - { - "epoch": 1.21, - "learning_rate": 0.000374247491638796, - "loss": 1.9813, - "step": 1775 - }, - { - "epoch": 1.21, - "learning_rate": 0.0003741041567128523, - "loss": 2.0155, - "step": 1776 - }, - { - "epoch": 1.21, - "learning_rate": 0.00037396082178690876, - "loss": 2.0065, - "step": 1777 - }, - { - "epoch": 1.22, - "learning_rate": 0.0003738174868609651, - "loss": 1.9658, - "step": 1778 - }, - { - "epoch": 1.22, - "learning_rate": 0.0003736741519350214, - "loss": 2.0513, - "step": 1779 - }, - { - "epoch": 1.22, - "learning_rate": 0.00037353081700907786, - "loss": 2.1119, - "step": 1780 - }, - { - "epoch": 1.22, - "learning_rate": 0.0003733874820831342, - "loss": 1.9894, - "step": 1781 - }, - { - "epoch": 1.22, - "learning_rate": 0.00037324414715719063, - "loss": 2.0833, - "step": 1782 - }, - { - "epoch": 1.22, - "learning_rate": 0.00037310081223124696, - "loss": 2.0896, - "step": 1783 - }, - { - "epoch": 1.22, - "learning_rate": 0.0003729574773053034, - "loss": 1.9927, - "step": 1784 - }, - { - "epoch": 1.22, - "learning_rate": 0.0003728141423793597, - "loss": 1.9954, - "step": 1785 - }, - { - "epoch": 1.22, - "learning_rate": 0.0003726708074534161, - "loss": 2.0618, - "step": 1786 - }, - { - "epoch": 1.22, - "learning_rate": 0.0003725274725274725, - "loss": 1.9595, - "step": 1787 - }, - { - "epoch": 1.22, - "learning_rate": 0.0003723841376015289, - "loss": 2.0487, - "step": 1788 - }, - { - "epoch": 1.22, - "learning_rate": 0.00037224080267558526, - "loss": 2.1212, - "step": 1789 - }, - { - "epoch": 1.22, - "learning_rate": 0.00037209746774964164, - "loss": 1.976, - "step": 1790 - }, - { - "epoch": 1.22, - "learning_rate": 0.00037195413282369803, - "loss": 1.8774, - "step": 1791 - }, - { - "epoch": 1.23, - "learning_rate": 0.0003718107978977544, - "loss": 2.0226, - "step": 1792 - }, - { - "epoch": 1.23, - "learning_rate": 0.00037166746297181074, - "loss": 1.9768, - "step": 1793 - }, - { - "epoch": 1.23, - "learning_rate": 0.0003715241280458672, - "loss": 2.0637, - "step": 1794 - }, - { - "epoch": 1.23, - "learning_rate": 0.0003713807931199235, - "loss": 2.0001, - "step": 1795 - }, - { - "epoch": 1.23, - "learning_rate": 0.00037123745819397995, - "loss": 2.0729, - "step": 1796 - }, - { - "epoch": 1.23, - "learning_rate": 0.0003710941232680363, - "loss": 1.9483, - "step": 1797 - }, - { - "epoch": 1.23, - "learning_rate": 0.00037095078834209266, - "loss": 2.0153, - "step": 1798 - }, - { - "epoch": 1.23, - "learning_rate": 0.00037080745341614904, - "loss": 2.0948, - "step": 1799 - }, - { - "epoch": 1.23, - "learning_rate": 0.0003706641184902054, - "loss": 2.0054, - "step": 1800 - }, - { - "epoch": 1.23, - "eval_loss": 1.9992964267730713, - "eval_runtime": 1649.7967, - "eval_samples_per_second": 6.061, - "eval_steps_per_second": 6.061, - "step": 1800 - }, - { - "epoch": 1.23, - "learning_rate": 0.0003705207835642618, - "loss": 1.9406, - "step": 1801 - }, - { - "epoch": 1.23, - "learning_rate": 0.00037037744863831814, - "loss": 2.164, - "step": 1802 - }, - { - "epoch": 1.23, - "learning_rate": 0.0003702341137123746, - "loss": 2.0001, - "step": 1803 - }, - { - "epoch": 1.23, - "learning_rate": 0.0003700907787864309, - "loss": 1.9451, - "step": 1804 - }, - { - "epoch": 1.23, - "learning_rate": 0.00036994744386048735, - "loss": 2.003, - "step": 1805 - }, - { - "epoch": 1.23, - "learning_rate": 0.0003698041089345437, - "loss": 2.0272, - "step": 1806 - }, - { - "epoch": 1.24, - "learning_rate": 0.00036966077400860006, - "loss": 1.9824, - "step": 1807 - }, - { - "epoch": 1.24, - "learning_rate": 0.00036951743908265644, - "loss": 2.0841, - "step": 1808 - }, - { - "epoch": 1.24, - "learning_rate": 0.00036937410415671283, - "loss": 2.0329, - "step": 1809 - }, - { - "epoch": 1.24, - "learning_rate": 0.0003692307692307692, - "loss": 2.0209, - "step": 1810 - }, - { - "epoch": 1.24, - "learning_rate": 0.0003690874343048256, - "loss": 1.8782, - "step": 1811 - }, - { - "epoch": 1.24, - "learning_rate": 0.000368944099378882, - "loss": 1.9467, - "step": 1812 - }, - { - "epoch": 1.24, - "learning_rate": 0.00036880076445293836, - "loss": 1.915, - "step": 1813 - }, - { - "epoch": 1.24, - "learning_rate": 0.0003686574295269947, - "loss": 2.019, - "step": 1814 - }, - { - "epoch": 1.24, - "learning_rate": 0.0003685140946010511, - "loss": 2.0315, - "step": 1815 - }, - { - "epoch": 1.24, - "learning_rate": 0.00036837075967510746, - "loss": 2.0391, - "step": 1816 - }, - { - "epoch": 1.24, - "learning_rate": 0.00036822742474916384, - "loss": 1.9748, - "step": 1817 - }, - { - "epoch": 1.24, - "learning_rate": 0.0003680840898232202, - "loss": 2.0068, - "step": 1818 - }, - { - "epoch": 1.24, - "learning_rate": 0.0003679407548972766, - "loss": 1.9701, - "step": 1819 - }, - { - "epoch": 1.24, - "learning_rate": 0.000367797419971333, - "loss": 1.9574, - "step": 1820 - }, - { - "epoch": 1.24, - "learning_rate": 0.0003676540850453893, - "loss": 2.0035, - "step": 1821 - }, - { - "epoch": 1.25, - "learning_rate": 0.00036751075011944576, - "loss": 1.9546, - "step": 1822 - }, - { - "epoch": 1.25, - "learning_rate": 0.0003673674151935021, - "loss": 1.9779, - "step": 1823 - }, - { - "epoch": 1.25, - "learning_rate": 0.00036722408026755853, - "loss": 2.1025, - "step": 1824 - }, - { - "epoch": 1.25, - "learning_rate": 0.00036708074534161486, - "loss": 2.028, - "step": 1825 - }, - { - "epoch": 1.25, - "learning_rate": 0.0003669374104156713, - "loss": 1.9185, - "step": 1826 - }, - { - "epoch": 1.25, - "learning_rate": 0.0003667940754897276, - "loss": 1.865, - "step": 1827 - }, - { - "epoch": 1.25, - "learning_rate": 0.000366650740563784, - "loss": 1.9609, - "step": 1828 - }, - { - "epoch": 1.25, - "learning_rate": 0.0003665074056378404, - "loss": 1.9569, - "step": 1829 - }, - { - "epoch": 1.25, - "learning_rate": 0.0003663640707118968, - "loss": 2.0257, - "step": 1830 - }, - { - "epoch": 1.25, - "learning_rate": 0.00036622073578595316, - "loss": 2.0884, - "step": 1831 - }, - { - "epoch": 1.25, - "learning_rate": 0.0003660774008600095, - "loss": 2.0377, - "step": 1832 - }, - { - "epoch": 1.25, - "learning_rate": 0.00036593406593406593, - "loss": 1.9751, - "step": 1833 - }, - { - "epoch": 1.25, - "learning_rate": 0.00036579073100812226, - "loss": 2.0617, - "step": 1834 - }, - { - "epoch": 1.25, - "learning_rate": 0.00036564739608217864, - "loss": 2.0659, - "step": 1835 - }, - { - "epoch": 1.26, - "learning_rate": 0.000365504061156235, - "loss": 1.9916, - "step": 1836 - }, - { - "epoch": 1.26, - "learning_rate": 0.0003653607262302914, - "loss": 2.0324, - "step": 1837 - }, - { - "epoch": 1.26, - "learning_rate": 0.0003652173913043478, - "loss": 2.0329, - "step": 1838 - }, - { - "epoch": 1.26, - "learning_rate": 0.0003650740563784042, - "loss": 1.9504, - "step": 1839 - }, - { - "epoch": 1.26, - "learning_rate": 0.00036493072145246056, - "loss": 1.8959, - "step": 1840 - }, - { - "epoch": 1.26, - "learning_rate": 0.00036478738652651695, - "loss": 2.0087, - "step": 1841 - }, - { - "epoch": 1.26, - "learning_rate": 0.0003646440516005733, - "loss": 2.1006, - "step": 1842 - }, - { - "epoch": 1.26, - "learning_rate": 0.0003645007166746297, - "loss": 2.0928, - "step": 1843 - }, - { - "epoch": 1.26, - "learning_rate": 0.00036435738174868604, - "loss": 2.0526, - "step": 1844 - }, - { - "epoch": 1.26, - "learning_rate": 0.0003642140468227425, - "loss": 1.9657, - "step": 1845 - }, - { - "epoch": 1.26, - "learning_rate": 0.0003640707118967988, - "loss": 1.9081, - "step": 1846 - }, - { - "epoch": 1.26, - "learning_rate": 0.00036392737697085514, - "loss": 2.007, - "step": 1847 - }, - { - "epoch": 1.26, - "learning_rate": 0.0003637840420449116, - "loss": 2.0273, - "step": 1848 - }, - { - "epoch": 1.26, - "learning_rate": 0.0003636407071189679, - "loss": 1.9917, - "step": 1849 - }, - { - "epoch": 1.26, - "learning_rate": 0.00036349737219302435, - "loss": 1.9588, - "step": 1850 - }, - { - "epoch": 1.27, - "learning_rate": 0.0003633540372670807, - "loss": 1.9464, - "step": 1851 - }, - { - "epoch": 1.27, - "learning_rate": 0.0003632107023411371, - "loss": 1.9609, - "step": 1852 - }, - { - "epoch": 1.27, - "learning_rate": 0.00036306736741519344, - "loss": 1.9257, - "step": 1853 - }, - { - "epoch": 1.27, - "learning_rate": 0.0003629240324892498, - "loss": 2.0676, - "step": 1854 - }, - { - "epoch": 1.27, - "learning_rate": 0.0003627806975633062, - "loss": 2.0322, - "step": 1855 - }, - { - "epoch": 1.27, - "learning_rate": 0.0003626373626373626, - "loss": 2.023, - "step": 1856 - }, - { - "epoch": 1.27, - "learning_rate": 0.000362494027711419, - "loss": 1.9282, - "step": 1857 - }, - { - "epoch": 1.27, - "learning_rate": 0.00036235069278547536, - "loss": 2.0074, - "step": 1858 - }, - { - "epoch": 1.27, - "learning_rate": 0.00036220735785953175, - "loss": 2.037, - "step": 1859 - }, - { - "epoch": 1.27, - "learning_rate": 0.00036206402293358813, - "loss": 1.9538, - "step": 1860 - }, - { - "epoch": 1.27, - "learning_rate": 0.00036192068800764446, - "loss": 1.852, - "step": 1861 - }, - { - "epoch": 1.27, - "learning_rate": 0.0003617773530817009, - "loss": 2.0184, - "step": 1862 - }, - { - "epoch": 1.27, - "learning_rate": 0.0003616340181557572, - "loss": 2.0041, - "step": 1863 - }, - { - "epoch": 1.27, - "learning_rate": 0.00036149068322981366, - "loss": 1.988, - "step": 1864 - }, - { - "epoch": 1.28, - "learning_rate": 0.00036134734830387, - "loss": 2.0504, - "step": 1865 - }, - { - "epoch": 1.28, - "learning_rate": 0.00036120401337792643, - "loss": 2.0778, - "step": 1866 - }, - { - "epoch": 1.28, - "learning_rate": 0.00036106067845198276, - "loss": 2.066, - "step": 1867 - }, - { - "epoch": 1.28, - "learning_rate": 0.0003609173435260391, - "loss": 2.0418, - "step": 1868 - }, - { - "epoch": 1.28, - "learning_rate": 0.00036077400860009553, - "loss": 2.0154, - "step": 1869 - }, - { - "epoch": 1.28, - "learning_rate": 0.00036063067367415186, - "loss": 1.9723, - "step": 1870 - }, - { - "epoch": 1.28, - "learning_rate": 0.0003604873387482083, - "loss": 1.9196, - "step": 1871 - }, - { - "epoch": 1.28, - "learning_rate": 0.0003603440038222646, - "loss": 2.0374, - "step": 1872 - }, - { - "epoch": 1.28, - "learning_rate": 0.00036020066889632106, - "loss": 2.1146, - "step": 1873 - }, - { - "epoch": 1.28, - "learning_rate": 0.0003600573339703774, - "loss": 2.0713, - "step": 1874 - }, - { - "epoch": 1.28, - "learning_rate": 0.0003599139990444338, - "loss": 2.0524, - "step": 1875 - }, - { - "epoch": 1.28, - "learning_rate": 0.00035977066411849016, - "loss": 1.9778, - "step": 1876 - }, - { - "epoch": 1.28, - "learning_rate": 0.00035962732919254654, - "loss": 2.0138, - "step": 1877 - }, - { - "epoch": 1.28, - "learning_rate": 0.00035948399426660293, - "loss": 1.9767, - "step": 1878 - }, - { - "epoch": 1.28, - "learning_rate": 0.0003593406593406593, - "loss": 1.9536, - "step": 1879 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003591973244147157, - "loss": 2.1092, - "step": 1880 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003590539894887721, - "loss": 1.9906, - "step": 1881 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003589106545628284, - "loss": 2.0383, - "step": 1882 - }, - { - "epoch": 1.29, - "learning_rate": 0.00035876731963688485, - "loss": 1.9626, - "step": 1883 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003586239847109412, - "loss": 2.0121, - "step": 1884 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003584806497849976, - "loss": 2.0007, - "step": 1885 - }, - { - "epoch": 1.29, - "learning_rate": 0.00035833731485905394, - "loss": 2.0846, - "step": 1886 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003581939799331104, - "loss": 2.1493, - "step": 1887 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003580506450071667, - "loss": 1.9866, - "step": 1888 - }, - { - "epoch": 1.29, - "learning_rate": 0.00035790731008122304, - "loss": 1.9876, - "step": 1889 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003577639751552795, - "loss": 2.0256, - "step": 1890 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003576206402293358, - "loss": 2.0551, - "step": 1891 - }, - { - "epoch": 1.29, - "learning_rate": 0.00035747730530339225, - "loss": 1.9491, - "step": 1892 - }, - { - "epoch": 1.29, - "learning_rate": 0.0003573339703774486, - "loss": 1.976, - "step": 1893 - }, - { - "epoch": 1.29, - "learning_rate": 0.000357190635451505, - "loss": 2.0242, - "step": 1894 - }, - { - "epoch": 1.3, - "learning_rate": 0.00035704730052556134, - "loss": 1.9942, - "step": 1895 - }, - { - "epoch": 1.3, - "learning_rate": 0.00035690396559961773, - "loss": 1.9462, - "step": 1896 - }, - { - "epoch": 1.3, - "learning_rate": 0.0003567606306736741, - "loss": 1.9941, - "step": 1897 - }, - { - "epoch": 1.3, - "learning_rate": 0.0003566172957477305, - "loss": 1.987, - "step": 1898 - }, - { - "epoch": 1.3, - "learning_rate": 0.0003564739608217869, - "loss": 1.9921, - "step": 1899 - }, - { - "epoch": 1.3, - "learning_rate": 0.00035633062589584326, - "loss": 1.9913, - "step": 1900 - }, - { - "epoch": 1.3, - "learning_rate": 0.00035618729096989965, - "loss": 2.0761, - "step": 1901 - }, - { - "epoch": 1.3, - "learning_rate": 0.00035604395604395603, - "loss": 2.0225, - "step": 1902 - }, - { - "epoch": 1.3, - "learning_rate": 0.00035590062111801236, - "loss": 2.1646, - "step": 1903 - }, - { - "epoch": 1.3, - "learning_rate": 0.0003557572861920688, - "loss": 1.9713, - "step": 1904 - }, - { - "epoch": 1.3, - "learning_rate": 0.00035561395126612513, - "loss": 2.0381, - "step": 1905 - }, - { - "epoch": 1.3, - "learning_rate": 0.00035547061634018157, - "loss": 2.1288, - "step": 1906 - }, - { - "epoch": 1.3, - "learning_rate": 0.0003553272814142379, - "loss": 1.9958, - "step": 1907 - }, - { - "epoch": 1.3, - "learning_rate": 0.00035518394648829433, - "loss": 2.0927, - "step": 1908 - }, - { - "epoch": 1.31, - "learning_rate": 0.00035504061156235066, - "loss": 2.0493, - "step": 1909 - }, - { - "epoch": 1.31, - "learning_rate": 0.000354897276636407, - "loss": 1.9751, - "step": 1910 - }, - { - "epoch": 1.31, - "learning_rate": 0.00035475394171046343, - "loss": 1.9947, - "step": 1911 - }, - { - "epoch": 1.31, - "learning_rate": 0.00035461060678451976, - "loss": 1.9878, - "step": 1912 - }, - { - "epoch": 1.31, - "learning_rate": 0.0003544672718585762, - "loss": 2.0322, - "step": 1913 - }, - { - "epoch": 1.31, - "learning_rate": 0.00035432393693263253, - "loss": 1.9825, - "step": 1914 - }, - { - "epoch": 1.31, - "learning_rate": 0.00035418060200668897, - "loss": 2.0174, - "step": 1915 - }, - { - "epoch": 1.31, - "learning_rate": 0.0003540372670807453, - "loss": 1.9631, - "step": 1916 - }, - { - "epoch": 1.31, - "learning_rate": 0.0003538939321548017, - "loss": 1.9678, - "step": 1917 - }, - { - "epoch": 1.31, - "learning_rate": 0.00035375059722885806, - "loss": 2.0006, - "step": 1918 - }, - { - "epoch": 1.31, - "learning_rate": 0.00035360726230291445, - "loss": 1.9347, - "step": 1919 - }, - { - "epoch": 1.31, - "learning_rate": 0.00035346392737697083, - "loss": 2.0409, - "step": 1920 - }, - { - "epoch": 1.31, - "learning_rate": 0.0003533205924510272, - "loss": 2.0187, - "step": 1921 - }, - { - "epoch": 1.31, - "learning_rate": 0.0003531772575250836, - "loss": 1.9847, - "step": 1922 - }, - { - "epoch": 1.31, - "learning_rate": 0.00035303392259914, - "loss": 2.0057, - "step": 1923 - }, - { - "epoch": 1.32, - "learning_rate": 0.0003528905876731963, - "loss": 2.0196, - "step": 1924 - }, - { - "epoch": 1.32, - "learning_rate": 0.00035274725274725275, - "loss": 1.8575, - "step": 1925 - }, - { - "epoch": 1.32, - "learning_rate": 0.0003526039178213091, - "loss": 1.9818, - "step": 1926 - }, - { - "epoch": 1.32, - "learning_rate": 0.0003524605828953655, - "loss": 2.0204, - "step": 1927 - }, - { - "epoch": 1.32, - "learning_rate": 0.00035231724796942185, - "loss": 2.0446, - "step": 1928 - }, - { - "epoch": 1.32, - "learning_rate": 0.0003521739130434783, - "loss": 2.0144, - "step": 1929 - }, - { - "epoch": 1.32, - "learning_rate": 0.0003520305781175346, - "loss": 1.9512, - "step": 1930 - }, - { - "epoch": 1.32, - "learning_rate": 0.00035188724319159094, - "loss": 1.9937, - "step": 1931 - }, - { - "epoch": 1.32, - "learning_rate": 0.0003517439082656474, - "loss": 1.9763, - "step": 1932 - }, - { - "epoch": 1.32, - "learning_rate": 0.0003516005733397037, - "loss": 2.0291, - "step": 1933 - }, - { - "epoch": 1.32, - "learning_rate": 0.00035145723841376015, - "loss": 2.0598, - "step": 1934 - }, - { - "epoch": 1.32, - "learning_rate": 0.0003513139034878165, - "loss": 2.0737, - "step": 1935 - }, - { - "epoch": 1.32, - "learning_rate": 0.0003511705685618729, - "loss": 2.1272, - "step": 1936 - }, - { - "epoch": 1.32, - "learning_rate": 0.00035102723363592925, - "loss": 2.0258, - "step": 1937 - }, - { - "epoch": 1.32, - "learning_rate": 0.00035088389870998563, - "loss": 2.0811, - "step": 1938 - }, - { - "epoch": 1.33, - "learning_rate": 0.000350740563784042, - "loss": 1.9712, - "step": 1939 - }, - { - "epoch": 1.33, - "learning_rate": 0.0003505972288580984, - "loss": 1.9419, - "step": 1940 - }, - { - "epoch": 1.33, - "learning_rate": 0.0003504538939321548, - "loss": 2.0849, - "step": 1941 - }, - { - "epoch": 1.33, - "learning_rate": 0.00035031055900621116, - "loss": 2.0123, - "step": 1942 - }, - { - "epoch": 1.33, - "learning_rate": 0.00035016722408026755, - "loss": 1.9185, - "step": 1943 - }, - { - "epoch": 1.33, - "learning_rate": 0.00035002388915432393, - "loss": 2.0759, - "step": 1944 - }, - { - "epoch": 1.33, - "learning_rate": 0.00034988055422838026, - "loss": 2.0272, - "step": 1945 - }, - { - "epoch": 1.33, - "learning_rate": 0.0003497372193024367, - "loss": 2.0296, - "step": 1946 - }, - { - "epoch": 1.33, - "learning_rate": 0.00034959388437649303, - "loss": 2.095, - "step": 1947 - }, - { - "epoch": 1.33, - "learning_rate": 0.00034945054945054947, - "loss": 2.1172, - "step": 1948 - }, - { - "epoch": 1.33, - "learning_rate": 0.0003493072145246058, - "loss": 1.9911, - "step": 1949 - }, - { - "epoch": 1.33, - "learning_rate": 0.0003491638795986621, - "loss": 1.9346, - "step": 1950 - }, - { - "epoch": 1.33, - "learning_rate": 0.00034902054467271856, - "loss": 2.0437, - "step": 1951 - }, - { - "epoch": 1.33, - "learning_rate": 0.0003488772097467749, - "loss": 1.9926, - "step": 1952 - }, - { - "epoch": 1.34, - "learning_rate": 0.00034873387482083133, - "loss": 1.8601, - "step": 1953 - }, - { - "epoch": 1.34, - "learning_rate": 0.00034859053989488766, - "loss": 2.0288, - "step": 1954 - }, - { - "epoch": 1.34, - "learning_rate": 0.0003484472049689441, - "loss": 1.9644, - "step": 1955 - }, - { - "epoch": 1.34, - "learning_rate": 0.00034830387004300043, - "loss": 2.0219, - "step": 1956 - }, - { - "epoch": 1.34, - "learning_rate": 0.0003481605351170568, - "loss": 2.0288, - "step": 1957 - }, - { - "epoch": 1.34, - "learning_rate": 0.0003480172001911132, - "loss": 2.0549, - "step": 1958 - }, - { - "epoch": 1.34, - "learning_rate": 0.0003478738652651696, - "loss": 2.0246, - "step": 1959 - }, - { - "epoch": 1.34, - "learning_rate": 0.00034773053033922596, - "loss": 2.0496, - "step": 1960 - }, - { - "epoch": 1.34, - "learning_rate": 0.00034758719541328235, - "loss": 1.9183, - "step": 1961 - }, - { - "epoch": 1.34, - "learning_rate": 0.00034744386048733873, - "loss": 2.0135, - "step": 1962 - }, - { - "epoch": 1.34, - "learning_rate": 0.0003473005255613951, - "loss": 1.9164, - "step": 1963 - }, - { - "epoch": 1.34, - "learning_rate": 0.00034715719063545145, - "loss": 2.0452, - "step": 1964 - }, - { - "epoch": 1.34, - "learning_rate": 0.0003470138557095079, - "loss": 2.1023, - "step": 1965 - }, - { - "epoch": 1.34, - "learning_rate": 0.0003468705207835642, - "loss": 1.9568, - "step": 1966 - }, - { - "epoch": 1.34, - "learning_rate": 0.00034672718585762065, - "loss": 2.0042, - "step": 1967 - }, - { - "epoch": 1.35, - "learning_rate": 0.000346583850931677, - "loss": 1.9825, - "step": 1968 - }, - { - "epoch": 1.35, - "learning_rate": 0.0003464405160057334, - "loss": 2.0362, - "step": 1969 - }, - { - "epoch": 1.35, - "learning_rate": 0.00034629718107978975, - "loss": 1.9395, - "step": 1970 - }, - { - "epoch": 1.35, - "learning_rate": 0.0003461538461538461, - "loss": 1.9586, - "step": 1971 - }, - { - "epoch": 1.35, - "learning_rate": 0.0003460105112279025, - "loss": 2.0692, - "step": 1972 - }, - { - "epoch": 1.35, - "learning_rate": 0.00034586717630195885, - "loss": 2.0178, - "step": 1973 - }, - { - "epoch": 1.35, - "learning_rate": 0.0003457238413760153, - "loss": 1.9349, - "step": 1974 - }, - { - "epoch": 1.35, - "learning_rate": 0.0003455805064500716, - "loss": 2.0868, - "step": 1975 - }, - { - "epoch": 1.35, - "learning_rate": 0.00034543717152412805, - "loss": 1.9723, - "step": 1976 - }, - { - "epoch": 1.35, - "learning_rate": 0.0003452938365981844, - "loss": 2.0752, - "step": 1977 - }, - { - "epoch": 1.35, - "learning_rate": 0.00034515050167224076, - "loss": 1.9482, - "step": 1978 - }, - { - "epoch": 1.35, - "learning_rate": 0.00034500716674629715, - "loss": 1.9869, - "step": 1979 - }, - { - "epoch": 1.35, - "learning_rate": 0.00034486383182035353, - "loss": 2.0023, - "step": 1980 - }, - { - "epoch": 1.35, - "learning_rate": 0.0003447204968944099, - "loss": 2.0806, - "step": 1981 - }, - { - "epoch": 1.36, - "learning_rate": 0.0003445771619684663, - "loss": 2.0252, - "step": 1982 - }, - { - "epoch": 1.36, - "learning_rate": 0.0003444338270425227, - "loss": 1.9967, - "step": 1983 - }, - { - "epoch": 1.36, - "learning_rate": 0.00034429049211657907, - "loss": 1.9377, - "step": 1984 - }, - { - "epoch": 1.36, - "learning_rate": 0.0003441471571906354, - "loss": 1.9739, - "step": 1985 - }, - { - "epoch": 1.36, - "learning_rate": 0.00034400382226469183, - "loss": 2.0232, - "step": 1986 - }, - { - "epoch": 1.36, - "learning_rate": 0.00034386048733874816, - "loss": 1.9893, - "step": 1987 - }, - { - "epoch": 1.36, - "learning_rate": 0.0003437171524128046, - "loss": 2.1028, - "step": 1988 - }, - { - "epoch": 1.36, - "learning_rate": 0.00034357381748686093, - "loss": 2.0027, - "step": 1989 - }, - { - "epoch": 1.36, - "learning_rate": 0.0003434304825609173, - "loss": 1.9481, - "step": 1990 - }, - { - "epoch": 1.36, - "learning_rate": 0.0003432871476349737, - "loss": 2.0907, - "step": 1991 - }, - { - "epoch": 1.36, - "learning_rate": 0.00034314381270903003, - "loss": 2.0744, - "step": 1992 - }, - { - "epoch": 1.36, - "learning_rate": 0.00034300047778308647, - "loss": 2.0442, - "step": 1993 - }, - { - "epoch": 1.36, - "learning_rate": 0.0003428571428571428, - "loss": 2.0232, - "step": 1994 - }, - { - "epoch": 1.36, - "learning_rate": 0.00034271380793119923, - "loss": 2.0709, - "step": 1995 - }, - { - "epoch": 1.36, - "learning_rate": 0.00034257047300525556, - "loss": 1.9478, - "step": 1996 - }, - { - "epoch": 1.37, - "learning_rate": 0.000342427138079312, - "loss": 1.9676, - "step": 1997 - }, - { - "epoch": 1.37, - "learning_rate": 0.00034228380315336833, - "loss": 2.1258, - "step": 1998 - }, - { - "epoch": 1.37, - "learning_rate": 0.0003421404682274247, - "loss": 1.9892, - "step": 1999 - }, - { - "epoch": 1.37, - "learning_rate": 0.0003419971333014811, - "loss": 2.0512, - "step": 2000 - }, - { - "epoch": 1.37, - "eval_loss": 1.9946362972259521, - "eval_runtime": 1647.5347, - "eval_samples_per_second": 6.07, - "eval_steps_per_second": 6.07, - "step": 2000 - }, - { - "epoch": 1.37, - "learning_rate": 0.0003418537983755375, - "loss": 2.0172, - "step": 2001 - }, - { - "epoch": 1.37, - "learning_rate": 0.00034171046344959387, - "loss": 2.0405, - "step": 2002 - }, - { - "epoch": 1.37, - "learning_rate": 0.00034156712852365025, - "loss": 1.9842, - "step": 2003 - }, - { - "epoch": 1.37, - "learning_rate": 0.00034142379359770663, - "loss": 2.0294, - "step": 2004 - }, - { - "epoch": 1.37, - "learning_rate": 0.000341280458671763, - "loss": 1.9872, - "step": 2005 - }, - { - "epoch": 1.37, - "learning_rate": 0.00034113712374581935, - "loss": 1.9517, - "step": 2006 - }, - { - "epoch": 1.37, - "learning_rate": 0.00034099378881987573, - "loss": 2.0903, - "step": 2007 - }, - { - "epoch": 1.37, - "learning_rate": 0.0003408504538939321, - "loss": 2.0047, - "step": 2008 - }, - { - "epoch": 1.37, - "learning_rate": 0.0003407071189679885, - "loss": 1.9116, - "step": 2009 - }, - { - "epoch": 1.37, - "learning_rate": 0.0003405637840420449, - "loss": 1.9933, - "step": 2010 - }, - { - "epoch": 1.37, - "learning_rate": 0.00034042044911610127, - "loss": 2.0691, - "step": 2011 - }, - { - "epoch": 1.38, - "learning_rate": 0.00034027711419015765, - "loss": 2.0249, - "step": 2012 - }, - { - "epoch": 1.38, - "learning_rate": 0.000340133779264214, - "loss": 1.9914, - "step": 2013 - }, - { - "epoch": 1.38, - "learning_rate": 0.0003399904443382704, - "loss": 2.0592, - "step": 2014 - }, - { - "epoch": 1.38, - "learning_rate": 0.00033984710941232675, - "loss": 2.0658, - "step": 2015 - }, - { - "epoch": 1.38, - "learning_rate": 0.0003397037744863832, - "loss": 1.9771, - "step": 2016 - }, - { - "epoch": 1.38, - "learning_rate": 0.0003395604395604395, - "loss": 1.9868, - "step": 2017 - }, - { - "epoch": 1.38, - "learning_rate": 0.00033941710463449595, - "loss": 1.943, - "step": 2018 - }, - { - "epoch": 1.38, - "learning_rate": 0.0003392737697085523, - "loss": 2.0233, - "step": 2019 - }, - { - "epoch": 1.38, - "learning_rate": 0.00033913043478260867, - "loss": 1.9217, - "step": 2020 - }, - { - "epoch": 1.38, - "learning_rate": 0.00033898709985666505, - "loss": 2.05, - "step": 2021 - }, - { - "epoch": 1.38, - "learning_rate": 0.00033884376493072143, - "loss": 1.9538, - "step": 2022 - }, - { - "epoch": 1.38, - "learning_rate": 0.0003387004300047778, - "loss": 1.9666, - "step": 2023 - }, - { - "epoch": 1.38, - "learning_rate": 0.00033855709507883415, - "loss": 2.0349, - "step": 2024 - }, - { - "epoch": 1.38, - "learning_rate": 0.0003384137601528906, - "loss": 2.0789, - "step": 2025 - }, - { - "epoch": 1.39, - "learning_rate": 0.0003382704252269469, - "loss": 2.0193, - "step": 2026 - }, - { - "epoch": 1.39, - "learning_rate": 0.0003381270903010033, - "loss": 2.0933, - "step": 2027 - }, - { - "epoch": 1.39, - "learning_rate": 0.0003379837553750597, - "loss": 2.0623, - "step": 2028 - }, - { - "epoch": 1.39, - "learning_rate": 0.00033784042044911607, - "loss": 2.044, - "step": 2029 - }, - { - "epoch": 1.39, - "learning_rate": 0.00033769708552317245, - "loss": 2.0534, - "step": 2030 - }, - { - "epoch": 1.39, - "learning_rate": 0.00033755375059722883, - "loss": 1.9295, - "step": 2031 - }, - { - "epoch": 1.39, - "learning_rate": 0.0003374104156712852, - "loss": 2.141, - "step": 2032 - }, - { - "epoch": 1.39, - "learning_rate": 0.0003372670807453416, - "loss": 2.0085, - "step": 2033 - }, - { - "epoch": 1.39, - "learning_rate": 0.00033712374581939793, - "loss": 2.0188, - "step": 2034 - }, - { - "epoch": 1.39, - "learning_rate": 0.00033698041089345437, - "loss": 2.0537, - "step": 2035 - }, - { - "epoch": 1.39, - "learning_rate": 0.0003368370759675107, - "loss": 1.9598, - "step": 2036 - }, - { - "epoch": 1.39, - "learning_rate": 0.00033669374104156714, - "loss": 2.004, - "step": 2037 - }, - { - "epoch": 1.39, - "learning_rate": 0.00033655040611562347, - "loss": 2.0024, - "step": 2038 - }, - { - "epoch": 1.39, - "learning_rate": 0.0003364070711896799, - "loss": 2.0414, - "step": 2039 - }, - { - "epoch": 1.39, - "learning_rate": 0.00033626373626373623, - "loss": 2.0245, - "step": 2040 - }, - { - "epoch": 1.4, - "learning_rate": 0.00033612040133779256, - "loss": 1.9819, - "step": 2041 - }, - { - "epoch": 1.4, - "learning_rate": 0.000335977066411849, - "loss": 2.0282, - "step": 2042 - }, - { - "epoch": 1.4, - "learning_rate": 0.00033583373148590533, - "loss": 1.9962, - "step": 2043 - }, - { - "epoch": 1.4, - "learning_rate": 0.00033569039655996177, - "loss": 1.9422, - "step": 2044 - }, - { - "epoch": 1.4, - "learning_rate": 0.0003355470616340181, - "loss": 2.0411, - "step": 2045 - }, - { - "epoch": 1.4, - "learning_rate": 0.0003354037267080745, - "loss": 1.9571, - "step": 2046 - }, - { - "epoch": 1.4, - "learning_rate": 0.00033526039178213087, - "loss": 1.9993, - "step": 2047 - }, - { - "epoch": 1.4, - "learning_rate": 0.00033511705685618725, - "loss": 2.0761, - "step": 2048 - }, - { - "epoch": 1.4, - "learning_rate": 0.00033497372193024363, - "loss": 2.0052, - "step": 2049 - }, - { - "epoch": 1.4, - "learning_rate": 0.0003348303870043, - "loss": 2.0984, - "step": 2050 - }, - { - "epoch": 1.4, - "learning_rate": 0.0003346870520783564, - "loss": 2.0299, - "step": 2051 - }, - { - "epoch": 1.4, - "learning_rate": 0.0003345437171524128, - "loss": 1.9776, - "step": 2052 - }, - { - "epoch": 1.4, - "learning_rate": 0.0003344003822264691, - "loss": 2.0061, - "step": 2053 - }, - { - "epoch": 1.4, - "learning_rate": 0.00033425704730052555, - "loss": 2.0546, - "step": 2054 - }, - { - "epoch": 1.4, - "learning_rate": 0.0003341137123745819, - "loss": 2.0127, - "step": 2055 - }, - { - "epoch": 1.41, - "learning_rate": 0.0003339703774486383, - "loss": 2.0911, - "step": 2056 - }, - { - "epoch": 1.41, - "learning_rate": 0.00033382704252269465, - "loss": 1.9568, - "step": 2057 - }, - { - "epoch": 1.41, - "learning_rate": 0.0003336837075967511, - "loss": 2.0353, - "step": 2058 - }, - { - "epoch": 1.41, - "learning_rate": 0.0003335403726708074, - "loss": 1.9722, - "step": 2059 - }, - { - "epoch": 1.41, - "learning_rate": 0.00033339703774486375, - "loss": 2.0115, - "step": 2060 - }, - { - "epoch": 1.41, - "learning_rate": 0.0003332537028189202, - "loss": 1.9517, - "step": 2061 - }, - { - "epoch": 1.41, - "learning_rate": 0.0003331103678929765, - "loss": 1.9787, - "step": 2062 - }, - { - "epoch": 1.41, - "learning_rate": 0.00033296703296703295, - "loss": 2.0402, - "step": 2063 - }, - { - "epoch": 1.41, - "learning_rate": 0.0003328236980410893, - "loss": 2.0362, - "step": 2064 - }, - { - "epoch": 1.41, - "learning_rate": 0.0003326803631151457, - "loss": 2.0316, - "step": 2065 - }, - { - "epoch": 1.41, - "learning_rate": 0.00033253702818920205, - "loss": 1.9957, - "step": 2066 - }, - { - "epoch": 1.41, - "learning_rate": 0.00033239369326325843, - "loss": 2.076, - "step": 2067 - }, - { - "epoch": 1.41, - "learning_rate": 0.0003322503583373148, - "loss": 1.9697, - "step": 2068 - }, - { - "epoch": 1.41, - "learning_rate": 0.0003321070234113712, - "loss": 2.063, - "step": 2069 - }, - { - "epoch": 1.42, - "learning_rate": 0.0003319636884854276, - "loss": 2.0591, - "step": 2070 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033182035355948397, - "loss": 2.031, - "step": 2071 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033167701863354035, - "loss": 2.0116, - "step": 2072 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033153368370759674, - "loss": 2.0301, - "step": 2073 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033139034878165306, - "loss": 2.0387, - "step": 2074 - }, - { - "epoch": 1.42, - "learning_rate": 0.0003312470138557095, - "loss": 2.0549, - "step": 2075 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033110367892976583, - "loss": 1.9861, - "step": 2076 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033096034400382227, - "loss": 1.918, - "step": 2077 - }, - { - "epoch": 1.42, - "learning_rate": 0.0003308170090778786, - "loss": 1.9836, - "step": 2078 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033067367415193504, - "loss": 2.0345, - "step": 2079 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033053033922599137, - "loss": 1.9827, - "step": 2080 - }, - { - "epoch": 1.42, - "learning_rate": 0.0003303870043000477, - "loss": 1.9904, - "step": 2081 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033024366937410413, - "loss": 1.9661, - "step": 2082 - }, - { - "epoch": 1.42, - "learning_rate": 0.00033010033444816046, - "loss": 2.0373, - "step": 2083 - }, - { - "epoch": 1.42, - "learning_rate": 0.0003299569995222169, - "loss": 1.872, - "step": 2084 - }, - { - "epoch": 1.43, - "learning_rate": 0.00032981366459627323, - "loss": 1.9871, - "step": 2085 - }, - { - "epoch": 1.43, - "learning_rate": 0.00032967032967032967, - "loss": 1.9051, - "step": 2086 - }, - { - "epoch": 1.43, - "learning_rate": 0.000329526994744386, - "loss": 2.1045, - "step": 2087 - }, - { - "epoch": 1.43, - "learning_rate": 0.0003293836598184424, - "loss": 1.9143, - "step": 2088 - }, - { - "epoch": 1.43, - "learning_rate": 0.00032924032489249877, - "loss": 1.8973, - "step": 2089 - }, - { - "epoch": 1.43, - "learning_rate": 0.00032909698996655515, - "loss": 1.9506, - "step": 2090 - }, - { - "epoch": 1.43, - "learning_rate": 0.00032895365504061153, - "loss": 1.9271, - "step": 2091 - }, - { - "epoch": 1.43, - "learning_rate": 0.0003288103201146679, - "loss": 1.9837, - "step": 2092 - }, - { - "epoch": 1.43, - "learning_rate": 0.0003286669851887243, - "loss": 2.1084, - "step": 2093 - }, - { - "epoch": 1.43, - "learning_rate": 0.0003285236502627807, - "loss": 1.9546, - "step": 2094 - }, - { - "epoch": 1.43, - "learning_rate": 0.000328380315336837, - "loss": 2.0735, - "step": 2095 - }, - { - "epoch": 1.43, - "learning_rate": 0.00032823698041089345, - "loss": 2.0231, - "step": 2096 - }, - { - "epoch": 1.43, - "learning_rate": 0.0003280936454849498, - "loss": 2.0705, - "step": 2097 - }, - { - "epoch": 1.43, - "learning_rate": 0.0003279503105590062, - "loss": 1.9939, - "step": 2098 - }, - { - "epoch": 1.44, - "learning_rate": 0.00032780697563306255, - "loss": 1.9815, - "step": 2099 - }, - { - "epoch": 1.44, - "learning_rate": 0.000327663640707119, - "loss": 1.9705, - "step": 2100 - }, - { - "epoch": 1.44, - "learning_rate": 0.0003275203057811753, - "loss": 2.0562, - "step": 2101 - }, - { - "epoch": 1.44, - "learning_rate": 0.00032737697085523165, - "loss": 2.014, - "step": 2102 - }, - { - "epoch": 1.44, - "learning_rate": 0.0003272336359292881, - "loss": 2.0447, - "step": 2103 - }, - { - "epoch": 1.44, - "learning_rate": 0.0003270903010033444, - "loss": 1.9627, - "step": 2104 - }, - { - "epoch": 1.44, - "learning_rate": 0.00032694696607740085, - "loss": 1.9587, - "step": 2105 - }, - { - "epoch": 1.44, - "learning_rate": 0.0003268036311514572, - "loss": 2.0637, - "step": 2106 - }, - { - "epoch": 1.44, - "learning_rate": 0.0003266602962255136, - "loss": 1.9581, - "step": 2107 - }, - { - "epoch": 1.44, - "learning_rate": 0.00032651696129956995, - "loss": 1.9609, - "step": 2108 - }, - { - "epoch": 1.44, - "learning_rate": 0.00032637362637362633, - "loss": 1.9956, - "step": 2109 - }, - { - "epoch": 1.44, - "learning_rate": 0.0003262302914476827, - "loss": 1.9885, - "step": 2110 - }, - { - "epoch": 1.44, - "learning_rate": 0.0003260869565217391, - "loss": 1.9506, - "step": 2111 - }, - { - "epoch": 1.44, - "learning_rate": 0.0003259436215957955, - "loss": 2.0175, - "step": 2112 - }, - { - "epoch": 1.44, - "learning_rate": 0.00032580028666985187, - "loss": 1.9666, - "step": 2113 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032565695174390825, - "loss": 2.0387, - "step": 2114 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032551361681796464, - "loss": 2.0288, - "step": 2115 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032537028189202097, - "loss": 2.0741, - "step": 2116 - }, - { - "epoch": 1.45, - "learning_rate": 0.0003252269469660774, - "loss": 2.0846, - "step": 2117 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032508361204013373, - "loss": 1.974, - "step": 2118 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032494027711419017, - "loss": 1.9899, - "step": 2119 - }, - { - "epoch": 1.45, - "learning_rate": 0.0003247969421882465, - "loss": 2.1513, - "step": 2120 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032465360726230294, - "loss": 1.9642, - "step": 2121 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032451027233635927, - "loss": 1.9595, - "step": 2122 - }, - { - "epoch": 1.45, - "learning_rate": 0.0003243669374104156, - "loss": 2.0706, - "step": 2123 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032422360248447204, - "loss": 2.0984, - "step": 2124 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032408026755852837, - "loss": 2.1254, - "step": 2125 - }, - { - "epoch": 1.45, - "learning_rate": 0.0003239369326325848, - "loss": 1.9552, - "step": 2126 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032379359770664113, - "loss": 1.9694, - "step": 2127 - }, - { - "epoch": 1.45, - "learning_rate": 0.00032365026278069757, - "loss": 2.0373, - "step": 2128 - }, - { - "epoch": 1.46, - "learning_rate": 0.0003235069278547539, - "loss": 2.0331, - "step": 2129 - }, - { - "epoch": 1.46, - "learning_rate": 0.0003233635929288103, - "loss": 2.0722, - "step": 2130 - }, - { - "epoch": 1.46, - "learning_rate": 0.00032322025800286667, - "loss": 2.0406, - "step": 2131 - }, - { - "epoch": 1.46, - "learning_rate": 0.00032307692307692305, - "loss": 1.8967, - "step": 2132 - }, - { - "epoch": 1.46, - "learning_rate": 0.00032293358815097944, - "loss": 1.9515, - "step": 2133 - }, - { - "epoch": 1.46, - "learning_rate": 0.0003227902532250358, - "loss": 2.0313, - "step": 2134 - }, - { - "epoch": 1.46, - "learning_rate": 0.0003226469182990922, - "loss": 2.0825, - "step": 2135 - }, - { - "epoch": 1.46, - "learning_rate": 0.0003225035833731486, - "loss": 2.0743, - "step": 2136 - }, - { - "epoch": 1.46, - "learning_rate": 0.0003223602484472049, - "loss": 2.0133, - "step": 2137 - }, - { - "epoch": 1.46, - "learning_rate": 0.00032221691352126136, - "loss": 1.9019, - "step": 2138 - }, - { - "epoch": 1.46, - "learning_rate": 0.0003220735785953177, - "loss": 1.9543, - "step": 2139 - }, - { - "epoch": 1.46, - "learning_rate": 0.0003219302436693741, - "loss": 1.999, - "step": 2140 - }, - { - "epoch": 1.46, - "learning_rate": 0.00032178690874343045, - "loss": 1.927, - "step": 2141 - }, - { - "epoch": 1.46, - "learning_rate": 0.0003216435738174869, - "loss": 2.069, - "step": 2142 - }, - { - "epoch": 1.47, - "learning_rate": 0.0003215002388915432, - "loss": 1.9393, - "step": 2143 - }, - { - "epoch": 1.47, - "learning_rate": 0.00032135690396559955, - "loss": 1.9443, - "step": 2144 - }, - { - "epoch": 1.47, - "learning_rate": 0.000321213569039656, - "loss": 1.9659, - "step": 2145 - }, - { - "epoch": 1.47, - "learning_rate": 0.0003210702341137123, - "loss": 2.0523, - "step": 2146 - }, - { - "epoch": 1.47, - "learning_rate": 0.00032092689918776876, - "loss": 2.0378, - "step": 2147 - }, - { - "epoch": 1.47, - "learning_rate": 0.0003207835642618251, - "loss": 2.0093, - "step": 2148 - }, - { - "epoch": 1.47, - "learning_rate": 0.00032064022933588147, - "loss": 2.0546, - "step": 2149 - }, - { - "epoch": 1.47, - "learning_rate": 0.00032049689440993785, - "loss": 2.0351, - "step": 2150 - }, - { - "epoch": 1.47, - "learning_rate": 0.00032035355948399424, - "loss": 1.9175, - "step": 2151 - }, - { - "epoch": 1.47, - "learning_rate": 0.0003202102245580506, - "loss": 1.9986, - "step": 2152 - }, - { - "epoch": 1.47, - "learning_rate": 0.000320066889632107, - "loss": 1.9202, - "step": 2153 - }, - { - "epoch": 1.47, - "learning_rate": 0.0003199235547061634, - "loss": 1.9855, - "step": 2154 - }, - { - "epoch": 1.47, - "learning_rate": 0.00031978021978021977, - "loss": 1.9847, - "step": 2155 - }, - { - "epoch": 1.47, - "learning_rate": 0.0003196368848542761, - "loss": 2.096, - "step": 2156 - }, - { - "epoch": 1.47, - "learning_rate": 0.00031949354992833254, - "loss": 2.0109, - "step": 2157 - }, - { - "epoch": 1.48, - "learning_rate": 0.00031935021500238887, - "loss": 1.8784, - "step": 2158 - }, - { - "epoch": 1.48, - "learning_rate": 0.0003192068800764453, - "loss": 1.902, - "step": 2159 - }, - { - "epoch": 1.48, - "learning_rate": 0.00031906354515050164, - "loss": 2.0265, - "step": 2160 - }, - { - "epoch": 1.48, - "learning_rate": 0.0003189202102245581, - "loss": 1.9539, - "step": 2161 - }, - { - "epoch": 1.48, - "learning_rate": 0.0003187768752986144, - "loss": 1.9743, - "step": 2162 - }, - { - "epoch": 1.48, - "learning_rate": 0.00031863354037267073, - "loss": 2.0785, - "step": 2163 - }, - { - "epoch": 1.48, - "learning_rate": 0.00031849020544672717, - "loss": 2.0334, - "step": 2164 - }, - { - "epoch": 1.48, - "learning_rate": 0.0003183468705207835, - "loss": 1.8959, - "step": 2165 - }, - { - "epoch": 1.48, - "learning_rate": 0.00031820353559483994, - "loss": 1.9354, - "step": 2166 - }, - { - "epoch": 1.48, - "learning_rate": 0.00031806020066889627, - "loss": 2.1023, - "step": 2167 - }, - { - "epoch": 1.48, - "learning_rate": 0.0003179168657429527, - "loss": 2.034, - "step": 2168 - }, - { - "epoch": 1.48, - "learning_rate": 0.00031777353081700904, - "loss": 1.9602, - "step": 2169 - }, - { - "epoch": 1.48, - "learning_rate": 0.0003176301958910654, - "loss": 1.9826, - "step": 2170 - }, - { - "epoch": 1.48, - "learning_rate": 0.0003174868609651218, - "loss": 1.9921, - "step": 2171 - }, - { - "epoch": 1.48, - "learning_rate": 0.0003173435260391782, - "loss": 1.9476, - "step": 2172 - }, - { - "epoch": 1.49, - "learning_rate": 0.00031720019111323457, - "loss": 2.0478, - "step": 2173 - }, - { - "epoch": 1.49, - "learning_rate": 0.00031705685618729095, - "loss": 2.0748, - "step": 2174 - }, - { - "epoch": 1.49, - "learning_rate": 0.00031691352126134734, - "loss": 1.9633, - "step": 2175 - }, - { - "epoch": 1.49, - "learning_rate": 0.0003167701863354037, - "loss": 2.001, - "step": 2176 - }, - { - "epoch": 1.49, - "learning_rate": 0.00031662685140946005, - "loss": 2.0642, - "step": 2177 - }, - { - "epoch": 1.49, - "learning_rate": 0.0003164835164835165, - "loss": 1.9987, - "step": 2178 - }, - { - "epoch": 1.49, - "learning_rate": 0.0003163401815575728, - "loss": 1.9435, - "step": 2179 - }, - { - "epoch": 1.49, - "learning_rate": 0.00031619684663162926, - "loss": 1.9399, - "step": 2180 - }, - { - "epoch": 1.49, - "learning_rate": 0.0003160535117056856, - "loss": 1.9618, - "step": 2181 - }, - { - "epoch": 1.49, - "learning_rate": 0.00031591017677974197, - "loss": 1.9743, - "step": 2182 - }, - { - "epoch": 1.49, - "learning_rate": 0.00031576684185379835, - "loss": 1.9511, - "step": 2183 - }, - { - "epoch": 1.49, - "learning_rate": 0.0003156235069278547, - "loss": 2.0058, - "step": 2184 - }, - { - "epoch": 1.49, - "learning_rate": 0.0003154801720019111, - "loss": 2.0174, - "step": 2185 - }, - { - "epoch": 1.49, - "learning_rate": 0.00031533683707596745, - "loss": 2.0545, - "step": 2186 - }, - { - "epoch": 1.5, - "learning_rate": 0.0003151935021500239, - "loss": 1.9771, - "step": 2187 - }, - { - "epoch": 1.5, - "learning_rate": 0.0003150501672240802, - "loss": 1.9741, - "step": 2188 - }, - { - "epoch": 1.5, - "learning_rate": 0.00031490683229813666, - "loss": 1.9993, - "step": 2189 - }, - { - "epoch": 1.5, - "learning_rate": 0.000314763497372193, - "loss": 1.9776, - "step": 2190 - }, - { - "epoch": 1.5, - "learning_rate": 0.00031462016244624937, - "loss": 2.0027, - "step": 2191 - }, - { - "epoch": 1.5, - "learning_rate": 0.00031447682752030575, - "loss": 2.0119, - "step": 2192 - }, - { - "epoch": 1.5, - "learning_rate": 0.00031433349259436214, - "loss": 1.9371, - "step": 2193 - }, - { - "epoch": 1.5, - "learning_rate": 0.0003141901576684185, - "loss": 2.0221, - "step": 2194 - }, - { - "epoch": 1.5, - "learning_rate": 0.0003140468227424749, - "loss": 2.0126, - "step": 2195 - }, - { - "epoch": 1.5, - "learning_rate": 0.0003139034878165313, - "loss": 2.066, - "step": 2196 - }, - { - "epoch": 1.5, - "learning_rate": 0.0003137601528905877, - "loss": 1.9748, - "step": 2197 - }, - { - "epoch": 1.5, - "learning_rate": 0.000313616817964644, - "loss": 1.9549, - "step": 2198 - }, - { - "epoch": 1.5, - "learning_rate": 0.0003134734830387004, - "loss": 2.0066, - "step": 2199 - }, - { - "epoch": 1.5, - "learning_rate": 0.00031333014811275677, - "loss": 2.0541, - "step": 2200 - }, - { - "epoch": 1.5, - "eval_loss": 1.9827316999435425, - "eval_runtime": 1651.9566, - "eval_samples_per_second": 6.053, - "eval_steps_per_second": 6.053, - "step": 2200 } ], - "max_steps": 4386, - "num_train_epochs": 3, - "total_flos": 2.69384075772468e+18, + "max_steps": 5848, + "num_train_epochs": 4, + "total_flos": 4.8975138397771776e+17, "trial_name": null, "trial_params": null }