{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 170000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0009970588235294119, "loss": 5.4687, "step": 500 }, { "epoch": 0.01, "learning_rate": 0.0009941176470588235, "loss": 2.9773, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.0009911764705882353, "loss": 2.803, "step": 1500 }, { "epoch": 0.01, "learning_rate": 0.0009882352941176472, "loss": 2.6892, "step": 2000 }, { "epoch": 0.01, "learning_rate": 0.0009852941176470588, "loss": 2.6263, "step": 2500 }, { "epoch": 0.02, "learning_rate": 0.0009823529411764707, "loss": 2.5926, "step": 3000 }, { "epoch": 0.02, "learning_rate": 0.0009794117647058823, "loss": 2.5264, "step": 3500 }, { "epoch": 0.02, "learning_rate": 0.0009764705882352941, "loss": 2.5018, "step": 4000 }, { "epoch": 0.03, "learning_rate": 0.0009735294117647059, "loss": 2.4978, "step": 4500 }, { "epoch": 0.03, "learning_rate": 0.0009705882352941176, "loss": 2.4435, "step": 5000 }, { "epoch": 0.03, "learning_rate": 0.0009676470588235295, "loss": 2.3768, "step": 5500 }, { "epoch": 0.04, "learning_rate": 0.0009647058823529412, "loss": 2.382, "step": 6000 }, { "epoch": 0.04, "learning_rate": 0.0009617647058823529, "loss": 2.4059, "step": 6500 }, { "epoch": 0.04, "learning_rate": 0.0009588235294117648, "loss": 2.3918, "step": 7000 }, { "epoch": 0.04, "learning_rate": 0.0009558823529411765, "loss": 2.3325, "step": 7500 }, { "epoch": 0.05, "learning_rate": 0.0009529411764705882, "loss": 2.3217, "step": 8000 }, { "epoch": 0.05, "learning_rate": 0.00095, "loss": 2.2997, "step": 8500 }, { "epoch": 0.05, "learning_rate": 0.0009470588235294117, "loss": 2.2969, "step": 9000 }, { "epoch": 0.06, "learning_rate": 0.0009441176470588235, "loss": 2.3028, "step": 9500 }, { "epoch": 0.06, "learning_rate": 0.0009411764705882353, "loss": 2.2885, "step": 10000 }, { "epoch": 0.06, "learning_rate": 0.0009382352941176471, "loss": 2.3032, "step": 10500 }, { "epoch": 0.06, "learning_rate": 0.0009352941176470589, "loss": 2.2727, "step": 11000 }, { "epoch": 0.07, "learning_rate": 0.0009323529411764706, "loss": 2.2315, "step": 11500 }, { "epoch": 0.07, "learning_rate": 0.0009294117647058824, "loss": 2.2389, "step": 12000 }, { "epoch": 0.07, "learning_rate": 0.0009264705882352942, "loss": 2.2585, "step": 12500 }, { "epoch": 0.08, "learning_rate": 0.000923529411764706, "loss": 2.2445, "step": 13000 }, { "epoch": 0.08, "learning_rate": 0.0009205882352941176, "loss": 2.1763, "step": 13500 }, { "epoch": 0.08, "learning_rate": 0.0009176470588235294, "loss": 2.2386, "step": 14000 }, { "epoch": 0.09, "learning_rate": 0.0009147058823529412, "loss": 2.2169, "step": 14500 }, { "epoch": 0.09, "learning_rate": 0.0009117647058823529, "loss": 2.2082, "step": 15000 }, { "epoch": 0.09, "learning_rate": 0.0009088235294117648, "loss": 2.2337, "step": 15500 }, { "epoch": 0.09, "learning_rate": 0.0009058823529411765, "loss": 2.1778, "step": 16000 }, { "epoch": 0.1, "learning_rate": 0.0009029411764705882, "loss": 2.2025, "step": 16500 }, { "epoch": 0.1, "learning_rate": 0.0009000000000000001, "loss": 2.1734, "step": 17000 }, { "epoch": 0.1, "learning_rate": 0.0008970588235294118, "loss": 2.1567, "step": 17500 }, { "epoch": 0.11, "learning_rate": 0.0008941176470588236, "loss": 2.1574, "step": 18000 }, { "epoch": 0.11, "learning_rate": 0.0008911764705882354, "loss": 2.1401, "step": 18500 }, { "epoch": 0.11, "learning_rate": 0.000888235294117647, "loss": 2.1853, "step": 19000 }, { "epoch": 0.11, "learning_rate": 0.0008852941176470588, "loss": 2.1417, "step": 19500 }, { "epoch": 0.12, "learning_rate": 0.0008823529411764706, "loss": 2.1332, "step": 20000 }, { "epoch": 0.12, "learning_rate": 0.0008794117647058824, "loss": 2.12, "step": 20500 }, { "epoch": 0.12, "learning_rate": 0.0008764705882352941, "loss": 2.1348, "step": 21000 }, { "epoch": 0.13, "learning_rate": 0.0008735294117647059, "loss": 2.1464, "step": 21500 }, { "epoch": 0.13, "learning_rate": 0.0008705882352941177, "loss": 2.1532, "step": 22000 }, { "epoch": 0.13, "learning_rate": 0.0008676470588235294, "loss": 2.1142, "step": 22500 }, { "epoch": 0.14, "learning_rate": 0.0008647058823529413, "loss": 2.0977, "step": 23000 }, { "epoch": 0.14, "learning_rate": 0.000861764705882353, "loss": 2.1114, "step": 23500 }, { "epoch": 0.14, "learning_rate": 0.0008588235294117646, "loss": 2.079, "step": 24000 }, { "epoch": 0.14, "learning_rate": 0.0008558823529411765, "loss": 2.1021, "step": 24500 }, { "epoch": 0.15, "learning_rate": 0.0008529411764705882, "loss": 2.1075, "step": 25000 }, { "epoch": 0.15, "learning_rate": 0.00085, "loss": 2.0557, "step": 25500 }, { "epoch": 0.15, "learning_rate": 0.0008470588235294118, "loss": 2.0445, "step": 26000 }, { "epoch": 0.16, "learning_rate": 0.0008441176470588235, "loss": 2.072, "step": 26500 }, { "epoch": 0.16, "learning_rate": 0.0008411764705882353, "loss": 2.0684, "step": 27000 }, { "epoch": 0.16, "learning_rate": 0.0008382352941176471, "loss": 2.06, "step": 27500 }, { "epoch": 0.16, "learning_rate": 0.0008352941176470589, "loss": 2.0768, "step": 28000 }, { "epoch": 0.17, "learning_rate": 0.0008323529411764706, "loss": 2.1055, "step": 28500 }, { "epoch": 0.17, "learning_rate": 0.0008294117647058824, "loss": 2.0504, "step": 29000 }, { "epoch": 0.17, "learning_rate": 0.0008264705882352941, "loss": 2.0279, "step": 29500 }, { "epoch": 0.18, "learning_rate": 0.0008235294117647058, "loss": 2.0852, "step": 30000 }, { "epoch": 0.18, "learning_rate": 0.0008205882352941177, "loss": 2.0934, "step": 30500 }, { "epoch": 0.18, "learning_rate": 0.0008176470588235294, "loss": 2.0453, "step": 31000 }, { "epoch": 0.19, "learning_rate": 0.0008147058823529411, "loss": 2.0469, "step": 31500 }, { "epoch": 0.19, "learning_rate": 0.000811764705882353, "loss": 2.6994, "step": 32000 }, { "epoch": 0.19, "learning_rate": 0.0008088235294117647, "loss": 2.0459, "step": 32500 }, { "epoch": 0.19, "learning_rate": 0.0008058823529411766, "loss": 2.0367, "step": 33000 }, { "epoch": 0.2, "learning_rate": 0.0008029411764705883, "loss": 2.0672, "step": 33500 }, { "epoch": 0.2, "learning_rate": 0.0008, "loss": 2.0765, "step": 34000 }, { "epoch": 0.2, "learning_rate": 0.0007970588235294119, "loss": 2.0736, "step": 34500 }, { "epoch": 0.21, "learning_rate": 0.0007941176470588235, "loss": 2.0491, "step": 35000 }, { "epoch": 0.21, "learning_rate": 0.0007911764705882353, "loss": 2.0626, "step": 35500 }, { "epoch": 0.21, "learning_rate": 0.0007882352941176471, "loss": 2.034, "step": 36000 }, { "epoch": 0.21, "learning_rate": 0.0007852941176470588, "loss": 2.0163, "step": 36500 }, { "epoch": 0.22, "learning_rate": 0.0007823529411764706, "loss": 2.0318, "step": 37000 }, { "epoch": 0.22, "learning_rate": 0.0007794117647058824, "loss": 2.0477, "step": 37500 }, { "epoch": 0.22, "learning_rate": 0.0007764705882352942, "loss": 2.0535, "step": 38000 }, { "epoch": 0.23, "learning_rate": 0.0007735294117647059, "loss": 1.9894, "step": 38500 }, { "epoch": 0.23, "learning_rate": 0.0007705882352941177, "loss": 2.0341, "step": 39000 }, { "epoch": 0.23, "learning_rate": 0.0007676470588235295, "loss": 1.9852, "step": 39500 }, { "epoch": 0.24, "learning_rate": 0.0007647058823529411, "loss": 1.9756, "step": 40000 }, { "epoch": 0.24, "learning_rate": 0.000761764705882353, "loss": 2.0002, "step": 40500 }, { "epoch": 0.24, "learning_rate": 0.0007588235294117647, "loss": 2.0056, "step": 41000 }, { "epoch": 0.24, "learning_rate": 0.0007558823529411764, "loss": 2.0109, "step": 41500 }, { "epoch": 0.25, "learning_rate": 0.0007529411764705883, "loss": 2.0476, "step": 42000 }, { "epoch": 0.25, "learning_rate": 0.00075, "loss": 2.0153, "step": 42500 }, { "epoch": 0.25, "learning_rate": 0.0007470588235294118, "loss": 1.983, "step": 43000 }, { "epoch": 0.26, "learning_rate": 0.0007441176470588236, "loss": 1.9831, "step": 43500 }, { "epoch": 0.26, "learning_rate": 0.0007411764705882353, "loss": 1.9843, "step": 44000 }, { "epoch": 0.26, "learning_rate": 0.0007382352941176471, "loss": 1.9926, "step": 44500 }, { "epoch": 0.26, "learning_rate": 0.0007352941176470589, "loss": 1.9853, "step": 45000 }, { "epoch": 0.27, "learning_rate": 0.0007323529411764706, "loss": 1.9806, "step": 45500 }, { "epoch": 0.27, "learning_rate": 0.0007294117647058823, "loss": 1.9876, "step": 46000 }, { "epoch": 0.27, "learning_rate": 0.0007264705882352941, "loss": 1.9705, "step": 46500 }, { "epoch": 0.28, "learning_rate": 0.0007235294117647059, "loss": 1.9724, "step": 47000 }, { "epoch": 0.28, "learning_rate": 0.0007205882352941176, "loss": 2.0018, "step": 47500 }, { "epoch": 0.28, "learning_rate": 0.0007176470588235295, "loss": 1.9356, "step": 48000 }, { "epoch": 0.29, "learning_rate": 0.0007147058823529412, "loss": 1.966, "step": 48500 }, { "epoch": 0.29, "learning_rate": 0.0007117647058823529, "loss": 1.9758, "step": 49000 }, { "epoch": 0.29, "learning_rate": 0.0007088235294117648, "loss": 1.9851, "step": 49500 }, { "epoch": 0.29, "learning_rate": 0.0007058823529411765, "loss": 1.9648, "step": 50000 }, { "epoch": 0.3, "learning_rate": 0.0007029411764705881, "loss": 1.9601, "step": 50500 }, { "epoch": 0.3, "learning_rate": 0.0007, "loss": 1.9646, "step": 51000 }, { "epoch": 0.3, "learning_rate": 0.0006970588235294117, "loss": 1.9551, "step": 51500 }, { "epoch": 0.31, "learning_rate": 0.0006941176470588235, "loss": 1.9455, "step": 52000 }, { "epoch": 0.31, "learning_rate": 0.0006911764705882353, "loss": 1.9527, "step": 52500 }, { "epoch": 0.31, "learning_rate": 0.000688235294117647, "loss": 1.9583, "step": 53000 }, { "epoch": 0.31, "learning_rate": 0.0006852941176470589, "loss": 1.9525, "step": 53500 }, { "epoch": 0.32, "learning_rate": 0.0006823529411764706, "loss": 1.9276, "step": 54000 }, { "epoch": 0.32, "learning_rate": 0.0006794117647058824, "loss": 1.9626, "step": 54500 }, { "epoch": 0.32, "learning_rate": 0.0006764705882352942, "loss": 1.8995, "step": 55000 }, { "epoch": 0.33, "learning_rate": 0.000673529411764706, "loss": 1.9457, "step": 55500 }, { "epoch": 0.33, "learning_rate": 0.0006705882352941176, "loss": 1.9706, "step": 56000 }, { "epoch": 0.33, "learning_rate": 0.0006676470588235294, "loss": 1.9671, "step": 56500 }, { "epoch": 0.34, "learning_rate": 0.0006647058823529412, "loss": 1.9613, "step": 57000 }, { "epoch": 0.34, "learning_rate": 0.0006617647058823529, "loss": 1.9511, "step": 57500 }, { "epoch": 0.34, "learning_rate": 0.0006588235294117648, "loss": 1.9437, "step": 58000 }, { "epoch": 0.34, "learning_rate": 0.0006558823529411765, "loss": 1.9381, "step": 58500 }, { "epoch": 0.35, "learning_rate": 0.0006529411764705882, "loss": 1.9205, "step": 59000 }, { "epoch": 0.35, "learning_rate": 0.0006500000000000001, "loss": 1.9415, "step": 59500 }, { "epoch": 0.35, "learning_rate": 0.0006470588235294118, "loss": 1.9307, "step": 60000 }, { "epoch": 0.36, "learning_rate": 0.0006441176470588236, "loss": 1.9616, "step": 60500 }, { "epoch": 0.36, "learning_rate": 0.0006411764705882354, "loss": 1.9267, "step": 61000 }, { "epoch": 0.36, "learning_rate": 0.000638235294117647, "loss": 1.911, "step": 61500 }, { "epoch": 0.36, "learning_rate": 0.0006352941176470588, "loss": 1.9316, "step": 62000 }, { "epoch": 0.37, "learning_rate": 0.0006323529411764706, "loss": 1.9174, "step": 62500 }, { "epoch": 0.37, "learning_rate": 0.0006294117647058824, "loss": 1.9185, "step": 63000 }, { "epoch": 0.37, "learning_rate": 0.0006264705882352941, "loss": 1.9067, "step": 63500 }, { "epoch": 0.38, "learning_rate": 0.0006235294117647059, "loss": 1.9071, "step": 64000 }, { "epoch": 0.38, "learning_rate": 0.0006205882352941177, "loss": 1.9217, "step": 64500 }, { "epoch": 0.38, "learning_rate": 0.0006176470588235294, "loss": 1.9262, "step": 65000 }, { "epoch": 0.39, "learning_rate": 0.0006147058823529413, "loss": 1.9182, "step": 65500 }, { "epoch": 0.39, "learning_rate": 0.000611764705882353, "loss": 1.9291, "step": 66000 }, { "epoch": 0.39, "learning_rate": 0.0006088235294117646, "loss": 1.9115, "step": 66500 }, { "epoch": 0.39, "learning_rate": 0.0006058823529411765, "loss": 1.8964, "step": 67000 }, { "epoch": 0.4, "learning_rate": 0.0006029411764705882, "loss": 1.9267, "step": 67500 }, { "epoch": 0.4, "learning_rate": 0.0006, "loss": 1.8993, "step": 68000 }, { "epoch": 0.4, "learning_rate": 0.0005970588235294118, "loss": 1.9361, "step": 68500 }, { "epoch": 0.41, "learning_rate": 0.0005941176470588235, "loss": 1.8742, "step": 69000 }, { "epoch": 0.41, "learning_rate": 0.0005911764705882353, "loss": 1.9027, "step": 69500 }, { "epoch": 0.41, "learning_rate": 0.0005882352941176471, "loss": 1.9098, "step": 70000 }, { "epoch": 0.41, "learning_rate": 0.0005852941176470589, "loss": 1.8969, "step": 70500 }, { "epoch": 0.42, "learning_rate": 0.0005823529411764706, "loss": 1.9119, "step": 71000 }, { "epoch": 0.42, "learning_rate": 0.0005794117647058824, "loss": 1.9021, "step": 71500 }, { "epoch": 0.42, "learning_rate": 0.0005764705882352941, "loss": 1.8577, "step": 72000 }, { "epoch": 0.43, "learning_rate": 0.0005735294117647058, "loss": 1.892, "step": 72500 }, { "epoch": 0.43, "learning_rate": 0.0005705882352941177, "loss": 1.8874, "step": 73000 }, { "epoch": 0.43, "learning_rate": 0.0005676470588235294, "loss": 1.8978, "step": 73500 }, { "epoch": 0.44, "learning_rate": 0.0005647058823529411, "loss": 1.8824, "step": 74000 }, { "epoch": 0.44, "learning_rate": 0.000561764705882353, "loss": 1.9075, "step": 74500 }, { "epoch": 0.44, "learning_rate": 0.0005588235294117647, "loss": 1.891, "step": 75000 }, { "epoch": 0.44, "learning_rate": 0.0005558823529411766, "loss": 1.8859, "step": 75500 }, { "epoch": 0.45, "learning_rate": 0.0005529411764705883, "loss": 1.8606, "step": 76000 }, { "epoch": 0.45, "learning_rate": 0.00055, "loss": 1.9076, "step": 76500 }, { "epoch": 0.45, "learning_rate": 0.0005470588235294119, "loss": 1.8524, "step": 77000 }, { "epoch": 0.46, "learning_rate": 0.0005441176470588235, "loss": 1.8389, "step": 77500 }, { "epoch": 0.46, "learning_rate": 0.0005411764705882352, "loss": 1.8674, "step": 78000 }, { "epoch": 0.46, "learning_rate": 0.0005382352941176471, "loss": 1.9254, "step": 78500 }, { "epoch": 0.46, "learning_rate": 0.0005352941176470588, "loss": 1.9068, "step": 79000 }, { "epoch": 0.47, "learning_rate": 0.0005323529411764706, "loss": 1.8746, "step": 79500 }, { "epoch": 0.47, "learning_rate": 0.0005294117647058824, "loss": 1.8625, "step": 80000 }, { "epoch": 0.47, "learning_rate": 0.0005264705882352942, "loss": 1.7992, "step": 80500 }, { "epoch": 0.48, "learning_rate": 0.0005235294117647059, "loss": 1.8703, "step": 81000 }, { "epoch": 0.48, "learning_rate": 0.0005205882352941177, "loss": 1.8678, "step": 81500 }, { "epoch": 0.48, "learning_rate": 0.0005176470588235295, "loss": 1.8611, "step": 82000 }, { "epoch": 0.49, "learning_rate": 0.0005147058823529411, "loss": 1.8053, "step": 82500 }, { "epoch": 0.49, "learning_rate": 0.000511764705882353, "loss": 1.8697, "step": 83000 }, { "epoch": 0.49, "learning_rate": 0.0005088235294117647, "loss": 1.8965, "step": 83500 }, { "epoch": 0.49, "learning_rate": 0.0005058823529411764, "loss": 1.8519, "step": 84000 }, { "epoch": 0.5, "learning_rate": 0.0005029411764705883, "loss": 1.8454, "step": 84500 }, { "epoch": 0.5, "learning_rate": 0.0005, "loss": 1.8719, "step": 85000 }, { "epoch": 0.5, "learning_rate": 0.0004970588235294117, "loss": 1.876, "step": 85500 }, { "epoch": 0.51, "learning_rate": 0.0004941176470588236, "loss": 1.8438, "step": 86000 }, { "epoch": 0.51, "learning_rate": 0.0004911764705882353, "loss": 1.8712, "step": 86500 }, { "epoch": 0.51, "learning_rate": 0.00048823529411764707, "loss": 1.8232, "step": 87000 }, { "epoch": 0.51, "learning_rate": 0.0004852941176470588, "loss": 1.8492, "step": 87500 }, { "epoch": 0.52, "learning_rate": 0.0004823529411764706, "loss": 1.8605, "step": 88000 }, { "epoch": 0.52, "learning_rate": 0.0004794117647058824, "loss": 1.8502, "step": 88500 }, { "epoch": 0.52, "learning_rate": 0.0004764705882352941, "loss": 1.8803, "step": 89000 }, { "epoch": 0.53, "learning_rate": 0.00047352941176470587, "loss": 1.8466, "step": 89500 }, { "epoch": 0.53, "learning_rate": 0.00047058823529411766, "loss": 1.8823, "step": 90000 }, { "epoch": 0.53, "learning_rate": 0.00046764705882352945, "loss": 1.8624, "step": 90500 }, { "epoch": 0.54, "learning_rate": 0.0004647058823529412, "loss": 1.8055, "step": 91000 }, { "epoch": 0.54, "learning_rate": 0.000461764705882353, "loss": 1.8456, "step": 91500 }, { "epoch": 0.54, "learning_rate": 0.0004588235294117647, "loss": 1.8181, "step": 92000 }, { "epoch": 0.54, "learning_rate": 0.00045588235294117646, "loss": 1.8324, "step": 92500 }, { "epoch": 0.55, "learning_rate": 0.00045294117647058825, "loss": 1.8439, "step": 93000 }, { "epoch": 0.55, "learning_rate": 0.00045000000000000004, "loss": 1.8487, "step": 93500 }, { "epoch": 0.55, "learning_rate": 0.0004470588235294118, "loss": 1.8355, "step": 94000 }, { "epoch": 0.56, "learning_rate": 0.0004441176470588235, "loss": 1.8536, "step": 94500 }, { "epoch": 0.56, "learning_rate": 0.0004411764705882353, "loss": 1.8101, "step": 95000 }, { "epoch": 0.56, "learning_rate": 0.00043823529411764705, "loss": 1.7965, "step": 95500 }, { "epoch": 0.56, "learning_rate": 0.00043529411764705884, "loss": 1.8239, "step": 96000 }, { "epoch": 0.57, "learning_rate": 0.00043235294117647063, "loss": 1.8155, "step": 96500 }, { "epoch": 0.57, "learning_rate": 0.0004294117647058823, "loss": 1.831, "step": 97000 }, { "epoch": 0.57, "learning_rate": 0.0004264705882352941, "loss": 1.8305, "step": 97500 }, { "epoch": 0.58, "learning_rate": 0.0004235294117647059, "loss": 1.824, "step": 98000 }, { "epoch": 0.58, "learning_rate": 0.00042058823529411764, "loss": 1.8162, "step": 98500 }, { "epoch": 0.58, "learning_rate": 0.00041764705882352943, "loss": 1.8092, "step": 99000 }, { "epoch": 0.59, "learning_rate": 0.0004147058823529412, "loss": 1.7905, "step": 99500 }, { "epoch": 0.59, "learning_rate": 0.0004117647058823529, "loss": 1.8432, "step": 100000 }, { "epoch": 0.59, "eval_gen_len": 85.6, "eval_loss": 1.6427801847457886, "eval_rouge1": 29.1215, "eval_rouge2": 10.0431, "eval_rougeL": 20.203, "eval_rougeLsum": 24.5992, "eval_runtime": 107.8904, "eval_samples_per_second": 0.927, "step": 100000 }, { "epoch": 0.59, "learning_rate": 0.0004088235294117647, "loss": 1.8176, "step": 100500 }, { "epoch": 0.59, "learning_rate": 0.0004058823529411765, "loss": 1.8058, "step": 101000 }, { "epoch": 0.6, "learning_rate": 0.0004029411764705883, "loss": 1.7967, "step": 101500 }, { "epoch": 0.6, "learning_rate": 0.0004, "loss": 1.7941, "step": 102000 }, { "epoch": 0.6, "learning_rate": 0.00039705882352941176, "loss": 1.8045, "step": 102500 }, { "epoch": 0.61, "learning_rate": 0.00039411764705882355, "loss": 1.7988, "step": 103000 }, { "epoch": 0.61, "learning_rate": 0.0003911764705882353, "loss": 1.8047, "step": 103500 }, { "epoch": 0.61, "learning_rate": 0.0003882352941176471, "loss": 1.817, "step": 104000 }, { "epoch": 0.61, "learning_rate": 0.00038529411764705887, "loss": 1.8081, "step": 104500 }, { "epoch": 0.62, "learning_rate": 0.00038235294117647055, "loss": 1.7954, "step": 105000 }, { "epoch": 0.62, "learning_rate": 0.00037941176470588235, "loss": 1.8404, "step": 105500 }, { "epoch": 0.62, "learning_rate": 0.00037647058823529414, "loss": 1.815, "step": 106000 }, { "epoch": 0.63, "learning_rate": 0.0003735294117647059, "loss": 1.8023, "step": 106500 }, { "epoch": 0.63, "learning_rate": 0.00037058823529411767, "loss": 1.7957, "step": 107000 }, { "epoch": 0.63, "learning_rate": 0.00036764705882352946, "loss": 1.7989, "step": 107500 }, { "epoch": 0.64, "learning_rate": 0.00036470588235294114, "loss": 1.8369, "step": 108000 }, { "epoch": 0.64, "learning_rate": 0.00036176470588235294, "loss": 1.816, "step": 108500 }, { "epoch": 0.64, "learning_rate": 0.00035882352941176473, "loss": 1.7874, "step": 109000 }, { "epoch": 0.64, "learning_rate": 0.00035588235294117647, "loss": 1.7832, "step": 109500 }, { "epoch": 0.65, "learning_rate": 0.00035294117647058826, "loss": 1.8078, "step": 110000 }, { "epoch": 0.65, "learning_rate": 0.00035, "loss": 1.7959, "step": 110500 }, { "epoch": 0.65, "learning_rate": 0.00034705882352941173, "loss": 1.7853, "step": 111000 }, { "epoch": 0.66, "learning_rate": 0.0003441176470588235, "loss": 1.8219, "step": 111500 }, { "epoch": 0.66, "learning_rate": 0.0003411764705882353, "loss": 1.8028, "step": 112000 }, { "epoch": 0.66, "learning_rate": 0.0003382352941176471, "loss": 1.8283, "step": 112500 }, { "epoch": 0.66, "learning_rate": 0.0003352941176470588, "loss": 1.7884, "step": 113000 }, { "epoch": 0.67, "learning_rate": 0.0003323529411764706, "loss": 1.7835, "step": 113500 }, { "epoch": 0.67, "learning_rate": 0.0003294117647058824, "loss": 1.7767, "step": 114000 }, { "epoch": 0.67, "learning_rate": 0.0003264705882352941, "loss": 1.8169, "step": 114500 }, { "epoch": 0.68, "learning_rate": 0.0003235294117647059, "loss": 1.8163, "step": 115000 }, { "epoch": 0.68, "learning_rate": 0.0003205882352941177, "loss": 1.7781, "step": 115500 }, { "epoch": 0.68, "learning_rate": 0.0003176470588235294, "loss": 1.7734, "step": 116000 }, { "epoch": 0.69, "learning_rate": 0.0003147058823529412, "loss": 1.7818, "step": 116500 }, { "epoch": 0.69, "learning_rate": 0.00031176470588235297, "loss": 1.7562, "step": 117000 }, { "epoch": 0.69, "learning_rate": 0.0003088235294117647, "loss": 1.7839, "step": 117500 }, { "epoch": 0.69, "learning_rate": 0.0003058823529411765, "loss": 1.7727, "step": 118000 }, { "epoch": 0.7, "learning_rate": 0.00030294117647058824, "loss": 1.7838, "step": 118500 }, { "epoch": 0.7, "learning_rate": 0.0003, "loss": 1.7936, "step": 119000 }, { "epoch": 0.7, "learning_rate": 0.00029705882352941177, "loss": 1.7998, "step": 119500 }, { "epoch": 0.71, "learning_rate": 0.00029411764705882356, "loss": 1.773, "step": 120000 }, { "epoch": 0.71, "learning_rate": 0.0002911764705882353, "loss": 1.7997, "step": 120500 }, { "epoch": 0.71, "learning_rate": 0.00028823529411764703, "loss": 1.7989, "step": 121000 }, { "epoch": 0.71, "learning_rate": 0.0002852941176470588, "loss": 1.773, "step": 121500 }, { "epoch": 0.72, "learning_rate": 0.00028235294117647056, "loss": 1.7351, "step": 122000 }, { "epoch": 0.72, "learning_rate": 0.00027941176470588236, "loss": 1.7558, "step": 122500 }, { "epoch": 0.72, "learning_rate": 0.00027647058823529415, "loss": 1.7618, "step": 123000 }, { "epoch": 0.73, "learning_rate": 0.00027352941176470594, "loss": 1.7601, "step": 123500 }, { "epoch": 0.73, "learning_rate": 0.0002705882352941176, "loss": 1.8319, "step": 124000 }, { "epoch": 0.73, "learning_rate": 0.0002676470588235294, "loss": 1.7765, "step": 124500 }, { "epoch": 0.74, "learning_rate": 0.0002647058823529412, "loss": 1.7838, "step": 125000 }, { "epoch": 0.74, "learning_rate": 0.00026176470588235295, "loss": 1.7566, "step": 125500 }, { "epoch": 0.74, "learning_rate": 0.00025882352941176474, "loss": 1.7626, "step": 126000 }, { "epoch": 0.74, "learning_rate": 0.0002558823529411765, "loss": 1.7678, "step": 126500 }, { "epoch": 0.75, "learning_rate": 0.0002529411764705882, "loss": 1.794, "step": 127000 }, { "epoch": 0.75, "learning_rate": 0.00025, "loss": 1.747, "step": 127500 }, { "epoch": 0.75, "learning_rate": 0.0002470588235294118, "loss": 1.7565, "step": 128000 }, { "epoch": 0.76, "learning_rate": 0.00024411764705882354, "loss": 1.7932, "step": 128500 }, { "epoch": 0.76, "learning_rate": 0.0002411764705882353, "loss": 1.7623, "step": 129000 }, { "epoch": 0.76, "learning_rate": 0.00023823529411764704, "loss": 1.7712, "step": 129500 }, { "epoch": 0.76, "learning_rate": 0.00023529411764705883, "loss": 1.7684, "step": 130000 }, { "epoch": 0.77, "learning_rate": 0.0002323529411764706, "loss": 1.7317, "step": 130500 }, { "epoch": 0.77, "learning_rate": 0.00022941176470588236, "loss": 1.7679, "step": 131000 }, { "epoch": 0.77, "learning_rate": 0.00022647058823529412, "loss": 1.7735, "step": 131500 }, { "epoch": 0.78, "learning_rate": 0.0002235294117647059, "loss": 1.7688, "step": 132000 }, { "epoch": 0.78, "learning_rate": 0.00022058823529411765, "loss": 1.7494, "step": 132500 }, { "epoch": 0.78, "learning_rate": 0.00021764705882352942, "loss": 1.7739, "step": 133000 }, { "epoch": 0.79, "learning_rate": 0.00021470588235294116, "loss": 1.7537, "step": 133500 }, { "epoch": 0.79, "learning_rate": 0.00021176470588235295, "loss": 1.7243, "step": 134000 }, { "epoch": 0.79, "learning_rate": 0.00020882352941176471, "loss": 1.7828, "step": 134500 }, { "epoch": 0.79, "learning_rate": 0.00020588235294117645, "loss": 1.783, "step": 135000 }, { "epoch": 0.8, "learning_rate": 0.00020294117647058824, "loss": 1.714, "step": 135500 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.7523, "step": 136000 }, { "epoch": 0.8, "learning_rate": 0.00019705882352941177, "loss": 1.7409, "step": 136500 }, { "epoch": 0.81, "learning_rate": 0.00019411764705882354, "loss": 1.7641, "step": 137000 }, { "epoch": 0.81, "learning_rate": 0.00019117647058823528, "loss": 1.7501, "step": 137500 }, { "epoch": 0.81, "learning_rate": 0.00018823529411764707, "loss": 1.7367, "step": 138000 }, { "epoch": 0.81, "learning_rate": 0.00018529411764705883, "loss": 1.7362, "step": 138500 }, { "epoch": 0.82, "learning_rate": 0.00018235294117647057, "loss": 1.7564, "step": 139000 }, { "epoch": 0.82, "learning_rate": 0.00017941176470588236, "loss": 1.7527, "step": 139500 }, { "epoch": 0.82, "learning_rate": 0.00017647058823529413, "loss": 1.7404, "step": 140000 }, { "epoch": 0.83, "learning_rate": 0.00017352941176470587, "loss": 1.7694, "step": 140500 }, { "epoch": 0.83, "learning_rate": 0.00017058823529411766, "loss": 1.7443, "step": 141000 }, { "epoch": 0.83, "learning_rate": 0.0001676470588235294, "loss": 1.7413, "step": 141500 }, { "epoch": 0.84, "learning_rate": 0.0001647058823529412, "loss": 1.7572, "step": 142000 }, { "epoch": 0.84, "learning_rate": 0.00016176470588235295, "loss": 1.7442, "step": 142500 }, { "epoch": 0.84, "learning_rate": 0.0001588235294117647, "loss": 1.7608, "step": 143000 }, { "epoch": 0.84, "learning_rate": 0.00015588235294117648, "loss": 1.7467, "step": 143500 }, { "epoch": 0.85, "learning_rate": 0.00015294117647058825, "loss": 1.7496, "step": 144000 }, { "epoch": 0.85, "learning_rate": 0.00015, "loss": 1.7396, "step": 144500 }, { "epoch": 0.85, "learning_rate": 0.00014705882352941178, "loss": 1.7341, "step": 145000 }, { "epoch": 0.86, "learning_rate": 0.00014411764705882352, "loss": 1.7358, "step": 145500 }, { "epoch": 0.86, "learning_rate": 0.00014117647058823528, "loss": 1.7721, "step": 146000 }, { "epoch": 0.86, "learning_rate": 0.00013823529411764707, "loss": 1.7224, "step": 146500 }, { "epoch": 0.86, "learning_rate": 0.0001352941176470588, "loss": 1.7137, "step": 147000 }, { "epoch": 0.87, "learning_rate": 0.0001323529411764706, "loss": 1.7555, "step": 147500 }, { "epoch": 0.87, "learning_rate": 0.00012941176470588237, "loss": 1.7432, "step": 148000 }, { "epoch": 0.87, "learning_rate": 0.0001264705882352941, "loss": 1.7319, "step": 148500 }, { "epoch": 0.88, "learning_rate": 0.0001235294117647059, "loss": 1.725, "step": 149000 }, { "epoch": 0.88, "learning_rate": 0.00012058823529411765, "loss": 1.7655, "step": 149500 }, { "epoch": 0.88, "learning_rate": 0.00011764705882352942, "loss": 1.7358, "step": 150000 }, { "epoch": 0.89, "learning_rate": 0.00011470588235294118, "loss": 1.7214, "step": 150500 }, { "epoch": 0.89, "learning_rate": 0.00011176470588235294, "loss": 1.7413, "step": 151000 }, { "epoch": 0.89, "learning_rate": 0.00010882352941176471, "loss": 1.7069, "step": 151500 }, { "epoch": 0.89, "learning_rate": 0.00010588235294117647, "loss": 1.7282, "step": 152000 }, { "epoch": 0.9, "learning_rate": 0.00010294117647058823, "loss": 1.7121, "step": 152500 }, { "epoch": 0.9, "learning_rate": 0.0001, "loss": 1.7227, "step": 153000 }, { "epoch": 0.9, "learning_rate": 9.705882352941177e-05, "loss": 1.7391, "step": 153500 }, { "epoch": 0.91, "learning_rate": 9.411764705882353e-05, "loss": 1.7422, "step": 154000 }, { "epoch": 0.91, "learning_rate": 9.117647058823529e-05, "loss": 1.7154, "step": 154500 }, { "epoch": 0.91, "learning_rate": 8.823529411764706e-05, "loss": 1.7419, "step": 155000 }, { "epoch": 0.91, "learning_rate": 8.529411764705883e-05, "loss": 1.7608, "step": 155500 }, { "epoch": 0.92, "learning_rate": 8.23529411764706e-05, "loss": 1.7154, "step": 156000 }, { "epoch": 0.92, "learning_rate": 7.941176470588235e-05, "loss": 1.7082, "step": 156500 }, { "epoch": 0.92, "learning_rate": 7.647058823529412e-05, "loss": 1.7347, "step": 157000 }, { "epoch": 0.93, "learning_rate": 7.352941176470589e-05, "loss": 1.7054, "step": 157500 }, { "epoch": 0.93, "learning_rate": 7.058823529411764e-05, "loss": 1.7326, "step": 158000 }, { "epoch": 0.93, "learning_rate": 6.76470588235294e-05, "loss": 1.7224, "step": 158500 }, { "epoch": 0.94, "learning_rate": 6.470588235294118e-05, "loss": 1.7362, "step": 159000 }, { "epoch": 0.94, "learning_rate": 6.176470588235295e-05, "loss": 1.7159, "step": 159500 }, { "epoch": 0.94, "learning_rate": 5.882352941176471e-05, "loss": 1.73, "step": 160000 }, { "epoch": 0.94, "learning_rate": 5.588235294117647e-05, "loss": 1.7523, "step": 160500 }, { "epoch": 0.95, "learning_rate": 5.294117647058824e-05, "loss": 1.7315, "step": 161000 }, { "epoch": 0.95, "learning_rate": 5e-05, "loss": 1.709, "step": 161500 }, { "epoch": 0.95, "learning_rate": 4.705882352941177e-05, "loss": 1.7341, "step": 162000 }, { "epoch": 0.96, "learning_rate": 4.411764705882353e-05, "loss": 1.7186, "step": 162500 }, { "epoch": 0.96, "learning_rate": 4.11764705882353e-05, "loss": 1.719, "step": 163000 }, { "epoch": 0.96, "learning_rate": 3.823529411764706e-05, "loss": 1.7115, "step": 163500 }, { "epoch": 0.96, "learning_rate": 3.529411764705882e-05, "loss": 1.7036, "step": 164000 }, { "epoch": 0.97, "learning_rate": 3.235294117647059e-05, "loss": 1.7147, "step": 164500 }, { "epoch": 0.97, "learning_rate": 2.9411764705882354e-05, "loss": 1.7347, "step": 165000 }, { "epoch": 0.97, "learning_rate": 2.647058823529412e-05, "loss": 1.7334, "step": 165500 }, { "epoch": 0.98, "learning_rate": 2.3529411764705884e-05, "loss": 1.7337, "step": 166000 }, { "epoch": 0.98, "learning_rate": 2.058823529411765e-05, "loss": 1.7266, "step": 166500 }, { "epoch": 0.98, "learning_rate": 1.764705882352941e-05, "loss": 1.6958, "step": 167000 }, { "epoch": 0.99, "learning_rate": 1.4705882352941177e-05, "loss": 1.7052, "step": 167500 }, { "epoch": 0.99, "learning_rate": 1.1764705882352942e-05, "loss": 1.6985, "step": 168000 }, { "epoch": 0.99, "learning_rate": 8.823529411764705e-06, "loss": 1.6896, "step": 168500 }, { "epoch": 0.99, "learning_rate": 5.882352941176471e-06, "loss": 1.7455, "step": 169000 }, { "epoch": 1.0, "learning_rate": 2.9411764705882355e-06, "loss": 1.7554, "step": 169500 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.6962, "step": 170000 }, { "epoch": 1.0, "step": 170000, "total_flos": 3421528306437104640, "train_runtime": 228303.6105, "train_samples_per_second": 0.745 } ], "max_steps": 170000, "num_train_epochs": 1, "total_flos": 3421528306437104640, "trial_name": null, "trial_params": null }