{ "best_metric": 0.2249804437160492, "best_model_checkpoint": "/opt/ml/model/checkpoint-3060", "epoch": 9.0, "global_step": 3060, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.4705882352941178e-07, "loss": 4.4596, "step": 10 }, { "epoch": 0.06, "learning_rate": 2.9411764705882356e-07, "loss": 4.2962, "step": 20 }, { "epoch": 0.09, "learning_rate": 4.4117647058823536e-07, "loss": 4.4439, "step": 30 }, { "epoch": 0.12, "learning_rate": 5.882352941176471e-07, "loss": 4.3771, "step": 40 }, { "epoch": 0.15, "learning_rate": 7.352941176470589e-07, "loss": 4.3934, "step": 50 }, { "epoch": 0.18, "learning_rate": 8.823529411764707e-07, "loss": 4.3209, "step": 60 }, { "epoch": 0.21, "learning_rate": 1.0294117647058825e-06, "loss": 4.2707, "step": 70 }, { "epoch": 0.24, "learning_rate": 1.1764705882352942e-06, "loss": 4.2453, "step": 80 }, { "epoch": 0.26, "learning_rate": 1.323529411764706e-06, "loss": 4.0978, "step": 90 }, { "epoch": 0.29, "learning_rate": 1.4705882352941177e-06, "loss": 4.0954, "step": 100 }, { "epoch": 0.32, "learning_rate": 1.6176470588235297e-06, "loss": 3.8289, "step": 110 }, { "epoch": 0.35, "learning_rate": 1.7647058823529414e-06, "loss": 4.1465, "step": 120 }, { "epoch": 0.38, "learning_rate": 1.9117647058823528e-06, "loss": 4.1067, "step": 130 }, { "epoch": 0.41, "learning_rate": 2.058823529411765e-06, "loss": 4.0671, "step": 140 }, { "epoch": 0.44, "learning_rate": 2.2058823529411767e-06, "loss": 3.7774, "step": 150 }, { "epoch": 0.47, "learning_rate": 2.3529411764705885e-06, "loss": 3.8341, "step": 160 }, { "epoch": 0.5, "learning_rate": 2.5e-06, "loss": 3.5665, "step": 170 }, { "epoch": 0.53, "learning_rate": 2.647058823529412e-06, "loss": 3.5655, "step": 180 }, { "epoch": 0.56, "learning_rate": 2.7941176470588237e-06, "loss": 3.505, "step": 190 }, { "epoch": 0.59, "learning_rate": 2.9411764705882355e-06, "loss": 3.6714, "step": 200 }, { "epoch": 0.62, "learning_rate": 3.0882352941176476e-06, "loss": 3.4006, "step": 210 }, { "epoch": 0.65, "learning_rate": 3.2352941176470594e-06, "loss": 3.1562, "step": 220 }, { "epoch": 0.68, "learning_rate": 3.382352941176471e-06, "loss": 3.322, "step": 230 }, { "epoch": 0.71, "learning_rate": 3.529411764705883e-06, "loss": 3.1499, "step": 240 }, { "epoch": 0.74, "learning_rate": 3.6764705882352946e-06, "loss": 3.0769, "step": 250 }, { "epoch": 0.76, "learning_rate": 3.8235294117647055e-06, "loss": 2.9092, "step": 260 }, { "epoch": 0.79, "learning_rate": 3.970588235294118e-06, "loss": 2.9317, "step": 270 }, { "epoch": 0.82, "learning_rate": 4.11764705882353e-06, "loss": 2.8851, "step": 280 }, { "epoch": 0.85, "learning_rate": 4.264705882352942e-06, "loss": 2.9637, "step": 290 }, { "epoch": 0.88, "learning_rate": 4.411764705882353e-06, "loss": 2.5192, "step": 300 }, { "epoch": 0.91, "learning_rate": 4.558823529411765e-06, "loss": 2.6035, "step": 310 }, { "epoch": 0.94, "learning_rate": 4.705882352941177e-06, "loss": 2.4931, "step": 320 }, { "epoch": 0.97, "learning_rate": 4.852941176470589e-06, "loss": 2.6514, "step": 330 }, { "epoch": 1.0, "learning_rate": 5e-06, "loss": 2.4065, "step": 340 }, { "epoch": 1.0, "eval_accuracy": 0.5992141453831041, "eval_f1": 0.5463480113288633, "eval_loss": 2.2797043323516846, "eval_precision": 0.6294796834703609, "eval_recall": 0.580237956829516, "eval_runtime": 161.1898, "eval_samples_per_second": 6.316, "eval_steps_per_second": 0.099, "step": 340 }, { "epoch": 1.03, "learning_rate": 4.983660130718955e-06, "loss": 2.1525, "step": 350 }, { "epoch": 1.06, "learning_rate": 4.967320261437909e-06, "loss": 1.8859, "step": 360 }, { "epoch": 1.09, "learning_rate": 4.9509803921568634e-06, "loss": 1.9121, "step": 370 }, { "epoch": 1.12, "learning_rate": 4.934640522875817e-06, "loss": 1.6673, "step": 380 }, { "epoch": 1.15, "learning_rate": 4.918300653594771e-06, "loss": 1.996, "step": 390 }, { "epoch": 1.18, "learning_rate": 4.901960784313726e-06, "loss": 1.8359, "step": 400 }, { "epoch": 1.21, "learning_rate": 4.88562091503268e-06, "loss": 1.5955, "step": 410 }, { "epoch": 1.24, "learning_rate": 4.869281045751634e-06, "loss": 1.6088, "step": 420 }, { "epoch": 1.26, "learning_rate": 4.852941176470589e-06, "loss": 1.7104, "step": 430 }, { "epoch": 1.29, "learning_rate": 4.836601307189543e-06, "loss": 1.7513, "step": 440 }, { "epoch": 1.32, "learning_rate": 4.820261437908497e-06, "loss": 1.5832, "step": 450 }, { "epoch": 1.35, "learning_rate": 4.803921568627452e-06, "loss": 1.4003, "step": 460 }, { "epoch": 1.38, "learning_rate": 4.7875816993464055e-06, "loss": 1.2787, "step": 470 }, { "epoch": 1.41, "learning_rate": 4.77124183006536e-06, "loss": 1.4729, "step": 480 }, { "epoch": 1.44, "learning_rate": 4.754901960784314e-06, "loss": 1.4468, "step": 490 }, { "epoch": 1.47, "learning_rate": 4.7385620915032685e-06, "loss": 1.3275, "step": 500 }, { "epoch": 1.5, "learning_rate": 4.722222222222222e-06, "loss": 1.4053, "step": 510 }, { "epoch": 1.53, "learning_rate": 4.705882352941177e-06, "loss": 1.1261, "step": 520 }, { "epoch": 1.56, "learning_rate": 4.689542483660131e-06, "loss": 1.346, "step": 530 }, { "epoch": 1.59, "learning_rate": 4.673202614379085e-06, "loss": 1.2904, "step": 540 }, { "epoch": 1.62, "learning_rate": 4.65686274509804e-06, "loss": 1.1763, "step": 550 }, { "epoch": 1.65, "learning_rate": 4.640522875816994e-06, "loss": 1.232, "step": 560 }, { "epoch": 1.68, "learning_rate": 4.624183006535948e-06, "loss": 1.0554, "step": 570 }, { "epoch": 1.71, "learning_rate": 4.607843137254902e-06, "loss": 1.0278, "step": 580 }, { "epoch": 1.74, "learning_rate": 4.591503267973857e-06, "loss": 1.0268, "step": 590 }, { "epoch": 1.76, "learning_rate": 4.5751633986928105e-06, "loss": 0.9385, "step": 600 }, { "epoch": 1.79, "learning_rate": 4.558823529411765e-06, "loss": 1.1249, "step": 610 }, { "epoch": 1.82, "learning_rate": 4.542483660130719e-06, "loss": 1.0529, "step": 620 }, { "epoch": 1.85, "learning_rate": 4.5261437908496736e-06, "loss": 1.0088, "step": 630 }, { "epoch": 1.88, "learning_rate": 4.509803921568628e-06, "loss": 0.981, "step": 640 }, { "epoch": 1.91, "learning_rate": 4.493464052287582e-06, "loss": 0.8764, "step": 650 }, { "epoch": 1.94, "learning_rate": 4.477124183006537e-06, "loss": 0.7669, "step": 660 }, { "epoch": 1.97, "learning_rate": 4.460784313725491e-06, "loss": 0.8074, "step": 670 }, { "epoch": 2.0, "learning_rate": 4.444444444444444e-06, "loss": 0.9048, "step": 680 }, { "epoch": 2.0, "eval_accuracy": 0.8359528487229863, "eval_f1": 0.8330712103623911, "eval_loss": 0.940582811832428, "eval_precision": 0.8510173659101683, "eval_recall": 0.8397313099233475, "eval_runtime": 158.421, "eval_samples_per_second": 6.426, "eval_steps_per_second": 0.101, "step": 680 }, { "epoch": 2.03, "learning_rate": 4.428104575163399e-06, "loss": 0.7005, "step": 690 }, { "epoch": 2.06, "learning_rate": 4.411764705882353e-06, "loss": 0.6216, "step": 700 }, { "epoch": 2.09, "learning_rate": 4.395424836601307e-06, "loss": 0.6758, "step": 710 }, { "epoch": 2.12, "learning_rate": 4.379084967320262e-06, "loss": 0.6859, "step": 720 }, { "epoch": 2.15, "learning_rate": 4.3627450980392164e-06, "loss": 0.454, "step": 730 }, { "epoch": 2.18, "learning_rate": 4.34640522875817e-06, "loss": 0.5674, "step": 740 }, { "epoch": 2.21, "learning_rate": 4.330065359477125e-06, "loss": 0.6264, "step": 750 }, { "epoch": 2.24, "learning_rate": 4.313725490196079e-06, "loss": 0.5886, "step": 760 }, { "epoch": 2.26, "learning_rate": 4.297385620915033e-06, "loss": 0.5141, "step": 770 }, { "epoch": 2.29, "learning_rate": 4.281045751633987e-06, "loss": 0.4116, "step": 780 }, { "epoch": 2.32, "learning_rate": 4.264705882352942e-06, "loss": 0.5151, "step": 790 }, { "epoch": 2.35, "learning_rate": 4.2483660130718954e-06, "loss": 0.6208, "step": 800 }, { "epoch": 2.38, "learning_rate": 4.23202614379085e-06, "loss": 0.4929, "step": 810 }, { "epoch": 2.41, "learning_rate": 4.215686274509805e-06, "loss": 0.6218, "step": 820 }, { "epoch": 2.44, "learning_rate": 4.1993464052287585e-06, "loss": 0.5226, "step": 830 }, { "epoch": 2.47, "learning_rate": 4.183006535947713e-06, "loss": 0.4797, "step": 840 }, { "epoch": 2.5, "learning_rate": 4.166666666666667e-06, "loss": 0.4065, "step": 850 }, { "epoch": 2.53, "learning_rate": 4.1503267973856215e-06, "loss": 0.5133, "step": 860 }, { "epoch": 2.56, "learning_rate": 4.133986928104575e-06, "loss": 0.4397, "step": 870 }, { "epoch": 2.59, "learning_rate": 4.11764705882353e-06, "loss": 0.4372, "step": 880 }, { "epoch": 2.62, "learning_rate": 4.101307189542484e-06, "loss": 0.5625, "step": 890 }, { "epoch": 2.65, "learning_rate": 4.084967320261438e-06, "loss": 0.6021, "step": 900 }, { "epoch": 2.68, "learning_rate": 4.068627450980392e-06, "loss": 0.5916, "step": 910 }, { "epoch": 2.71, "learning_rate": 4.052287581699347e-06, "loss": 0.3234, "step": 920 }, { "epoch": 2.74, "learning_rate": 4.035947712418301e-06, "loss": 0.5071, "step": 930 }, { "epoch": 2.76, "learning_rate": 4.019607843137255e-06, "loss": 0.6007, "step": 940 }, { "epoch": 2.79, "learning_rate": 4.00326797385621e-06, "loss": 0.5284, "step": 950 }, { "epoch": 2.82, "learning_rate": 3.986928104575164e-06, "loss": 0.4562, "step": 960 }, { "epoch": 2.85, "learning_rate": 3.970588235294118e-06, "loss": 0.2356, "step": 970 }, { "epoch": 2.88, "learning_rate": 3.954248366013072e-06, "loss": 0.3074, "step": 980 }, { "epoch": 2.91, "learning_rate": 3.9379084967320266e-06, "loss": 0.5702, "step": 990 }, { "epoch": 2.94, "learning_rate": 3.92156862745098e-06, "loss": 0.5959, "step": 1000 }, { "epoch": 2.97, "learning_rate": 3.905228758169935e-06, "loss": 0.5857, "step": 1010 }, { "epoch": 3.0, "learning_rate": 3.88888888888889e-06, "loss": 0.4807, "step": 1020 }, { "epoch": 3.0, "eval_accuracy": 0.8870333988212181, "eval_f1": 0.8862169026751461, "eval_loss": 0.5658098459243774, "eval_precision": 0.8973600463348972, "eval_recall": 0.8907758376754111, "eval_runtime": 161.7981, "eval_samples_per_second": 6.292, "eval_steps_per_second": 0.099, "step": 1020 }, { "epoch": 3.03, "learning_rate": 3.872549019607843e-06, "loss": 0.1907, "step": 1030 }, { "epoch": 3.06, "learning_rate": 3.856209150326798e-06, "loss": 0.2597, "step": 1040 }, { "epoch": 3.09, "learning_rate": 3.839869281045752e-06, "loss": 0.3115, "step": 1050 }, { "epoch": 3.12, "learning_rate": 3.8235294117647055e-06, "loss": 0.2734, "step": 1060 }, { "epoch": 3.15, "learning_rate": 3.80718954248366e-06, "loss": 0.4247, "step": 1070 }, { "epoch": 3.18, "learning_rate": 3.7908496732026144e-06, "loss": 0.1969, "step": 1080 }, { "epoch": 3.21, "learning_rate": 3.774509803921569e-06, "loss": 0.3282, "step": 1090 }, { "epoch": 3.24, "learning_rate": 3.758169934640523e-06, "loss": 0.2913, "step": 1100 }, { "epoch": 3.26, "learning_rate": 3.7418300653594774e-06, "loss": 0.1869, "step": 1110 }, { "epoch": 3.29, "learning_rate": 3.7254901960784316e-06, "loss": 0.3293, "step": 1120 }, { "epoch": 3.32, "learning_rate": 3.7091503267973862e-06, "loss": 0.2212, "step": 1130 }, { "epoch": 3.35, "learning_rate": 3.6928104575163404e-06, "loss": 0.1879, "step": 1140 }, { "epoch": 3.38, "learning_rate": 3.6764705882352946e-06, "loss": 0.2919, "step": 1150 }, { "epoch": 3.41, "learning_rate": 3.6601307189542484e-06, "loss": 0.2825, "step": 1160 }, { "epoch": 3.44, "learning_rate": 3.6437908496732026e-06, "loss": 0.2485, "step": 1170 }, { "epoch": 3.47, "learning_rate": 3.6274509803921573e-06, "loss": 0.1167, "step": 1180 }, { "epoch": 3.5, "learning_rate": 3.6111111111111115e-06, "loss": 0.2036, "step": 1190 }, { "epoch": 3.53, "learning_rate": 3.5947712418300657e-06, "loss": 0.2564, "step": 1200 }, { "epoch": 3.56, "learning_rate": 3.57843137254902e-06, "loss": 0.2122, "step": 1210 }, { "epoch": 3.59, "learning_rate": 3.562091503267974e-06, "loss": 0.2749, "step": 1220 }, { "epoch": 3.62, "learning_rate": 3.5457516339869287e-06, "loss": 0.4118, "step": 1230 }, { "epoch": 3.65, "learning_rate": 3.529411764705883e-06, "loss": 0.2769, "step": 1240 }, { "epoch": 3.68, "learning_rate": 3.5130718954248367e-06, "loss": 0.2003, "step": 1250 }, { "epoch": 3.71, "learning_rate": 3.496732026143791e-06, "loss": 0.279, "step": 1260 }, { "epoch": 3.74, "learning_rate": 3.480392156862745e-06, "loss": 0.1174, "step": 1270 }, { "epoch": 3.76, "learning_rate": 3.4640522875816997e-06, "loss": 0.2338, "step": 1280 }, { "epoch": 3.79, "learning_rate": 3.447712418300654e-06, "loss": 0.2891, "step": 1290 }, { "epoch": 3.82, "learning_rate": 3.431372549019608e-06, "loss": 0.2443, "step": 1300 }, { "epoch": 3.85, "learning_rate": 3.4150326797385623e-06, "loss": 0.2581, "step": 1310 }, { "epoch": 3.88, "learning_rate": 3.398692810457517e-06, "loss": 0.2144, "step": 1320 }, { "epoch": 3.91, "learning_rate": 3.382352941176471e-06, "loss": 0.1662, "step": 1330 }, { "epoch": 3.94, "learning_rate": 3.3660130718954253e-06, "loss": 0.2396, "step": 1340 }, { "epoch": 3.97, "learning_rate": 3.349673202614379e-06, "loss": 0.3285, "step": 1350 }, { "epoch": 4.0, "learning_rate": 3.3333333333333333e-06, "loss": 0.1307, "step": 1360 }, { "epoch": 4.0, "eval_accuracy": 0.9115913555992141, "eval_f1": 0.9103864126710947, "eval_loss": 0.41891810297966003, "eval_precision": 0.9193377642626458, "eval_recall": 0.9140736604791629, "eval_runtime": 158.7694, "eval_samples_per_second": 6.412, "eval_steps_per_second": 0.101, "step": 1360 }, { "epoch": 4.03, "learning_rate": 3.3169934640522875e-06, "loss": 0.1055, "step": 1370 }, { "epoch": 4.06, "learning_rate": 3.300653594771242e-06, "loss": 0.1316, "step": 1380 }, { "epoch": 4.09, "learning_rate": 3.2843137254901964e-06, "loss": 0.1568, "step": 1390 }, { "epoch": 4.12, "learning_rate": 3.2679738562091506e-06, "loss": 0.1172, "step": 1400 }, { "epoch": 4.15, "learning_rate": 3.2516339869281048e-06, "loss": 0.1561, "step": 1410 }, { "epoch": 4.18, "learning_rate": 3.2352941176470594e-06, "loss": 0.1353, "step": 1420 }, { "epoch": 4.21, "learning_rate": 3.2189542483660136e-06, "loss": 0.1428, "step": 1430 }, { "epoch": 4.24, "learning_rate": 3.2026143790849674e-06, "loss": 0.1704, "step": 1440 }, { "epoch": 4.26, "learning_rate": 3.1862745098039216e-06, "loss": 0.1453, "step": 1450 }, { "epoch": 4.29, "learning_rate": 3.1699346405228758e-06, "loss": 0.135, "step": 1460 }, { "epoch": 4.32, "learning_rate": 3.1535947712418304e-06, "loss": 0.1236, "step": 1470 }, { "epoch": 4.35, "learning_rate": 3.1372549019607846e-06, "loss": 0.2047, "step": 1480 }, { "epoch": 4.38, "learning_rate": 3.120915032679739e-06, "loss": 0.1028, "step": 1490 }, { "epoch": 4.41, "learning_rate": 3.104575163398693e-06, "loss": 0.1737, "step": 1500 }, { "epoch": 4.44, "learning_rate": 3.0882352941176476e-06, "loss": 0.1892, "step": 1510 }, { "epoch": 4.47, "learning_rate": 3.071895424836602e-06, "loss": 0.1097, "step": 1520 }, { "epoch": 4.5, "learning_rate": 3.055555555555556e-06, "loss": 0.1168, "step": 1530 }, { "epoch": 4.53, "learning_rate": 3.03921568627451e-06, "loss": 0.1019, "step": 1540 }, { "epoch": 4.56, "learning_rate": 3.022875816993464e-06, "loss": 0.0835, "step": 1550 }, { "epoch": 4.59, "learning_rate": 3.0065359477124182e-06, "loss": 0.1283, "step": 1560 }, { "epoch": 4.62, "learning_rate": 2.990196078431373e-06, "loss": 0.1213, "step": 1570 }, { "epoch": 4.65, "learning_rate": 2.973856209150327e-06, "loss": 0.1162, "step": 1580 }, { "epoch": 4.68, "learning_rate": 2.9575163398692813e-06, "loss": 0.0795, "step": 1590 }, { "epoch": 4.71, "learning_rate": 2.9411764705882355e-06, "loss": 0.1179, "step": 1600 }, { "epoch": 4.74, "learning_rate": 2.92483660130719e-06, "loss": 0.0731, "step": 1610 }, { "epoch": 4.76, "learning_rate": 2.9084967320261443e-06, "loss": 0.2145, "step": 1620 }, { "epoch": 4.79, "learning_rate": 2.8921568627450985e-06, "loss": 0.1445, "step": 1630 }, { "epoch": 4.82, "learning_rate": 2.8758169934640523e-06, "loss": 0.0867, "step": 1640 }, { "epoch": 4.85, "learning_rate": 2.8594771241830065e-06, "loss": 0.0668, "step": 1650 }, { "epoch": 4.88, "learning_rate": 2.843137254901961e-06, "loss": 0.1466, "step": 1660 }, { "epoch": 4.91, "learning_rate": 2.8267973856209153e-06, "loss": 0.1207, "step": 1670 }, { "epoch": 4.94, "learning_rate": 2.8104575163398695e-06, "loss": 0.0815, "step": 1680 }, { "epoch": 4.97, "learning_rate": 2.7941176470588237e-06, "loss": 0.0874, "step": 1690 }, { "epoch": 5.0, "learning_rate": 2.7777777777777783e-06, "loss": 0.1169, "step": 1700 }, { "epoch": 5.0, "eval_accuracy": 0.925343811394892, "eval_f1": 0.9263233067835679, "eval_loss": 0.3335131108760834, "eval_precision": 0.9302145251196247, "eval_recall": 0.9311557041318327, "eval_runtime": 160.0601, "eval_samples_per_second": 6.36, "eval_steps_per_second": 0.1, "step": 1700 }, { "epoch": 5.03, "learning_rate": 2.7614379084967325e-06, "loss": 0.0505, "step": 1710 }, { "epoch": 5.06, "learning_rate": 2.7450980392156867e-06, "loss": 0.0733, "step": 1720 }, { "epoch": 5.09, "learning_rate": 2.7287581699346405e-06, "loss": 0.0911, "step": 1730 }, { "epoch": 5.12, "learning_rate": 2.7124183006535947e-06, "loss": 0.0365, "step": 1740 }, { "epoch": 5.15, "learning_rate": 2.696078431372549e-06, "loss": 0.0712, "step": 1750 }, { "epoch": 5.18, "learning_rate": 2.6797385620915036e-06, "loss": 0.0747, "step": 1760 }, { "epoch": 5.21, "learning_rate": 2.6633986928104578e-06, "loss": 0.0877, "step": 1770 }, { "epoch": 5.24, "learning_rate": 2.647058823529412e-06, "loss": 0.0526, "step": 1780 }, { "epoch": 5.26, "learning_rate": 2.630718954248366e-06, "loss": 0.0697, "step": 1790 }, { "epoch": 5.29, "learning_rate": 2.6143790849673208e-06, "loss": 0.0822, "step": 1800 }, { "epoch": 5.32, "learning_rate": 2.598039215686275e-06, "loss": 0.1042, "step": 1810 }, { "epoch": 5.35, "learning_rate": 2.581699346405229e-06, "loss": 0.0703, "step": 1820 }, { "epoch": 5.38, "learning_rate": 2.565359477124183e-06, "loss": 0.046, "step": 1830 }, { "epoch": 5.41, "learning_rate": 2.549019607843137e-06, "loss": 0.0904, "step": 1840 }, { "epoch": 5.44, "learning_rate": 2.532679738562092e-06, "loss": 0.0404, "step": 1850 }, { "epoch": 5.47, "learning_rate": 2.516339869281046e-06, "loss": 0.042, "step": 1860 }, { "epoch": 5.5, "learning_rate": 2.5e-06, "loss": 0.0745, "step": 1870 }, { "epoch": 5.53, "learning_rate": 2.4836601307189544e-06, "loss": 0.0605, "step": 1880 }, { "epoch": 5.56, "learning_rate": 2.4673202614379086e-06, "loss": 0.0431, "step": 1890 }, { "epoch": 5.59, "learning_rate": 2.450980392156863e-06, "loss": 0.0383, "step": 1900 }, { "epoch": 5.62, "learning_rate": 2.434640522875817e-06, "loss": 0.0365, "step": 1910 }, { "epoch": 5.65, "learning_rate": 2.4183006535947716e-06, "loss": 0.078, "step": 1920 }, { "epoch": 5.68, "learning_rate": 2.401960784313726e-06, "loss": 0.0478, "step": 1930 }, { "epoch": 5.71, "learning_rate": 2.38562091503268e-06, "loss": 0.1199, "step": 1940 }, { "epoch": 5.74, "learning_rate": 2.3692810457516342e-06, "loss": 0.0487, "step": 1950 }, { "epoch": 5.76, "learning_rate": 2.3529411764705885e-06, "loss": 0.0704, "step": 1960 }, { "epoch": 5.79, "learning_rate": 2.3366013071895427e-06, "loss": 0.0686, "step": 1970 }, { "epoch": 5.82, "learning_rate": 2.320261437908497e-06, "loss": 0.0235, "step": 1980 }, { "epoch": 5.85, "learning_rate": 2.303921568627451e-06, "loss": 0.0729, "step": 1990 }, { "epoch": 5.88, "learning_rate": 2.2875816993464053e-06, "loss": 0.0844, "step": 2000 }, { "epoch": 5.91, "learning_rate": 2.2712418300653595e-06, "loss": 0.0491, "step": 2010 }, { "epoch": 5.94, "learning_rate": 2.254901960784314e-06, "loss": 0.111, "step": 2020 }, { "epoch": 5.97, "learning_rate": 2.2385620915032683e-06, "loss": 0.0728, "step": 2030 }, { "epoch": 6.0, "learning_rate": 2.222222222222222e-06, "loss": 0.0621, "step": 2040 }, { "epoch": 6.0, "eval_accuracy": 0.93713163064833, "eval_f1": 0.9356746512723884, "eval_loss": 0.2819439172744751, "eval_precision": 0.9410873053457958, "eval_recall": 0.9364388610680477, "eval_runtime": 157.7429, "eval_samples_per_second": 6.454, "eval_steps_per_second": 0.101, "step": 2040 }, { "epoch": 6.03, "learning_rate": 2.2058823529411767e-06, "loss": 0.0192, "step": 2050 }, { "epoch": 6.06, "learning_rate": 2.189542483660131e-06, "loss": 0.0247, "step": 2060 }, { "epoch": 6.09, "learning_rate": 2.173202614379085e-06, "loss": 0.1012, "step": 2070 }, { "epoch": 6.12, "learning_rate": 2.1568627450980393e-06, "loss": 0.0288, "step": 2080 }, { "epoch": 6.15, "learning_rate": 2.1405228758169935e-06, "loss": 0.0221, "step": 2090 }, { "epoch": 6.18, "learning_rate": 2.1241830065359477e-06, "loss": 0.0328, "step": 2100 }, { "epoch": 6.21, "learning_rate": 2.1078431372549023e-06, "loss": 0.0321, "step": 2110 }, { "epoch": 6.24, "learning_rate": 2.0915032679738565e-06, "loss": 0.0342, "step": 2120 }, { "epoch": 6.26, "learning_rate": 2.0751633986928107e-06, "loss": 0.0444, "step": 2130 }, { "epoch": 6.29, "learning_rate": 2.058823529411765e-06, "loss": 0.0257, "step": 2140 }, { "epoch": 6.32, "learning_rate": 2.042483660130719e-06, "loss": 0.0236, "step": 2150 }, { "epoch": 6.35, "learning_rate": 2.0261437908496734e-06, "loss": 0.0301, "step": 2160 }, { "epoch": 6.38, "learning_rate": 2.0098039215686276e-06, "loss": 0.0234, "step": 2170 }, { "epoch": 6.41, "learning_rate": 1.993464052287582e-06, "loss": 0.0431, "step": 2180 }, { "epoch": 6.44, "learning_rate": 1.977124183006536e-06, "loss": 0.0333, "step": 2190 }, { "epoch": 6.47, "learning_rate": 1.96078431372549e-06, "loss": 0.0195, "step": 2200 }, { "epoch": 6.5, "learning_rate": 1.944444444444445e-06, "loss": 0.0236, "step": 2210 }, { "epoch": 6.53, "learning_rate": 1.928104575163399e-06, "loss": 0.0633, "step": 2220 }, { "epoch": 6.56, "learning_rate": 1.9117647058823528e-06, "loss": 0.0239, "step": 2230 }, { "epoch": 6.59, "learning_rate": 1.8954248366013072e-06, "loss": 0.0257, "step": 2240 }, { "epoch": 6.62, "learning_rate": 1.8790849673202616e-06, "loss": 0.0247, "step": 2250 }, { "epoch": 6.65, "learning_rate": 1.8627450980392158e-06, "loss": 0.0224, "step": 2260 }, { "epoch": 6.68, "learning_rate": 1.8464052287581702e-06, "loss": 0.049, "step": 2270 }, { "epoch": 6.71, "learning_rate": 1.8300653594771242e-06, "loss": 0.06, "step": 2280 }, { "epoch": 6.74, "learning_rate": 1.8137254901960786e-06, "loss": 0.0273, "step": 2290 }, { "epoch": 6.76, "learning_rate": 1.7973856209150328e-06, "loss": 0.0399, "step": 2300 }, { "epoch": 6.79, "learning_rate": 1.781045751633987e-06, "loss": 0.0958, "step": 2310 }, { "epoch": 6.82, "learning_rate": 1.7647058823529414e-06, "loss": 0.034, "step": 2320 }, { "epoch": 6.85, "learning_rate": 1.7483660130718954e-06, "loss": 0.0299, "step": 2330 }, { "epoch": 6.88, "learning_rate": 1.7320261437908499e-06, "loss": 0.018, "step": 2340 }, { "epoch": 6.91, "learning_rate": 1.715686274509804e-06, "loss": 0.0149, "step": 2350 }, { "epoch": 6.94, "learning_rate": 1.6993464052287585e-06, "loss": 0.0648, "step": 2360 }, { "epoch": 6.97, "learning_rate": 1.6830065359477127e-06, "loss": 0.0553, "step": 2370 }, { "epoch": 7.0, "learning_rate": 1.6666666666666667e-06, "loss": 0.0287, "step": 2380 }, { "epoch": 7.0, "eval_accuracy": 0.9400785854616895, "eval_f1": 0.9397297660899341, "eval_loss": 0.25110480189323425, "eval_precision": 0.9427838093056656, "eval_recall": 0.9422498990022914, "eval_runtime": 163.249, "eval_samples_per_second": 6.236, "eval_steps_per_second": 0.098, "step": 2380 }, { "epoch": 7.03, "learning_rate": 1.650326797385621e-06, "loss": 0.0135, "step": 2390 }, { "epoch": 7.06, "learning_rate": 1.6339869281045753e-06, "loss": 0.0134, "step": 2400 }, { "epoch": 7.09, "learning_rate": 1.6176470588235297e-06, "loss": 0.0141, "step": 2410 }, { "epoch": 7.12, "learning_rate": 1.6013071895424837e-06, "loss": 0.0203, "step": 2420 }, { "epoch": 7.15, "learning_rate": 1.5849673202614379e-06, "loss": 0.0157, "step": 2430 }, { "epoch": 7.18, "learning_rate": 1.5686274509803923e-06, "loss": 0.0126, "step": 2440 }, { "epoch": 7.21, "learning_rate": 1.5522875816993465e-06, "loss": 0.0264, "step": 2450 }, { "epoch": 7.24, "learning_rate": 1.535947712418301e-06, "loss": 0.0347, "step": 2460 }, { "epoch": 7.26, "learning_rate": 1.519607843137255e-06, "loss": 0.0143, "step": 2470 }, { "epoch": 7.29, "learning_rate": 1.5032679738562091e-06, "loss": 0.0108, "step": 2480 }, { "epoch": 7.32, "learning_rate": 1.4869281045751635e-06, "loss": 0.0214, "step": 2490 }, { "epoch": 7.35, "learning_rate": 1.4705882352941177e-06, "loss": 0.0145, "step": 2500 }, { "epoch": 7.38, "learning_rate": 1.4542483660130721e-06, "loss": 0.0115, "step": 2510 }, { "epoch": 7.41, "learning_rate": 1.4379084967320261e-06, "loss": 0.0143, "step": 2520 }, { "epoch": 7.44, "learning_rate": 1.4215686274509805e-06, "loss": 0.0191, "step": 2530 }, { "epoch": 7.47, "learning_rate": 1.4052287581699348e-06, "loss": 0.0413, "step": 2540 }, { "epoch": 7.5, "learning_rate": 1.3888888888888892e-06, "loss": 0.0133, "step": 2550 }, { "epoch": 7.53, "learning_rate": 1.3725490196078434e-06, "loss": 0.0102, "step": 2560 }, { "epoch": 7.56, "learning_rate": 1.3562091503267974e-06, "loss": 0.0381, "step": 2570 }, { "epoch": 7.59, "learning_rate": 1.3398692810457518e-06, "loss": 0.0157, "step": 2580 }, { "epoch": 7.62, "learning_rate": 1.323529411764706e-06, "loss": 0.0163, "step": 2590 }, { "epoch": 7.65, "learning_rate": 1.3071895424836604e-06, "loss": 0.015, "step": 2600 }, { "epoch": 7.68, "learning_rate": 1.2908496732026146e-06, "loss": 0.0553, "step": 2610 }, { "epoch": 7.71, "learning_rate": 1.2745098039215686e-06, "loss": 0.018, "step": 2620 }, { "epoch": 7.74, "learning_rate": 1.258169934640523e-06, "loss": 0.0161, "step": 2630 }, { "epoch": 7.76, "learning_rate": 1.2418300653594772e-06, "loss": 0.0256, "step": 2640 }, { "epoch": 7.79, "learning_rate": 1.2254901960784314e-06, "loss": 0.0095, "step": 2650 }, { "epoch": 7.82, "learning_rate": 1.2091503267973858e-06, "loss": 0.0353, "step": 2660 }, { "epoch": 7.85, "learning_rate": 1.19281045751634e-06, "loss": 0.065, "step": 2670 }, { "epoch": 7.88, "learning_rate": 1.1764705882352942e-06, "loss": 0.0264, "step": 2680 }, { "epoch": 7.91, "learning_rate": 1.1601307189542484e-06, "loss": 0.0355, "step": 2690 }, { "epoch": 7.94, "learning_rate": 1.1437908496732026e-06, "loss": 0.0498, "step": 2700 }, { "epoch": 7.97, "learning_rate": 1.127450980392157e-06, "loss": 0.0101, "step": 2710 }, { "epoch": 8.0, "learning_rate": 1.111111111111111e-06, "loss": 0.0188, "step": 2720 }, { "epoch": 8.0, "eval_accuracy": 0.9400785854616895, "eval_f1": 0.9395279559278015, "eval_loss": 0.2341359555721283, "eval_precision": 0.9437460647329915, "eval_recall": 0.9404723331338164, "eval_runtime": 159.7168, "eval_samples_per_second": 6.374, "eval_steps_per_second": 0.1, "step": 2720 }, { "epoch": 8.03, "learning_rate": 1.0947712418300655e-06, "loss": 0.0077, "step": 2730 }, { "epoch": 8.06, "learning_rate": 1.0784313725490197e-06, "loss": 0.0117, "step": 2740 }, { "epoch": 8.09, "learning_rate": 1.0620915032679739e-06, "loss": 0.0102, "step": 2750 }, { "epoch": 8.12, "learning_rate": 1.0457516339869283e-06, "loss": 0.0256, "step": 2760 }, { "epoch": 8.15, "learning_rate": 1.0294117647058825e-06, "loss": 0.0111, "step": 2770 }, { "epoch": 8.18, "learning_rate": 1.0130718954248367e-06, "loss": 0.0144, "step": 2780 }, { "epoch": 8.21, "learning_rate": 9.96732026143791e-07, "loss": 0.0084, "step": 2790 }, { "epoch": 8.24, "learning_rate": 9.80392156862745e-07, "loss": 0.0061, "step": 2800 }, { "epoch": 8.26, "learning_rate": 9.640522875816995e-07, "loss": 0.054, "step": 2810 }, { "epoch": 8.29, "learning_rate": 9.477124183006536e-07, "loss": 0.0126, "step": 2820 }, { "epoch": 8.32, "learning_rate": 9.313725490196079e-07, "loss": 0.0068, "step": 2830 }, { "epoch": 8.35, "learning_rate": 9.150326797385621e-07, "loss": 0.0108, "step": 2840 }, { "epoch": 8.38, "learning_rate": 8.986928104575164e-07, "loss": 0.0074, "step": 2850 }, { "epoch": 8.41, "learning_rate": 8.823529411764707e-07, "loss": 0.0194, "step": 2860 }, { "epoch": 8.44, "learning_rate": 8.660130718954249e-07, "loss": 0.0112, "step": 2870 }, { "epoch": 8.47, "learning_rate": 8.496732026143792e-07, "loss": 0.0133, "step": 2880 }, { "epoch": 8.5, "learning_rate": 8.333333333333333e-07, "loss": 0.0138, "step": 2890 }, { "epoch": 8.53, "learning_rate": 8.169934640522876e-07, "loss": 0.0261, "step": 2900 }, { "epoch": 8.56, "learning_rate": 8.006535947712418e-07, "loss": 0.0151, "step": 2910 }, { "epoch": 8.59, "learning_rate": 7.843137254901962e-07, "loss": 0.0176, "step": 2920 }, { "epoch": 8.62, "learning_rate": 7.679738562091505e-07, "loss": 0.008, "step": 2930 }, { "epoch": 8.65, "learning_rate": 7.516339869281046e-07, "loss": 0.0091, "step": 2940 }, { "epoch": 8.68, "learning_rate": 7.352941176470589e-07, "loss": 0.0124, "step": 2950 }, { "epoch": 8.71, "learning_rate": 7.189542483660131e-07, "loss": 0.0313, "step": 2960 }, { "epoch": 8.74, "learning_rate": 7.026143790849674e-07, "loss": 0.0286, "step": 2970 }, { "epoch": 8.76, "learning_rate": 6.862745098039217e-07, "loss": 0.0098, "step": 2980 }, { "epoch": 8.79, "learning_rate": 6.699346405228759e-07, "loss": 0.0067, "step": 2990 }, { "epoch": 8.82, "learning_rate": 6.535947712418302e-07, "loss": 0.0081, "step": 3000 }, { "epoch": 8.85, "learning_rate": 6.372549019607843e-07, "loss": 0.0102, "step": 3010 }, { "epoch": 8.88, "learning_rate": 6.209150326797386e-07, "loss": 0.0256, "step": 3020 }, { "epoch": 8.91, "learning_rate": 6.045751633986929e-07, "loss": 0.0117, "step": 3030 }, { "epoch": 8.94, "learning_rate": 5.882352941176471e-07, "loss": 0.0079, "step": 3040 }, { "epoch": 8.97, "learning_rate": 5.718954248366013e-07, "loss": 0.017, "step": 3050 }, { "epoch": 9.0, "learning_rate": 5.555555555555555e-07, "loss": 0.0217, "step": 3060 }, { "epoch": 9.0, "eval_accuracy": 0.9420432220039293, "eval_f1": 0.942289956941725, "eval_loss": 0.2249804437160492, "eval_precision": 0.9452784201539948, "eval_recall": 0.9442615812771316, "eval_runtime": 162.0726, "eval_samples_per_second": 6.281, "eval_steps_per_second": 0.099, "step": 3060 } ], "max_steps": 3400, "num_train_epochs": 10, "total_flos": 1.449075129002754e+18, "trial_name": null, "trial_params": null }