openmathinstruct2-mix-sft / trainer_log.jsonl
skyai798's picture
Upload folder using huggingface_hub
71efe9c verified
{"current_steps": 30, "total_steps": 2304, "loss": 0.7578, "learning_rate": 6.493506493506493e-07, "epoch": 0.0390625, "percentage": 1.3, "elapsed_time": "0:01:54", "remaining_time": "2:24:47"}
{"current_steps": 60, "total_steps": 2304, "loss": 0.5677, "learning_rate": 1.2987012987012986e-06, "epoch": 0.078125, "percentage": 2.6, "elapsed_time": "0:03:39", "remaining_time": "2:17:07"}
{"current_steps": 90, "total_steps": 2304, "loss": 0.5243, "learning_rate": 1.9480519480519483e-06, "epoch": 0.1171875, "percentage": 3.91, "elapsed_time": "0:05:24", "remaining_time": "2:12:59"}
{"current_steps": 120, "total_steps": 2304, "loss": 0.5118, "learning_rate": 2.597402597402597e-06, "epoch": 0.15625, "percentage": 5.21, "elapsed_time": "0:07:06", "remaining_time": "2:09:26"}
{"current_steps": 150, "total_steps": 2304, "loss": 0.5146, "learning_rate": 3.246753246753247e-06, "epoch": 0.1953125, "percentage": 6.51, "elapsed_time": "0:08:50", "remaining_time": "2:06:56"}
{"current_steps": 180, "total_steps": 2304, "loss": 0.5121, "learning_rate": 3.896103896103897e-06, "epoch": 0.234375, "percentage": 7.81, "elapsed_time": "0:10:31", "remaining_time": "2:04:09"}
{"current_steps": 210, "total_steps": 2304, "loss": 0.5172, "learning_rate": 4.5454545454545455e-06, "epoch": 0.2734375, "percentage": 9.11, "elapsed_time": "0:12:13", "remaining_time": "2:01:49"}
{"current_steps": 240, "total_steps": 2304, "loss": 0.5053, "learning_rate": 4.999767464405452e-06, "epoch": 0.3125, "percentage": 10.42, "elapsed_time": "0:13:54", "remaining_time": "1:59:40"}
{"current_steps": 270, "total_steps": 2304, "loss": 0.5197, "learning_rate": 4.995634701567892e-06, "epoch": 0.3515625, "percentage": 11.72, "elapsed_time": "0:17:45", "remaining_time": "2:13:43"}
{"current_steps": 300, "total_steps": 2304, "loss": 0.5087, "learning_rate": 4.986344312601082e-06, "epoch": 0.390625, "percentage": 13.02, "elapsed_time": "0:19:28", "remaining_time": "2:10:06"}
{"current_steps": 330, "total_steps": 2304, "loss": 0.5126, "learning_rate": 4.971915497571788e-06, "epoch": 0.4296875, "percentage": 14.32, "elapsed_time": "0:21:10", "remaining_time": "2:06:37"}
{"current_steps": 360, "total_steps": 2304, "loss": 0.5144, "learning_rate": 4.9523780759216764e-06, "epoch": 0.46875, "percentage": 15.62, "elapsed_time": "0:22:53", "remaining_time": "2:03:38"}
{"current_steps": 390, "total_steps": 2304, "loss": 0.5083, "learning_rate": 4.927772424840702e-06, "epoch": 0.5078125, "percentage": 16.93, "elapsed_time": "0:24:38", "remaining_time": "2:00:57"}
{"current_steps": 420, "total_steps": 2304, "loss": 0.5117, "learning_rate": 4.898149395821218e-06, "epoch": 0.546875, "percentage": 18.23, "elapsed_time": "0:26:22", "remaining_time": "1:58:19"}
{"current_steps": 450, "total_steps": 2304, "loss": 0.5107, "learning_rate": 4.863570209565277e-06, "epoch": 0.5859375, "percentage": 19.53, "elapsed_time": "0:28:06", "remaining_time": "1:55:47"}
{"current_steps": 480, "total_steps": 2304, "loss": 0.5098, "learning_rate": 4.824106329462313e-06, "epoch": 0.625, "percentage": 20.83, "elapsed_time": "0:29:50", "remaining_time": "1:53:23"}
{"current_steps": 510, "total_steps": 2304, "loss": 0.5152, "learning_rate": 4.779839313898675e-06, "epoch": 0.6640625, "percentage": 22.14, "elapsed_time": "0:31:35", "remaining_time": "1:51:07"}
{"current_steps": 540, "total_steps": 2304, "loss": 0.5078, "learning_rate": 4.730860647704252e-06, "epoch": 0.703125, "percentage": 23.44, "elapsed_time": "0:35:29", "remaining_time": "1:55:57"}
{"current_steps": 570, "total_steps": 2304, "loss": 0.5078, "learning_rate": 4.677271553084515e-06, "epoch": 0.7421875, "percentage": 24.74, "elapsed_time": "0:37:12", "remaining_time": "1:53:11"}
{"current_steps": 600, "total_steps": 2304, "loss": 0.5073, "learning_rate": 4.6191827804287236e-06, "epoch": 0.78125, "percentage": 26.04, "elapsed_time": "0:38:56", "remaining_time": "1:50:36"}
{"current_steps": 630, "total_steps": 2304, "loss": 0.5194, "learning_rate": 4.556714379426634e-06, "epoch": 0.8203125, "percentage": 27.34, "elapsed_time": "0:40:40", "remaining_time": "1:48:05"}
{"current_steps": 660, "total_steps": 2304, "loss": 0.4997, "learning_rate": 4.489995450966714e-06, "epoch": 0.859375, "percentage": 28.65, "elapsed_time": "0:42:23", "remaining_time": "1:45:36"}
{"current_steps": 690, "total_steps": 2304, "loss": 0.5008, "learning_rate": 4.419163880328615e-06, "epoch": 0.8984375, "percentage": 29.95, "elapsed_time": "0:44:06", "remaining_time": "1:43:10"}
{"current_steps": 720, "total_steps": 2304, "loss": 0.5037, "learning_rate": 4.344366052221316e-06, "epoch": 0.9375, "percentage": 31.25, "elapsed_time": "0:46:18", "remaining_time": "1:41:53"}
{"current_steps": 750, "total_steps": 2304, "loss": 0.4977, "learning_rate": 4.265756548255823e-06, "epoch": 0.9765625, "percentage": 32.55, "elapsed_time": "0:48:52", "remaining_time": "1:41:16"}
{"current_steps": 780, "total_steps": 2304, "loss": 0.4782, "learning_rate": 4.183497827477687e-06, "epoch": 1.015625, "percentage": 33.85, "elapsed_time": "0:53:27", "remaining_time": "1:44:26"}
{"current_steps": 810, "total_steps": 2304, "loss": 0.439, "learning_rate": 4.097759890619539e-06, "epoch": 1.0546875, "percentage": 35.16, "elapsed_time": "0:55:58", "remaining_time": "1:43:14"}
{"current_steps": 840, "total_steps": 2304, "loss": 0.4441, "learning_rate": 4.00871992876753e-06, "epoch": 1.09375, "percentage": 36.46, "elapsed_time": "0:58:30", "remaining_time": "1:41:58"}
{"current_steps": 870, "total_steps": 2304, "loss": 0.4438, "learning_rate": 3.916561957167765e-06, "epoch": 1.1328125, "percentage": 37.76, "elapsed_time": "1:00:57", "remaining_time": "1:40:28"}
{"current_steps": 900, "total_steps": 2304, "loss": 0.44, "learning_rate": 3.82147643492952e-06, "epoch": 1.171875, "percentage": 39.06, "elapsed_time": "1:03:27", "remaining_time": "1:38:59"}
{"current_steps": 930, "total_steps": 2304, "loss": 0.4406, "learning_rate": 3.723659871411196e-06, "epoch": 1.2109375, "percentage": 40.36, "elapsed_time": "1:06:00", "remaining_time": "1:37:30"}
{"current_steps": 960, "total_steps": 2304, "loss": 0.4464, "learning_rate": 3.623314420102467e-06, "epoch": 1.25, "percentage": 41.67, "elapsed_time": "1:08:34", "remaining_time": "1:35:59"}
{"current_steps": 990, "total_steps": 2304, "loss": 0.4462, "learning_rate": 3.5206474608419385e-06, "epoch": 1.2890625, "percentage": 42.97, "elapsed_time": "1:11:05", "remaining_time": "1:34:21"}
{"current_steps": 1020, "total_steps": 2304, "loss": 0.4412, "learning_rate": 3.415871171233709e-06, "epoch": 1.328125, "percentage": 44.27, "elapsed_time": "1:13:34", "remaining_time": "1:32:36"}
{"current_steps": 1050, "total_steps": 2304, "loss": 0.4395, "learning_rate": 3.3092020881486085e-06, "epoch": 1.3671875, "percentage": 45.57, "elapsed_time": "1:18:18", "remaining_time": "1:33:31"}
{"current_steps": 1080, "total_steps": 2304, "loss": 0.4425, "learning_rate": 3.2008606602163023e-06, "epoch": 1.40625, "percentage": 46.88, "elapsed_time": "1:20:46", "remaining_time": "1:31:33"}
{"current_steps": 1110, "total_steps": 2304, "loss": 0.439, "learning_rate": 3.091070792233124e-06, "epoch": 1.4453125, "percentage": 48.18, "elapsed_time": "1:23:14", "remaining_time": "1:29:32"}
{"current_steps": 1140, "total_steps": 2304, "loss": 0.4354, "learning_rate": 2.9800593824272027e-06, "epoch": 1.484375, "percentage": 49.48, "elapsed_time": "1:25:45", "remaining_time": "1:27:34"}
{"current_steps": 1170, "total_steps": 2304, "loss": 0.4404, "learning_rate": 2.8680558535371688e-06, "epoch": 1.5234375, "percentage": 50.78, "elapsed_time": "1:28:15", "remaining_time": "1:25:32"}
{"current_steps": 1200, "total_steps": 2304, "loss": 0.4431, "learning_rate": 2.7552916786735744e-06, "epoch": 1.5625, "percentage": 52.08, "elapsed_time": "1:30:48", "remaining_time": "1:23:32"}
{"current_steps": 1230, "total_steps": 2304, "loss": 0.43, "learning_rate": 2.641999902942882e-06, "epoch": 1.6015625, "percentage": 53.39, "elapsed_time": "1:33:22", "remaining_time": "1:21:32"}
{"current_steps": 1260, "total_steps": 2304, "loss": 0.449, "learning_rate": 2.5284146618226807e-06, "epoch": 1.640625, "percentage": 54.69, "elapsed_time": "1:35:55", "remaining_time": "1:19:28"}
{"current_steps": 1290, "total_steps": 2304, "loss": 0.4387, "learning_rate": 2.414770697283471e-06, "epoch": 1.6796875, "percentage": 55.99, "elapsed_time": "1:40:48", "remaining_time": "1:19:14"}
{"current_steps": 1320, "total_steps": 2304, "loss": 0.444, "learning_rate": 2.3013028726570436e-06, "epoch": 1.71875, "percentage": 57.29, "elapsed_time": "1:43:15", "remaining_time": "1:16:58"}
{"current_steps": 1350, "total_steps": 2304, "loss": 0.4394, "learning_rate": 2.188245687254035e-06, "epoch": 1.7578125, "percentage": 58.59, "elapsed_time": "1:45:43", "remaining_time": "1:14:42"}
{"current_steps": 1380, "total_steps": 2304, "loss": 0.4473, "learning_rate": 2.075832791733802e-06, "epoch": 1.796875, "percentage": 59.9, "elapsed_time": "1:48:11", "remaining_time": "1:12:26"}
{"current_steps": 1410, "total_steps": 2304, "loss": 0.4404, "learning_rate": 1.9642965052281618e-06, "epoch": 1.8359375, "percentage": 61.2, "elapsed_time": "1:50:42", "remaining_time": "1:10:11"}
{"current_steps": 1440, "total_steps": 2304, "loss": 0.4446, "learning_rate": 1.8538673352169467e-06, "epoch": 1.875, "percentage": 62.5, "elapsed_time": "1:53:09", "remaining_time": "1:07:53"}
{"current_steps": 1470, "total_steps": 2304, "loss": 0.4236, "learning_rate": 1.744773501147627e-06, "epoch": 1.9140625, "percentage": 63.8, "elapsed_time": "1:55:41", "remaining_time": "1:05:37"}
{"current_steps": 1500, "total_steps": 2304, "loss": 0.4352, "learning_rate": 1.6372404627835182e-06, "epoch": 1.953125, "percentage": 65.1, "elapsed_time": "1:58:11", "remaining_time": "1:03:21"}
{"current_steps": 1530, "total_steps": 2304, "loss": 0.4344, "learning_rate": 1.5314904542553099e-06, "epoch": 1.9921875, "percentage": 66.41, "elapsed_time": "2:00:44", "remaining_time": "1:01:04"}
{"current_steps": 1560, "total_steps": 2304, "loss": 0.3877, "learning_rate": 1.4277420247788842e-06, "epoch": 2.03125, "percentage": 67.71, "elapsed_time": "2:05:28", "remaining_time": "0:59:50"}
{"current_steps": 1590, "total_steps": 2304, "loss": 0.3748, "learning_rate": 1.3262095869885907e-06, "epoch": 2.0703125, "percentage": 69.01, "elapsed_time": "2:08:00", "remaining_time": "0:57:29"}
{"current_steps": 1620, "total_steps": 2304, "loss": 0.3801, "learning_rate": 1.227102973819426e-06, "epoch": 2.109375, "percentage": 70.31, "elapsed_time": "2:10:31", "remaining_time": "0:55:06"}
{"current_steps": 1650, "total_steps": 2304, "loss": 0.3623, "learning_rate": 1.1306270048538966e-06, "epoch": 2.1484375, "percentage": 71.61, "elapsed_time": "2:13:02", "remaining_time": "0:52:43"}
{"current_steps": 1680, "total_steps": 2304, "loss": 0.3652, "learning_rate": 1.0369810630297658e-06, "epoch": 2.1875, "percentage": 72.92, "elapsed_time": "2:15:28", "remaining_time": "0:50:19"}
{"current_steps": 1710, "total_steps": 2304, "loss": 0.3724, "learning_rate": 9.463586825834939e-07, "epoch": 2.2265625, "percentage": 74.22, "elapsed_time": "2:18:00", "remaining_time": "0:47:56"}
{"current_steps": 1740, "total_steps": 2304, "loss": 0.3639, "learning_rate": 8.589471490809473e-07, "epoch": 2.265625, "percentage": 75.52, "elapsed_time": "2:20:31", "remaining_time": "0:45:33"}
{"current_steps": 1770, "total_steps": 2304, "loss": 0.3665, "learning_rate": 7.749271123619889e-07, "epoch": 2.3046875, "percentage": 76.82, "elapsed_time": "2:22:56", "remaining_time": "0:43:07"}
{"current_steps": 1800, "total_steps": 2304, "loss": 0.3624, "learning_rate": 6.944722131988394e-07, "epoch": 2.34375, "percentage": 78.12, "elapsed_time": "2:27:47", "remaining_time": "0:41:22"}
{"current_steps": 1830, "total_steps": 2304, "loss": 0.3629, "learning_rate": 6.177487244398009e-07, "epoch": 2.3828125, "percentage": 79.43, "elapsed_time": "2:30:19", "remaining_time": "0:38:56"}
{"current_steps": 1860, "total_steps": 2304, "loss": 0.3739, "learning_rate": 5.449152073799616e-07, "epoch": 2.421875, "percentage": 80.73, "elapsed_time": "2:32:48", "remaining_time": "0:36:28"}
{"current_steps": 1890, "total_steps": 2304, "loss": 0.3723, "learning_rate": 4.761221840690586e-07, "epoch": 2.4609375, "percentage": 82.03, "elapsed_time": "2:35:16", "remaining_time": "0:34:00"}
{"current_steps": 1920, "total_steps": 2304, "loss": 0.377, "learning_rate": 4.115118262337128e-07, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "2:37:49", "remaining_time": "0:31:33"}
{"current_steps": 1950, "total_steps": 2304, "loss": 0.3676, "learning_rate": 3.512176614569418e-07, "epoch": 2.5390625, "percentage": 84.64, "elapsed_time": "2:40:22", "remaining_time": "0:29:06"}
{"current_steps": 1980, "total_steps": 2304, "loss": 0.3714, "learning_rate": 2.9536429722216207e-07, "epoch": 2.578125, "percentage": 85.94, "elapsed_time": "2:42:54", "remaining_time": "0:26:39"}
{"current_steps": 2010, "total_steps": 2304, "loss": 0.3733, "learning_rate": 2.440671633920075e-07, "epoch": 2.6171875, "percentage": 87.24, "elapsed_time": "2:45:23", "remaining_time": "0:24:11"}
{"current_steps": 2040, "total_steps": 2304, "loss": 0.3694, "learning_rate": 1.9743227365415092e-07, "epoch": 2.65625, "percentage": 88.54, "elapsed_time": "2:47:46", "remaining_time": "0:21:42"}
{"current_steps": 2070, "total_steps": 2304, "loss": 0.3747, "learning_rate": 1.5555600642715442e-07, "epoch": 2.6953125, "percentage": 89.84, "elapsed_time": "2:51:43", "remaining_time": "0:19:24"}
{"current_steps": 2100, "total_steps": 2304, "loss": 0.3611, "learning_rate": 1.1852490567913655e-07, "epoch": 2.734375, "percentage": 91.15, "elapsed_time": "2:53:28", "remaining_time": "0:16:51"}
{"current_steps": 2130, "total_steps": 2304, "loss": 0.3686, "learning_rate": 8.641550207089039e-08, "epoch": 2.7734375, "percentage": 92.45, "elapsed_time": "2:55:11", "remaining_time": "0:14:18"}
{"current_steps": 2160, "total_steps": 2304, "loss": 0.3716, "learning_rate": 5.92941547931028e-08, "epoch": 2.8125, "percentage": 93.75, "elapsed_time": "2:56:53", "remaining_time": "0:11:47"}
{"current_steps": 2190, "total_steps": 2304, "loss": 0.3624, "learning_rate": 3.7216914424527686e-08, "epoch": 2.8515625, "percentage": 95.05, "elapsed_time": "2:58:37", "remaining_time": "0:09:17"}
{"current_steps": 2220, "total_steps": 2304, "loss": 0.369, "learning_rate": 2.0229407094547736e-08, "epoch": 2.890625, "percentage": 96.35, "elapsed_time": "3:00:24", "remaining_time": "0:06:49"}
{"current_steps": 2250, "total_steps": 2304, "loss": 0.3668, "learning_rate": 8.366740189520716e-09, "epoch": 2.9296875, "percentage": 97.66, "elapsed_time": "3:02:07", "remaining_time": "0:04:22"}
{"current_steps": 2280, "total_steps": 2304, "loss": 0.3621, "learning_rate": 1.6534297977804925e-09, "epoch": 2.96875, "percentage": 98.96, "elapsed_time": "3:03:51", "remaining_time": "0:01:56"}
{"current_steps": 2304, "total_steps": 2304, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "3:07:27", "remaining_time": "0:00:00"}