Llama3-Energy / trainer_log.jsonl
Zihao-Li's picture
First commit
e32f7f9
raw
history blame
39.6 kB
{"current_steps": 10, "total_steps": 1590, "loss": 2.3241, "learning_rate": 6.289308176100629e-06, "epoch": 0.018867924528301886, "percentage": 0.63, "elapsed_time": "0:01:36", "remaining_time": "4:13:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 20, "total_steps": 1590, "loss": 2.2496, "learning_rate": 1.2578616352201259e-05, "epoch": 0.03773584905660377, "percentage": 1.26, "elapsed_time": "0:03:02", "remaining_time": "3:59:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 30, "total_steps": 1590, "loss": 2.2836, "learning_rate": 1.8867924528301888e-05, "epoch": 0.05660377358490566, "percentage": 1.89, "elapsed_time": "0:04:28", "remaining_time": "3:52:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 1590, "loss": 2.2567, "learning_rate": 2.5157232704402517e-05, "epoch": 0.07547169811320754, "percentage": 2.52, "elapsed_time": "0:05:53", "remaining_time": "3:48:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 50, "total_steps": 1590, "loss": 2.2749, "learning_rate": 3.144654088050314e-05, "epoch": 0.09433962264150944, "percentage": 3.14, "elapsed_time": "0:07:18", "remaining_time": "3:45:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 1590, "loss": 2.3334, "learning_rate": 3.7735849056603776e-05, "epoch": 0.11320754716981132, "percentage": 3.77, "elapsed_time": "0:08:43", "remaining_time": "3:42:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 1590, "loss": 2.3378, "learning_rate": 4.402515723270441e-05, "epoch": 0.1320754716981132, "percentage": 4.4, "elapsed_time": "0:10:08", "remaining_time": "3:40:02", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 1590, "loss": 2.3462, "learning_rate": 5.0314465408805034e-05, "epoch": 0.1509433962264151, "percentage": 5.03, "elapsed_time": "0:11:32", "remaining_time": "3:37:55", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 90, "total_steps": 1590, "loss": 2.3832, "learning_rate": 5.660377358490566e-05, "epoch": 0.16981132075471697, "percentage": 5.66, "elapsed_time": "0:12:57", "remaining_time": "3:36:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 1590, "loss": 2.3658, "learning_rate": 6.289308176100629e-05, "epoch": 0.18867924528301888, "percentage": 6.29, "elapsed_time": "0:14:22", "remaining_time": "3:34:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 110, "total_steps": 1590, "loss": 2.3894, "learning_rate": 6.918238993710691e-05, "epoch": 0.20754716981132076, "percentage": 6.92, "elapsed_time": "0:15:47", "remaining_time": "3:32:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 120, "total_steps": 1590, "loss": 2.4635, "learning_rate": 7.547169811320755e-05, "epoch": 0.22641509433962265, "percentage": 7.55, "elapsed_time": "0:17:13", "remaining_time": "3:31:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 130, "total_steps": 1590, "loss": 2.4099, "learning_rate": 8.176100628930818e-05, "epoch": 0.24528301886792453, "percentage": 8.18, "elapsed_time": "0:18:40", "remaining_time": "3:29:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 1590, "loss": 2.4141, "learning_rate": 8.805031446540882e-05, "epoch": 0.2641509433962264, "percentage": 8.81, "elapsed_time": "0:20:07", "remaining_time": "3:28:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 150, "total_steps": 1590, "loss": 2.4505, "learning_rate": 9.433962264150944e-05, "epoch": 0.2830188679245283, "percentage": 9.43, "elapsed_time": "0:21:34", "remaining_time": "3:27:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 160, "total_steps": 1590, "loss": 2.4853, "learning_rate": 9.999987950741765e-05, "epoch": 0.3018867924528302, "percentage": 10.06, "elapsed_time": "0:23:00", "remaining_time": "3:25:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 170, "total_steps": 1590, "loss": 2.529, "learning_rate": 9.9985421100216e-05, "epoch": 0.32075471698113206, "percentage": 10.69, "elapsed_time": "0:24:27", "remaining_time": "3:24:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 180, "total_steps": 1590, "loss": 2.5123, "learning_rate": 9.99468721610658e-05, "epoch": 0.33962264150943394, "percentage": 11.32, "elapsed_time": "0:25:54", "remaining_time": "3:22:55", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 190, "total_steps": 1590, "loss": 2.5137, "learning_rate": 9.988425126867315e-05, "epoch": 0.3584905660377358, "percentage": 11.95, "elapsed_time": "0:27:20", "remaining_time": "3:21:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 200, "total_steps": 1590, "loss": 2.4818, "learning_rate": 9.979758860325019e-05, "epoch": 0.37735849056603776, "percentage": 12.58, "elapsed_time": "0:28:47", "remaining_time": "3:20:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 1590, "loss": 2.5084, "learning_rate": 9.968692593196944e-05, "epoch": 0.39622641509433965, "percentage": 13.21, "elapsed_time": "0:30:13", "remaining_time": "3:18:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 220, "total_steps": 1590, "loss": 2.4667, "learning_rate": 9.955231658883432e-05, "epoch": 0.41509433962264153, "percentage": 13.84, "elapsed_time": "0:31:40", "remaining_time": "3:17:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 230, "total_steps": 1590, "loss": 2.4815, "learning_rate": 9.93938254489746e-05, "epoch": 0.4339622641509434, "percentage": 14.47, "elapsed_time": "0:33:07", "remaining_time": "3:15:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 240, "total_steps": 1590, "loss": 2.465, "learning_rate": 9.921152889737984e-05, "epoch": 0.4528301886792453, "percentage": 15.09, "elapsed_time": "0:34:33", "remaining_time": "3:14:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 250, "total_steps": 1590, "loss": 2.4827, "learning_rate": 9.900551479208552e-05, "epoch": 0.4716981132075472, "percentage": 15.72, "elapsed_time": "0:35:59", "remaining_time": "3:12:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 260, "total_steps": 1590, "loss": 2.5077, "learning_rate": 9.877588242182975e-05, "epoch": 0.49056603773584906, "percentage": 16.35, "elapsed_time": "0:37:26", "remaining_time": "3:11:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 270, "total_steps": 1590, "loss": 2.5812, "learning_rate": 9.852274245820096e-05, "epoch": 0.5094339622641509, "percentage": 16.98, "elapsed_time": "0:38:52", "remaining_time": "3:10:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 280, "total_steps": 1590, "loss": 2.5047, "learning_rate": 9.824621690229965e-05, "epoch": 0.5283018867924528, "percentage": 17.61, "elapsed_time": "0:40:19", "remaining_time": "3:08:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 290, "total_steps": 1590, "loss": 2.4985, "learning_rate": 9.79464390259397e-05, "epoch": 0.5471698113207547, "percentage": 18.24, "elapsed_time": "0:41:46", "remaining_time": "3:07:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 300, "total_steps": 1590, "loss": 2.4943, "learning_rate": 9.762355330741796e-05, "epoch": 0.5660377358490566, "percentage": 18.87, "elapsed_time": "0:43:12", "remaining_time": "3:05:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 310, "total_steps": 1590, "loss": 2.4536, "learning_rate": 9.727771536188275e-05, "epoch": 0.5849056603773585, "percentage": 19.5, "elapsed_time": "0:44:39", "remaining_time": "3:04:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 320, "total_steps": 1590, "loss": 2.4837, "learning_rate": 9.690909186633492e-05, "epoch": 0.6037735849056604, "percentage": 20.13, "elapsed_time": "0:46:06", "remaining_time": "3:02:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 330, "total_steps": 1590, "loss": 2.5074, "learning_rate": 9.651786047929773e-05, "epoch": 0.6226415094339622, "percentage": 20.75, "elapsed_time": "0:47:32", "remaining_time": "3:01:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 340, "total_steps": 1590, "loss": 2.441, "learning_rate": 9.610420975519408e-05, "epoch": 0.6415094339622641, "percentage": 21.38, "elapsed_time": "0:48:59", "remaining_time": "3:00:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 350, "total_steps": 1590, "loss": 2.4885, "learning_rate": 9.566833905347245e-05, "epoch": 0.660377358490566, "percentage": 22.01, "elapsed_time": "0:50:26", "remaining_time": "2:58:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 360, "total_steps": 1590, "loss": 2.4342, "learning_rate": 9.521045844252552e-05, "epoch": 0.6792452830188679, "percentage": 22.64, "elapsed_time": "0:51:52", "remaining_time": "2:57:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 370, "total_steps": 1590, "loss": 2.4425, "learning_rate": 9.473078859844728e-05, "epoch": 0.6981132075471698, "percentage": 23.27, "elapsed_time": "0:53:19", "remaining_time": "2:55:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 380, "total_steps": 1590, "loss": 2.4567, "learning_rate": 9.422956069867807e-05, "epoch": 0.7169811320754716, "percentage": 23.9, "elapsed_time": "0:54:45", "remaining_time": "2:54:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 390, "total_steps": 1590, "loss": 2.4636, "learning_rate": 9.370701631058829e-05, "epoch": 0.7358490566037735, "percentage": 24.53, "elapsed_time": "0:56:12", "remaining_time": "2:52:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 400, "total_steps": 1590, "loss": 2.4707, "learning_rate": 9.316340727505468e-05, "epoch": 0.7547169811320755, "percentage": 25.16, "elapsed_time": "0:57:38", "remaining_time": "2:51:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 410, "total_steps": 1590, "loss": 2.4242, "learning_rate": 9.259899558508543e-05, "epoch": 0.7735849056603774, "percentage": 25.79, "elapsed_time": "0:59:05", "remaining_time": "2:50:02", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 420, "total_steps": 1590, "loss": 2.4754, "learning_rate": 9.201405325955221e-05, "epoch": 0.7924528301886793, "percentage": 26.42, "elapsed_time": "1:00:31", "remaining_time": "2:48:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 430, "total_steps": 1590, "loss": 2.4735, "learning_rate": 9.14088622120905e-05, "epoch": 0.8113207547169812, "percentage": 27.04, "elapsed_time": "1:01:58", "remaining_time": "2:47:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 440, "total_steps": 1590, "loss": 2.4511, "learning_rate": 9.078371411523084e-05, "epoch": 0.8301886792452831, "percentage": 27.67, "elapsed_time": "1:03:24", "remaining_time": "2:45:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 450, "total_steps": 1590, "loss": 2.4627, "learning_rate": 9.013891025982704e-05, "epoch": 0.8490566037735849, "percentage": 28.3, "elapsed_time": "1:04:58", "remaining_time": "2:44:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 460, "total_steps": 1590, "loss": 2.4804, "learning_rate": 8.947476140984856e-05, "epoch": 0.8679245283018868, "percentage": 28.93, "elapsed_time": "1:06:45", "remaining_time": "2:43:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 470, "total_steps": 1590, "loss": 2.4872, "learning_rate": 8.879158765260767e-05, "epoch": 0.8867924528301887, "percentage": 29.56, "elapsed_time": "1:08:28", "remaining_time": "2:43:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 480, "total_steps": 1590, "loss": 2.4847, "learning_rate": 8.808971824449275e-05, "epoch": 0.9056603773584906, "percentage": 30.19, "elapsed_time": "1:10:09", "remaining_time": "2:42:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 490, "total_steps": 1590, "loss": 2.4873, "learning_rate": 8.736949145228295e-05, "epoch": 0.9245283018867925, "percentage": 30.82, "elapsed_time": "1:11:51", "remaining_time": "2:41:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 500, "total_steps": 1590, "loss": 2.4738, "learning_rate": 8.66312543901201e-05, "epoch": 0.9433962264150944, "percentage": 31.45, "elapsed_time": "1:13:32", "remaining_time": "2:40:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 510, "total_steps": 1590, "loss": 2.4211, "learning_rate": 8.587536285221656e-05, "epoch": 0.9622641509433962, "percentage": 32.08, "elapsed_time": "1:15:13", "remaining_time": "2:39:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 520, "total_steps": 1590, "loss": 2.4183, "learning_rate": 8.510218114137992e-05, "epoch": 0.9811320754716981, "percentage": 32.7, "elapsed_time": "1:16:54", "remaining_time": "2:38:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 530, "total_steps": 1590, "loss": 2.459, "learning_rate": 8.43120818934367e-05, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "1:18:35", "remaining_time": "2:37:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 540, "total_steps": 1590, "loss": 1.8838, "learning_rate": 8.350544589764016e-05, "epoch": 1.0188679245283019, "percentage": 33.96, "elapsed_time": "1:20:16", "remaining_time": "2:36:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 550, "total_steps": 1590, "loss": 1.8624, "learning_rate": 8.268266191314848e-05, "epoch": 1.0377358490566038, "percentage": 34.59, "elapsed_time": "1:21:57", "remaining_time": "2:34:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 560, "total_steps": 1590, "loss": 1.8182, "learning_rate": 8.184412648166183e-05, "epoch": 1.0566037735849056, "percentage": 35.22, "elapsed_time": "1:23:38", "remaining_time": "2:33:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 570, "total_steps": 1590, "loss": 1.8391, "learning_rate": 8.099024373630854e-05, "epoch": 1.0754716981132075, "percentage": 35.85, "elapsed_time": "1:25:19", "remaining_time": "2:32:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 580, "total_steps": 1590, "loss": 1.8545, "learning_rate": 8.01214252068728e-05, "epoch": 1.0943396226415094, "percentage": 36.48, "elapsed_time": "1:27:00", "remaining_time": "2:31:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 590, "total_steps": 1590, "loss": 1.8367, "learning_rate": 7.923808962145734e-05, "epoch": 1.1132075471698113, "percentage": 37.11, "elapsed_time": "1:28:41", "remaining_time": "2:30:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 600, "total_steps": 1590, "loss": 1.8149, "learning_rate": 7.83406627046769e-05, "epoch": 1.1320754716981132, "percentage": 37.74, "elapsed_time": "1:30:22", "remaining_time": "2:29:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 610, "total_steps": 1590, "loss": 1.8061, "learning_rate": 7.742957697247984e-05, "epoch": 1.150943396226415, "percentage": 38.36, "elapsed_time": "1:32:03", "remaining_time": "2:27:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 620, "total_steps": 1590, "loss": 1.8411, "learning_rate": 7.650527152369647e-05, "epoch": 1.169811320754717, "percentage": 38.99, "elapsed_time": "1:33:44", "remaining_time": "2:26:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 630, "total_steps": 1590, "loss": 1.8264, "learning_rate": 7.556819182841497e-05, "epoch": 1.1886792452830188, "percentage": 39.62, "elapsed_time": "1:35:24", "remaining_time": "2:25:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 640, "total_steps": 1590, "loss": 1.8954, "learning_rate": 7.461878951328653e-05, "epoch": 1.2075471698113207, "percentage": 40.25, "elapsed_time": "1:37:05", "remaining_time": "2:24:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 650, "total_steps": 1590, "loss": 1.8346, "learning_rate": 7.365752214386321e-05, "epoch": 1.2264150943396226, "percentage": 40.88, "elapsed_time": "1:38:46", "remaining_time": "2:22:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 660, "total_steps": 1590, "loss": 1.8805, "learning_rate": 7.268485300407393e-05, "epoch": 1.2452830188679245, "percentage": 41.51, "elapsed_time": "1:40:27", "remaining_time": "2:21:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 670, "total_steps": 1590, "loss": 1.7728, "learning_rate": 7.17012508729441e-05, "epoch": 1.2641509433962264, "percentage": 42.14, "elapsed_time": "1:42:08", "remaining_time": "2:20:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 680, "total_steps": 1590, "loss": 1.8718, "learning_rate": 7.070718979866702e-05, "epoch": 1.2830188679245282, "percentage": 42.77, "elapsed_time": "1:43:48", "remaining_time": "2:18:55", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 690, "total_steps": 1590, "loss": 1.8535, "learning_rate": 6.970314887013584e-05, "epoch": 1.3018867924528301, "percentage": 43.4, "elapsed_time": "1:45:29", "remaining_time": "2:17:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 700, "total_steps": 1590, "loss": 1.8344, "learning_rate": 6.868961198604611e-05, "epoch": 1.320754716981132, "percentage": 44.03, "elapsed_time": "1:47:08", "remaining_time": "2:16:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 710, "total_steps": 1590, "loss": 1.8759, "learning_rate": 6.766706762168022e-05, "epoch": 1.3396226415094339, "percentage": 44.65, "elapsed_time": "1:48:46", "remaining_time": "2:14:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 720, "total_steps": 1590, "loss": 1.7973, "learning_rate": 6.663600859348616e-05, "epoch": 1.3584905660377358, "percentage": 45.28, "elapsed_time": "1:50:24", "remaining_time": "2:13:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 730, "total_steps": 1590, "loss": 1.8101, "learning_rate": 6.55969318215641e-05, "epoch": 1.3773584905660377, "percentage": 45.91, "elapsed_time": "1:52:02", "remaining_time": "2:11:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 740, "total_steps": 1590, "loss": 1.8574, "learning_rate": 6.455033809017512e-05, "epoch": 1.3962264150943398, "percentage": 46.54, "elapsed_time": "1:53:39", "remaining_time": "2:10:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 750, "total_steps": 1590, "loss": 1.8194, "learning_rate": 6.34967318063877e-05, "epoch": 1.4150943396226414, "percentage": 47.17, "elapsed_time": "1:55:17", "remaining_time": "2:09:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 760, "total_steps": 1590, "loss": 1.8557, "learning_rate": 6.24366207569781e-05, "epoch": 1.4339622641509435, "percentage": 47.8, "elapsed_time": "1:56:55", "remaining_time": "2:07:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 770, "total_steps": 1590, "loss": 1.8403, "learning_rate": 6.137051586370194e-05, "epoch": 1.4528301886792452, "percentage": 48.43, "elapsed_time": "1:58:32", "remaining_time": "2:06:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 780, "total_steps": 1590, "loss": 1.86, "learning_rate": 6.029893093705492e-05, "epoch": 1.4716981132075473, "percentage": 49.06, "elapsed_time": "2:00:10", "remaining_time": "2:04:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 790, "total_steps": 1590, "loss": 1.8223, "learning_rate": 5.9222382428641174e-05, "epoch": 1.490566037735849, "percentage": 49.69, "elapsed_time": "2:01:48", "remaining_time": "2:03:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 800, "total_steps": 1590, "loss": 1.7957, "learning_rate": 5.814138918226887e-05, "epoch": 1.509433962264151, "percentage": 50.31, "elapsed_time": "2:03:25", "remaining_time": "2:01:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 810, "total_steps": 1590, "loss": 1.8542, "learning_rate": 5.7056472183892806e-05, "epoch": 1.5283018867924527, "percentage": 50.94, "elapsed_time": "2:05:03", "remaining_time": "2:00:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 820, "total_steps": 1590, "loss": 1.8043, "learning_rate": 5.5968154310524614e-05, "epoch": 1.5471698113207548, "percentage": 51.57, "elapsed_time": "2:06:41", "remaining_time": "1:58:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 830, "total_steps": 1590, "loss": 1.7981, "learning_rate": 5.487696007823161e-05, "epoch": 1.5660377358490565, "percentage": 52.2, "elapsed_time": "2:08:19", "remaining_time": "1:57:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 840, "total_steps": 1590, "loss": 1.8313, "learning_rate": 5.378341538934566e-05, "epoch": 1.5849056603773586, "percentage": 52.83, "elapsed_time": "2:10:02", "remaining_time": "1:56:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 850, "total_steps": 1590, "loss": 1.8476, "learning_rate": 5.268804727900391e-05, "epoch": 1.6037735849056602, "percentage": 53.46, "elapsed_time": "2:11:42", "remaining_time": "1:54:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 860, "total_steps": 1590, "loss": 1.7863, "learning_rate": 5.159138366114358e-05, "epoch": 1.6226415094339623, "percentage": 54.09, "elapsed_time": "2:13:21", "remaining_time": "1:53:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 870, "total_steps": 1590, "loss": 1.8363, "learning_rate": 5.049395307407329e-05, "epoch": 1.641509433962264, "percentage": 54.72, "elapsed_time": "2:14:59", "remaining_time": "1:51:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 880, "total_steps": 1590, "loss": 1.8004, "learning_rate": 4.9396284425743326e-05, "epoch": 1.6603773584905661, "percentage": 55.35, "elapsed_time": "2:16:38", "remaining_time": "1:50:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 890, "total_steps": 1590, "loss": 1.818, "learning_rate": 4.829890673883792e-05, "epoch": 1.6792452830188678, "percentage": 55.97, "elapsed_time": "2:18:16", "remaining_time": "1:48:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 900, "total_steps": 1590, "loss": 1.7885, "learning_rate": 4.7202348895812035e-05, "epoch": 1.6981132075471699, "percentage": 56.6, "elapsed_time": "2:19:54", "remaining_time": "1:47:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 910, "total_steps": 1590, "loss": 1.7906, "learning_rate": 4.610713938399601e-05, "epoch": 1.7169811320754715, "percentage": 57.23, "elapsed_time": "2:21:33", "remaining_time": "1:45:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 920, "total_steps": 1590, "loss": 1.7858, "learning_rate": 4.5013806040890294e-05, "epoch": 1.7358490566037736, "percentage": 57.86, "elapsed_time": "2:23:12", "remaining_time": "1:44:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 930, "total_steps": 1590, "loss": 1.7796, "learning_rate": 4.392287579977374e-05, "epoch": 1.7547169811320755, "percentage": 58.49, "elapsed_time": "2:24:50", "remaining_time": "1:42:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 940, "total_steps": 1590, "loss": 1.7666, "learning_rate": 4.2834874435747305e-05, "epoch": 1.7735849056603774, "percentage": 59.12, "elapsed_time": "2:26:29", "remaining_time": "1:41:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 950, "total_steps": 1590, "loss": 1.7516, "learning_rate": 4.1750326312336254e-05, "epoch": 1.7924528301886793, "percentage": 59.75, "elapsed_time": "2:28:07", "remaining_time": "1:39:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 960, "total_steps": 1590, "loss": 1.7904, "learning_rate": 4.066975412877255e-05, "epoch": 1.8113207547169812, "percentage": 60.38, "elapsed_time": "2:29:45", "remaining_time": "1:38:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 970, "total_steps": 1590, "loss": 1.7605, "learning_rate": 3.959367866807926e-05, "epoch": 1.830188679245283, "percentage": 61.01, "elapsed_time": "2:31:24", "remaining_time": "1:36:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 980, "total_steps": 1590, "loss": 1.8169, "learning_rate": 3.852261854607866e-05, "epoch": 1.849056603773585, "percentage": 61.64, "elapsed_time": "2:33:02", "remaining_time": "1:35:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 990, "total_steps": 1590, "loss": 1.7652, "learning_rate": 3.7457089961444636e-05, "epoch": 1.8679245283018868, "percentage": 62.26, "elapsed_time": "2:34:40", "remaining_time": "1:33:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1000, "total_steps": 1590, "loss": 1.75, "learning_rate": 3.6397606446920294e-05, "epoch": 1.8867924528301887, "percentage": 62.89, "elapsed_time": "2:36:19", "remaining_time": "1:32:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1000, "total_steps": 1590, "eval_loss": 2.2884254455566406, "epoch": 1.8867924528301887, "percentage": 62.89, "elapsed_time": "2:39:04", "remaining_time": "1:33:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1010, "total_steps": 1590, "loss": 1.7847, "learning_rate": 3.534467862182008e-05, "epoch": 1.9056603773584906, "percentage": 63.52, "elapsed_time": "2:41:45", "remaining_time": "1:32:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1020, "total_steps": 1590, "loss": 1.7737, "learning_rate": 3.4298813945936295e-05, "epoch": 1.9245283018867925, "percentage": 64.15, "elapsed_time": "2:43:27", "remaining_time": "1:31:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1030, "total_steps": 1590, "loss": 1.7281, "learning_rate": 3.3260516474968285e-05, "epoch": 1.9433962264150944, "percentage": 64.78, "elapsed_time": "2:45:08", "remaining_time": "1:29:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1040, "total_steps": 1590, "loss": 1.7924, "learning_rate": 3.223028661759211e-05, "epoch": 1.9622641509433962, "percentage": 65.41, "elapsed_time": "2:46:48", "remaining_time": "1:28:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1050, "total_steps": 1590, "loss": 1.7397, "learning_rate": 3.12086208942881e-05, "epoch": 1.9811320754716981, "percentage": 66.04, "elapsed_time": "2:48:28", "remaining_time": "1:26:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1060, "total_steps": 1590, "loss": 1.6932, "learning_rate": 3.019601169804216e-05, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "2:50:09", "remaining_time": "1:25:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1070, "total_steps": 1590, "loss": 0.6881, "learning_rate": 2.919294705703647e-05, "epoch": 2.018867924528302, "percentage": 67.3, "elapsed_time": "2:51:49", "remaining_time": "1:23:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1080, "total_steps": 1590, "loss": 0.6078, "learning_rate": 2.819991039944363e-05, "epoch": 2.0377358490566038, "percentage": 67.92, "elapsed_time": "2:53:29", "remaining_time": "1:21:55", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1090, "total_steps": 1590, "loss": 0.6092, "learning_rate": 2.7217380320437978e-05, "epoch": 2.056603773584906, "percentage": 68.55, "elapsed_time": "2:55:09", "remaining_time": "1:20:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1100, "total_steps": 1590, "loss": 0.585, "learning_rate": 2.624583035153609e-05, "epoch": 2.0754716981132075, "percentage": 69.18, "elapsed_time": "2:56:49", "remaining_time": "1:18:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1110, "total_steps": 1590, "loss": 0.577, "learning_rate": 2.5285728732377613e-05, "epoch": 2.0943396226415096, "percentage": 69.81, "elapsed_time": "2:58:29", "remaining_time": "1:17:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1120, "total_steps": 1590, "loss": 0.551, "learning_rate": 2.4337538185056762e-05, "epoch": 2.1132075471698113, "percentage": 70.44, "elapsed_time": "3:00:08", "remaining_time": "1:15:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1130, "total_steps": 1590, "loss": 0.556, "learning_rate": 2.3401715691112746e-05, "epoch": 2.1320754716981134, "percentage": 71.07, "elapsed_time": "3:01:48", "remaining_time": "1:14:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1140, "total_steps": 1590, "loss": 0.5711, "learning_rate": 2.247871227128709e-05, "epoch": 2.150943396226415, "percentage": 71.7, "elapsed_time": "3:03:28", "remaining_time": "1:12:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1150, "total_steps": 1590, "loss": 0.5601, "learning_rate": 2.1568972768153556e-05, "epoch": 2.169811320754717, "percentage": 72.33, "elapsed_time": "3:05:07", "remaining_time": "1:10:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1160, "total_steps": 1590, "loss": 0.5609, "learning_rate": 2.067293563172581e-05, "epoch": 2.188679245283019, "percentage": 72.96, "elapsed_time": "3:06:47", "remaining_time": "1:09:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1170, "total_steps": 1590, "loss": 0.5417, "learning_rate": 1.9791032708145963e-05, "epoch": 2.207547169811321, "percentage": 73.58, "elapsed_time": "3:08:27", "remaining_time": "1:07:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1180, "total_steps": 1590, "loss": 0.5635, "learning_rate": 1.8923689031555697e-05, "epoch": 2.2264150943396226, "percentage": 74.21, "elapsed_time": "3:10:07", "remaining_time": "1:06:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1190, "total_steps": 1590, "loss": 0.5371, "learning_rate": 1.807132261925073e-05, "epoch": 2.2452830188679247, "percentage": 74.84, "elapsed_time": "3:11:47", "remaining_time": "1:04:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1200, "total_steps": 1590, "loss": 0.5459, "learning_rate": 1.7234344270216713e-05, "epoch": 2.2641509433962264, "percentage": 75.47, "elapsed_time": "3:13:27", "remaining_time": "1:02:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1210, "total_steps": 1590, "loss": 0.5608, "learning_rate": 1.6413157367144354e-05, "epoch": 2.2830188679245285, "percentage": 76.1, "elapsed_time": "3:15:12", "remaining_time": "1:01:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1220, "total_steps": 1590, "loss": 0.5613, "learning_rate": 1.5608157682018505e-05, "epoch": 2.30188679245283, "percentage": 76.73, "elapsed_time": "3:16:53", "remaining_time": "0:59:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1230, "total_steps": 1590, "loss": 0.537, "learning_rate": 1.4819733185375534e-05, "epoch": 2.3207547169811322, "percentage": 77.36, "elapsed_time": "3:18:33", "remaining_time": "0:58:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1240, "total_steps": 1590, "loss": 0.5425, "learning_rate": 1.4048263859320344e-05, "epoch": 2.339622641509434, "percentage": 77.99, "elapsed_time": "3:20:13", "remaining_time": "0:56:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1250, "total_steps": 1590, "loss": 0.5289, "learning_rate": 1.3294121514393637e-05, "epoch": 2.358490566037736, "percentage": 78.62, "elapsed_time": "3:21:53", "remaining_time": "0:54:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1260, "total_steps": 1590, "loss": 0.5155, "learning_rate": 1.2557669610377399e-05, "epoch": 2.3773584905660377, "percentage": 79.25, "elapsed_time": "3:23:34", "remaining_time": "0:53:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1270, "total_steps": 1590, "loss": 0.5214, "learning_rate": 1.1839263081124946e-05, "epoch": 2.3962264150943398, "percentage": 79.87, "elapsed_time": "3:25:14", "remaining_time": "0:51:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1280, "total_steps": 1590, "loss": 0.5326, "learning_rate": 1.113924816350026e-05, "epoch": 2.4150943396226414, "percentage": 80.5, "elapsed_time": "3:26:54", "remaining_time": "0:50:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1290, "total_steps": 1590, "loss": 0.5218, "learning_rate": 1.04579622305086e-05, "epoch": 2.4339622641509435, "percentage": 81.13, "elapsed_time": "3:28:34", "remaining_time": "0:48:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1300, "total_steps": 1590, "loss": 0.5341, "learning_rate": 9.795733628699333e-06, "epoch": 2.452830188679245, "percentage": 81.76, "elapsed_time": "3:30:14", "remaining_time": "0:46:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1310, "total_steps": 1590, "loss": 0.5102, "learning_rate": 9.152881519918787e-06, "epoch": 2.4716981132075473, "percentage": 82.39, "elapsed_time": "3:31:54", "remaining_time": "0:45:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1320, "total_steps": 1590, "loss": 0.5113, "learning_rate": 8.529715727489912e-06, "epoch": 2.490566037735849, "percentage": 83.02, "elapsed_time": "3:33:34", "remaining_time": "0:43:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1330, "total_steps": 1590, "loss": 0.51, "learning_rate": 7.926536586892591e-06, "epoch": 2.509433962264151, "percentage": 83.65, "elapsed_time": "3:35:14", "remaining_time": "0:42:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1340, "total_steps": 1590, "loss": 0.5075, "learning_rate": 7.3436348010165025e-06, "epoch": 2.5283018867924527, "percentage": 84.28, "elapsed_time": "3:36:54", "remaining_time": "0:40:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1350, "total_steps": 1590, "loss": 0.5111, "learning_rate": 6.781291300056647e-06, "epoch": 2.547169811320755, "percentage": 84.91, "elapsed_time": "3:38:34", "remaining_time": "0:38:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1360, "total_steps": 1590, "loss": 0.501, "learning_rate": 6.239777106118605e-06, "epoch": 2.5660377358490565, "percentage": 85.53, "elapsed_time": "3:40:14", "remaining_time": "0:37:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1370, "total_steps": 1590, "loss": 0.5065, "learning_rate": 5.719353202599209e-06, "epoch": 2.5849056603773586, "percentage": 86.16, "elapsed_time": "3:41:54", "remaining_time": "0:35:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1380, "total_steps": 1590, "loss": 0.5268, "learning_rate": 5.220270408405198e-06, "epoch": 2.6037735849056602, "percentage": 86.79, "elapsed_time": "3:43:35", "remaining_time": "0:34:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1390, "total_steps": 1590, "loss": 0.5225, "learning_rate": 4.7427692570708445e-06, "epoch": 2.6226415094339623, "percentage": 87.42, "elapsed_time": "3:45:16", "remaining_time": "0:32:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1400, "total_steps": 1590, "loss": 0.5094, "learning_rate": 4.287079880832478e-06, "epoch": 2.641509433962264, "percentage": 88.05, "elapsed_time": "3:47:02", "remaining_time": "0:30:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1410, "total_steps": 1590, "loss": 0.4991, "learning_rate": 3.853421899715992e-06, "epoch": 2.660377358490566, "percentage": 88.68, "elapsed_time": "3:48:43", "remaining_time": "0:29:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1420, "total_steps": 1590, "loss": 0.5011, "learning_rate": 3.44200431569075e-06, "epoch": 2.6792452830188678, "percentage": 89.31, "elapsed_time": "3:50:24", "remaining_time": "0:27:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1430, "total_steps": 1590, "loss": 0.4954, "learning_rate": 3.053025411940802e-06, "epoch": 2.69811320754717, "percentage": 89.94, "elapsed_time": "3:52:05", "remaining_time": "0:25:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1440, "total_steps": 1590, "loss": 0.5054, "learning_rate": 2.6866726573021026e-06, "epoch": 2.7169811320754715, "percentage": 90.57, "elapsed_time": "3:53:46", "remaining_time": "0:24:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1450, "total_steps": 1590, "loss": 0.5154, "learning_rate": 2.3431226159116637e-06, "epoch": 2.7358490566037736, "percentage": 91.19, "elapsed_time": "3:55:27", "remaining_time": "0:22:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1460, "total_steps": 1590, "loss": 0.5029, "learning_rate": 2.022540862112282e-06, "epoch": 2.7547169811320753, "percentage": 91.82, "elapsed_time": "3:57:07", "remaining_time": "0:21:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1470, "total_steps": 1590, "loss": 0.5147, "learning_rate": 1.725081900653791e-06, "epoch": 2.7735849056603774, "percentage": 92.45, "elapsed_time": "3:58:48", "remaining_time": "0:19:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1480, "total_steps": 1590, "loss": 0.4882, "learning_rate": 1.4508890922293018e-06, "epoch": 2.7924528301886795, "percentage": 93.08, "elapsed_time": "4:00:29", "remaining_time": "0:17:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1490, "total_steps": 1590, "loss": 0.4909, "learning_rate": 1.2000945843823551e-06, "epoch": 2.811320754716981, "percentage": 93.71, "elapsed_time": "4:02:09", "remaining_time": "0:16:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1500, "total_steps": 1590, "loss": 0.485, "learning_rate": 9.728192478182574e-07, "epoch": 2.830188679245283, "percentage": 94.34, "elapsed_time": "4:03:50", "remaining_time": "0:14:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1510, "total_steps": 1590, "loss": 0.4985, "learning_rate": 7.691726181503267e-07, "epoch": 2.849056603773585, "percentage": 94.97, "elapsed_time": "4:05:30", "remaining_time": "0:13:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1520, "total_steps": 1590, "loss": 0.4816, "learning_rate": 5.892528431090393e-07, "epoch": 2.867924528301887, "percentage": 95.6, "elapsed_time": "4:07:11", "remaining_time": "0:11:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1530, "total_steps": 1590, "loss": 0.4955, "learning_rate": 4.331466352396396e-07, "epoch": 2.8867924528301887, "percentage": 96.23, "elapsed_time": "4:08:52", "remaining_time": "0:09:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1540, "total_steps": 1590, "loss": 0.5018, "learning_rate": 3.009292301109412e-07, "epoch": 2.9056603773584904, "percentage": 96.86, "elapsed_time": "4:10:32", "remaining_time": "0:08:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1550, "total_steps": 1590, "loss": 0.5011, "learning_rate": 1.9266435005540483e-07, "epoch": 2.9245283018867925, "percentage": 97.48, "elapsed_time": "4:12:13", "remaining_time": "0:06:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1560, "total_steps": 1590, "loss": 0.5141, "learning_rate": 1.0840417345814313e-07, "epoch": 2.9433962264150946, "percentage": 98.11, "elapsed_time": "4:13:53", "remaining_time": "0:04:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1570, "total_steps": 1590, "loss": 0.4904, "learning_rate": 4.818930960945878e-08, "epoch": 2.9622641509433962, "percentage": 98.74, "elapsed_time": "4:15:34", "remaining_time": "0:03:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1580, "total_steps": 1590, "loss": 0.4746, "learning_rate": 1.2048779133150279e-08, "epoch": 2.981132075471698, "percentage": 99.37, "elapsed_time": "4:17:14", "remaining_time": "0:01:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1590, "total_steps": 1590, "loss": 0.5039, "learning_rate": 0.0, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "4:18:56", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 1590, "total_steps": 1590, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "4:19:44", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}