|
{"current_steps": 5, "total_steps": 1200, "loss": 1.8836, "lr": 8.333333333333334e-06, "epoch": 0.0014771048744460858, "percentage": 0.42, "elapsed_time": "0:02:44", "remaining_time": "10:55:34", "throughput": 317.71, "total_tokens": 52288} |
|
{"current_steps": 5, "total_steps": 6770, "loss": 1.8844, "lr": 1.4749262536873157e-06, "epoch": 0.0014771048744460858, "percentage": 0.07, "elapsed_time": "0:02:46", "remaining_time": "2 days, 14:44:41", "throughput": 313.2, "total_tokens": 52288} |
|
{"current_steps": 10, "total_steps": 6770, "loss": 1.9494, "lr": 2.9498525073746313e-06, "epoch": 0.0029542097488921715, "percentage": 0.15, "elapsed_time": "0:04:15", "remaining_time": "1 day, 23:59:44", "throughput": 406.79, "total_tokens": 103976} |
|
{"current_steps": 15, "total_steps": 6770, "loss": 1.995, "lr": 4.424778761061947e-06, "epoch": 0.004431314623338257, "percentage": 0.22, "elapsed_time": "0:05:43", "remaining_time": "1 day, 18:55:19", "throughput": 453.37, "total_tokens": 155560} |
|
{"current_steps": 20, "total_steps": 6770, "loss": 2.0327, "lr": 5.899705014749263e-06, "epoch": 0.005908419497784343, "percentage": 0.3, "elapsed_time": "0:07:11", "remaining_time": "1 day, 16:26:17", "throughput": 478.79, "total_tokens": 206520} |
|
{"current_steps": 25, "total_steps": 6770, "loss": 1.9153, "lr": 7.374631268436579e-06, "epoch": 0.007385524372230428, "percentage": 0.37, "elapsed_time": "0:08:38", "remaining_time": "1 day, 14:51:44", "throughput": 498.44, "total_tokens": 258464} |
|
{"current_steps": 30, "total_steps": 6770, "loss": 1.9723, "lr": 8.849557522123894e-06, "epoch": 0.008862629246676515, "percentage": 0.44, "elapsed_time": "0:10:06", "remaining_time": "1 day, 13:51:32", "throughput": 510.68, "total_tokens": 309800} |
|
{"current_steps": 35, "total_steps": 6770, "loss": 1.6646, "lr": 1.032448377581121e-05, "epoch": 0.0103397341211226, "percentage": 0.52, "elapsed_time": "0:11:34", "remaining_time": "1 day, 13:06:20", "throughput": 520.35, "total_tokens": 361216} |
|
{"current_steps": 40, "total_steps": 6770, "loss": 1.7057, "lr": 1.1799410029498525e-05, "epoch": 0.011816838995568686, "percentage": 0.59, "elapsed_time": "0:13:02", "remaining_time": "1 day, 12:34:38", "throughput": 527.29, "total_tokens": 412680} |
|
{"current_steps": 45, "total_steps": 6770, "loss": 1.4552, "lr": 1.3274336283185843e-05, "epoch": 0.013293943870014771, "percentage": 0.66, "elapsed_time": "0:14:30", "remaining_time": "1 day, 12:07:14", "throughput": 534.0, "total_tokens": 464640} |
|
{"current_steps": 50, "total_steps": 6770, "loss": 1.3918, "lr": 1.4749262536873157e-05, "epoch": 0.014771048744460856, "percentage": 0.74, "elapsed_time": "0:15:58", "remaining_time": "1 day, 11:47:49", "throughput": 538.4, "total_tokens": 516240} |
|
{"current_steps": 50, "total_steps": 6770, "eval_loss": 1.042170763015747, "epoch": 0.014771048744460856, "percentage": 0.74, "elapsed_time": "0:16:47", "remaining_time": "1 day, 13:37:05", "throughput": 512.33, "total_tokens": 516240} |
|
{"current_steps": 55, "total_steps": 6770, "loss": 1.2308, "lr": 1.6224188790560475e-05, "epoch": 0.01624815361890694, "percentage": 0.81, "elapsed_time": "0:18:22", "remaining_time": "1 day, 13:24:07", "throughput": 514.61, "total_tokens": 567536} |
|
{"current_steps": 60, "total_steps": 6770, "loss": 1.0922, "lr": 1.7699115044247787e-05, "epoch": 0.01772525849335303, "percentage": 0.89, "elapsed_time": "0:19:50", "remaining_time": "1 day, 12:58:38", "throughput": 520.35, "total_tokens": 619392} |
|
{"current_steps": 65, "total_steps": 6770, "loss": 0.9517, "lr": 1.9174041297935107e-05, "epoch": 0.019202363367799114, "percentage": 0.96, "elapsed_time": "0:21:16", "remaining_time": "1 day, 12:34:14", "throughput": 525.87, "total_tokens": 671168} |
|
{"current_steps": 70, "total_steps": 6770, "loss": 0.9277, "lr": 2.064896755162242e-05, "epoch": 0.0206794682422452, "percentage": 1.03, "elapsed_time": "0:22:43", "remaining_time": "1 day, 12:15:52", "throughput": 529.67, "total_tokens": 722464} |
|
{"current_steps": 75, "total_steps": 6770, "loss": 0.8741, "lr": 2.2123893805309738e-05, "epoch": 0.022156573116691284, "percentage": 1.11, "elapsed_time": "0:24:09", "remaining_time": "1 day, 11:57:07", "throughput": 533.91, "total_tokens": 774120} |
|
{"current_steps": 80, "total_steps": 6770, "loss": 0.8837, "lr": 2.359882005899705e-05, "epoch": 0.023633677991137372, "percentage": 1.18, "elapsed_time": "0:25:37", "remaining_time": "1 day, 11:42:56", "throughput": 537.19, "total_tokens": 825944} |
|
{"current_steps": 85, "total_steps": 6770, "loss": 0.8658, "lr": 2.5073746312684367e-05, "epoch": 0.025110782865583457, "percentage": 1.26, "elapsed_time": "0:27:03", "remaining_time": "1 day, 11:28:18", "throughput": 540.52, "total_tokens": 877632} |
|
{"current_steps": 90, "total_steps": 6770, "loss": 0.8626, "lr": 2.6548672566371686e-05, "epoch": 0.026587887740029542, "percentage": 1.33, "elapsed_time": "0:28:32", "remaining_time": "1 day, 11:18:12", "throughput": 542.34, "total_tokens": 928664} |
|
{"current_steps": 95, "total_steps": 6770, "loss": 0.828, "lr": 2.8023598820059e-05, "epoch": 0.028064992614475627, "percentage": 1.4, "elapsed_time": "0:29:59", "remaining_time": "1 day, 11:07:21", "throughput": 544.65, "total_tokens": 980120} |
|
{"current_steps": 100, "total_steps": 6770, "loss": 0.8208, "lr": 2.9498525073746314e-05, "epoch": 0.029542097488921712, "percentage": 1.48, "elapsed_time": "0:31:27", "remaining_time": "1 day, 10:58:11", "throughput": 546.08, "total_tokens": 1030696} |
|
{"current_steps": 100, "total_steps": 6770, "eval_loss": 0.8917127847671509, "epoch": 0.029542097488921712, "percentage": 1.48, "elapsed_time": "0:31:46", "remaining_time": "1 day, 11:19:33", "throughput": 540.58, "total_tokens": 1030696} |
|
{"current_steps": 105, "total_steps": 6770, "loss": 0.8858, "lr": 3.097345132743363e-05, "epoch": 0.0310192023633678, "percentage": 1.55, "elapsed_time": "0:33:20", "remaining_time": "1 day, 11:16:26", "throughput": 541.44, "total_tokens": 1083184} |
|
{"current_steps": 110, "total_steps": 6770, "loss": 0.8395, "lr": 3.244837758112095e-05, "epoch": 0.03249630723781388, "percentage": 1.62, "elapsed_time": "0:34:47", "remaining_time": "1 day, 11:06:02", "throughput": 543.93, "total_tokens": 1135216} |
|
{"current_steps": 115, "total_steps": 6770, "loss": 0.8729, "lr": 3.3923303834808265e-05, "epoch": 0.033973412112259974, "percentage": 1.7, "elapsed_time": "0:36:14", "remaining_time": "1 day, 10:57:04", "throughput": 546.2, "total_tokens": 1187592} |
|
{"current_steps": 120, "total_steps": 6770, "loss": 0.8534, "lr": 3.5398230088495574e-05, "epoch": 0.03545051698670606, "percentage": 1.77, "elapsed_time": "0:37:39", "remaining_time": "1 day, 10:47:09", "throughput": 548.52, "total_tokens": 1239544} |
|
{"current_steps": 125, "total_steps": 6770, "loss": 0.8621, "lr": 3.687315634218289e-05, "epoch": 0.03692762186115214, "percentage": 1.85, "elapsed_time": "0:39:07", "remaining_time": "1 day, 10:39:29", "throughput": 550.26, "total_tokens": 1291496} |
|
{"current_steps": 130, "total_steps": 6770, "loss": 0.8548, "lr": 3.834808259587021e-05, "epoch": 0.03840472673559823, "percentage": 1.92, "elapsed_time": "0:40:32", "remaining_time": "1 day, 10:30:49", "throughput": 552.78, "total_tokens": 1344704} |
|
{"current_steps": 135, "total_steps": 6770, "loss": 0.8555, "lr": 3.982300884955752e-05, "epoch": 0.03988183161004431, "percentage": 1.99, "elapsed_time": "0:41:59", "remaining_time": "1 day, 10:24:08", "throughput": 554.16, "total_tokens": 1396432} |
|
{"current_steps": 140, "total_steps": 6770, "loss": 0.8503, "lr": 4.129793510324484e-05, "epoch": 0.0413589364844904, "percentage": 2.07, "elapsed_time": "0:43:25", "remaining_time": "1 day, 10:16:23", "throughput": 555.89, "total_tokens": 1448304} |
|
{"current_steps": 145, "total_steps": 6770, "loss": 0.7974, "lr": 4.2772861356932154e-05, "epoch": 0.04283604135893648, "percentage": 2.14, "elapsed_time": "0:44:52", "remaining_time": "1 day, 10:10:28", "throughput": 557.24, "total_tokens": 1500480} |
|
{"current_steps": 150, "total_steps": 6770, "loss": 0.8125, "lr": 4.4247787610619477e-05, "epoch": 0.04431314623338257, "percentage": 2.22, "elapsed_time": "0:46:18", "remaining_time": "1 day, 10:03:34", "throughput": 558.18, "total_tokens": 1550792} |
|
{"current_steps": 150, "total_steps": 6770, "eval_loss": 0.9009397625923157, "epoch": 0.04431314623338257, "percentage": 2.22, "elapsed_time": "0:46:37", "remaining_time": "1 day, 10:17:38", "throughput": 554.37, "total_tokens": 1550792} |
|
{"current_steps": 155, "total_steps": 6770, "loss": 0.8444, "lr": 4.5722713864306786e-05, "epoch": 0.04579025110782865, "percentage": 2.29, "elapsed_time": "0:48:09", "remaining_time": "1 day, 10:15:29", "throughput": 554.6, "total_tokens": 1602680} |
|
{"current_steps": 160, "total_steps": 6770, "loss": 0.8832, "lr": 4.71976401179941e-05, "epoch": 0.047267355982274745, "percentage": 2.36, "elapsed_time": "0:49:35", "remaining_time": "1 day, 10:08:39", "throughput": 556.3, "total_tokens": 1655184} |
|
{"current_steps": 165, "total_steps": 6770, "loss": 0.8428, "lr": 4.867256637168142e-05, "epoch": 0.04874446085672083, "percentage": 2.44, "elapsed_time": "0:51:01", "remaining_time": "1 day, 10:02:52", "throughput": 557.66, "total_tokens": 1707544} |
|
{"current_steps": 170, "total_steps": 6770, "loss": 0.8235, "lr": 5.014749262536873e-05, "epoch": 0.050221565731166914, "percentage": 2.51, "elapsed_time": "0:52:28", "remaining_time": "1 day, 9:56:57", "throughput": 558.86, "total_tokens": 1759296} |
|
{"current_steps": 175, "total_steps": 6770, "loss": 0.8293, "lr": 5.162241887905604e-05, "epoch": 0.051698670605613, "percentage": 2.58, "elapsed_time": "0:53:54", "remaining_time": "1 day, 9:51:42", "throughput": 560.32, "total_tokens": 1812488} |
|
{"current_steps": 180, "total_steps": 6770, "loss": 0.8284, "lr": 5.309734513274337e-05, "epoch": 0.053175775480059084, "percentage": 2.66, "elapsed_time": "0:55:20", "remaining_time": "1 day, 9:46:19", "throughput": 561.43, "total_tokens": 1864408} |
|
{"current_steps": 185, "total_steps": 6770, "loss": 0.8268, "lr": 5.457227138643069e-05, "epoch": 0.05465288035450517, "percentage": 2.73, "elapsed_time": "0:56:47", "remaining_time": "1 day, 9:41:18", "throughput": 562.56, "total_tokens": 1916744} |
|
{"current_steps": 190, "total_steps": 6770, "loss": 0.8153, "lr": 5.6047197640118e-05, "epoch": 0.056129985228951254, "percentage": 2.81, "elapsed_time": "0:58:13", "remaining_time": "1 day, 9:36:32", "throughput": 563.34, "total_tokens": 1968128} |
|
{"current_steps": 195, "total_steps": 6770, "loss": 0.8123, "lr": 5.752212389380531e-05, "epoch": 0.05760709010339734, "percentage": 2.88, "elapsed_time": "0:59:40", "remaining_time": "1 day, 9:31:54", "throughput": 564.03, "total_tokens": 2019312} |
|
{"current_steps": 200, "total_steps": 6770, "loss": 0.7675, "lr": 5.899705014749263e-05, "epoch": 0.059084194977843424, "percentage": 2.95, "elapsed_time": "1:01:06", "remaining_time": "1 day, 9:27:19", "throughput": 564.91, "total_tokens": 2071176} |
|
{"current_steps": 200, "total_steps": 6770, "eval_loss": 0.9007444977760315, "epoch": 0.059084194977843424, "percentage": 2.95, "elapsed_time": "1:01:25", "remaining_time": "1 day, 9:37:46", "throughput": 561.99, "total_tokens": 2071176} |
|
{"current_steps": 205, "total_steps": 6770, "loss": 0.8075, "lr": 6.0471976401179945e-05, "epoch": 0.060561299852289516, "percentage": 3.03, "elapsed_time": "1:02:56", "remaining_time": "1 day, 9:35:48", "throughput": 561.95, "total_tokens": 2122328} |
|
{"current_steps": 210, "total_steps": 6770, "loss": 0.8207, "lr": 6.194690265486725e-05, "epoch": 0.0620384047267356, "percentage": 3.1, "elapsed_time": "1:04:24", "remaining_time": "1 day, 9:31:45", "throughput": 562.82, "total_tokens": 2174744} |
|
{"current_steps": 215, "total_steps": 6770, "loss": 0.7867, "lr": 6.342182890855458e-05, "epoch": 0.06351550960118169, "percentage": 3.18, "elapsed_time": "1:05:49", "remaining_time": "1 day, 9:26:57", "throughput": 563.89, "total_tokens": 2227136} |
|
{"current_steps": 220, "total_steps": 6770, "loss": 0.8256, "lr": 6.48967551622419e-05, "epoch": 0.06499261447562776, "percentage": 3.25, "elapsed_time": "1:07:16", "remaining_time": "1 day, 9:23:02", "throughput": 564.47, "total_tokens": 2278568} |
|
{"current_steps": 225, "total_steps": 6770, "loss": 0.7897, "lr": 6.637168141592921e-05, "epoch": 0.06646971935007386, "percentage": 3.32, "elapsed_time": "1:08:42", "remaining_time": "1 day, 9:18:28", "throughput": 565.29, "total_tokens": 2330224} |
|
{"current_steps": 230, "total_steps": 6770, "loss": 0.792, "lr": 6.784660766961653e-05, "epoch": 0.06794682422451995, "percentage": 3.4, "elapsed_time": "1:10:08", "remaining_time": "1 day, 9:14:40", "throughput": 565.78, "total_tokens": 2381344} |
|
{"current_steps": 235, "total_steps": 6770, "loss": 0.8309, "lr": 6.932153392330384e-05, "epoch": 0.06942392909896603, "percentage": 3.47, "elapsed_time": "1:11:34", "remaining_time": "1 day, 9:10:23", "throughput": 566.34, "total_tokens": 2432136} |
|
{"current_steps": 240, "total_steps": 6770, "loss": 0.7974, "lr": 7.079646017699115e-05, "epoch": 0.07090103397341212, "percentage": 3.55, "elapsed_time": "1:13:01", "remaining_time": "1 day, 9:06:53", "throughput": 566.83, "total_tokens": 2483568} |
|
{"current_steps": 245, "total_steps": 6770, "loss": 0.7739, "lr": 7.227138643067847e-05, "epoch": 0.0723781388478582, "percentage": 3.62, "elapsed_time": "1:14:27", "remaining_time": "1 day, 9:03:07", "throughput": 567.41, "total_tokens": 2535040} |
|
{"current_steps": 250, "total_steps": 6770, "loss": 0.7558, "lr": 7.374631268436578e-05, "epoch": 0.07385524372230429, "percentage": 3.69, "elapsed_time": "1:15:54", "remaining_time": "1 day, 8:59:44", "throughput": 568.06, "total_tokens": 2587272} |
|
{"current_steps": 250, "total_steps": 6770, "eval_loss": 0.810763955116272, "epoch": 0.07385524372230429, "percentage": 3.69, "elapsed_time": "1:16:13", "remaining_time": "1 day, 9:08:04", "throughput": 565.67, "total_tokens": 2587272} |
|
{"current_steps": 255, "total_steps": 6770, "loss": 0.7851, "lr": 7.522123893805309e-05, "epoch": 0.07533234859675036, "percentage": 3.77, "elapsed_time": "1:17:45", "remaining_time": "1 day, 9:06:30", "throughput": 565.6, "total_tokens": 2638632} |
|
{"current_steps": 260, "total_steps": 6770, "loss": 0.7211, "lr": 7.669616519174043e-05, "epoch": 0.07680945347119646, "percentage": 3.84, "elapsed_time": "1:19:12", "remaining_time": "1 day, 9:03:07", "throughput": 566.27, "total_tokens": 2691016} |
|
{"current_steps": 265, "total_steps": 6770, "loss": 0.8082, "lr": 7.817109144542774e-05, "epoch": 0.07828655834564253, "percentage": 3.91, "elapsed_time": "1:20:38", "remaining_time": "1 day, 8:59:25", "throughput": 566.92, "total_tokens": 2742912} |
|
{"current_steps": 270, "total_steps": 6770, "loss": 0.7485, "lr": 7.964601769911504e-05, "epoch": 0.07976366322008863, "percentage": 3.99, "elapsed_time": "1:22:04", "remaining_time": "1 day, 8:56:03", "throughput": 567.57, "total_tokens": 2795264} |
|
{"current_steps": 275, "total_steps": 6770, "loss": 0.7454, "lr": 8.112094395280237e-05, "epoch": 0.08124076809453472, "percentage": 4.06, "elapsed_time": "1:23:31", "remaining_time": "1 day, 8:52:33", "throughput": 568.0, "total_tokens": 2846344} |
|
{"current_steps": 280, "total_steps": 6770, "loss": 0.7258, "lr": 8.259587020648968e-05, "epoch": 0.0827178729689808, "percentage": 4.14, "elapsed_time": "1:24:57", "remaining_time": "1 day, 8:49:18", "throughput": 568.55, "total_tokens": 2898304} |
|
{"current_steps": 285, "total_steps": 6770, "loss": 0.7863, "lr": 8.4070796460177e-05, "epoch": 0.08419497784342689, "percentage": 4.21, "elapsed_time": "1:26:24", "remaining_time": "1 day, 8:46:15", "throughput": 569.24, "total_tokens": 2951368} |
|
{"current_steps": 290, "total_steps": 6770, "loss": 0.7105, "lr": 8.554572271386431e-05, "epoch": 0.08567208271787297, "percentage": 4.28, "elapsed_time": "1:27:51", "remaining_time": "1 day, 8:43:00", "throughput": 569.77, "total_tokens": 3003288} |
|
{"current_steps": 295, "total_steps": 6770, "loss": 0.6885, "lr": 8.702064896755162e-05, "epoch": 0.08714918759231906, "percentage": 4.36, "elapsed_time": "1:29:17", "remaining_time": "1 day, 8:39:53", "throughput": 570.19, "total_tokens": 3054808} |
|
{"current_steps": 300, "total_steps": 6770, "loss": 0.78, "lr": 8.849557522123895e-05, "epoch": 0.08862629246676514, "percentage": 4.43, "elapsed_time": "1:30:43", "remaining_time": "1 day, 8:36:32", "throughput": 570.84, "total_tokens": 3107200} |
|
{"current_steps": 300, "total_steps": 6770, "eval_loss": 0.8194220662117004, "epoch": 0.08862629246676514, "percentage": 4.43, "elapsed_time": "1:31:02", "remaining_time": "1 day, 8:43:25", "throughput": 568.83, "total_tokens": 3107200} |
|
{"current_steps": 305, "total_steps": 6770, "loss": 0.7394, "lr": 8.997050147492626e-05, "epoch": 0.09010339734121123, "percentage": 4.51, "elapsed_time": "1:32:35", "remaining_time": "1 day, 8:42:43", "throughput": 568.54, "total_tokens": 3158648} |
|
{"current_steps": 310, "total_steps": 6770, "loss": 0.7371, "lr": 9.144542772861357e-05, "epoch": 0.0915805022156573, "percentage": 4.58, "elapsed_time": "1:34:01", "remaining_time": "1 day, 8:39:19", "throughput": 569.11, "total_tokens": 3210560} |
|
{"current_steps": 315, "total_steps": 6770, "loss": 0.7622, "lr": 9.29203539823009e-05, "epoch": 0.0930576070901034, "percentage": 4.65, "elapsed_time": "1:35:29", "remaining_time": "1 day, 8:36:39", "throughput": 569.67, "total_tokens": 3263664} |
|
{"current_steps": 320, "total_steps": 6770, "loss": 0.7214, "lr": 9.43952802359882e-05, "epoch": 0.09453471196454949, "percentage": 4.73, "elapsed_time": "1:36:54", "remaining_time": "1 day, 8:33:24", "throughput": 570.18, "total_tokens": 3315520} |
|
{"current_steps": 325, "total_steps": 6770, "loss": 0.7078, "lr": 9.587020648967551e-05, "epoch": 0.09601181683899557, "percentage": 4.8, "elapsed_time": "1:38:22", "remaining_time": "1 day, 8:30:46", "throughput": 570.64, "total_tokens": 3368088} |
|
{"current_steps": 330, "total_steps": 6770, "loss": 0.6852, "lr": 9.734513274336283e-05, "epoch": 0.09748892171344166, "percentage": 4.87, "elapsed_time": "1:39:48", "remaining_time": "1 day, 8:27:38", "throughput": 571.16, "total_tokens": 3420176} |
|
{"current_steps": 335, "total_steps": 6770, "loss": 0.7557, "lr": 9.882005899705014e-05, "epoch": 0.09896602658788774, "percentage": 4.95, "elapsed_time": "1:41:15", "remaining_time": "1 day, 8:25:11", "throughput": 571.3, "total_tokens": 3471184} |
|
{"current_steps": 340, "total_steps": 6770, "loss": 0.6709, "lr": 9.99999940340072e-05, "epoch": 0.10044313146233383, "percentage": 5.02, "elapsed_time": "1:42:41", "remaining_time": "1 day, 8:22:10", "throughput": 571.75, "total_tokens": 3523008} |
|
{"current_steps": 345, "total_steps": 6770, "loss": 0.7252, "lr": 9.999978522440803e-05, "epoch": 0.1019202363367799, "percentage": 5.1, "elapsed_time": "1:44:08", "remaining_time": "1 day, 8:19:27", "throughput": 571.96, "total_tokens": 3573880} |
|
{"current_steps": 350, "total_steps": 6770, "loss": 0.6602, "lr": 9.999927811659165e-05, "epoch": 0.103397341211226, "percentage": 5.17, "elapsed_time": "1:45:35", "remaining_time": "1 day, 8:16:42", "throughput": 572.33, "total_tokens": 3625752} |
|
{"current_steps": 350, "total_steps": 6770, "eval_loss": 0.7663387656211853, "epoch": 0.103397341211226, "percentage": 5.17, "elapsed_time": "1:45:54", "remaining_time": "1 day, 8:22:34", "throughput": 570.6, "total_tokens": 3625752} |
|
{"current_steps": 355, "total_steps": 6770, "loss": 0.7222, "lr": 9.999847271358347e-05, "epoch": 0.10487444608567208, "percentage": 5.24, "elapsed_time": "1:47:27", "remaining_time": "1 day, 8:21:42", "throughput": 570.33, "total_tokens": 3676984} |
|
{"current_steps": 360, "total_steps": 6770, "loss": 0.6639, "lr": 9.99973690201885e-05, "epoch": 0.10635155096011817, "percentage": 5.32, "elapsed_time": "1:48:53", "remaining_time": "1 day, 8:18:51", "throughput": 570.78, "total_tokens": 3729168} |
|
{"current_steps": 365, "total_steps": 6770, "loss": 0.6501, "lr": 9.999596704299139e-05, "epoch": 0.10782865583456426, "percentage": 5.39, "elapsed_time": "1:50:20", "remaining_time": "1 day, 8:16:15", "throughput": 571.06, "total_tokens": 3780672} |
|
{"current_steps": 370, "total_steps": 6770, "loss": 0.6871, "lr": 9.999426679035628e-05, "epoch": 0.10930576070901034, "percentage": 5.47, "elapsed_time": "1:51:46", "remaining_time": "1 day, 8:13:25", "throughput": 571.43, "total_tokens": 3832328} |
|
{"current_steps": 375, "total_steps": 6770, "loss": 0.6621, "lr": 9.99922682724269e-05, "epoch": 0.11078286558345643, "percentage": 5.54, "elapsed_time": "1:53:12", "remaining_time": "1 day, 8:10:41", "throughput": 571.64, "total_tokens": 3883112} |
|
{"current_steps": 380, "total_steps": 6770, "loss": 0.7156, "lr": 9.998997150112635e-05, "epoch": 0.11225997045790251, "percentage": 5.61, "elapsed_time": "1:54:39", "remaining_time": "1 day, 8:08:06", "throughput": 571.98, "total_tokens": 3934976} |
|
{"current_steps": 385, "total_steps": 6770, "loss": 0.6662, "lr": 9.998737649015718e-05, "epoch": 0.1137370753323486, "percentage": 5.69, "elapsed_time": "1:56:05", "remaining_time": "1 day, 8:05:13", "throughput": 572.3, "total_tokens": 3986192} |
|
{"current_steps": 390, "total_steps": 6770, "loss": 0.682, "lr": 9.998448325500118e-05, "epoch": 0.11521418020679468, "percentage": 5.76, "elapsed_time": "1:57:32", "remaining_time": "1 day, 8:02:48", "throughput": 572.54, "total_tokens": 4037760} |
|
{"current_steps": 395, "total_steps": 6770, "loss": 0.6137, "lr": 9.998129181291936e-05, "epoch": 0.11669128508124077, "percentage": 5.83, "elapsed_time": "1:58:57", "remaining_time": "1 day, 7:59:58", "throughput": 573.13, "total_tokens": 4090872} |
|
{"current_steps": 400, "total_steps": 6770, "loss": 0.6739, "lr": 9.997780218295185e-05, "epoch": 0.11816838995568685, "percentage": 5.91, "elapsed_time": "2:00:25", "remaining_time": "1 day, 7:57:39", "throughput": 573.36, "total_tokens": 4142592} |
|
{"current_steps": 400, "total_steps": 6770, "eval_loss": 0.7038857936859131, "epoch": 0.11816838995568685, "percentage": 5.91, "elapsed_time": "2:00:44", "remaining_time": "1 day, 8:02:42", "throughput": 571.85, "total_tokens": 4142592} |
|
{"current_steps": 405, "total_steps": 6770, "loss": 0.6209, "lr": 9.997401438591772e-05, "epoch": 0.11964549483013294, "percentage": 5.98, "elapsed_time": "2:02:16", "remaining_time": "1 day, 8:01:36", "throughput": 571.81, "total_tokens": 4194920} |
|
{"current_steps": 410, "total_steps": 6770, "loss": 0.6576, "lr": 9.996992844441495e-05, "epoch": 0.12112259970457903, "percentage": 6.06, "elapsed_time": "2:03:44", "remaining_time": "1 day, 7:59:30", "throughput": 572.03, "total_tokens": 4247048} |
|
{"current_steps": 415, "total_steps": 6770, "loss": 0.6851, "lr": 9.996554438282022e-05, "epoch": 0.12259970457902511, "percentage": 6.13, "elapsed_time": "2:05:11", "remaining_time": "1 day, 7:57:04", "throughput": 572.42, "total_tokens": 4299728} |
|
{"current_steps": 420, "total_steps": 6770, "loss": 0.6288, "lr": 9.996086222728879e-05, "epoch": 0.1240768094534712, "percentage": 6.2, "elapsed_time": "2:06:39", "remaining_time": "1 day, 7:55:03", "throughput": 572.52, "total_tokens": 4351088} |
|
{"current_steps": 425, "total_steps": 6770, "loss": 0.667, "lr": 9.995588200575439e-05, "epoch": 0.1255539143279173, "percentage": 6.28, "elapsed_time": "2:08:05", "remaining_time": "1 day, 7:52:16", "throughput": 572.92, "total_tokens": 4403016} |
|
{"current_steps": 430, "total_steps": 6770, "loss": 0.6747, "lr": 9.995060374792892e-05, "epoch": 0.12703101920236337, "percentage": 6.35, "elapsed_time": "2:09:31", "remaining_time": "1 day, 7:49:50", "throughput": 573.07, "total_tokens": 4453880} |
|
{"current_steps": 435, "total_steps": 6770, "loss": 0.6594, "lr": 9.994502748530244e-05, "epoch": 0.12850812407680945, "percentage": 6.43, "elapsed_time": "2:10:57", "remaining_time": "1 day, 7:47:14", "throughput": 573.4, "total_tokens": 4505616} |
|
{"current_steps": 440, "total_steps": 6770, "loss": 0.6727, "lr": 9.993915325114288e-05, "epoch": 0.12998522895125553, "percentage": 6.5, "elapsed_time": "2:12:24", "remaining_time": "1 day, 7:44:55", "throughput": 573.77, "total_tokens": 4558384} |
|
{"current_steps": 445, "total_steps": 6770, "loss": 0.6526, "lr": 9.993298108049582e-05, "epoch": 0.13146233382570163, "percentage": 6.57, "elapsed_time": "2:13:50", "remaining_time": "1 day, 7:42:20", "throughput": 574.21, "total_tokens": 4611184} |
|
{"current_steps": 450, "total_steps": 6770, "loss": 0.5661, "lr": 9.992651101018445e-05, "epoch": 0.1329394387001477, "percentage": 6.65, "elapsed_time": "2:15:17", "remaining_time": "1 day, 7:40:00", "throughput": 574.5, "total_tokens": 4663320} |
|
{"current_steps": 450, "total_steps": 6770, "eval_loss": 0.7132604718208313, "epoch": 0.1329394387001477, "percentage": 6.65, "elapsed_time": "2:15:36", "remaining_time": "1 day, 7:44:27", "throughput": 573.16, "total_tokens": 4663320} |
|
{"current_steps": 455, "total_steps": 6770, "loss": 0.5954, "lr": 9.991974307880907e-05, "epoch": 0.1344165435745938, "percentage": 6.72, "elapsed_time": "2:17:08", "remaining_time": "1 day, 7:43:19", "throughput": 572.96, "total_tokens": 4714448} |
|
{"current_steps": 460, "total_steps": 6770, "loss": 0.721, "lr": 9.991267732674711e-05, "epoch": 0.1358936484490399, "percentage": 6.79, "elapsed_time": "2:18:34", "remaining_time": "1 day, 7:40:59", "throughput": 573.32, "total_tokens": 4767136} |
|
{"current_steps": 465, "total_steps": 6770, "loss": 0.6578, "lr": 9.99053137961528e-05, "epoch": 0.13737075332348597, "percentage": 6.87, "elapsed_time": "2:20:00", "remaining_time": "1 day, 7:38:26", "throughput": 573.69, "total_tokens": 4819408} |
|
{"current_steps": 470, "total_steps": 6770, "loss": 0.6642, "lr": 9.989765253095686e-05, "epoch": 0.13884785819793205, "percentage": 6.94, "elapsed_time": "2:21:27", "remaining_time": "1 day, 7:36:03", "throughput": 574.06, "total_tokens": 4872120} |
|
{"current_steps": 475, "total_steps": 6770, "loss": 0.6462, "lr": 9.988969357686636e-05, "epoch": 0.14032496307237813, "percentage": 7.02, "elapsed_time": "2:22:53", "remaining_time": "1 day, 7:33:40", "throughput": 574.38, "total_tokens": 4924400} |
|
{"current_steps": 480, "total_steps": 6770, "loss": 0.6055, "lr": 9.988143698136429e-05, "epoch": 0.14180206794682423, "percentage": 7.09, "elapsed_time": "2:24:19", "remaining_time": "1 day, 7:31:09", "throughput": 574.72, "total_tokens": 4976504} |
|
{"current_steps": 485, "total_steps": 6770, "loss": 0.5928, "lr": 9.987288279370945e-05, "epoch": 0.1432791728212703, "percentage": 7.16, "elapsed_time": "2:25:46", "remaining_time": "1 day, 7:28:59", "throughput": 574.96, "total_tokens": 5028648} |
|
{"current_steps": 490, "total_steps": 6770, "loss": 0.5835, "lr": 9.986403106493604e-05, "epoch": 0.1447562776957164, "percentage": 7.24, "elapsed_time": "2:27:11", "remaining_time": "1 day, 7:26:26", "throughput": 575.27, "total_tokens": 5080488} |
|
{"current_steps": 495, "total_steps": 6770, "loss": 0.6641, "lr": 9.985488184785336e-05, "epoch": 0.14623338257016247, "percentage": 7.31, "elapsed_time": "2:28:38", "remaining_time": "1 day, 7:24:18", "throughput": 575.4, "total_tokens": 5131744} |
|
{"current_steps": 500, "total_steps": 6770, "loss": 0.6283, "lr": 9.984543519704557e-05, "epoch": 0.14771048744460857, "percentage": 7.39, "elapsed_time": "2:30:03", "remaining_time": "1 day, 7:21:48", "throughput": 575.71, "total_tokens": 5183664} |
|
{"current_steps": 500, "total_steps": 6770, "eval_loss": 0.6505001187324524, "epoch": 0.14771048744460857, "percentage": 7.39, "elapsed_time": "2:30:22", "remaining_time": "1 day, 7:25:46", "throughput": 574.51, "total_tokens": 5183664} |
|
{"current_steps": 505, "total_steps": 6770, "loss": 0.573, "lr": 9.983569116887128e-05, "epoch": 0.14918759231905465, "percentage": 7.46, "elapsed_time": "2:31:55", "remaining_time": "1 day, 7:24:45", "throughput": 574.29, "total_tokens": 5234920} |
|
{"current_steps": 510, "total_steps": 6770, "loss": 0.6261, "lr": 9.982564982146327e-05, "epoch": 0.15066469719350073, "percentage": 7.53, "elapsed_time": "2:33:20", "remaining_time": "1 day, 7:22:15", "throughput": 574.66, "total_tokens": 5287312} |
|
{"current_steps": 515, "total_steps": 6770, "loss": 0.6072, "lr": 9.981531121472811e-05, "epoch": 0.15214180206794684, "percentage": 7.61, "elapsed_time": "2:34:47", "remaining_time": "1 day, 7:20:02", "throughput": 574.99, "total_tokens": 5340240} |
|
{"current_steps": 520, "total_steps": 6770, "loss": 0.565, "lr": 9.980467541034584e-05, "epoch": 0.1536189069423929, "percentage": 7.68, "elapsed_time": "2:36:13", "remaining_time": "1 day, 7:17:38", "throughput": 575.32, "total_tokens": 5392600} |
|
{"current_steps": 525, "total_steps": 6770, "loss": 0.6188, "lr": 9.979374247176956e-05, "epoch": 0.155096011816839, "percentage": 7.75, "elapsed_time": "2:37:39", "remaining_time": "1 day, 7:15:27", "throughput": 575.61, "total_tokens": 5445168} |
|
{"current_steps": 530, "total_steps": 6770, "loss": 0.6069, "lr": 9.978251246422505e-05, "epoch": 0.15657311669128507, "percentage": 7.83, "elapsed_time": "2:39:05", "remaining_time": "1 day, 7:13:06", "throughput": 575.8, "total_tokens": 5496384} |
|
{"current_steps": 535, "total_steps": 6770, "loss": 0.5805, "lr": 9.977098545471046e-05, "epoch": 0.15805022156573117, "percentage": 7.9, "elapsed_time": "2:40:32", "remaining_time": "1 day, 7:10:58", "throughput": 576.0, "total_tokens": 5548264} |
|
{"current_steps": 540, "total_steps": 6770, "loss": 0.6383, "lr": 9.975916151199579e-05, "epoch": 0.15952732644017725, "percentage": 7.98, "elapsed_time": "2:41:58", "remaining_time": "1 day, 7:08:47", "throughput": 576.12, "total_tokens": 5599216} |
|
{"current_steps": 545, "total_steps": 6770, "loss": 0.5845, "lr": 9.974704070662254e-05, "epoch": 0.16100443131462333, "percentage": 8.05, "elapsed_time": "2:43:27", "remaining_time": "1 day, 7:06:58", "throughput": 576.19, "total_tokens": 5650816} |
|
{"current_steps": 550, "total_steps": 6770, "loss": 0.5957, "lr": 9.973462311090336e-05, "epoch": 0.16248153618906944, "percentage": 8.12, "elapsed_time": "2:44:53", "remaining_time": "1 day, 7:04:48", "throughput": 576.43, "total_tokens": 5703016} |
|
{"current_steps": 550, "total_steps": 6770, "eval_loss": 0.6883422136306763, "epoch": 0.16248153618906944, "percentage": 8.12, "elapsed_time": "2:45:12", "remaining_time": "1 day, 7:08:25", "throughput": 575.31, "total_tokens": 5703016} |
|
{"current_steps": 555, "total_steps": 6770, "loss": 0.6076, "lr": 9.972190879892147e-05, "epoch": 0.16395864106351551, "percentage": 8.2, "elapsed_time": "2:46:46", "remaining_time": "1 day, 7:07:29", "throughput": 575.07, "total_tokens": 5754192} |
|
{"current_steps": 560, "total_steps": 6770, "loss": 0.6136, "lr": 9.970889784653033e-05, "epoch": 0.1654357459379616, "percentage": 8.27, "elapsed_time": "2:48:12", "remaining_time": "1 day, 7:05:15", "throughput": 575.32, "total_tokens": 5806272} |
|
{"current_steps": 565, "total_steps": 6770, "loss": 0.5554, "lr": 9.969559033135318e-05, "epoch": 0.16691285081240767, "percentage": 8.35, "elapsed_time": "2:49:38", "remaining_time": "1 day, 7:03:03", "throughput": 575.59, "total_tokens": 5858632} |
|
{"current_steps": 570, "total_steps": 6770, "loss": 0.5847, "lr": 9.96819863327825e-05, "epoch": 0.16838995568685378, "percentage": 8.42, "elapsed_time": "2:51:05", "remaining_time": "1 day, 7:01:01", "throughput": 575.7, "total_tokens": 5909936} |
|
{"current_steps": 575, "total_steps": 6770, "loss": 0.6217, "lr": 9.966808593197959e-05, "epoch": 0.16986706056129985, "percentage": 8.49, "elapsed_time": "2:52:32", "remaining_time": "1 day, 6:58:52", "throughput": 575.87, "total_tokens": 5961464} |
|
{"current_steps": 580, "total_steps": 6770, "loss": 0.5569, "lr": 9.965388921187413e-05, "epoch": 0.17134416543574593, "percentage": 8.57, "elapsed_time": "2:53:58", "remaining_time": "1 day, 6:56:48", "throughput": 576.08, "total_tokens": 6013696} |
|
{"current_steps": 585, "total_steps": 6770, "loss": 0.5894, "lr": 9.963939625716361e-05, "epoch": 0.172821270310192, "percentage": 8.64, "elapsed_time": "2:55:24", "remaining_time": "1 day, 6:54:36", "throughput": 576.32, "total_tokens": 6065736} |
|
{"current_steps": 590, "total_steps": 6770, "loss": 0.5783, "lr": 9.962460715431284e-05, "epoch": 0.17429837518463812, "percentage": 8.71, "elapsed_time": "2:56:52", "remaining_time": "1 day, 6:52:40", "throughput": 576.53, "total_tokens": 6118400} |
|
{"current_steps": 595, "total_steps": 6770, "loss": 0.5657, "lr": 9.960952199155347e-05, "epoch": 0.1757754800590842, "percentage": 8.79, "elapsed_time": "2:58:18", "remaining_time": "1 day, 6:50:27", "throughput": 576.84, "total_tokens": 6171120} |
|
{"current_steps": 600, "total_steps": 6770, "loss": 0.6331, "lr": 9.959414085888342e-05, "epoch": 0.17725258493353027, "percentage": 8.86, "elapsed_time": "2:59:45", "remaining_time": "1 day, 6:48:30", "throughput": 576.95, "total_tokens": 6222736} |
|
{"current_steps": 600, "total_steps": 6770, "eval_loss": 0.5883122682571411, "epoch": 0.17725258493353027, "percentage": 8.86, "elapsed_time": "3:00:04", "remaining_time": "1 day, 6:51:46", "throughput": 575.94, "total_tokens": 6222736} |
|
{"current_steps": 605, "total_steps": 6770, "loss": 0.5678, "lr": 9.957846384806636e-05, "epoch": 0.17872968980797638, "percentage": 8.94, "elapsed_time": "3:01:35", "remaining_time": "1 day, 6:50:24", "throughput": 575.87, "total_tokens": 6274328} |
|
{"current_steps": 610, "total_steps": 6770, "loss": 0.5609, "lr": 9.956249105263121e-05, "epoch": 0.18020679468242246, "percentage": 9.01, "elapsed_time": "3:03:02", "remaining_time": "1 day, 6:48:22", "throughput": 576.12, "total_tokens": 6327088} |
|
{"current_steps": 615, "total_steps": 6770, "loss": 0.5177, "lr": 9.95462225678715e-05, "epoch": 0.18168389955686853, "percentage": 9.08, "elapsed_time": "3:04:28", "remaining_time": "1 day, 6:46:14", "throughput": 576.3, "total_tokens": 6378824} |
|
{"current_steps": 620, "total_steps": 6770, "loss": 0.5839, "lr": 9.952965849084483e-05, "epoch": 0.1831610044313146, "percentage": 9.16, "elapsed_time": "3:05:55", "remaining_time": "1 day, 6:44:15", "throughput": 576.49, "total_tokens": 6431024} |
|
{"current_steps": 625, "total_steps": 6770, "loss": 0.5069, "lr": 9.951279892037233e-05, "epoch": 0.18463810930576072, "percentage": 9.23, "elapsed_time": "3:07:21", "remaining_time": "1 day, 6:42:09", "throughput": 576.69, "total_tokens": 6483072} |
|
{"current_steps": 630, "total_steps": 6770, "loss": 0.495, "lr": 9.949564395703803e-05, "epoch": 0.1861152141802068, "percentage": 9.31, "elapsed_time": "3:08:48", "remaining_time": "1 day, 6:40:08", "throughput": 576.84, "total_tokens": 6534768} |
|
{"current_steps": 635, "total_steps": 6770, "loss": 0.6435, "lr": 9.947819370318825e-05, "epoch": 0.18759231905465287, "percentage": 9.38, "elapsed_time": "3:10:14", "remaining_time": "1 day, 6:38:02", "throughput": 577.01, "total_tokens": 6586416} |
|
{"current_steps": 640, "total_steps": 6770, "loss": 0.5014, "lr": 9.946044826293106e-05, "epoch": 0.18906942392909898, "percentage": 9.45, "elapsed_time": "3:11:41", "remaining_time": "1 day, 6:35:58", "throughput": 577.21, "total_tokens": 6638592} |
|
{"current_steps": 645, "total_steps": 6770, "loss": 0.529, "lr": 9.944240774213556e-05, "epoch": 0.19054652880354506, "percentage": 9.53, "elapsed_time": "3:13:07", "remaining_time": "1 day, 6:33:54", "throughput": 577.35, "total_tokens": 6689920} |
|
{"current_steps": 650, "total_steps": 6770, "loss": 0.5483, "lr": 9.942407224843132e-05, "epoch": 0.19202363367799113, "percentage": 9.6, "elapsed_time": "3:14:33", "remaining_time": "1 day, 6:31:54", "throughput": 577.62, "total_tokens": 6743120} |
|
{"current_steps": 650, "total_steps": 6770, "eval_loss": 0.6100574135780334, "epoch": 0.19202363367799113, "percentage": 9.6, "elapsed_time": "3:14:52", "remaining_time": "1 day, 6:34:53", "throughput": 576.68, "total_tokens": 6743120} |
|
{"current_steps": 655, "total_steps": 6770, "loss": 0.5499, "lr": 9.940544189120771e-05, "epoch": 0.1935007385524372, "percentage": 9.68, "elapsed_time": "3:16:24", "remaining_time": "1 day, 6:33:40", "throughput": 576.52, "total_tokens": 6794096} |
|
{"current_steps": 660, "total_steps": 6770, "loss": 0.5866, "lr": 9.938651678161326e-05, "epoch": 0.19497784342688332, "percentage": 9.75, "elapsed_time": "3:17:50", "remaining_time": "1 day, 6:31:36", "throughput": 576.72, "total_tokens": 6846200} |
|
{"current_steps": 665, "total_steps": 6770, "loss": 0.4958, "lr": 9.936729703255498e-05, "epoch": 0.1964549483013294, "percentage": 9.82, "elapsed_time": "3:19:17", "remaining_time": "1 day, 6:29:32", "throughput": 577.0, "total_tokens": 6899280} |
|
{"current_steps": 670, "total_steps": 6770, "loss": 0.4232, "lr": 9.93477827586977e-05, "epoch": 0.19793205317577547, "percentage": 9.9, "elapsed_time": "3:20:43", "remaining_time": "1 day, 6:27:25", "throughput": 577.15, "total_tokens": 6950608} |
|
{"current_steps": 675, "total_steps": 6770, "loss": 0.5407, "lr": 9.932797407646338e-05, "epoch": 0.19940915805022155, "percentage": 9.97, "elapsed_time": "3:22:09", "remaining_time": "1 day, 6:25:29", "throughput": 577.3, "total_tokens": 7002696} |
|
{"current_steps": 680, "total_steps": 6770, "loss": 0.4553, "lr": 9.93078711040304e-05, "epoch": 0.20088626292466766, "percentage": 10.04, "elapsed_time": "3:23:35", "remaining_time": "1 day, 6:23:21", "throughput": 577.55, "total_tokens": 7055160} |
|
{"current_steps": 685, "total_steps": 6770, "loss": 0.5565, "lr": 9.928747396133294e-05, "epoch": 0.20236336779911374, "percentage": 10.12, "elapsed_time": "3:25:02", "remaining_time": "1 day, 6:21:29", "throughput": 577.68, "total_tokens": 7107224} |
|
{"current_steps": 690, "total_steps": 6770, "loss": 0.5951, "lr": 9.926678277006011e-05, "epoch": 0.2038404726735598, "percentage": 10.19, "elapsed_time": "3:26:28", "remaining_time": "1 day, 6:19:24", "throughput": 577.82, "total_tokens": 7158376} |
|
{"current_steps": 695, "total_steps": 6770, "loss": 0.4764, "lr": 9.924579765365536e-05, "epoch": 0.20531757754800592, "percentage": 10.27, "elapsed_time": "3:27:55", "remaining_time": "1 day, 6:17:29", "throughput": 577.97, "total_tokens": 7210552} |
|
{"current_steps": 700, "total_steps": 6770, "loss": 0.477, "lr": 9.922451873731569e-05, "epoch": 0.206794682422452, "percentage": 10.34, "elapsed_time": "3:29:21", "remaining_time": "1 day, 6:15:27", "throughput": 578.17, "total_tokens": 7262832} |
|
{"current_steps": 700, "total_steps": 6770, "eval_loss": 0.5883837938308716, "epoch": 0.206794682422452, "percentage": 10.34, "elapsed_time": "3:29:40", "remaining_time": "1 day, 6:18:13", "throughput": 577.3, "total_tokens": 7262832} |
|
{"current_steps": 705, "total_steps": 6770, "loss": 0.5151, "lr": 9.92029461479909e-05, "epoch": 0.20827178729689808, "percentage": 10.41, "elapsed_time": "3:31:13", "remaining_time": "1 day, 6:17:07", "throughput": 577.15, "total_tokens": 7314520} |
|
{"current_steps": 710, "total_steps": 6770, "loss": 0.6158, "lr": 9.918108001438283e-05, "epoch": 0.20974889217134415, "percentage": 10.49, "elapsed_time": "3:32:39", "remaining_time": "1 day, 6:15:03", "throughput": 577.25, "total_tokens": 7365368} |
|
{"current_steps": 715, "total_steps": 6770, "loss": 0.5164, "lr": 9.915892046694464e-05, "epoch": 0.21122599704579026, "percentage": 10.56, "elapsed_time": "3:34:05", "remaining_time": "1 day, 6:13:05", "throughput": 577.41, "total_tokens": 7417296} |
|
{"current_steps": 720, "total_steps": 6770, "loss": 0.5823, "lr": 9.913646763787992e-05, "epoch": 0.21270310192023634, "percentage": 10.64, "elapsed_time": "3:35:32", "remaining_time": "1 day, 6:11:07", "throughput": 577.59, "total_tokens": 7469640} |
|
{"current_steps": 725, "total_steps": 6770, "loss": 0.5145, "lr": 9.911372166114208e-05, "epoch": 0.21418020679468242, "percentage": 10.71, "elapsed_time": "3:36:58", "remaining_time": "1 day, 6:09:09", "throughput": 577.75, "total_tokens": 7521520} |
|
{"current_steps": 730, "total_steps": 6770, "loss": 0.571, "lr": 9.909068267243336e-05, "epoch": 0.21565731166912852, "percentage": 10.78, "elapsed_time": "3:38:24", "remaining_time": "1 day, 6:07:08", "throughput": 577.95, "total_tokens": 7573880} |
|
{"current_steps": 735, "total_steps": 6770, "loss": 0.4638, "lr": 9.906735080920413e-05, "epoch": 0.2171344165435746, "percentage": 10.86, "elapsed_time": "3:39:51", "remaining_time": "1 day, 6:05:10", "throughput": 578.11, "total_tokens": 7625896} |
|
{"current_steps": 740, "total_steps": 6770, "loss": 0.5449, "lr": 9.904372621065206e-05, "epoch": 0.21861152141802068, "percentage": 10.93, "elapsed_time": "3:41:17", "remaining_time": "1 day, 6:03:13", "throughput": 578.16, "total_tokens": 7676528} |
|
{"current_steps": 745, "total_steps": 6770, "loss": 0.5505, "lr": 9.901980901772126e-05, "epoch": 0.22008862629246675, "percentage": 11.0, "elapsed_time": "3:42:43", "remaining_time": "1 day, 6:01:16", "throughput": 578.3, "total_tokens": 7728240} |
|
{"current_steps": 750, "total_steps": 6770, "loss": 0.514, "lr": 9.899559937310148e-05, "epoch": 0.22156573116691286, "percentage": 11.08, "elapsed_time": "3:44:09", "remaining_time": "1 day, 5:59:17", "throughput": 578.44, "total_tokens": 7779872} |
|
{"current_steps": 750, "total_steps": 6770, "eval_loss": 0.4665524661540985, "epoch": 0.22156573116691286, "percentage": 11.08, "elapsed_time": "3:44:29", "remaining_time": "1 day, 6:01:51", "throughput": 577.61, "total_tokens": 7779872} |
|
{"current_steps": 755, "total_steps": 6770, "loss": 0.5248, "lr": 9.897109742122721e-05, "epoch": 0.22304283604135894, "percentage": 11.15, "elapsed_time": "3:46:01", "remaining_time": "1 day, 6:00:43", "throughput": 577.52, "total_tokens": 7832168} |
|
{"current_steps": 760, "total_steps": 6770, "loss": 0.5017, "lr": 9.894630330827686e-05, "epoch": 0.22451994091580502, "percentage": 11.23, "elapsed_time": "3:47:29", "remaining_time": "1 day, 5:58:57", "throughput": 577.62, "total_tokens": 7884040} |
|
{"current_steps": 765, "total_steps": 6770, "loss": 0.4896, "lr": 9.892121718217182e-05, "epoch": 0.2259970457902511, "percentage": 11.3, "elapsed_time": "3:48:55", "remaining_time": "1 day, 5:57:00", "throughput": 577.73, "total_tokens": 7935528} |
|
{"current_steps": 770, "total_steps": 6770, "loss": 0.5125, "lr": 9.88958391925757e-05, "epoch": 0.2274741506646972, "percentage": 11.37, "elapsed_time": "3:50:23", "remaining_time": "1 day, 5:55:18", "throughput": 577.82, "total_tokens": 7987760} |
|
{"current_steps": 775, "total_steps": 6770, "loss": 0.5615, "lr": 9.887016949089333e-05, "epoch": 0.22895125553914328, "percentage": 11.45, "elapsed_time": "3:51:50", "remaining_time": "1 day, 5:53:23", "throughput": 577.94, "total_tokens": 8039400} |
|
{"current_steps": 780, "total_steps": 6770, "loss": 0.494, "lr": 9.884420823026989e-05, "epoch": 0.23042836041358936, "percentage": 11.52, "elapsed_time": "3:53:18", "remaining_time": "1 day, 5:51:38", "throughput": 578.11, "total_tokens": 8092440} |
|
{"current_steps": 785, "total_steps": 6770, "loss": 0.5122, "lr": 9.881795556558999e-05, "epoch": 0.23190546528803546, "percentage": 11.6, "elapsed_time": "3:54:45", "remaining_time": "1 day, 5:49:47", "throughput": 578.27, "total_tokens": 8145040} |
|
{"current_steps": 790, "total_steps": 6770, "loss": 0.4925, "lr": 9.879141165347678e-05, "epoch": 0.23338257016248154, "percentage": 11.67, "elapsed_time": "3:56:12", "remaining_time": "1 day, 5:48:03", "throughput": 578.35, "total_tokens": 8196904} |
|
{"current_steps": 795, "total_steps": 6770, "loss": 0.4752, "lr": 9.876457665229097e-05, "epoch": 0.23485967503692762, "percentage": 11.74, "elapsed_time": "3:57:39", "remaining_time": "1 day, 5:46:14", "throughput": 578.49, "total_tokens": 8249232} |
|
{"current_steps": 800, "total_steps": 6770, "loss": 0.4239, "lr": 9.87374507221299e-05, "epoch": 0.2363367799113737, "percentage": 11.82, "elapsed_time": "3:59:07", "remaining_time": "1 day, 5:44:30", "throughput": 578.62, "total_tokens": 8301976} |
|
{"current_steps": 800, "total_steps": 6770, "eval_loss": 0.48219749331474304, "epoch": 0.2363367799113737, "percentage": 11.82, "elapsed_time": "3:59:26", "remaining_time": "1 day, 5:46:52", "throughput": 577.85, "total_tokens": 8301976} |
|
{"current_steps": 805, "total_steps": 6770, "loss": 0.4482, "lr": 9.87100340248266e-05, "epoch": 0.2378138847858198, "percentage": 11.89, "elapsed_time": "4:00:58", "remaining_time": "1 day, 5:45:39", "throughput": 577.76, "total_tokens": 8353736} |
|
{"current_steps": 810, "total_steps": 6770, "loss": 0.4764, "lr": 9.868232672394881e-05, "epoch": 0.23929098966026588, "percentage": 11.96, "elapsed_time": "4:02:26", "remaining_time": "1 day, 5:43:56", "throughput": 577.87, "total_tokens": 8406216} |
|
{"current_steps": 815, "total_steps": 6770, "loss": 0.4476, "lr": 9.8654328984798e-05, "epoch": 0.24076809453471196, "percentage": 12.04, "elapsed_time": "4:03:53", "remaining_time": "1 day, 5:42:06", "throughput": 577.96, "total_tokens": 8457752} |
|
{"current_steps": 820, "total_steps": 6770, "loss": 0.4295, "lr": 9.862604097440844e-05, "epoch": 0.24224519940915806, "percentage": 12.11, "elapsed_time": "4:05:21", "remaining_time": "1 day, 5:40:22", "throughput": 578.09, "total_tokens": 8510440} |
|
{"current_steps": 825, "total_steps": 6770, "loss": 0.5384, "lr": 9.859746286154607e-05, "epoch": 0.24372230428360414, "percentage": 12.19, "elapsed_time": "4:06:49", "remaining_time": "1 day, 5:38:34", "throughput": 578.16, "total_tokens": 8562016} |
|
{"current_steps": 830, "total_steps": 6770, "loss": 0.5357, "lr": 9.856859481670764e-05, "epoch": 0.24519940915805022, "percentage": 12.26, "elapsed_time": "4:08:16", "remaining_time": "1 day, 5:36:46", "throughput": 578.28, "total_tokens": 8614184} |
|
{"current_steps": 835, "total_steps": 6770, "loss": 0.5309, "lr": 9.853943701211963e-05, "epoch": 0.2466765140324963, "percentage": 12.33, "elapsed_time": "4:09:43", "remaining_time": "1 day, 5:34:59", "throughput": 578.4, "total_tokens": 8666528} |
|
{"current_steps": 840, "total_steps": 6770, "loss": 0.4949, "lr": 9.850998962173719e-05, "epoch": 0.2481536189069424, "percentage": 12.41, "elapsed_time": "4:11:10", "remaining_time": "1 day, 5:33:13", "throughput": 578.47, "total_tokens": 8718048} |
|
{"current_steps": 845, "total_steps": 6770, "loss": 0.4681, "lr": 9.848025282124317e-05, "epoch": 0.24963072378138848, "percentage": 12.48, "elapsed_time": "4:12:38", "remaining_time": "1 day, 5:31:28", "throughput": 578.55, "total_tokens": 8769968} |
|
{"current_steps": 850, "total_steps": 6770, "loss": 0.4949, "lr": 9.845022678804701e-05, "epoch": 0.2511078286558346, "percentage": 12.56, "elapsed_time": "4:14:05", "remaining_time": "1 day, 5:29:41", "throughput": 578.71, "total_tokens": 8822832} |
|
{"current_steps": 850, "total_steps": 6770, "eval_loss": 0.6121839880943298, "epoch": 0.2511078286558346, "percentage": 12.56, "elapsed_time": "4:14:24", "remaining_time": "1 day, 5:31:54", "throughput": 577.98, "total_tokens": 8822832} |
|
{"current_steps": 855, "total_steps": 6770, "loss": 0.4636, "lr": 9.841991170128374e-05, "epoch": 0.25258493353028066, "percentage": 12.63, "elapsed_time": "4:15:58", "remaining_time": "1 day, 5:30:51", "throughput": 577.9, "total_tokens": 8875608} |
|
{"current_steps": 860, "total_steps": 6770, "loss": 0.4322, "lr": 9.838930774181285e-05, "epoch": 0.25406203840472674, "percentage": 12.7, "elapsed_time": "4:17:25", "remaining_time": "1 day, 5:29:00", "throughput": 578.02, "total_tokens": 8927600} |
|
{"current_steps": 865, "total_steps": 6770, "loss": 0.4302, "lr": 9.835841509221725e-05, "epoch": 0.2555391432791728, "percentage": 12.78, "elapsed_time": "4:18:53", "remaining_time": "1 day, 5:27:18", "throughput": 578.13, "total_tokens": 8980224} |
|
{"current_steps": 870, "total_steps": 6770, "loss": 0.5231, "lr": 9.83272339368022e-05, "epoch": 0.2570162481536189, "percentage": 12.85, "elapsed_time": "4:20:19", "remaining_time": "1 day, 5:25:27", "throughput": 578.25, "total_tokens": 9032112} |
|
{"current_steps": 875, "total_steps": 6770, "loss": 0.4414, "lr": 9.829576446159416e-05, "epoch": 0.258493353028065, "percentage": 12.92, "elapsed_time": "4:21:48", "remaining_time": "1 day, 5:23:48", "throughput": 578.32, "total_tokens": 9084480} |
|
{"current_steps": 880, "total_steps": 6770, "loss": 0.4469, "lr": 9.826400685433968e-05, "epoch": 0.25997045790251105, "percentage": 13.0, "elapsed_time": "4:23:14", "remaining_time": "1 day, 5:21:56", "throughput": 578.48, "total_tokens": 9136816} |
|
{"current_steps": 885, "total_steps": 6770, "loss": 0.3859, "lr": 9.823196130450434e-05, "epoch": 0.2614475627769572, "percentage": 13.07, "elapsed_time": "4:24:42", "remaining_time": "1 day, 5:20:14", "throughput": 578.61, "total_tokens": 9189808} |
|
{"current_steps": 890, "total_steps": 6770, "loss": 0.4794, "lr": 9.819962800327156e-05, "epoch": 0.26292466765140327, "percentage": 13.15, "elapsed_time": "4:26:09", "remaining_time": "1 day, 5:18:26", "throughput": 578.71, "total_tokens": 9241712} |
|
{"current_steps": 895, "total_steps": 6770, "loss": 0.4476, "lr": 9.81670071435415e-05, "epoch": 0.26440177252584934, "percentage": 13.22, "elapsed_time": "4:27:37", "remaining_time": "1 day, 5:16:42", "throughput": 578.77, "total_tokens": 9293328} |
|
{"current_steps": 900, "total_steps": 6770, "loss": 0.4852, "lr": 9.813409891992988e-05, "epoch": 0.2658788774002954, "percentage": 13.29, "elapsed_time": "4:29:04", "remaining_time": "1 day, 5:14:54", "throughput": 578.86, "total_tokens": 9345160} |
|
{"current_steps": 900, "total_steps": 6770, "eval_loss": 0.5605542063713074, "epoch": 0.2658788774002954, "percentage": 13.29, "elapsed_time": "4:29:23", "remaining_time": "1 day, 5:16:59", "throughput": 578.18, "total_tokens": 9345160} |
|
{"current_steps": 905, "total_steps": 6770, "loss": 0.4973, "lr": 9.810090352876685e-05, "epoch": 0.2673559822747415, "percentage": 13.37, "elapsed_time": "4:30:55", "remaining_time": "1 day, 5:15:48", "throughput": 578.04, "total_tokens": 9396608} |
|
{"current_steps": 910, "total_steps": 6770, "loss": 0.4845, "lr": 9.806742116809575e-05, "epoch": 0.2688330871491876, "percentage": 13.44, "elapsed_time": "4:32:23", "remaining_time": "1 day, 5:14:02", "throughput": 578.12, "total_tokens": 9448264} |
|
{"current_steps": 915, "total_steps": 6770, "loss": 0.4405, "lr": 9.803365203767201e-05, "epoch": 0.27031019202363366, "percentage": 13.52, "elapsed_time": "4:33:50", "remaining_time": "1 day, 5:12:19", "throughput": 578.26, "total_tokens": 9501288} |
|
{"current_steps": 920, "total_steps": 6770, "loss": 0.5228, "lr": 9.799959633896194e-05, "epoch": 0.2717872968980798, "percentage": 13.59, "elapsed_time": "4:35:18", "remaining_time": "1 day, 5:10:38", "throughput": 578.29, "total_tokens": 9552680} |
|
{"current_steps": 925, "total_steps": 6770, "loss": 0.4189, "lr": 9.79652542751415e-05, "epoch": 0.27326440177252587, "percentage": 13.66, "elapsed_time": "4:36:46", "remaining_time": "1 day, 5:08:57", "throughput": 578.34, "total_tokens": 9604432} |
|
{"current_steps": 930, "total_steps": 6770, "loss": 0.4449, "lr": 9.793062605109509e-05, "epoch": 0.27474150664697194, "percentage": 13.74, "elapsed_time": "4:38:14", "remaining_time": "1 day, 5:07:15", "throughput": 578.45, "total_tokens": 9656992} |
|
{"current_steps": 935, "total_steps": 6770, "loss": 0.4678, "lr": 9.789571187341433e-05, "epoch": 0.276218611521418, "percentage": 13.81, "elapsed_time": "4:39:42", "remaining_time": "1 day, 5:05:32", "throughput": 578.52, "total_tokens": 9709016} |
|
{"current_steps": 940, "total_steps": 6770, "loss": 0.5359, "lr": 9.786051195039689e-05, "epoch": 0.2776957163958641, "percentage": 13.88, "elapsed_time": "4:41:10", "remaining_time": "1 day, 5:03:51", "throughput": 578.53, "total_tokens": 9759936} |
|
{"current_steps": 945, "total_steps": 6770, "loss": 0.5507, "lr": 9.782502649204512e-05, "epoch": 0.2791728212703102, "percentage": 13.96, "elapsed_time": "4:42:37", "remaining_time": "1 day, 5:02:07", "throughput": 578.61, "total_tokens": 9811880} |
|
{"current_steps": 950, "total_steps": 6770, "loss": 0.4737, "lr": 9.778925571006495e-05, "epoch": 0.28064992614475626, "percentage": 14.03, "elapsed_time": "4:44:05", "remaining_time": "1 day, 5:00:27", "throughput": 578.63, "total_tokens": 9863168} |
|
{"current_steps": 950, "total_steps": 6770, "eval_loss": 0.479105681180954, "epoch": 0.28064992614475626, "percentage": 14.03, "elapsed_time": "4:44:24", "remaining_time": "1 day, 5:02:25", "throughput": 577.98, "total_tokens": 9863168} |
|
{"current_steps": 955, "total_steps": 6770, "loss": 0.4393, "lr": 9.775319981786445e-05, "epoch": 0.2821270310192024, "percentage": 14.11, "elapsed_time": "4:45:57", "remaining_time": "1 day, 5:01:09", "throughput": 577.87, "total_tokens": 9914672} |
|
{"current_steps": 960, "total_steps": 6770, "loss": 0.4355, "lr": 9.771685903055277e-05, "epoch": 0.28360413589364847, "percentage": 14.18, "elapsed_time": "4:47:25", "remaining_time": "1 day, 4:59:30", "throughput": 577.94, "total_tokens": 9966736} |
|
{"current_steps": 965, "total_steps": 6770, "loss": 0.4459, "lr": 9.768023356493864e-05, "epoch": 0.28508124076809455, "percentage": 14.25, "elapsed_time": "4:48:53", "remaining_time": "1 day, 4:57:48", "throughput": 577.97, "total_tokens": 10017984} |
|
{"current_steps": 970, "total_steps": 6770, "loss": 0.4774, "lr": 9.764332363952927e-05, "epoch": 0.2865583456425406, "percentage": 14.33, "elapsed_time": "4:50:21", "remaining_time": "1 day, 4:56:10", "throughput": 577.99, "total_tokens": 10069520} |
|
{"current_steps": 975, "total_steps": 6770, "loss": 0.413, "lr": 9.760612947452884e-05, "epoch": 0.2880354505169867, "percentage": 14.4, "elapsed_time": "4:51:48", "remaining_time": "1 day, 4:54:21", "throughput": 578.14, "total_tokens": 10122208} |
|
{"current_steps": 980, "total_steps": 6770, "loss": 0.5433, "lr": 9.756865129183741e-05, "epoch": 0.2895125553914328, "percentage": 14.48, "elapsed_time": "4:53:15", "remaining_time": "1 day, 4:52:39", "throughput": 578.19, "total_tokens": 10173760} |
|
{"current_steps": 985, "total_steps": 6770, "loss": 0.4096, "lr": 9.753088931504944e-05, "epoch": 0.29098966026587886, "percentage": 14.55, "elapsed_time": "4:54:42", "remaining_time": "1 day, 4:50:51", "throughput": 578.25, "total_tokens": 10224976} |
|
{"current_steps": 990, "total_steps": 6770, "loss": 0.3916, "lr": 9.749284376945248e-05, "epoch": 0.29246676514032494, "percentage": 14.62, "elapsed_time": "4:56:10", "remaining_time": "1 day, 4:49:08", "throughput": 578.33, "total_tokens": 10276928} |
|
{"current_steps": 995, "total_steps": 6770, "loss": 0.3899, "lr": 9.74545148820259e-05, "epoch": 0.29394387001477107, "percentage": 14.7, "elapsed_time": "4:57:35", "remaining_time": "1 day, 4:47:15", "throughput": 578.41, "total_tokens": 10328048} |
|
{"current_steps": 1000, "total_steps": 6770, "loss": 0.4005, "lr": 9.741590288143944e-05, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:02", "remaining_time": "1 day, 4:45:26", "throughput": 578.48, "total_tokens": 10379136} |
|
{"current_steps": 1000, "total_steps": 6770, "eval_loss": 0.5501028299331665, "epoch": 0.29542097488921715, "percentage": 14.77, "elapsed_time": "4:59:21", "remaining_time": "1 day, 4:47:16", "throughput": 577.86, "total_tokens": 10379136} |
|
{"current_steps": 1005, "total_steps": 6770, "loss": 0.4585, "lr": 9.737700799805191e-05, "epoch": 0.2968980797636632, "percentage": 14.84, "elapsed_time": "0:02:47", "remaining_time": "0:15:59", "throughput": 62352.42, "total_tokens": 10430680} |
|
{"current_steps": 1010, "total_steps": 6770, "loss": 0.4257, "lr": 9.73378304639098e-05, "epoch": 0.2983751846381093, "percentage": 14.92, "elapsed_time": "0:04:15", "remaining_time": "0:24:18", "throughput": 40994.74, "total_tokens": 10482472} |
|
{"current_steps": 1015, "total_steps": 6770, "loss": 0.4359, "lr": 9.729837051274591e-05, "epoch": 0.2998522895125554, "percentage": 14.99, "elapsed_time": "0:05:42", "remaining_time": "0:32:24", "throughput": 30720.21, "total_tokens": 10534392} |
|
{"current_steps": 1020, "total_steps": 6770, "loss": 0.4158, "lr": 9.725862837997786e-05, "epoch": 0.30132939438700146, "percentage": 15.07, "elapsed_time": "0:07:11", "remaining_time": "0:40:30", "throughput": 24554.28, "total_tokens": 10586104} |
|
{"current_steps": 1025, "total_steps": 6770, "loss": 0.4067, "lr": 9.721860430270685e-05, "epoch": 0.30280649926144754, "percentage": 15.14, "elapsed_time": "0:08:38", "remaining_time": "0:48:25", "throughput": 20517.46, "total_tokens": 10637560} |
|
{"current_steps": 1030, "total_steps": 6770, "loss": 0.4811, "lr": 9.717829851971612e-05, "epoch": 0.30428360413589367, "percentage": 15.21, "elapsed_time": "0:10:06", "remaining_time": "0:56:22", "throughput": 17611.31, "total_tokens": 10689552} |
|
{"current_steps": 1035, "total_steps": 6770, "loss": 0.4732, "lr": 9.713771127146955e-05, "epoch": 0.30576070901033975, "percentage": 15.29, "elapsed_time": "0:11:34", "remaining_time": "1:04:10", "throughput": 15458.65, "total_tokens": 10742208} |
|
{"current_steps": 1040, "total_steps": 6770, "loss": 0.4735, "lr": 9.70968428001103e-05, "epoch": 0.3072378138847858, "percentage": 15.36, "elapsed_time": "0:13:03", "remaining_time": "1:11:59", "throughput": 13768.31, "total_tokens": 10794008} |
|
{"current_steps": 1045, "total_steps": 6770, "loss": 0.4381, "lr": 9.705569334945921e-05, "epoch": 0.3087149187592319, "percentage": 15.44, "elapsed_time": "0:14:31", "remaining_time": "1:19:32", "throughput": 12450.04, "total_tokens": 10845736} |
|
{"current_steps": 1050, "total_steps": 6770, "loss": 0.3991, "lr": 9.701426316501352e-05, "epoch": 0.310192023633678, "percentage": 15.51, "elapsed_time": "0:16:00", "remaining_time": "1:27:10", "throughput": 11349.91, "total_tokens": 10897528} |
|
{"current_steps": 1050, "total_steps": 6770, "eval_loss": 0.4378110468387604, "epoch": 0.310192023633678, "percentage": 15.51, "elapsed_time": "0:16:47", "remaining_time": "1:31:27", "throughput": 10817.94, "total_tokens": 10897528} |
|
{"current_steps": 1055, "total_steps": 6770, "loss": 0.3724, "lr": 9.697255249394527e-05, "epoch": 0.31166912850812406, "percentage": 15.58, "elapsed_time": "0:18:22", "remaining_time": "1:39:31", "throughput": 9932.58, "total_tokens": 10949888} |
|
{"current_steps": 1060, "total_steps": 6770, "loss": 0.4483, "lr": 9.693056158509992e-05, "epoch": 0.31314623338257014, "percentage": 15.66, "elapsed_time": "0:19:50", "remaining_time": "1:46:52", "throughput": 9241.48, "total_tokens": 11001208} |
|
{"current_steps": 1065, "total_steps": 6770, "loss": 0.4133, "lr": 9.688829068899483e-05, "epoch": 0.31462333825701627, "percentage": 15.73, "elapsed_time": "0:21:17", "remaining_time": "1:54:01", "throughput": 8653.24, "total_tokens": 11052368} |
|
{"current_steps": 1070, "total_steps": 6770, "loss": 0.5406, "lr": 9.684574005781772e-05, "epoch": 0.31610044313146235, "percentage": 15.81, "elapsed_time": "0:22:46", "remaining_time": "2:01:17", "throughput": 8128.52, "total_tokens": 11104008} |
|
{"current_steps": 1075, "total_steps": 6770, "loss": 0.4148, "lr": 9.680290994542523e-05, "epoch": 0.3175775480059084, "percentage": 15.88, "elapsed_time": "0:24:12", "remaining_time": "2:08:13", "throughput": 7681.93, "total_tokens": 11155888} |
|
{"current_steps": 1080, "total_steps": 6770, "loss": 0.4169, "lr": 9.675980060734138e-05, "epoch": 0.3190546528803545, "percentage": 15.95, "elapsed_time": "0:25:39", "remaining_time": "2:15:11", "throughput": 7279.12, "total_tokens": 11207352} |
|
{"current_steps": 1085, "total_steps": 6770, "loss": 0.4706, "lr": 9.671641230075604e-05, "epoch": 0.3205317577548006, "percentage": 16.03, "elapsed_time": "0:27:05", "remaining_time": "2:21:58", "throughput": 6924.56, "total_tokens": 11257672} |
|
{"current_steps": 1090, "total_steps": 6770, "loss": 0.3736, "lr": 9.667274528452344e-05, "epoch": 0.32200886262924666, "percentage": 16.1, "elapsed_time": "0:28:33", "remaining_time": "2:28:48", "throughput": 6601.02, "total_tokens": 11309944} |
|
{"current_steps": 1095, "total_steps": 6770, "loss": 0.4413, "lr": 9.662879981916054e-05, "epoch": 0.32348596750369274, "percentage": 16.17, "elapsed_time": "0:29:59", "remaining_time": "2:35:26", "throughput": 6313.05, "total_tokens": 11361032} |
|
{"current_steps": 1100, "total_steps": 6770, "loss": 0.4624, "lr": 9.658457616684555e-05, "epoch": 0.3249630723781389, "percentage": 16.25, "elapsed_time": "0:31:26", "remaining_time": "2:42:04", "throughput": 6049.61, "total_tokens": 11413120} |
|
{"current_steps": 1100, "total_steps": 6770, "eval_loss": 0.5300672650337219, "epoch": 0.3249630723781389, "percentage": 16.25, "elapsed_time": "0:31:45", "remaining_time": "2:43:42", "throughput": 5989.26, "total_tokens": 11413120} |
|
{"current_steps": 1105, "total_steps": 6770, "loss": 0.4121, "lr": 9.654007459141634e-05, "epoch": 0.32644017725258495, "percentage": 16.32, "elapsed_time": "0:33:18", "remaining_time": "2:50:46", "throughput": 5736.49, "total_tokens": 11465064} |
|
{"current_steps": 1110, "total_steps": 6770, "loss": 0.4569, "lr": 9.649529535836887e-05, "epoch": 0.32791728212703103, "percentage": 16.4, "elapsed_time": "0:34:45", "remaining_time": "2:57:11", "throughput": 5523.23, "total_tokens": 11516304} |
|
{"current_steps": 1115, "total_steps": 6770, "loss": 0.4121, "lr": 9.645023873485557e-05, "epoch": 0.3293943870014771, "percentage": 16.47, "elapsed_time": "0:36:12", "remaining_time": "3:03:39", "throughput": 5324.36, "total_tokens": 11568568} |
|
{"current_steps": 1120, "total_steps": 6770, "loss": 0.4112, "lr": 9.640490498968383e-05, "epoch": 0.3308714918759232, "percentage": 16.54, "elapsed_time": "0:37:39", "remaining_time": "3:09:56", "throughput": 5143.72, "total_tokens": 11620672} |
|
{"current_steps": 1125, "total_steps": 6770, "loss": 0.3564, "lr": 9.63592943933143e-05, "epoch": 0.33234859675036926, "percentage": 16.62, "elapsed_time": "0:39:06", "remaining_time": "3:16:15", "throughput": 4973.87, "total_tokens": 11672864} |
|
{"current_steps": 1130, "total_steps": 6770, "loss": 0.3909, "lr": 9.631340721785934e-05, "epoch": 0.33382570162481534, "percentage": 16.69, "elapsed_time": "0:40:33", "remaining_time": "3:22:26", "throughput": 4817.77, "total_tokens": 11724128} |
|
{"current_steps": 1135, "total_steps": 6770, "loss": 0.4636, "lr": 9.62672437370814e-05, "epoch": 0.3353028064992615, "percentage": 16.77, "elapsed_time": "0:42:01", "remaining_time": "3:28:39", "throughput": 4670.11, "total_tokens": 11776416} |
|
{"current_steps": 1140, "total_steps": 6770, "loss": 0.4617, "lr": 9.622080422639133e-05, "epoch": 0.33677991137370755, "percentage": 16.84, "elapsed_time": "0:43:27", "remaining_time": "3:34:37", "throughput": 4536.3, "total_tokens": 11828256} |
|
{"current_steps": 1145, "total_steps": 6770, "loss": 0.3443, "lr": 9.617408896284678e-05, "epoch": 0.33825701624815363, "percentage": 16.91, "elapsed_time": "0:44:56", "remaining_time": "3:40:44", "throughput": 4407.24, "total_tokens": 11882048} |
|
{"current_steps": 1150, "total_steps": 6770, "loss": 0.4432, "lr": 9.612709822515054e-05, "epoch": 0.3397341211225997, "percentage": 16.99, "elapsed_time": "0:46:23", "remaining_time": "3:46:41", "throughput": 4287.75, "total_tokens": 11933632} |
|
{"current_steps": 1150, "total_steps": 6770, "eval_loss": 0.42494550347328186, "epoch": 0.3397341211225997, "percentage": 16.99, "elapsed_time": "0:46:42", "remaining_time": "3:48:17", "throughput": 4257.76, "total_tokens": 11933632} |
|
{"current_steps": 1155, "total_steps": 6770, "loss": 0.3716, "lr": 9.60798322936489e-05, "epoch": 0.3412112259970458, "percentage": 17.06, "elapsed_time": "0:48:16", "remaining_time": "3:54:38", "throughput": 4138.97, "total_tokens": 11986496} |
|
{"current_steps": 1160, "total_steps": 6770, "loss": 0.4234, "lr": 9.603229145032993e-05, "epoch": 0.34268833087149186, "percentage": 17.13, "elapsed_time": "0:49:42", "remaining_time": "4:00:26", "throughput": 4035.94, "total_tokens": 12039112} |
|
{"current_steps": 1165, "total_steps": 6770, "loss": 0.3973, "lr": 9.598447597882181e-05, "epoch": 0.34416543574593794, "percentage": 17.21, "elapsed_time": "0:51:10", "remaining_time": "4:06:13", "throughput": 3937.7, "total_tokens": 12091728} |
|
{"current_steps": 1170, "total_steps": 6770, "loss": 0.3494, "lr": 9.593638616439118e-05, "epoch": 0.345642540620384, "percentage": 17.28, "elapsed_time": "0:52:37", "remaining_time": "4:11:51", "throughput": 3846.44, "total_tokens": 12143896} |
|
{"current_steps": 1175, "total_steps": 6770, "loss": 0.4182, "lr": 9.588802229394137e-05, "epoch": 0.34711964549483015, "percentage": 17.36, "elapsed_time": "0:54:05", "remaining_time": "4:17:31", "throughput": 3758.15, "total_tokens": 12195336} |
|
{"current_steps": 1180, "total_steps": 6770, "loss": 0.462, "lr": 9.583938465601075e-05, "epoch": 0.34859675036927623, "percentage": 17.43, "elapsed_time": "0:55:31", "remaining_time": "4:23:01", "throughput": 3676.4, "total_tokens": 12247696} |
|
{"current_steps": 1185, "total_steps": 6770, "loss": 0.4451, "lr": 9.5790473540771e-05, "epoch": 0.3500738552437223, "percentage": 17.5, "elapsed_time": "0:56:59", "remaining_time": "4:28:35", "throughput": 3597.25, "total_tokens": 12300040} |
|
{"current_steps": 1190, "total_steps": 6770, "loss": 0.4789, "lr": 9.574128924002533e-05, "epoch": 0.3515509601181684, "percentage": 17.58, "elapsed_time": "0:58:25", "remaining_time": "4:33:58", "throughput": 3523.41, "total_tokens": 12351904} |
|
{"current_steps": 1195, "total_steps": 6770, "loss": 0.3898, "lr": 9.569183204720677e-05, "epoch": 0.35302806499261447, "percentage": 17.65, "elapsed_time": "0:59:53", "remaining_time": "4:39:23", "throughput": 3451.85, "total_tokens": 12403280} |
|
{"current_steps": 1200, "total_steps": 6770, "loss": 0.3296, "lr": 9.564210225737647e-05, "epoch": 0.35450516986706054, "percentage": 17.73, "elapsed_time": "1:01:22", "remaining_time": "4:44:50", "throughput": 3382.92, "total_tokens": 12456040} |
|
{"current_steps": 1200, "total_steps": 6770, "eval_loss": 0.2966395914554596, "epoch": 0.35450516986706054, "percentage": 17.73, "elapsed_time": "1:01:41", "remaining_time": "4:46:21", "throughput": 3365.07, "total_tokens": 12456040} |
|
{"current_steps": 1205, "total_steps": 6770, "loss": 0.3717, "lr": 9.559210016722184e-05, "epoch": 0.3559822747415066, "percentage": 17.8, "elapsed_time": "1:03:14", "remaining_time": "4:52:05", "throughput": 3296.0, "total_tokens": 12507640} |
|
{"current_steps": 1210, "total_steps": 6770, "loss": 0.541, "lr": 9.554182607505484e-05, "epoch": 0.35745937961595275, "percentage": 17.87, "elapsed_time": "1:04:42", "remaining_time": "4:57:19", "throughput": 3234.99, "total_tokens": 12559400} |
|
{"current_steps": 1215, "total_steps": 6770, "loss": 0.3965, "lr": 9.54912802808102e-05, "epoch": 0.35893648449039883, "percentage": 17.95, "elapsed_time": "1:06:10", "remaining_time": "5:02:32", "throughput": 3176.28, "total_tokens": 12610992} |
|
{"current_steps": 1220, "total_steps": 6770, "loss": 0.4834, "lr": 9.544046308604364e-05, "epoch": 0.3604135893648449, "percentage": 18.02, "elapsed_time": "1:07:37", "remaining_time": "5:07:40", "throughput": 3120.47, "total_tokens": 12662688} |
|
{"current_steps": 1225, "total_steps": 6770, "loss": 0.4538, "lr": 9.538937479393001e-05, "epoch": 0.361890694239291, "percentage": 18.09, "elapsed_time": "1:09:04", "remaining_time": "5:12:40", "throughput": 3067.55, "total_tokens": 12713600} |
|
{"current_steps": 1230, "total_steps": 6770, "loss": 0.4226, "lr": 9.533801570926157e-05, "epoch": 0.36336779911373707, "percentage": 18.17, "elapsed_time": "1:10:32", "remaining_time": "5:17:42", "throughput": 3016.41, "total_tokens": 12766360} |
|
{"current_steps": 1235, "total_steps": 6770, "loss": 0.4315, "lr": 9.52863861384461e-05, "epoch": 0.36484490398818314, "percentage": 18.24, "elapsed_time": "1:11:58", "remaining_time": "5:22:35", "throughput": 2967.88, "total_tokens": 12817248} |
|
{"current_steps": 1240, "total_steps": 6770, "loss": 0.3567, "lr": 9.523448638950508e-05, "epoch": 0.3663220088626292, "percentage": 18.32, "elapsed_time": "1:13:26", "remaining_time": "5:27:30", "throughput": 2920.53, "total_tokens": 12868496} |
|
{"current_steps": 1245, "total_steps": 6770, "loss": 0.3431, "lr": 9.518231677207192e-05, "epoch": 0.36779911373707536, "percentage": 18.39, "elapsed_time": "1:14:52", "remaining_time": "5:32:16", "throughput": 2875.92, "total_tokens": 12920168} |
|
{"current_steps": 1250, "total_steps": 6770, "loss": 0.335, "lr": 9.512987759739003e-05, "epoch": 0.36927621861152143, "percentage": 18.46, "elapsed_time": "1:16:20", "remaining_time": "5:37:06", "throughput": 2832.29, "total_tokens": 12972696} |
|
{"current_steps": 1250, "total_steps": 6770, "eval_loss": 0.31846168637275696, "epoch": 0.36927621861152143, "percentage": 18.46, "elapsed_time": "1:16:39", "remaining_time": "5:38:30", "throughput": 2820.63, "total_tokens": 12972696} |
|
{"current_steps": 1255, "total_steps": 6770, "loss": 0.3242, "lr": 9.507716917831099e-05, "epoch": 0.3707533234859675, "percentage": 18.54, "elapsed_time": "1:18:10", "remaining_time": "5:43:33", "throughput": 2776.73, "total_tokens": 13025280} |
|
{"current_steps": 1260, "total_steps": 6770, "loss": 0.4083, "lr": 9.50241918292927e-05, "epoch": 0.3722304283604136, "percentage": 18.61, "elapsed_time": "1:19:37", "remaining_time": "5:48:14", "throughput": 2736.74, "total_tokens": 13075992} |
|
{"current_steps": 1265, "total_steps": 6770, "loss": 0.4043, "lr": 9.49709458663975e-05, "epoch": 0.37370753323485967, "percentage": 18.69, "elapsed_time": "1:21:04", "remaining_time": "5:52:48", "throughput": 2699.0, "total_tokens": 13128592} |
|
{"current_steps": 1270, "total_steps": 6770, "loss": 0.3481, "lr": 9.491743160729026e-05, "epoch": 0.37518463810930575, "percentage": 18.76, "elapsed_time": "1:22:31", "remaining_time": "5:57:23", "throughput": 2662.2, "total_tokens": 13181824} |
|
{"current_steps": 1275, "total_steps": 6770, "loss": 0.4121, "lr": 9.486364937123651e-05, "epoch": 0.3766617429837518, "percentage": 18.83, "elapsed_time": "1:23:58", "remaining_time": "6:01:54", "throughput": 2626.51, "total_tokens": 13233624} |
|
{"current_steps": 1280, "total_steps": 6770, "loss": 0.487, "lr": 9.480959947910055e-05, "epoch": 0.37813884785819796, "percentage": 18.91, "elapsed_time": "1:25:26", "remaining_time": "6:06:25", "throughput": 2591.83, "total_tokens": 13285808} |
|
{"current_steps": 1285, "total_steps": 6770, "loss": 0.3798, "lr": 9.47552822533435e-05, "epoch": 0.37961595273264404, "percentage": 18.98, "elapsed_time": "1:26:54", "remaining_time": "6:10:57", "throughput": 2557.91, "total_tokens": 13337864} |
|
{"current_steps": 1290, "total_steps": 6770, "loss": 0.348, "lr": 9.470069801802135e-05, "epoch": 0.3810930576070901, "percentage": 19.05, "elapsed_time": "1:28:21", "remaining_time": "6:15:22", "throughput": 2525.65, "total_tokens": 13390544} |
|
{"current_steps": 1295, "total_steps": 6770, "loss": 0.41, "lr": 9.464584709878313e-05, "epoch": 0.3825701624815362, "percentage": 19.13, "elapsed_time": "1:29:50", "remaining_time": "6:19:48", "throughput": 2493.77, "total_tokens": 13441664} |
|
{"current_steps": 1300, "total_steps": 6770, "loss": 0.3594, "lr": 9.459072982286886e-05, "epoch": 0.38404726735598227, "percentage": 19.2, "elapsed_time": "1:31:16", "remaining_time": "6:24:03", "throughput": 2463.8, "total_tokens": 13493264} |
|
{"current_steps": 1300, "total_steps": 6770, "eval_loss": 0.4715976417064667, "epoch": 0.38404726735598227, "percentage": 19.2, "elapsed_time": "1:31:35", "remaining_time": "6:25:24", "throughput": 2455.24, "total_tokens": 13493264} |
|
{"current_steps": 1305, "total_steps": 6770, "loss": 0.402, "lr": 9.453534651910765e-05, "epoch": 0.38552437223042835, "percentage": 19.28, "elapsed_time": "1:33:08", "remaining_time": "6:30:03", "throughput": 2423.77, "total_tokens": 13545256} |
|
{"current_steps": 1310, "total_steps": 6770, "loss": 0.3075, "lr": 9.447969751791577e-05, "epoch": 0.3870014771048744, "percentage": 19.35, "elapsed_time": "1:34:35", "remaining_time": "6:34:15", "throughput": 2395.88, "total_tokens": 13597792} |
|
{"current_steps": 1315, "total_steps": 6770, "loss": 0.3702, "lr": 9.442378315129455e-05, "epoch": 0.38847858197932056, "percentage": 19.42, "elapsed_time": "1:36:04", "remaining_time": "6:38:31", "throughput": 2368.0, "total_tokens": 13649848} |
|
{"current_steps": 1320, "total_steps": 6770, "loss": 0.3603, "lr": 9.436760375282859e-05, "epoch": 0.38995568685376664, "percentage": 19.5, "elapsed_time": "1:37:31", "remaining_time": "6:42:38", "throughput": 2341.65, "total_tokens": 13701592} |
|
{"current_steps": 1325, "total_steps": 6770, "loss": 0.4072, "lr": 9.431115965768358e-05, "epoch": 0.3914327917282127, "percentage": 19.57, "elapsed_time": "1:38:59", "remaining_time": "6:46:49", "throughput": 2315.42, "total_tokens": 13753064} |
|
{"current_steps": 1330, "total_steps": 6770, "loss": 0.3279, "lr": 9.425445120260445e-05, "epoch": 0.3929098966026588, "percentage": 19.65, "elapsed_time": "1:40:27", "remaining_time": "6:50:52", "throughput": 2290.54, "total_tokens": 13805528} |
|
{"current_steps": 1335, "total_steps": 6770, "loss": 0.3754, "lr": 9.419747872591325e-05, "epoch": 0.39438700147710487, "percentage": 19.72, "elapsed_time": "1:41:57", "remaining_time": "6:55:03", "throughput": 2265.48, "total_tokens": 13858192} |
|
{"current_steps": 1340, "total_steps": 6770, "loss": 0.3754, "lr": 9.414024256750723e-05, "epoch": 0.39586410635155095, "percentage": 19.79, "elapsed_time": "1:43:25", "remaining_time": "6:59:04", "throughput": 2241.72, "total_tokens": 13910128} |
|
{"current_steps": 1345, "total_steps": 6770, "loss": 0.3235, "lr": 9.408274306885674e-05, "epoch": 0.397341211225997, "percentage": 19.87, "elapsed_time": "1:44:53", "remaining_time": "7:03:05", "throughput": 2218.47, "total_tokens": 13962536} |
|
{"current_steps": 1350, "total_steps": 6770, "loss": 0.3731, "lr": 9.402498057300317e-05, "epoch": 0.3988183161004431, "percentage": 19.94, "elapsed_time": "1:46:20", "remaining_time": "7:06:56", "throughput": 2196.51, "total_tokens": 14014736} |
|
{"current_steps": 1350, "total_steps": 6770, "eval_loss": 0.5565826892852783, "epoch": 0.3988183161004431, "percentage": 19.94, "elapsed_time": "1:46:39", "remaining_time": "7:08:13", "throughput": 2189.88, "total_tokens": 14014736} |
|
{"current_steps": 1355, "total_steps": 6770, "loss": 0.4115, "lr": 9.396695542455704e-05, "epoch": 0.40029542097488924, "percentage": 20.01, "elapsed_time": "1:48:13", "remaining_time": "7:12:28", "throughput": 2166.39, "total_tokens": 14066880} |
|
{"current_steps": 1360, "total_steps": 6770, "loss": 0.365, "lr": 9.390866796969577e-05, "epoch": 0.4017725258493353, "percentage": 20.09, "elapsed_time": "1:49:40", "remaining_time": "7:16:18", "throughput": 2145.35, "total_tokens": 14118320} |
|
{"current_steps": 1365, "total_steps": 6770, "loss": 0.3904, "lr": 9.385011855616177e-05, "epoch": 0.4032496307237814, "percentage": 20.16, "elapsed_time": "1:51:08", "remaining_time": "7:20:07", "throughput": 2124.65, "total_tokens": 14169208} |
|
{"current_steps": 1370, "total_steps": 6770, "loss": 0.5425, "lr": 9.379130753326021e-05, "epoch": 0.40472673559822747, "percentage": 20.24, "elapsed_time": "1:52:36", "remaining_time": "7:23:52", "throughput": 2104.69, "total_tokens": 14220632} |
|
{"current_steps": 1375, "total_steps": 6770, "loss": 0.3985, "lr": 9.373223525185709e-05, "epoch": 0.40620384047267355, "percentage": 20.31, "elapsed_time": "1:54:04", "remaining_time": "7:27:34", "throughput": 2085.35, "total_tokens": 14272640} |
|
{"current_steps": 1380, "total_steps": 6770, "loss": 0.3528, "lr": 9.367290206437702e-05, "epoch": 0.4076809453471196, "percentage": 20.38, "elapsed_time": "1:55:30", "remaining_time": "7:31:10", "throughput": 2066.81, "total_tokens": 14324960} |
|
{"current_steps": 1385, "total_steps": 6770, "loss": 0.3687, "lr": 9.361330832480124e-05, "epoch": 0.4091580502215657, "percentage": 20.46, "elapsed_time": "1:56:57", "remaining_time": "7:34:44", "throughput": 2048.73, "total_tokens": 14376792} |
|
{"current_steps": 1390, "total_steps": 6770, "loss": 0.3552, "lr": 9.355345438866538e-05, "epoch": 0.41063515509601184, "percentage": 20.53, "elapsed_time": "1:58:24", "remaining_time": "7:38:17", "throughput": 2030.91, "total_tokens": 14428192} |
|
{"current_steps": 1395, "total_steps": 6770, "loss": 0.3194, "lr": 9.349334061305743e-05, "epoch": 0.4121122599704579, "percentage": 20.61, "elapsed_time": "1:59:50", "remaining_time": "7:41:44", "throughput": 2013.91, "total_tokens": 14480568} |
|
{"current_steps": 1400, "total_steps": 6770, "loss": 0.388, "lr": 9.343296735661557e-05, "epoch": 0.413589364844904, "percentage": 20.68, "elapsed_time": "2:01:17", "remaining_time": "7:45:12", "throughput": 1997.0, "total_tokens": 14532288} |
|
{"current_steps": 1400, "total_steps": 6770, "eval_loss": 0.38656601309776306, "epoch": 0.413589364844904, "percentage": 20.68, "elapsed_time": "2:01:36", "remaining_time": "7:46:26", "throughput": 1991.75, "total_tokens": 14532288} |
|
{"current_steps": 1405, "total_steps": 6770, "loss": 0.3751, "lr": 9.337233497952604e-05, "epoch": 0.4150664697193501, "percentage": 20.75, "elapsed_time": "2:03:08", "remaining_time": "7:50:12", "throughput": 1973.89, "total_tokens": 14583680} |
|
{"current_steps": 1410, "total_steps": 6770, "loss": 0.3431, "lr": 9.331144384352099e-05, "epoch": 0.41654357459379615, "percentage": 20.83, "elapsed_time": "2:04:35", "remaining_time": "7:53:37", "throughput": 1957.83, "total_tokens": 14635712} |
|
{"current_steps": 1415, "total_steps": 6770, "loss": 0.3786, "lr": 9.325029431187635e-05, "epoch": 0.41802067946824223, "percentage": 20.9, "elapsed_time": "2:06:01", "remaining_time": "7:56:56", "throughput": 1942.35, "total_tokens": 14687048} |
|
{"current_steps": 1420, "total_steps": 6770, "loss": 0.3427, "lr": 9.318888674940958e-05, "epoch": 0.4194977843426883, "percentage": 20.97, "elapsed_time": "2:07:29", "remaining_time": "8:00:19", "throughput": 1926.88, "total_tokens": 14739336} |
|
{"current_steps": 1425, "total_steps": 6770, "loss": 0.3307, "lr": 9.31272215224776e-05, "epoch": 0.42097488921713444, "percentage": 21.05, "elapsed_time": "2:08:55", "remaining_time": "8:03:35", "throughput": 1912.17, "total_tokens": 14791656} |
|
{"current_steps": 1430, "total_steps": 6770, "loss": 0.3509, "lr": 9.306529899897451e-05, "epoch": 0.4224519940915805, "percentage": 21.12, "elapsed_time": "2:10:23", "remaining_time": "8:06:53", "throughput": 1897.37, "total_tokens": 14843288} |
|
{"current_steps": 1435, "total_steps": 6770, "loss": 0.4168, "lr": 9.300311954832952e-05, "epoch": 0.4239290989660266, "percentage": 21.2, "elapsed_time": "2:11:48", "remaining_time": "8:10:03", "throughput": 1883.32, "total_tokens": 14895040} |
|
{"current_steps": 1440, "total_steps": 6770, "loss": 0.3203, "lr": 9.294068354150455e-05, "epoch": 0.4254062038404727, "percentage": 21.27, "elapsed_time": "2:13:16", "remaining_time": "8:13:17", "throughput": 1869.3, "total_tokens": 14947448} |
|
{"current_steps": 1445, "total_steps": 6770, "loss": 0.3217, "lr": 9.287799135099225e-05, "epoch": 0.42688330871491875, "percentage": 21.34, "elapsed_time": "2:14:43", "remaining_time": "8:16:29", "throughput": 1855.54, "total_tokens": 14999480} |
|
{"current_steps": 1450, "total_steps": 6770, "loss": 0.3131, "lr": 9.281504335081354e-05, "epoch": 0.42836041358936483, "percentage": 21.42, "elapsed_time": "2:16:12", "remaining_time": "8:19:43", "throughput": 1841.73, "total_tokens": 15050992} |
|
{"current_steps": 1450, "total_steps": 6770, "eval_loss": 0.4740215837955475, "epoch": 0.42836041358936483, "percentage": 21.42, "elapsed_time": "2:16:31", "remaining_time": "8:20:54", "throughput": 1837.38, "total_tokens": 15050992} |
|
{"current_steps": 1455, "total_steps": 6770, "loss": 0.3253, "lr": 9.275183991651558e-05, "epoch": 0.4298375184638109, "percentage": 21.49, "elapsed_time": "2:18:06", "remaining_time": "8:24:30", "throughput": 1822.61, "total_tokens": 15103328} |
|
{"current_steps": 1460, "total_steps": 6770, "loss": 0.3999, "lr": 9.268838142516943e-05, "epoch": 0.43131462333825704, "percentage": 21.57, "elapsed_time": "2:19:34", "remaining_time": "8:27:37", "throughput": 1809.62, "total_tokens": 15154640} |
|
{"current_steps": 1465, "total_steps": 6770, "loss": 0.3529, "lr": 9.262466825536782e-05, "epoch": 0.4327917282127031, "percentage": 21.64, "elapsed_time": "2:21:02", "remaining_time": "8:30:44", "throughput": 1796.88, "total_tokens": 15206264} |
|
{"current_steps": 1470, "total_steps": 6770, "loss": 0.3363, "lr": 9.256070078722287e-05, "epoch": 0.4342688330871492, "percentage": 21.71, "elapsed_time": "2:22:31", "remaining_time": "8:33:50", "throughput": 1784.35, "total_tokens": 15258160} |
|
{"current_steps": 1475, "total_steps": 6770, "loss": 0.4133, "lr": 9.249647940236385e-05, "epoch": 0.4357459379615953, "percentage": 21.79, "elapsed_time": "2:23:59", "remaining_time": "8:36:53", "throughput": 1772.04, "total_tokens": 15309224} |
|
{"current_steps": 1480, "total_steps": 6770, "loss": 0.3306, "lr": 9.243200448393492e-05, "epoch": 0.43722304283604135, "percentage": 21.86, "elapsed_time": "2:25:26", "remaining_time": "8:39:51", "throughput": 1760.33, "total_tokens": 15361480} |
|
{"current_steps": 1485, "total_steps": 6770, "loss": 0.2993, "lr": 9.236727641659277e-05, "epoch": 0.43870014771048743, "percentage": 21.94, "elapsed_time": "2:26:53", "remaining_time": "8:42:47", "throughput": 1748.94, "total_tokens": 15414680} |
|
{"current_steps": 1490, "total_steps": 6770, "loss": 0.3324, "lr": 9.230229558650442e-05, "epoch": 0.4401772525849335, "percentage": 22.01, "elapsed_time": "2:28:21", "remaining_time": "8:45:43", "throughput": 1737.53, "total_tokens": 15466552} |
|
{"current_steps": 1495, "total_steps": 6770, "loss": 0.2615, "lr": 9.223706238134485e-05, "epoch": 0.44165435745937964, "percentage": 22.08, "elapsed_time": "2:29:49", "remaining_time": "8:48:38", "throughput": 1726.41, "total_tokens": 15519472} |
|
{"current_steps": 1500, "total_steps": 6770, "loss": 0.2928, "lr": 9.217157719029469e-05, "epoch": 0.4431314623338257, "percentage": 22.16, "elapsed_time": "2:31:16", "remaining_time": "8:51:27", "throughput": 1715.71, "total_tokens": 15572048} |
|
{"current_steps": 1500, "total_steps": 6770, "eval_loss": 0.40494996309280396, "epoch": 0.4431314623338257, "percentage": 22.16, "elapsed_time": "2:31:35", "remaining_time": "8:52:35", "throughput": 1712.08, "total_tokens": 15572048} |
|
{"current_steps": 1505, "total_steps": 6770, "loss": 0.3622, "lr": 9.210584040403793e-05, "epoch": 0.4446085672082718, "percentage": 22.23, "elapsed_time": "2:33:08", "remaining_time": "8:55:43", "throughput": 1700.37, "total_tokens": 15623400} |
|
{"current_steps": 1510, "total_steps": 6770, "loss": 0.3192, "lr": 9.20398524147596e-05, "epoch": 0.4460856720827179, "percentage": 22.3, "elapsed_time": "2:34:35", "remaining_time": "8:58:31", "throughput": 1690.08, "total_tokens": 15676712} |
|
{"current_steps": 1515, "total_steps": 6770, "loss": 0.3463, "lr": 9.197361361614339e-05, "epoch": 0.44756277695716395, "percentage": 22.38, "elapsed_time": "2:36:03", "remaining_time": "9:01:17", "throughput": 1679.92, "total_tokens": 15729304} |
|
{"current_steps": 1520, "total_steps": 6770, "loss": 0.3675, "lr": 9.190712440336928e-05, "epoch": 0.44903988183161003, "percentage": 22.45, "elapsed_time": "2:37:29", "remaining_time": "9:03:57", "throughput": 1669.98, "total_tokens": 15780144} |
|
{"current_steps": 1525, "total_steps": 6770, "loss": 0.3305, "lr": 9.184038517311126e-05, "epoch": 0.4505169867060561, "percentage": 22.53, "elapsed_time": "2:38:58", "remaining_time": "9:06:45", "throughput": 1659.85, "total_tokens": 15832032} |
|
{"current_steps": 1530, "total_steps": 6770, "loss": 0.3817, "lr": 9.177339632353492e-05, "epoch": 0.4519940915805022, "percentage": 22.6, "elapsed_time": "2:40:25", "remaining_time": "9:09:24", "throughput": 1650.3, "total_tokens": 15884152} |
|
{"current_steps": 1535, "total_steps": 6770, "loss": 0.3742, "lr": 9.170615825429502e-05, "epoch": 0.4534711964549483, "percentage": 22.67, "elapsed_time": "2:41:52", "remaining_time": "9:12:03", "throughput": 1640.73, "total_tokens": 15935688} |
|
{"current_steps": 1540, "total_steps": 6770, "loss": 0.34, "lr": 9.163867136653327e-05, "epoch": 0.4549483013293944, "percentage": 22.75, "elapsed_time": "2:43:20", "remaining_time": "9:14:42", "throughput": 1631.38, "total_tokens": 15987800} |
|
{"current_steps": 1545, "total_steps": 6770, "loss": 0.3389, "lr": 9.157093606287572e-05, "epoch": 0.4564254062038405, "percentage": 22.82, "elapsed_time": "2:44:46", "remaining_time": "9:17:15", "throughput": 1622.32, "total_tokens": 16039472} |
|
{"current_steps": 1550, "total_steps": 6770, "loss": 0.3588, "lr": 9.150295274743053e-05, "epoch": 0.45790251107828656, "percentage": 22.9, "elapsed_time": "2:46:13", "remaining_time": "9:19:49", "throughput": 1613.43, "total_tokens": 16091960} |
|
{"current_steps": 1550, "total_steps": 6770, "eval_loss": 0.28714123368263245, "epoch": 0.45790251107828656, "percentage": 22.9, "elapsed_time": "2:46:32", "remaining_time": "9:20:53", "throughput": 1610.35, "total_tokens": 16091960} |
|
{"current_steps": 1555, "total_steps": 6770, "loss": 0.3501, "lr": 9.143472182578547e-05, "epoch": 0.45937961595273263, "percentage": 22.97, "elapsed_time": "2:48:04", "remaining_time": "9:23:39", "throughput": 1600.89, "total_tokens": 16143672} |
|
{"current_steps": 1560, "total_steps": 6770, "loss": 0.2684, "lr": 9.136624370500554e-05, "epoch": 0.4608567208271787, "percentage": 23.04, "elapsed_time": "2:49:30", "remaining_time": "9:26:07", "throughput": 1592.42, "total_tokens": 16195776} |
|
{"current_steps": 1565, "total_steps": 6770, "loss": 0.3294, "lr": 9.129751879363052e-05, "epoch": 0.4623338257016248, "percentage": 23.12, "elapsed_time": "2:50:56", "remaining_time": "9:28:32", "throughput": 1584.12, "total_tokens": 16247752} |
|
{"current_steps": 1570, "total_steps": 6770, "loss": 0.2906, "lr": 9.122854750167254e-05, "epoch": 0.4638109305760709, "percentage": 23.19, "elapsed_time": "2:52:23", "remaining_time": "9:30:57", "throughput": 1576.01, "total_tokens": 16300680} |
|
{"current_steps": 1575, "total_steps": 6770, "loss": 0.3498, "lr": 9.115933024061365e-05, "epoch": 0.465288035450517, "percentage": 23.26, "elapsed_time": "2:53:49", "remaining_time": "9:33:20", "throughput": 1567.86, "total_tokens": 16352000} |
|
{"current_steps": 1580, "total_steps": 6770, "loss": 0.3262, "lr": 9.108986742340331e-05, "epoch": 0.4667651403249631, "percentage": 23.34, "elapsed_time": "2:55:17", "remaining_time": "9:35:46", "throughput": 1559.74, "total_tokens": 16403784} |
|
{"current_steps": 1585, "total_steps": 6770, "loss": 0.318, "lr": 9.102015946445601e-05, "epoch": 0.46824224519940916, "percentage": 23.41, "elapsed_time": "2:56:43", "remaining_time": "9:38:07", "throughput": 1551.84, "total_tokens": 16455080} |
|
{"current_steps": 1590, "total_steps": 6770, "loss": 0.3257, "lr": 9.095020677964874e-05, "epoch": 0.46971935007385524, "percentage": 23.49, "elapsed_time": "2:58:10", "remaining_time": "9:40:28", "throughput": 1544.13, "total_tokens": 16507712} |
|
{"current_steps": 1595, "total_steps": 6770, "loss": 0.3253, "lr": 9.08800097863185e-05, "epoch": 0.4711964549483013, "percentage": 23.56, "elapsed_time": "2:59:37", "remaining_time": "9:42:48", "throughput": 1536.43, "total_tokens": 16559392} |
|
{"current_steps": 1600, "total_steps": 6770, "loss": 0.3879, "lr": 9.080956890325985e-05, "epoch": 0.4726735598227474, "percentage": 23.63, "elapsed_time": "3:01:05", "remaining_time": "9:45:08", "throughput": 1528.71, "total_tokens": 16609960} |
|
{"current_steps": 1600, "total_steps": 6770, "eval_loss": 0.3135533034801483, "epoch": 0.4726735598227474, "percentage": 23.63, "elapsed_time": "3:01:24", "remaining_time": "9:46:10", "throughput": 1526.0, "total_tokens": 16609960} |
|
{"current_steps": 1605, "total_steps": 6770, "loss": 0.3644, "lr": 9.07388845507224e-05, "epoch": 0.4741506646971935, "percentage": 23.71, "elapsed_time": "3:02:56", "remaining_time": "9:48:41", "throughput": 1517.98, "total_tokens": 16661440} |
|
{"current_steps": 1610, "total_steps": 6770, "loss": 0.2733, "lr": 9.066795715040825e-05, "epoch": 0.4756277695716396, "percentage": 23.78, "elapsed_time": "3:04:23", "remaining_time": "9:50:58", "throughput": 1510.72, "total_tokens": 16714200} |
|
{"current_steps": 1615, "total_steps": 6770, "loss": 0.3063, "lr": 9.059678712546963e-05, "epoch": 0.4771048744460857, "percentage": 23.86, "elapsed_time": "3:05:49", "remaining_time": "9:53:08", "throughput": 1503.82, "total_tokens": 16766904} |
|
{"current_steps": 1620, "total_steps": 6770, "loss": 0.3769, "lr": 9.052537490050614e-05, "epoch": 0.47858197932053176, "percentage": 23.93, "elapsed_time": "3:07:16", "remaining_time": "9:55:21", "throughput": 1496.71, "total_tokens": 16818168} |
|
{"current_steps": 1625, "total_steps": 6770, "loss": 0.3089, "lr": 9.045372090156243e-05, "epoch": 0.48005908419497784, "percentage": 24.0, "elapsed_time": "3:08:41", "remaining_time": "9:57:27", "throughput": 1490.02, "total_tokens": 16869952} |
|
{"current_steps": 1630, "total_steps": 6770, "loss": 0.2953, "lr": 9.038182555612551e-05, "epoch": 0.4815361890694239, "percentage": 24.08, "elapsed_time": "3:10:08", "remaining_time": "9:59:34", "throughput": 1483.38, "total_tokens": 16922608} |
|
{"current_steps": 1635, "total_steps": 6770, "loss": 0.3286, "lr": 9.030968929312231e-05, "epoch": 0.48301329394387, "percentage": 24.15, "elapsed_time": "3:11:34", "remaining_time": "10:01:39", "throughput": 1476.82, "total_tokens": 16974824} |
|
{"current_steps": 1640, "total_steps": 6770, "loss": 0.3552, "lr": 9.023731254291705e-05, "epoch": 0.4844903988183161, "percentage": 24.22, "elapsed_time": "3:13:00", "remaining_time": "10:03:44", "throughput": 1470.22, "total_tokens": 17026088} |
|
{"current_steps": 1645, "total_steps": 6770, "loss": 0.326, "lr": 9.016469573730869e-05, "epoch": 0.4859675036927622, "percentage": 24.3, "elapsed_time": "3:14:27", "remaining_time": "10:05:49", "throughput": 1463.76, "total_tokens": 17077904} |
|
{"current_steps": 1650, "total_steps": 6770, "loss": 0.2698, "lr": 9.009183930952836e-05, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:15:53", "remaining_time": "10:07:50", "throughput": 1457.55, "total_tokens": 17130896} |
|
{"current_steps": 1650, "total_steps": 6770, "eval_loss": 0.40201568603515625, "epoch": 0.4874446085672083, "percentage": 24.37, "elapsed_time": "3:16:12", "remaining_time": "10:08:51", "throughput": 1455.14, "total_tokens": 17130896} |
|
{"current_steps": 1655, "total_steps": 6770, "loss": 0.2956, "lr": 9.00187436942368e-05, "epoch": 0.48892171344165436, "percentage": 24.45, "elapsed_time": "3:17:46", "remaining_time": "10:11:15", "throughput": 1448.0, "total_tokens": 17182896} |
|
{"current_steps": 1660, "total_steps": 6770, "loss": 0.3027, "lr": 8.994540932752167e-05, "epoch": 0.49039881831610044, "percentage": 24.52, "elapsed_time": "3:19:14", "remaining_time": "10:13:19", "throughput": 1441.79, "total_tokens": 17235552} |
|
{"current_steps": 1665, "total_steps": 6770, "loss": 0.3295, "lr": 8.987183664689511e-05, "epoch": 0.4918759231905465, "percentage": 24.59, "elapsed_time": "3:20:43", "remaining_time": "10:15:25", "throughput": 1435.39, "total_tokens": 17286816} |
|
{"current_steps": 1670, "total_steps": 6770, "loss": 0.3201, "lr": 8.9798026091291e-05, "epoch": 0.4933530280649926, "percentage": 24.67, "elapsed_time": "3:22:11", "remaining_time": "10:17:28", "throughput": 1429.24, "total_tokens": 17339072} |
|
{"current_steps": 1675, "total_steps": 6770, "loss": 0.3044, "lr": 8.972397810106235e-05, "epoch": 0.4948301329394387, "percentage": 24.74, "elapsed_time": "3:23:40", "remaining_time": "10:19:31", "throughput": 1423.15, "total_tokens": 17391288} |
|
{"current_steps": 1680, "total_steps": 6770, "loss": 0.2781, "lr": 8.964969311797871e-05, "epoch": 0.4963072378138848, "percentage": 24.82, "elapsed_time": "3:25:07", "remaining_time": "10:21:28", "throughput": 1417.32, "total_tokens": 17443456} |
|
{"current_steps": 1685, "total_steps": 6770, "loss": 0.423, "lr": 8.957517158522359e-05, "epoch": 0.4977843426883309, "percentage": 24.89, "elapsed_time": "3:26:34", "remaining_time": "10:23:23", "throughput": 1411.54, "total_tokens": 17494832} |
|
{"current_steps": 1690, "total_steps": 6770, "loss": 0.2747, "lr": 8.950041394739168e-05, "epoch": 0.49926144756277696, "percentage": 24.96, "elapsed_time": "3:28:01", "remaining_time": "10:25:19", "throughput": 1405.84, "total_tokens": 17547384} |
|
{"current_steps": 1695, "total_steps": 6770, "loss": 0.3162, "lr": 8.942542065048632e-05, "epoch": 0.5007385524372231, "percentage": 25.04, "elapsed_time": "3:29:30", "remaining_time": "10:27:15", "throughput": 1400.09, "total_tokens": 17599120} |
|
{"current_steps": 1700, "total_steps": 6770, "loss": 0.3904, "lr": 8.935019214191672e-05, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "3:30:56", "remaining_time": "10:29:06", "throughput": 1394.6, "total_tokens": 17650984} |
|
{"current_steps": 1700, "total_steps": 6770, "eval_loss": 0.3297054171562195, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "3:31:15", "remaining_time": "10:30:04", "throughput": 1392.48, "total_tokens": 17650984} |
|
{"current_steps": 1655, "total_steps": 6770, "loss": 0.2956, "lr": 9.00187436942368e-05, "epoch": 0.48892171344165436, "percentage": 24.45, "elapsed_time": "0:02:46", "remaining_time": "0:08:35", "throughput": 103093.65, "total_tokens": 17182896} |
|
{"current_steps": 1660, "total_steps": 6770, "loss": 0.3027, "lr": 8.994540932752167e-05, "epoch": 0.49039881831610044, "percentage": 24.52, "elapsed_time": "0:04:14", "remaining_time": "0:13:02", "throughput": 67768.5, "total_tokens": 17235552} |
|
{"current_steps": 1665, "total_steps": 6770, "loss": 0.3295, "lr": 8.987183664689511e-05, "epoch": 0.4918759231905465, "percentage": 24.59, "elapsed_time": "0:05:40", "remaining_time": "0:17:24", "throughput": 50751.01, "total_tokens": 17286816} |
|
{"current_steps": 1670, "total_steps": 6770, "loss": 0.3201, "lr": 8.9798026091291e-05, "epoch": 0.4933530280649926, "percentage": 24.67, "elapsed_time": "0:07:08", "remaining_time": "0:21:47", "throughput": 40490.2, "total_tokens": 17339072} |
|
{"current_steps": 1675, "total_steps": 6770, "loss": 0.3044, "lr": 8.972397810106235e-05, "epoch": 0.4948301329394387, "percentage": 24.74, "elapsed_time": "0:08:34", "remaining_time": "0:26:04", "throughput": 33807.84, "total_tokens": 17391288} |
|
{"current_steps": 1680, "total_steps": 6770, "loss": 0.2781, "lr": 8.964969311797871e-05, "epoch": 0.4963072378138848, "percentage": 24.82, "elapsed_time": "0:10:01", "remaining_time": "0:30:21", "throughput": 29020.42, "total_tokens": 17443456} |
|
{"current_steps": 1685, "total_steps": 6770, "loss": 0.423, "lr": 8.957517158522359e-05, "epoch": 0.4977843426883309, "percentage": 24.89, "elapsed_time": "0:11:26", "remaining_time": "0:34:31", "throughput": 25485.2, "total_tokens": 17494832} |
|
{"current_steps": 1690, "total_steps": 6770, "loss": 0.2747, "lr": 8.950041394739168e-05, "epoch": 0.49926144756277696, "percentage": 24.96, "elapsed_time": "0:12:53", "remaining_time": "0:38:45", "throughput": 22678.11, "total_tokens": 17547384} |
|
{"current_steps": 1695, "total_steps": 6770, "loss": 0.3162, "lr": 8.942542065048632e-05, "epoch": 0.5007385524372231, "percentage": 25.04, "elapsed_time": "0:14:20", "remaining_time": "0:42:55", "throughput": 20457.93, "total_tokens": 17599120} |
|
{"current_steps": 1700, "total_steps": 6770, "loss": 0.3904, "lr": 8.935019214191672e-05, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "0:15:49", "remaining_time": "0:47:11", "throughput": 18591.57, "total_tokens": 17650984} |
|
{"current_steps": 1700, "total_steps": 6770, "eval_loss": 0.3297054171562195, "epoch": 0.5022156573116692, "percentage": 25.11, "elapsed_time": "0:16:37", "remaining_time": "0:49:34", "throughput": 17697.21, "total_tokens": 17650984} |
|
{"current_steps": 1705, "total_steps": 6770, "loss": 0.3484, "lr": 8.927472887049545e-05, "epoch": 0.5036927621861153, "percentage": 25.18, "elapsed_time": "0:18:13", "remaining_time": "0:54:08", "throughput": 16191.1, "total_tokens": 17702864} |
|
{"current_steps": 1710, "total_steps": 6770, "loss": 0.3214, "lr": 8.919903128643563e-05, "epoch": 0.5051698670605613, "percentage": 25.26, "elapsed_time": "0:19:41", "remaining_time": "0:58:14", "throughput": 15033.64, "total_tokens": 17754792} |
|
{"current_steps": 1715, "total_steps": 6770, "loss": 0.3428, "lr": 8.912309984134825e-05, "epoch": 0.5066469719350074, "percentage": 25.33, "elapsed_time": "0:21:08", "remaining_time": "1:02:17", "throughput": 14040.8, "total_tokens": 17805608} |
|
{"current_steps": 1720, "total_steps": 6770, "loss": 0.3186, "lr": 8.90469349882396e-05, "epoch": 0.5081240768094535, "percentage": 25.41, "elapsed_time": "0:22:36", "remaining_time": "1:06:22", "throughput": 13163.53, "total_tokens": 17857304} |
|
{"current_steps": 1725, "total_steps": 6770, "loss": 0.2549, "lr": 8.897053718150838e-05, "epoch": 0.5096011816838996, "percentage": 25.48, "elapsed_time": "0:24:03", "remaining_time": "1:10:22", "throughput": 12403.33, "total_tokens": 17909528} |
|
{"current_steps": 1730, "total_steps": 6770, "loss": 0.3248, "lr": 8.889390687694317e-05, "epoch": 0.5110782865583456, "percentage": 25.55, "elapsed_time": "0:25:31", "remaining_time": "1:14:21", "throughput": 11727.68, "total_tokens": 17960976} |
|
{"current_steps": 1735, "total_steps": 6770, "loss": 0.3234, "lr": 8.88170445317196e-05, "epoch": 0.5125553914327917, "percentage": 25.63, "elapsed_time": "0:26:57", "remaining_time": "1:18:13", "throughput": 11138.06, "total_tokens": 18013008} |
|
{"current_steps": 1740, "total_steps": 6770, "loss": 0.2883, "lr": 8.873995060439764e-05, "epoch": 0.5140324963072378, "percentage": 25.7, "elapsed_time": "0:28:24", "remaining_time": "1:22:06", "throughput": 10599.75, "total_tokens": 18065200} |
|
{"current_steps": 1745, "total_steps": 6770, "loss": 0.2691, "lr": 8.86626255549189e-05, "epoch": 0.5155096011816839, "percentage": 25.78, "elapsed_time": "0:29:50", "remaining_time": "1:25:54", "throughput": 10120.67, "total_tokens": 18116880} |
|
{"current_steps": 1750, "total_steps": 6770, "loss": 0.3173, "lr": 8.858506984460383e-05, "epoch": 0.51698670605613, "percentage": 25.85, "elapsed_time": "0:31:17", "remaining_time": "1:29:44", "throughput": 9679.72, "total_tokens": 18169344} |
|
{"current_steps": 1750, "total_steps": 6770, "eval_loss": 0.44908422231674194, "epoch": 0.51698670605613, "percentage": 25.85, "elapsed_time": "0:31:36", "remaining_time": "1:30:40", "throughput": 9580.54, "total_tokens": 18169344} |
|
{"current_steps": 1755, "total_steps": 6770, "loss": 0.2966, "lr": 8.850728393614902e-05, "epoch": 0.518463810930576, "percentage": 25.92, "elapsed_time": "0:33:07", "remaining_time": "1:34:40", "throughput": 9166.33, "total_tokens": 18221144} |
|
{"current_steps": 1760, "total_steps": 6770, "loss": 0.3101, "lr": 8.842926829362446e-05, "epoch": 0.5199409158050221, "percentage": 26.0, "elapsed_time": "0:34:34", "remaining_time": "1:38:24", "throughput": 8808.93, "total_tokens": 18272752} |
|
{"current_steps": 1765, "total_steps": 6770, "loss": 0.2545, "lr": 8.835102338247064e-05, "epoch": 0.5214180206794683, "percentage": 26.07, "elapsed_time": "0:36:00", "remaining_time": "1:42:07", "throughput": 8481.26, "total_tokens": 18325888} |
|
{"current_steps": 1770, "total_steps": 6770, "loss": 0.3223, "lr": 8.827254966949593e-05, "epoch": 0.5228951255539144, "percentage": 26.14, "elapsed_time": "0:37:27", "remaining_time": "1:45:48", "throughput": 8177.92, "total_tokens": 18378016} |
|
{"current_steps": 1775, "total_steps": 6770, "loss": 0.2714, "lr": 8.819384762287373e-05, "epoch": 0.5243722304283605, "percentage": 26.22, "elapsed_time": "0:38:55", "remaining_time": "1:49:31", "throughput": 7892.96, "total_tokens": 18431240} |
|
{"current_steps": 1780, "total_steps": 6770, "loss": 0.3438, "lr": 8.811491771213964e-05, "epoch": 0.5258493353028065, "percentage": 26.29, "elapsed_time": "0:40:21", "remaining_time": "1:53:07", "throughput": 7633.32, "total_tokens": 18482832} |
|
{"current_steps": 1785, "total_steps": 6770, "loss": 0.2324, "lr": 8.803576040818873e-05, "epoch": 0.5273264401772526, "percentage": 26.37, "elapsed_time": "0:41:49", "remaining_time": "1:56:48", "throughput": 7385.4, "total_tokens": 18534992} |
|
{"current_steps": 1790, "total_steps": 6770, "loss": 0.2259, "lr": 8.795637618327269e-05, "epoch": 0.5288035450516987, "percentage": 26.44, "elapsed_time": "0:43:17", "remaining_time": "2:00:25", "throughput": 7156.95, "total_tokens": 18587752} |
|
{"current_steps": 1795, "total_steps": 6770, "loss": 0.2468, "lr": 8.7876765510997e-05, "epoch": 0.5302806499261448, "percentage": 26.51, "elapsed_time": "0:44:46", "remaining_time": "2:04:05", "throughput": 6939.35, "total_tokens": 18640440} |
|
{"current_steps": 1800, "total_steps": 6770, "loss": 0.3127, "lr": 8.779692886631812e-05, "epoch": 0.5317577548005908, "percentage": 26.59, "elapsed_time": "0:46:12", "remaining_time": "2:07:35", "throughput": 6741.35, "total_tokens": 18691928} |
|
{"current_steps": 1800, "total_steps": 6770, "eval_loss": 0.3499237596988678, "epoch": 0.5317577548005908, "percentage": 26.59, "elapsed_time": "0:46:32", "remaining_time": "2:08:29", "throughput": 6694.66, "total_tokens": 18691928} |
|
{"current_steps": 1805, "total_steps": 6770, "loss": 0.3145, "lr": 8.771686672554067e-05, "epoch": 0.5332348596750369, "percentage": 26.66, "elapsed_time": "0:48:05", "remaining_time": "2:12:16", "throughput": 6495.96, "total_tokens": 18743600} |
|
{"current_steps": 1810, "total_steps": 6770, "loss": 0.3207, "lr": 8.763657956631462e-05, "epoch": 0.534711964549483, "percentage": 26.74, "elapsed_time": "0:49:30", "remaining_time": "2:15:41", "throughput": 6326.14, "total_tokens": 18794920} |
|
{"current_steps": 1815, "total_steps": 6770, "loss": 0.2463, "lr": 8.75560678676323e-05, "epoch": 0.5361890694239291, "percentage": 26.81, "elapsed_time": "0:50:57", "remaining_time": "2:19:08", "throughput": 6163.54, "total_tokens": 18847600} |
|
{"current_steps": 1820, "total_steps": 6770, "loss": 0.3199, "lr": 8.747533210982575e-05, "epoch": 0.5376661742983752, "percentage": 26.88, "elapsed_time": "0:52:24", "remaining_time": "2:22:33", "throughput": 6009.55, "total_tokens": 18899936} |
|
{"current_steps": 1825, "total_steps": 6770, "loss": 0.282, "lr": 8.739437277456366e-05, "epoch": 0.5391432791728212, "percentage": 26.96, "elapsed_time": "0:53:53", "remaining_time": "2:26:00", "throughput": 5861.93, "total_tokens": 18952336} |
|
{"current_steps": 1830, "total_steps": 6770, "loss": 0.2846, "lr": 8.731319034484862e-05, "epoch": 0.5406203840472673, "percentage": 27.03, "elapsed_time": "0:55:19", "remaining_time": "2:29:20", "throughput": 5725.37, "total_tokens": 19004528} |
|
{"current_steps": 1835, "total_steps": 6770, "loss": 0.3537, "lr": 8.723178530501417e-05, "epoch": 0.5420974889217134, "percentage": 27.1, "elapsed_time": "0:56:46", "remaining_time": "2:32:41", "throughput": 5593.92, "total_tokens": 19056296} |
|
{"current_steps": 1840, "total_steps": 6770, "loss": 0.3289, "lr": 8.7150158140722e-05, "epoch": 0.5435745937961596, "percentage": 27.18, "elapsed_time": "0:58:12", "remaining_time": "2:35:58", "throughput": 5470.76, "total_tokens": 19108832} |
|
{"current_steps": 1845, "total_steps": 6770, "loss": 0.3094, "lr": 8.706830933895894e-05, "epoch": 0.5450516986706057, "percentage": 27.25, "elapsed_time": "0:59:40", "remaining_time": "2:39:16", "throughput": 5352.15, "total_tokens": 19160960} |
|
{"current_steps": 1850, "total_steps": 6770, "loss": 0.2828, "lr": 8.698623938803409e-05, "epoch": 0.5465288035450517, "percentage": 27.33, "elapsed_time": "1:01:07", "remaining_time": "2:42:32", "throughput": 5239.16, "total_tokens": 19212992} |
|
{"current_steps": 1850, "total_steps": 6770, "eval_loss": 0.3780718147754669, "epoch": 0.5465288035450517, "percentage": 27.33, "elapsed_time": "1:01:26", "remaining_time": "2:43:24", "throughput": 5211.69, "total_tokens": 19212992} |
|
{"current_steps": 1855, "total_steps": 6770, "loss": 0.2084, "lr": 8.6903948777576e-05, "epoch": 0.5480059084194978, "percentage": 27.4, "elapsed_time": "1:03:00", "remaining_time": "2:46:57", "throughput": 5096.02, "total_tokens": 19266312} |
|
{"current_steps": 1860, "total_steps": 6770, "loss": 0.3262, "lr": 8.68214379985296e-05, "epoch": 0.5494830132939439, "percentage": 27.47, "elapsed_time": "1:04:27", "remaining_time": "2:50:09", "throughput": 4994.96, "total_tokens": 19318888} |
|
{"current_steps": 1865, "total_steps": 6770, "loss": 0.3074, "lr": 8.673870754315336e-05, "epoch": 0.55096011816839, "percentage": 27.55, "elapsed_time": "1:05:55", "remaining_time": "2:53:22", "throughput": 4897.63, "total_tokens": 19371352} |
|
{"current_steps": 1870, "total_steps": 6770, "loss": 0.2795, "lr": 8.665575790501639e-05, "epoch": 0.552437223042836, "percentage": 27.62, "elapsed_time": "1:07:22", "remaining_time": "2:56:33", "throughput": 4804.54, "total_tokens": 19424160} |
|
{"current_steps": 1875, "total_steps": 6770, "loss": 0.2396, "lr": 8.657258957899535e-05, "epoch": 0.5539143279172821, "percentage": 27.7, "elapsed_time": "1:08:49", "remaining_time": "2:59:41", "throughput": 4716.35, "total_tokens": 19477720} |
|
{"current_steps": 1880, "total_steps": 6770, "loss": 0.302, "lr": 8.648920306127169e-05, "epoch": 0.5553914327917282, "percentage": 27.77, "elapsed_time": "1:10:17", "remaining_time": "3:02:48", "throughput": 4630.85, "total_tokens": 19528512} |
|
{"current_steps": 1885, "total_steps": 6770, "loss": 0.3457, "lr": 8.640559884932848e-05, "epoch": 0.5568685376661743, "percentage": 27.84, "elapsed_time": "1:11:43", "remaining_time": "3:05:52", "throughput": 4549.89, "total_tokens": 19579624} |
|
{"current_steps": 1890, "total_steps": 6770, "loss": 0.3194, "lr": 8.632177744194765e-05, "epoch": 0.5583456425406204, "percentage": 27.92, "elapsed_time": "1:13:11", "remaining_time": "3:08:58", "throughput": 4470.4, "total_tokens": 19631432} |
|
{"current_steps": 1895, "total_steps": 6770, "loss": 0.3135, "lr": 8.623773933920688e-05, "epoch": 0.5598227474150664, "percentage": 27.99, "elapsed_time": "1:14:36", "remaining_time": "3:11:56", "throughput": 4396.85, "total_tokens": 19682792} |
|
{"current_steps": 1900, "total_steps": 6770, "loss": 0.306, "lr": 8.615348504247663e-05, "epoch": 0.5612998522895125, "percentage": 28.06, "elapsed_time": "1:16:03", "remaining_time": "3:14:57", "throughput": 4324.57, "total_tokens": 19735976} |
|
{"current_steps": 1900, "total_steps": 6770, "eval_loss": 0.37662214040756226, "epoch": 0.5612998522895125, "percentage": 28.06, "elapsed_time": "1:16:23", "remaining_time": "3:15:47", "throughput": 4306.27, "total_tokens": 19735976} |
|
{"current_steps": 1905, "total_steps": 6770, "loss": 0.3128, "lr": 8.606901505441718e-05, "epoch": 0.5627769571639586, "percentage": 28.14, "elapsed_time": "1:17:55", "remaining_time": "3:18:59", "throughput": 4232.45, "total_tokens": 19787504} |
|
{"current_steps": 1910, "total_steps": 6770, "loss": 0.2711, "lr": 8.598432987897565e-05, "epoch": 0.5642540620384048, "percentage": 28.21, "elapsed_time": "1:19:22", "remaining_time": "3:21:59", "throughput": 4165.29, "total_tokens": 19839104} |
|
{"current_steps": 1915, "total_steps": 6770, "loss": 0.3735, "lr": 8.589943002138295e-05, "epoch": 0.5657311669128509, "percentage": 28.29, "elapsed_time": "1:20:49", "remaining_time": "3:24:54", "throughput": 4101.88, "total_tokens": 19891064} |
|
{"current_steps": 1920, "total_steps": 6770, "loss": 0.29, "lr": 8.581431598815077e-05, "epoch": 0.5672082717872969, "percentage": 28.36, "elapsed_time": "1:22:17", "remaining_time": "3:27:52", "throughput": 4039.04, "total_tokens": 19942368} |
|
{"current_steps": 1925, "total_steps": 6770, "loss": 0.2977, "lr": 8.572898828706857e-05, "epoch": 0.568685376661743, "percentage": 28.43, "elapsed_time": "1:23:43", "remaining_time": "3:30:43", "throughput": 3980.4, "total_tokens": 19994816} |
|
{"current_steps": 1930, "total_steps": 6770, "loss": 0.2483, "lr": 8.564344742720059e-05, "epoch": 0.5701624815361891, "percentage": 28.51, "elapsed_time": "1:25:11", "remaining_time": "3:33:37", "throughput": 3922.11, "total_tokens": 20046192} |
|
{"current_steps": 1935, "total_steps": 6770, "loss": 0.3126, "lr": 8.55576939188827e-05, "epoch": 0.5716395864106352, "percentage": 28.58, "elapsed_time": "1:26:37", "remaining_time": "3:36:27", "throughput": 3866.64, "total_tokens": 20097328} |
|
{"current_steps": 1940, "total_steps": 6770, "loss": 0.2688, "lr": 8.54717282737195e-05, "epoch": 0.5731166912850812, "percentage": 28.66, "elapsed_time": "1:28:05", "remaining_time": "3:39:19", "throughput": 3811.99, "total_tokens": 20149392} |
|
{"current_steps": 1945, "total_steps": 6770, "loss": 0.2491, "lr": 8.538555100458114e-05, "epoch": 0.5745937961595273, "percentage": 28.73, "elapsed_time": "1:29:33", "remaining_time": "3:42:10", "throughput": 3759.39, "total_tokens": 20201392} |
|
{"current_steps": 1950, "total_steps": 6770, "loss": 0.2992, "lr": 8.529916262560038e-05, "epoch": 0.5760709010339734, "percentage": 28.8, "elapsed_time": "1:31:01", "remaining_time": "3:45:00", "throughput": 3708.28, "total_tokens": 20253288} |
|
{"current_steps": 1950, "total_steps": 6770, "eval_loss": 0.3468088209629059, "epoch": 0.5760709010339734, "percentage": 28.8, "elapsed_time": "1:31:21", "remaining_time": "3:45:47", "throughput": 3695.16, "total_tokens": 20253288} |
|
{"current_steps": 1955, "total_steps": 6770, "loss": 0.2505, "lr": 8.521256365216941e-05, "epoch": 0.5775480059084195, "percentage": 28.88, "elapsed_time": "1:32:54", "remaining_time": "3:48:50", "throughput": 3642.42, "total_tokens": 20305536} |
|
{"current_steps": 1960, "total_steps": 6770, "loss": 0.2487, "lr": 8.512575460093683e-05, "epoch": 0.5790251107828656, "percentage": 28.95, "elapsed_time": "1:34:21", "remaining_time": "3:51:34", "throughput": 3595.67, "total_tokens": 20357912} |
|
{"current_steps": 1965, "total_steps": 6770, "loss": 0.3441, "lr": 8.503873598980456e-05, "epoch": 0.5805022156573116, "percentage": 29.03, "elapsed_time": "1:35:49", "remaining_time": "3:54:18", "throughput": 3549.92, "total_tokens": 20409624} |
|
{"current_steps": 1970, "total_steps": 6770, "loss": 0.2973, "lr": 8.495150833792478e-05, "epoch": 0.5819793205317577, "percentage": 29.1, "elapsed_time": "1:37:15", "remaining_time": "3:56:59", "throughput": 3506.11, "total_tokens": 20461080} |
|
{"current_steps": 1975, "total_steps": 6770, "loss": 0.316, "lr": 8.486407216569678e-05, "epoch": 0.5834564254062038, "percentage": 29.17, "elapsed_time": "1:38:42", "remaining_time": "3:59:39", "throughput": 3463.28, "total_tokens": 20512000} |
|
{"current_steps": 1980, "total_steps": 6770, "loss": 0.338, "lr": 8.477642799476387e-05, "epoch": 0.5849335302806499, "percentage": 29.25, "elapsed_time": "1:40:09", "remaining_time": "4:02:17", "throughput": 3422.09, "total_tokens": 20563824} |
|
{"current_steps": 1985, "total_steps": 6770, "loss": 0.2472, "lr": 8.468857634801033e-05, "epoch": 0.5864106351550961, "percentage": 29.32, "elapsed_time": "1:41:37", "remaining_time": "4:04:58", "throughput": 3381.1, "total_tokens": 20615944} |
|
{"current_steps": 1990, "total_steps": 6770, "loss": 0.3045, "lr": 8.460051774955818e-05, "epoch": 0.5878877400295421, "percentage": 29.39, "elapsed_time": "1:43:03", "remaining_time": "4:07:32", "throughput": 3342.24, "total_tokens": 20666720} |
|
{"current_steps": 1995, "total_steps": 6770, "loss": 0.2606, "lr": 8.451225272476412e-05, "epoch": 0.5893648449039882, "percentage": 29.47, "elapsed_time": "1:44:30", "remaining_time": "4:10:08", "throughput": 3304.05, "total_tokens": 20718504} |
|
{"current_steps": 2000, "total_steps": 6770, "loss": 0.2341, "lr": 8.442378180021644e-05, "epoch": 0.5908419497784343, "percentage": 29.54, "elapsed_time": "1:45:56", "remaining_time": "4:12:40", "throughput": 3267.68, "total_tokens": 20770728} |
|
{"current_steps": 2000, "total_steps": 6770, "eval_loss": 0.3366144299507141, "epoch": 0.5908419497784343, "percentage": 29.54, "elapsed_time": "1:46:15", "remaining_time": "4:13:25", "throughput": 3257.82, "total_tokens": 20770728} |
|
{"current_steps": 2005, "total_steps": 6770, "loss": 0.2844, "lr": 8.433510550373175e-05, "epoch": 0.5923190546528804, "percentage": 29.62, "elapsed_time": "1:47:49", "remaining_time": "4:16:15", "throughput": 3218.57, "total_tokens": 20823136} |
|
{"current_steps": 2010, "total_steps": 6770, "loss": 0.2786, "lr": 8.424622436435199e-05, "epoch": 0.5937961595273265, "percentage": 29.69, "elapsed_time": "1:49:16", "remaining_time": "4:18:45", "throughput": 3184.08, "total_tokens": 20875080} |
|
{"current_steps": 2015, "total_steps": 6770, "loss": 0.2647, "lr": 8.41571389123411e-05, "epoch": 0.5952732644017725, "percentage": 29.76, "elapsed_time": "1:50:44", "remaining_time": "4:21:19", "throughput": 3149.72, "total_tokens": 20927584} |
|
{"current_steps": 2020, "total_steps": 6770, "loss": 0.2673, "lr": 8.406784967918203e-05, "epoch": 0.5967503692762186, "percentage": 29.84, "elapsed_time": "1:52:10", "remaining_time": "4:23:47", "throughput": 3117.05, "total_tokens": 20980640} |
|
{"current_steps": 2025, "total_steps": 6770, "loss": 0.2973, "lr": 8.397835719757343e-05, "epoch": 0.5982274741506647, "percentage": 29.91, "elapsed_time": "1:53:39", "remaining_time": "4:26:19", "throughput": 3084.36, "total_tokens": 21033272} |
|
{"current_steps": 2030, "total_steps": 6770, "loss": 0.302, "lr": 8.388866200142656e-05, "epoch": 0.5997045790251108, "percentage": 29.99, "elapsed_time": "1:55:06", "remaining_time": "4:28:46", "throughput": 3053.01, "total_tokens": 21086032} |
|
{"current_steps": 2035, "total_steps": 6770, "loss": 0.2758, "lr": 8.379876462586203e-05, "epoch": 0.6011816838995568, "percentage": 30.06, "elapsed_time": "1:56:34", "remaining_time": "4:31:14", "throughput": 3022.08, "total_tokens": 21137800} |
|
{"current_steps": 2040, "total_steps": 6770, "loss": 0.2687, "lr": 8.370866560720671e-05, "epoch": 0.6026587887740029, "percentage": 30.13, "elapsed_time": "1:58:01", "remaining_time": "4:33:39", "throughput": 2992.14, "total_tokens": 21189264} |
|
{"current_steps": 2045, "total_steps": 6770, "loss": 0.295, "lr": 8.361836548299045e-05, "epoch": 0.604135893648449, "percentage": 30.21, "elapsed_time": "1:59:28", "remaining_time": "4:36:03", "throughput": 2962.91, "total_tokens": 21240184} |
|
{"current_steps": 2050, "total_steps": 6770, "loss": 0.2931, "lr": 8.352786479194288e-05, "epoch": 0.6056129985228951, "percentage": 30.28, "elapsed_time": "2:00:56", "remaining_time": "4:38:27", "throughput": 2934.23, "total_tokens": 21291664} |
|
{"current_steps": 2050, "total_steps": 6770, "eval_loss": 0.33863261342048645, "epoch": 0.6056129985228951, "percentage": 30.28, "elapsed_time": "2:01:15", "remaining_time": "4:39:11", "throughput": 2926.43, "total_tokens": 21291664} |
|
{"current_steps": 2055, "total_steps": 6770, "loss": 0.2338, "lr": 8.343716407399019e-05, "epoch": 0.6070901033973413, "percentage": 30.35, "elapsed_time": "2:02:48", "remaining_time": "4:41:46", "throughput": 2896.61, "total_tokens": 21344232} |
|
{"current_steps": 2060, "total_steps": 6770, "loss": 0.3026, "lr": 8.334626387025197e-05, "epoch": 0.6085672082717873, "percentage": 30.43, "elapsed_time": "2:04:16", "remaining_time": "4:44:09", "throughput": 2869.36, "total_tokens": 21396160} |
|
{"current_steps": 2065, "total_steps": 6770, "loss": 0.2898, "lr": 8.325516472303792e-05, "epoch": 0.6100443131462334, "percentage": 30.5, "elapsed_time": "2:05:45", "remaining_time": "4:46:31", "throughput": 2842.58, "total_tokens": 21448032} |
|
{"current_steps": 2070, "total_steps": 6770, "loss": 0.3265, "lr": 8.316386717584463e-05, "epoch": 0.6115214180206795, "percentage": 30.58, "elapsed_time": "2:07:13", "remaining_time": "4:48:52", "throughput": 2816.35, "total_tokens": 21499144} |
|
{"current_steps": 2075, "total_steps": 6770, "loss": 0.2513, "lr": 8.307237177335239e-05, "epoch": 0.6129985228951256, "percentage": 30.65, "elapsed_time": "2:08:40", "remaining_time": "4:51:09", "throughput": 2791.39, "total_tokens": 21551328} |
|
{"current_steps": 2080, "total_steps": 6770, "loss": 0.2864, "lr": 8.298067906142182e-05, "epoch": 0.6144756277695717, "percentage": 30.72, "elapsed_time": "2:10:08", "remaining_time": "4:53:26", "throughput": 2766.8, "total_tokens": 21603800} |
|
{"current_steps": 2085, "total_steps": 6770, "loss": 0.243, "lr": 8.288878958709072e-05, "epoch": 0.6159527326440177, "percentage": 30.8, "elapsed_time": "2:11:35", "remaining_time": "4:55:40", "throughput": 2742.95, "total_tokens": 21656480} |
|
{"current_steps": 2090, "total_steps": 6770, "loss": 0.2711, "lr": 8.279670389857079e-05, "epoch": 0.6174298375184638, "percentage": 30.87, "elapsed_time": "2:13:02", "remaining_time": "4:57:55", "throughput": 2719.43, "total_tokens": 21708824} |
|
{"current_steps": 2095, "total_steps": 6770, "loss": 0.2475, "lr": 8.27044225452443e-05, "epoch": 0.6189069423929099, "percentage": 30.95, "elapsed_time": "2:14:29", "remaining_time": "5:00:06", "throughput": 2696.8, "total_tokens": 21760744} |
|
{"current_steps": 2100, "total_steps": 6770, "loss": 0.1826, "lr": 8.26119460776609e-05, "epoch": 0.620384047267356, "percentage": 31.02, "elapsed_time": "2:15:56", "remaining_time": "5:02:19", "throughput": 2674.27, "total_tokens": 21813984} |
|
{"current_steps": 2100, "total_steps": 6770, "eval_loss": 0.5386325716972351, "epoch": 0.620384047267356, "percentage": 31.02, "elapsed_time": "2:16:16", "remaining_time": "5:03:02", "throughput": 2667.96, "total_tokens": 21813984} |
|
{"current_steps": 2105, "total_steps": 6770, "loss": 0.3018, "lr": 8.251927504753426e-05, "epoch": 0.621861152141802, "percentage": 31.09, "elapsed_time": "2:17:48", "remaining_time": "5:05:23", "throughput": 2644.46, "total_tokens": 21865304} |
|
{"current_steps": 2110, "total_steps": 6770, "loss": 0.2725, "lr": 8.24264100077388e-05, "epoch": 0.6233382570162481, "percentage": 31.17, "elapsed_time": "2:19:15", "remaining_time": "5:07:33", "throughput": 2623.29, "total_tokens": 21918568} |
|
{"current_steps": 2115, "total_steps": 6770, "loss": 0.3103, "lr": 8.233335151230646e-05, "epoch": 0.6248153618906942, "percentage": 31.24, "elapsed_time": "2:20:41", "remaining_time": "5:09:39", "throughput": 2602.66, "total_tokens": 21970352} |
|
{"current_steps": 2120, "total_steps": 6770, "loss": 0.2868, "lr": 8.224010011642326e-05, "epoch": 0.6262924667651403, "percentage": 31.31, "elapsed_time": "2:22:08", "remaining_time": "5:11:46", "throughput": 2582.09, "total_tokens": 22021312} |
|
{"current_steps": 2125, "total_steps": 6770, "loss": 0.335, "lr": 8.21466563764261e-05, "epoch": 0.6277695716395865, "percentage": 31.39, "elapsed_time": "2:23:33", "remaining_time": "5:13:49", "throughput": 2562.52, "total_tokens": 22073496} |
|
{"current_steps": 2130, "total_steps": 6770, "loss": 0.3764, "lr": 8.205302084979937e-05, "epoch": 0.6292466765140325, "percentage": 31.46, "elapsed_time": "2:25:01", "remaining_time": "5:15:54", "throughput": 2542.68, "total_tokens": 22124088} |
|
{"current_steps": 2135, "total_steps": 6770, "loss": 0.2681, "lr": 8.19591940951717e-05, "epoch": 0.6307237813884786, "percentage": 31.54, "elapsed_time": "2:26:27", "remaining_time": "5:17:56", "throughput": 2523.68, "total_tokens": 22175824} |
|
{"current_steps": 2140, "total_steps": 6770, "loss": 0.2619, "lr": 8.186517667231259e-05, "epoch": 0.6322008862629247, "percentage": 31.61, "elapsed_time": "2:27:53", "remaining_time": "5:19:57", "throughput": 2504.99, "total_tokens": 22227376} |
|
{"current_steps": 2145, "total_steps": 6770, "loss": 0.2516, "lr": 8.1770969142129e-05, "epoch": 0.6336779911373708, "percentage": 31.68, "elapsed_time": "2:29:19", "remaining_time": "5:21:59", "throughput": 2486.61, "total_tokens": 22279928} |
|
{"current_steps": 2150, "total_steps": 6770, "loss": 0.2387, "lr": 8.167657206666217e-05, "epoch": 0.6351550960118169, "percentage": 31.76, "elapsed_time": "2:30:47", "remaining_time": "5:24:00", "throughput": 2468.41, "total_tokens": 22332144} |
|
{"current_steps": 2150, "total_steps": 6770, "eval_loss": 0.25809118151664734, "epoch": 0.6351550960118169, "percentage": 31.76, "elapsed_time": "2:31:06", "remaining_time": "5:24:42", "throughput": 2463.15, "total_tokens": 22332144} |
|
{"current_steps": 2155, "total_steps": 6770, "loss": 0.2606, "lr": 8.158198600908405e-05, "epoch": 0.6366322008862629, "percentage": 31.83, "elapsed_time": "2:32:37", "remaining_time": "5:26:52", "throughput": 2444.2, "total_tokens": 22383912} |
|
{"current_steps": 2160, "total_steps": 6770, "loss": 0.2672, "lr": 8.148721153369411e-05, "epoch": 0.638109305760709, "percentage": 31.91, "elapsed_time": "2:34:03", "remaining_time": "5:28:48", "throughput": 2427.1, "total_tokens": 22435504} |
|
{"current_steps": 2165, "total_steps": 6770, "loss": 0.2771, "lr": 8.139224920591598e-05, "epoch": 0.6395864106351551, "percentage": 31.98, "elapsed_time": "2:35:31", "remaining_time": "5:30:48", "throughput": 2409.87, "total_tokens": 22487696} |
|
{"current_steps": 2170, "total_steps": 6770, "loss": 0.3018, "lr": 8.129709959229388e-05, "epoch": 0.6410635155096012, "percentage": 32.05, "elapsed_time": "2:36:59", "remaining_time": "5:32:47", "throughput": 2392.93, "total_tokens": 22539664} |
|
{"current_steps": 2175, "total_steps": 6770, "loss": 0.312, "lr": 8.120176326048949e-05, "epoch": 0.6425406203840472, "percentage": 32.13, "elapsed_time": "2:38:26", "remaining_time": "5:34:44", "throughput": 2376.46, "total_tokens": 22592240} |
|
{"current_steps": 2180, "total_steps": 6770, "loss": 0.2413, "lr": 8.110624077927842e-05, "epoch": 0.6440177252584933, "percentage": 32.2, "elapsed_time": "2:39:53", "remaining_time": "5:36:40", "throughput": 2360.19, "total_tokens": 22643648} |
|
{"current_steps": 2185, "total_steps": 6770, "loss": 0.2585, "lr": 8.101053271854682e-05, "epoch": 0.6454948301329394, "percentage": 32.27, "elapsed_time": "2:41:21", "remaining_time": "5:38:36", "throughput": 2344.07, "total_tokens": 22695208} |
|
{"current_steps": 2190, "total_steps": 6770, "loss": 0.2621, "lr": 8.091463964928801e-05, "epoch": 0.6469719350073855, "percentage": 32.35, "elapsed_time": "2:42:48", "remaining_time": "5:40:30", "throughput": 2328.48, "total_tokens": 22746896} |
|
{"current_steps": 2195, "total_steps": 6770, "loss": 0.324, "lr": 8.081856214359908e-05, "epoch": 0.6484490398818316, "percentage": 32.42, "elapsed_time": "2:44:16", "remaining_time": "5:42:24", "throughput": 2312.87, "total_tokens": 22797936} |
|
{"current_steps": 2200, "total_steps": 6770, "loss": 0.2662, "lr": 8.072230077467748e-05, "epoch": 0.6499261447562777, "percentage": 32.5, "elapsed_time": "2:45:43", "remaining_time": "5:44:14", "throughput": 2298.01, "total_tokens": 22849552} |
|
{"current_steps": 2200, "total_steps": 6770, "eval_loss": 0.48401138186454773, "epoch": 0.6499261447562777, "percentage": 32.5, "elapsed_time": "2:46:02", "remaining_time": "5:44:54", "throughput": 2293.58, "total_tokens": 22849552} |
|
{"current_steps": 2205, "total_steps": 6770, "loss": 0.3016, "lr": 8.062585611681758e-05, "epoch": 0.6514032496307238, "percentage": 32.57, "elapsed_time": "2:47:35", "remaining_time": "5:46:57", "throughput": 2277.4, "total_tokens": 22900184} |
|
{"current_steps": 2210, "total_steps": 6770, "loss": 0.2661, "lr": 8.052922874540722e-05, "epoch": 0.6528803545051699, "percentage": 32.64, "elapsed_time": "2:49:01", "remaining_time": "5:48:45", "throughput": 2263.19, "total_tokens": 22951816} |
|
{"current_steps": 2215, "total_steps": 6770, "loss": 0.2698, "lr": 8.043241923692436e-05, "epoch": 0.654357459379616, "percentage": 32.72, "elapsed_time": "2:50:28", "remaining_time": "5:50:34", "throughput": 2248.95, "total_tokens": 23003952} |
|
{"current_steps": 2220, "total_steps": 6770, "loss": 0.2683, "lr": 8.03354281689335e-05, "epoch": 0.6558345642540621, "percentage": 32.79, "elapsed_time": "2:51:55", "remaining_time": "5:52:22", "throughput": 2235.12, "total_tokens": 23056272} |
|
{"current_steps": 2225, "total_steps": 6770, "loss": 0.3237, "lr": 8.023825612008242e-05, "epoch": 0.6573116691285081, "percentage": 32.87, "elapsed_time": "2:53:23", "remaining_time": "5:54:10", "throughput": 2221.09, "total_tokens": 23106472} |
|
{"current_steps": 2230, "total_steps": 6770, "loss": 0.2906, "lr": 8.014090367009859e-05, "epoch": 0.6587887740029542, "percentage": 32.94, "elapsed_time": "2:54:50", "remaining_time": "5:55:57", "throughput": 2207.53, "total_tokens": 23158064} |
|
{"current_steps": 2235, "total_steps": 6770, "loss": 0.2067, "lr": 8.004337139978574e-05, "epoch": 0.6602658788774003, "percentage": 33.01, "elapsed_time": "2:56:17", "remaining_time": "5:57:43", "throughput": 2194.26, "total_tokens": 23210584} |
|
{"current_steps": 2240, "total_steps": 6770, "loss": 0.2648, "lr": 7.994565989102042e-05, "epoch": 0.6617429837518464, "percentage": 33.09, "elapsed_time": "2:57:45", "remaining_time": "5:59:28", "throughput": 2181.2, "total_tokens": 23262864} |
|
{"current_steps": 2245, "total_steps": 6770, "loss": 0.2496, "lr": 7.98477697267485e-05, "epoch": 0.6632200886262924, "percentage": 33.16, "elapsed_time": "2:59:11", "remaining_time": "6:01:10", "throughput": 2168.49, "total_tokens": 23314568} |
|
{"current_steps": 2250, "total_steps": 6770, "loss": 0.2332, "lr": 7.974970149098174e-05, "epoch": 0.6646971935007385, "percentage": 33.23, "elapsed_time": "3:00:38", "remaining_time": "6:02:53", "throughput": 2155.9, "total_tokens": 23366784} |
|
{"current_steps": 2250, "total_steps": 6770, "eval_loss": 0.4966147541999817, "epoch": 0.6646971935007385, "percentage": 33.23, "elapsed_time": "3:00:57", "remaining_time": "6:03:31", "throughput": 2152.09, "total_tokens": 23366784} |
|
{"current_steps": 2255, "total_steps": 6770, "loss": 0.3135, "lr": 7.965145576879423e-05, "epoch": 0.6661742983751846, "percentage": 33.31, "elapsed_time": "3:02:29", "remaining_time": "6:05:23", "throughput": 2138.79, "total_tokens": 23418504} |
|
{"current_steps": 2260, "total_steps": 6770, "loss": 0.3268, "lr": 7.955303314631898e-05, "epoch": 0.6676514032496307, "percentage": 33.38, "elapsed_time": "3:03:56", "remaining_time": "6:07:04", "throughput": 2126.5, "total_tokens": 23469840} |
|
{"current_steps": 2265, "total_steps": 6770, "loss": 0.2706, "lr": 7.945443421074436e-05, "epoch": 0.6691285081240768, "percentage": 33.46, "elapsed_time": "3:05:23", "remaining_time": "6:08:43", "throughput": 2114.63, "total_tokens": 23521416} |
|
{"current_steps": 2270, "total_steps": 6770, "loss": 0.2044, "lr": 7.935565955031064e-05, "epoch": 0.670605612998523, "percentage": 33.53, "elapsed_time": "3:06:50", "remaining_time": "6:10:22", "throughput": 2102.83, "total_tokens": 23573176} |
|
{"current_steps": 2275, "total_steps": 6770, "loss": 0.2724, "lr": 7.925670975430644e-05, "epoch": 0.672082717872969, "percentage": 33.6, "elapsed_time": "3:08:16", "remaining_time": "6:11:59", "throughput": 2091.44, "total_tokens": 23625080} |
|
{"current_steps": 2280, "total_steps": 6770, "loss": 0.2543, "lr": 7.915758541306523e-05, "epoch": 0.6735598227474151, "percentage": 33.68, "elapsed_time": "3:09:42", "remaining_time": "6:13:35", "throughput": 2080.16, "total_tokens": 23677096} |
|
{"current_steps": 2285, "total_steps": 6770, "loss": 0.2372, "lr": 7.90582871179619e-05, "epoch": 0.6750369276218612, "percentage": 33.75, "elapsed_time": "3:11:08", "remaining_time": "6:15:09", "throughput": 2069.13, "total_tokens": 23729168} |
|
{"current_steps": 2290, "total_steps": 6770, "loss": 0.2695, "lr": 7.895881546140902e-05, "epoch": 0.6765140324963073, "percentage": 33.83, "elapsed_time": "3:12:35", "remaining_time": "6:16:46", "throughput": 2057.97, "total_tokens": 23780568} |
|
{"current_steps": 2295, "total_steps": 6770, "loss": 0.3282, "lr": 7.885917103685353e-05, "epoch": 0.6779911373707533, "percentage": 33.9, "elapsed_time": "3:14:01", "remaining_time": "6:18:20", "throughput": 2047.05, "total_tokens": 23831360} |
|
{"current_steps": 2300, "total_steps": 6770, "loss": 0.2481, "lr": 7.875935443877305e-05, "epoch": 0.6794682422451994, "percentage": 33.97, "elapsed_time": "3:15:29", "remaining_time": "6:19:55", "throughput": 2036.18, "total_tokens": 23883032} |
|
{"current_steps": 2300, "total_steps": 6770, "eval_loss": 0.24180778861045837, "epoch": 0.6794682422451994, "percentage": 33.97, "elapsed_time": "3:15:48", "remaining_time": "6:20:32", "throughput": 2032.89, "total_tokens": 23883032} |
|
{"current_steps": 2305, "total_steps": 6770, "loss": 0.2474, "lr": 7.865936626267243e-05, "epoch": 0.6809453471196455, "percentage": 34.05, "elapsed_time": "3:17:19", "remaining_time": "6:22:13", "throughput": 2021.65, "total_tokens": 23934880} |
|
{"current_steps": 2310, "total_steps": 6770, "loss": 0.2613, "lr": 7.855920710508009e-05, "epoch": 0.6824224519940916, "percentage": 34.12, "elapsed_time": "3:18:46", "remaining_time": "6:23:47", "throughput": 2011.12, "total_tokens": 23986160} |
|
{"current_steps": 2315, "total_steps": 6770, "loss": 0.1957, "lr": 7.845887756354458e-05, "epoch": 0.6838995568685377, "percentage": 34.19, "elapsed_time": "3:20:13", "remaining_time": "6:25:18", "throughput": 2000.99, "total_tokens": 24038984} |
|
{"current_steps": 2320, "total_steps": 6770, "loss": 0.2709, "lr": 7.835837823663092e-05, "epoch": 0.6853766617429837, "percentage": 34.27, "elapsed_time": "3:21:40", "remaining_time": "6:26:50", "throughput": 1990.81, "total_tokens": 24090648} |
|
{"current_steps": 2325, "total_steps": 6770, "loss": 0.2873, "lr": 7.825770972391712e-05, "epoch": 0.6868537666174298, "percentage": 34.34, "elapsed_time": "3:23:08", "remaining_time": "6:28:22", "throughput": 1980.75, "total_tokens": 24142200} |
|
{"current_steps": 2330, "total_steps": 6770, "loss": 0.2378, "lr": 7.81568726259905e-05, "epoch": 0.6883308714918759, "percentage": 34.42, "elapsed_time": "3:24:36", "remaining_time": "6:29:53", "throughput": 1970.82, "total_tokens": 24194400} |
|
{"current_steps": 2335, "total_steps": 6770, "loss": 0.2615, "lr": 7.805586754444416e-05, "epoch": 0.689807976366322, "percentage": 34.49, "elapsed_time": "3:26:04", "remaining_time": "6:31:24", "throughput": 1960.89, "total_tokens": 24245328} |
|
{"current_steps": 2340, "total_steps": 6770, "loss": 0.2234, "lr": 7.795469508187343e-05, "epoch": 0.691285081240768, "percentage": 34.56, "elapsed_time": "3:27:31", "remaining_time": "6:32:53", "throughput": 1951.32, "total_tokens": 24297400} |
|
{"current_steps": 2345, "total_steps": 6770, "loss": 0.2833, "lr": 7.785335584187219e-05, "epoch": 0.6927621861152142, "percentage": 34.64, "elapsed_time": "3:28:59", "remaining_time": "6:34:22", "throughput": 1941.68, "total_tokens": 24348536} |
|
{"current_steps": 2350, "total_steps": 6770, "loss": 0.2313, "lr": 7.775185042902933e-05, "epoch": 0.6942392909896603, "percentage": 34.71, "elapsed_time": "3:30:27", "remaining_time": "6:35:50", "throughput": 1932.38, "total_tokens": 24401256} |
|
{"current_steps": 2350, "total_steps": 6770, "eval_loss": 0.1869634985923767, "epoch": 0.6942392909896603, "percentage": 34.71, "elapsed_time": "3:30:47", "remaining_time": "6:36:27", "throughput": 1929.4, "total_tokens": 24401256} |
|
{"current_steps": 2355, "total_steps": 6770, "loss": 0.2499, "lr": 7.765017944892514e-05, "epoch": 0.6957163958641064, "percentage": 34.79, "elapsed_time": "3:32:20", "remaining_time": "6:38:04", "throughput": 1919.35, "total_tokens": 24453384} |
|
{"current_steps": 2360, "total_steps": 6770, "loss": 0.2132, "lr": 7.754834350812765e-05, "epoch": 0.6971935007385525, "percentage": 34.86, "elapsed_time": "3:33:47", "remaining_time": "6:39:30", "throughput": 1910.39, "total_tokens": 24505960} |
|
{"current_steps": 2365, "total_steps": 6770, "loss": 0.2049, "lr": 7.744634321418906e-05, "epoch": 0.6986706056129985, "percentage": 34.93, "elapsed_time": "3:35:14", "remaining_time": "6:40:55", "throughput": 1901.6, "total_tokens": 24559008} |
|
{"current_steps": 2370, "total_steps": 6770, "loss": 0.2222, "lr": 7.734417917564211e-05, "epoch": 0.7001477104874446, "percentage": 35.01, "elapsed_time": "3:36:40", "remaining_time": "6:42:16", "throughput": 1893.03, "total_tokens": 24611128} |
|
{"current_steps": 2375, "total_steps": 6770, "loss": 0.2678, "lr": 7.724185200199643e-05, "epoch": 0.7016248153618907, "percentage": 35.08, "elapsed_time": "3:38:08", "remaining_time": "6:43:41", "throughput": 1884.23, "total_tokens": 24662336} |
|
{"current_steps": 2380, "total_steps": 6770, "loss": 0.2888, "lr": 7.713936230373491e-05, "epoch": 0.7031019202363368, "percentage": 35.16, "elapsed_time": "3:39:35", "remaining_time": "6:45:02", "throughput": 1875.8, "total_tokens": 24714032} |
|
{"current_steps": 2385, "total_steps": 6770, "loss": 0.2609, "lr": 7.703671069231007e-05, "epoch": 0.7045790251107829, "percentage": 35.23, "elapsed_time": "3:41:03", "remaining_time": "6:46:25", "throughput": 1867.18, "total_tokens": 24765296} |
|
{"current_steps": 2390, "total_steps": 6770, "loss": 0.2654, "lr": 7.693389778014037e-05, "epoch": 0.7060561299852289, "percentage": 35.3, "elapsed_time": "3:42:29", "remaining_time": "6:47:44", "throughput": 1859.03, "total_tokens": 24816744} |
|
{"current_steps": 2395, "total_steps": 6770, "loss": 0.2231, "lr": 7.683092418060664e-05, "epoch": 0.707533234859675, "percentage": 35.38, "elapsed_time": "3:43:57", "remaining_time": "6:49:06", "throughput": 1850.76, "total_tokens": 24869320} |
|
{"current_steps": 2400, "total_steps": 6770, "loss": 0.262, "lr": 7.672779050804834e-05, "epoch": 0.7090103397341211, "percentage": 35.45, "elapsed_time": "3:45:24", "remaining_time": "6:50:25", "throughput": 1842.77, "total_tokens": 24921872} |
|
{"current_steps": 2400, "total_steps": 6770, "eval_loss": 0.34713664650917053, "epoch": 0.7090103397341211, "percentage": 35.45, "elapsed_time": "3:45:43", "remaining_time": "6:50:59", "throughput": 1840.19, "total_tokens": 24921872} |
|
{"current_steps": 2405, "total_steps": 6770, "loss": 0.2704, "lr": 7.662449737775991e-05, "epoch": 0.7104874446085672, "percentage": 35.52, "elapsed_time": "3:47:15", "remaining_time": "6:52:28", "throughput": 1831.43, "total_tokens": 24973200} |
|
{"current_steps": 2410, "total_steps": 6770, "loss": 0.2792, "lr": 7.652104540598712e-05, "epoch": 0.7119645494830132, "percentage": 35.6, "elapsed_time": "3:48:42", "remaining_time": "6:53:46", "throughput": 1823.55, "total_tokens": 25024168} |
|
{"current_steps": 2415, "total_steps": 6770, "loss": 0.2798, "lr": 7.641743520992343e-05, "epoch": 0.7134416543574594, "percentage": 35.67, "elapsed_time": "3:50:10", "remaining_time": "6:55:05", "throughput": 1815.65, "total_tokens": 25075704} |
|
{"current_steps": 2420, "total_steps": 6770, "loss": 0.2411, "lr": 7.631366740770622e-05, "epoch": 0.7149187592319055, "percentage": 35.75, "elapsed_time": "3:51:38", "remaining_time": "6:56:22", "throughput": 1808.0, "total_tokens": 25128264} |
|
{"current_steps": 2425, "total_steps": 6770, "loss": 0.2204, "lr": 7.620974261841314e-05, "epoch": 0.7163958641063516, "percentage": 35.82, "elapsed_time": "3:53:05", "remaining_time": "6:57:38", "throughput": 1800.46, "total_tokens": 25180080} |
|
{"current_steps": 2430, "total_steps": 6770, "loss": 0.2295, "lr": 7.610566146205846e-05, "epoch": 0.7178729689807977, "percentage": 35.89, "elapsed_time": "3:54:34", "remaining_time": "6:58:56", "throughput": 1792.81, "total_tokens": 25232312} |
|
{"current_steps": 2435, "total_steps": 6770, "loss": 0.3177, "lr": 7.60014245595893e-05, "epoch": 0.7193500738552437, "percentage": 35.97, "elapsed_time": "3:56:01", "remaining_time": "7:00:11", "throughput": 1785.37, "total_tokens": 25283688} |
|
{"current_steps": 2440, "total_steps": 6770, "loss": 0.2606, "lr": 7.589703253288196e-05, "epoch": 0.7208271787296898, "percentage": 36.04, "elapsed_time": "3:57:29", "remaining_time": "7:01:26", "throughput": 1778.03, "total_tokens": 25335656} |
|
{"current_steps": 2445, "total_steps": 6770, "loss": 0.2406, "lr": 7.579248600473827e-05, "epoch": 0.7223042836041359, "percentage": 36.12, "elapsed_time": "3:58:56", "remaining_time": "7:02:40", "throughput": 1770.82, "total_tokens": 25387752} |
|
{"current_steps": 2450, "total_steps": 6770, "loss": 0.2412, "lr": 7.568778559888173e-05, "epoch": 0.723781388478582, "percentage": 36.19, "elapsed_time": "4:00:23", "remaining_time": "7:03:53", "throughput": 1763.74, "total_tokens": 25439896} |
|
{"current_steps": 2450, "total_steps": 6770, "eval_loss": 0.34561124444007874, "epoch": 0.723781388478582, "percentage": 36.19, "elapsed_time": "4:00:42", "remaining_time": "7:04:26", "throughput": 1761.41, "total_tokens": 25439896} |
|
{"current_steps": 2455, "total_steps": 6770, "loss": 0.2752, "lr": 7.558293193995394e-05, "epoch": 0.725258493353028, "percentage": 36.26, "elapsed_time": "4:02:14", "remaining_time": "7:05:46", "throughput": 1753.81, "total_tokens": 25491160} |
|
{"current_steps": 2460, "total_steps": 6770, "loss": 0.2399, "lr": 7.547792565351075e-05, "epoch": 0.7267355982274741, "percentage": 36.34, "elapsed_time": "4:03:43", "remaining_time": "7:07:00", "throughput": 1746.76, "total_tokens": 25543152} |
|
{"current_steps": 2465, "total_steps": 6770, "loss": 0.2351, "lr": 7.537276736601864e-05, "epoch": 0.7282127031019202, "percentage": 36.41, "elapsed_time": "4:05:09", "remaining_time": "7:08:08", "throughput": 1740.11, "total_tokens": 25595312} |
|
{"current_steps": 2470, "total_steps": 6770, "loss": 0.1837, "lr": 7.526745770485088e-05, "epoch": 0.7296898079763663, "percentage": 36.48, "elapsed_time": "4:06:35", "remaining_time": "7:09:17", "throughput": 1733.53, "total_tokens": 25648680} |
|
{"current_steps": 2475, "total_steps": 6770, "loss": 0.3093, "lr": 7.516199729828385e-05, "epoch": 0.7311669128508124, "percentage": 36.56, "elapsed_time": "4:08:01", "remaining_time": "7:10:24", "throughput": 1727.06, "total_tokens": 25701464} |
|
{"current_steps": 2480, "total_steps": 6770, "loss": 0.223, "lr": 7.505638677549327e-05, "epoch": 0.7326440177252584, "percentage": 36.63, "elapsed_time": "4:09:29", "remaining_time": "7:11:34", "throughput": 1720.41, "total_tokens": 25753528} |
|
{"current_steps": 2485, "total_steps": 6770, "loss": 0.2128, "lr": 7.495062676655049e-05, "epoch": 0.7341211225997046, "percentage": 36.71, "elapsed_time": "4:10:55", "remaining_time": "7:12:41", "throughput": 1714.01, "total_tokens": 25805768} |
|
{"current_steps": 2490, "total_steps": 6770, "loss": 0.2703, "lr": 7.484471790241865e-05, "epoch": 0.7355982274741507, "percentage": 36.78, "elapsed_time": "4:12:23", "remaining_time": "7:13:49", "throughput": 1707.5, "total_tokens": 25856672} |
|
{"current_steps": 2495, "total_steps": 6770, "loss": 0.2456, "lr": 7.473866081494896e-05, "epoch": 0.7370753323485968, "percentage": 36.85, "elapsed_time": "4:13:49", "remaining_time": "7:14:53", "throughput": 1701.25, "total_tokens": 25908544} |
|
{"current_steps": 2500, "total_steps": 6770, "loss": 0.2382, "lr": 7.463245613687695e-05, "epoch": 0.7385524372230429, "percentage": 36.93, "elapsed_time": "4:15:15", "remaining_time": "7:15:59", "throughput": 1695.04, "total_tokens": 25961056} |
|
{"current_steps": 2500, "total_steps": 6770, "eval_loss": 0.2542795240879059, "epoch": 0.7385524372230429, "percentage": 36.93, "elapsed_time": "4:15:35", "remaining_time": "7:16:32", "throughput": 1692.92, "total_tokens": 25961056} |
|
{"current_steps": 2505, "total_steps": 6770, "loss": 0.2843, "lr": 7.452610450181865e-05, "epoch": 0.740029542097489, "percentage": 37.0, "elapsed_time": "4:17:07", "remaining_time": "7:17:45", "throughput": 1686.15, "total_tokens": 26012232} |
|
{"current_steps": 2510, "total_steps": 6770, "loss": 0.2376, "lr": 7.441960654426687e-05, "epoch": 0.741506646971935, "percentage": 37.08, "elapsed_time": "4:18:33", "remaining_time": "7:18:49", "throughput": 1680.12, "total_tokens": 26064432} |
|
{"current_steps": 2515, "total_steps": 6770, "loss": 0.2464, "lr": 7.431296289958735e-05, "epoch": 0.7429837518463811, "percentage": 37.15, "elapsed_time": "4:20:00", "remaining_time": "7:19:54", "throughput": 1674.0, "total_tokens": 26115856} |
|
{"current_steps": 2520, "total_steps": 6770, "loss": 0.2793, "lr": 7.4206174204015e-05, "epoch": 0.7444608567208272, "percentage": 37.22, "elapsed_time": "4:21:27", "remaining_time": "7:20:56", "throughput": 1668.04, "total_tokens": 26167176} |
|
{"current_steps": 2525, "total_steps": 6770, "loss": 0.2141, "lr": 7.409924109465011e-05, "epoch": 0.7459379615952733, "percentage": 37.3, "elapsed_time": "4:22:53", "remaining_time": "7:21:58", "throughput": 1662.21, "total_tokens": 26219144} |
|
{"current_steps": 2530, "total_steps": 6770, "loss": 0.2137, "lr": 7.399216420945453e-05, "epoch": 0.7474150664697193, "percentage": 37.37, "elapsed_time": "4:24:19", "remaining_time": "7:22:58", "throughput": 1656.57, "total_tokens": 26271712} |
|
{"current_steps": 2535, "total_steps": 6770, "loss": 0.2177, "lr": 7.388494418724789e-05, "epoch": 0.7488921713441654, "percentage": 37.44, "elapsed_time": "4:25:44", "remaining_time": "7:23:57", "throughput": 1650.91, "total_tokens": 26323656} |
|
{"current_steps": 2540, "total_steps": 6770, "loss": 0.2762, "lr": 7.377758166770377e-05, "epoch": 0.7503692762186115, "percentage": 37.52, "elapsed_time": "4:27:09", "remaining_time": "7:24:55", "throughput": 1645.38, "total_tokens": 26375392} |
|
{"current_steps": 2545, "total_steps": 6770, "loss": 0.2794, "lr": 7.367007729134588e-05, "epoch": 0.7518463810930576, "percentage": 37.59, "elapsed_time": "4:28:37", "remaining_time": "7:25:56", "throughput": 1639.64, "total_tokens": 26426080} |
|
{"current_steps": 2550, "total_steps": 6770, "loss": 0.2364, "lr": 7.356243169954426e-05, "epoch": 0.7533234859675036, "percentage": 37.67, "elapsed_time": "4:30:02", "remaining_time": "7:26:53", "throughput": 1634.17, "total_tokens": 26477208} |
|
{"current_steps": 2550, "total_steps": 6770, "eval_loss": 0.38712552189826965, "epoch": 0.7533234859675036, "percentage": 37.67, "elapsed_time": "4:30:21", "remaining_time": "7:27:24", "throughput": 1632.25, "total_tokens": 26477208} |
|
{"current_steps": 2555, "total_steps": 6770, "loss": 0.2649, "lr": 7.34546455345114e-05, "epoch": 0.7548005908419497, "percentage": 37.74, "elapsed_time": "4:31:53", "remaining_time": "7:28:32", "throughput": 1626.17, "total_tokens": 26528824} |
|
{"current_steps": 2560, "total_steps": 6770, "loss": 0.1834, "lr": 7.334671943929853e-05, "epoch": 0.7562776957163959, "percentage": 37.81, "elapsed_time": "4:33:19", "remaining_time": "7:29:30", "throughput": 1620.84, "total_tokens": 26581512} |
|
{"current_steps": 2565, "total_steps": 6770, "loss": 0.2539, "lr": 7.323865405779162e-05, "epoch": 0.757754800590842, "percentage": 37.89, "elapsed_time": "4:34:47", "remaining_time": "7:30:29", "throughput": 1615.36, "total_tokens": 26633144} |
|
{"current_steps": 2570, "total_steps": 6770, "loss": 0.2592, "lr": 7.313045003470766e-05, "epoch": 0.7592319054652881, "percentage": 37.96, "elapsed_time": "4:36:13", "remaining_time": "7:31:25", "throughput": 1610.02, "total_tokens": 26684024} |
|
{"current_steps": 2575, "total_steps": 6770, "loss": 0.228, "lr": 7.302210801559075e-05, "epoch": 0.7607090103397341, "percentage": 38.04, "elapsed_time": "4:37:41", "remaining_time": "7:32:24", "throughput": 1604.65, "total_tokens": 26736512} |
|
{"current_steps": 2580, "total_steps": 6770, "loss": 0.232, "lr": 7.291362864680831e-05, "epoch": 0.7621861152141802, "percentage": 38.11, "elapsed_time": "4:39:08", "remaining_time": "7:33:20", "throughput": 1599.44, "total_tokens": 26788656} |
|
{"current_steps": 2585, "total_steps": 6770, "loss": 0.2411, "lr": 7.280501257554716e-05, "epoch": 0.7636632200886263, "percentage": 38.18, "elapsed_time": "4:40:36", "remaining_time": "7:34:17", "throughput": 1594.23, "total_tokens": 26840856} |
|
{"current_steps": 2590, "total_steps": 6770, "loss": 0.2214, "lr": 7.269626044980968e-05, "epoch": 0.7651403249630724, "percentage": 38.26, "elapsed_time": "4:42:03", "remaining_time": "7:35:13", "throughput": 1589.07, "total_tokens": 26892840} |
|
{"current_steps": 2595, "total_steps": 6770, "loss": 0.236, "lr": 7.258737291841e-05, "epoch": 0.7666174298375185, "percentage": 38.33, "elapsed_time": "4:43:31", "remaining_time": "7:36:08", "throughput": 1583.96, "total_tokens": 26945200} |
|
{"current_steps": 2600, "total_steps": 6770, "loss": 0.2082, "lr": 7.247835063097e-05, "epoch": 0.7680945347119645, "percentage": 38.4, "elapsed_time": "4:44:58", "remaining_time": "7:37:03", "throughput": 1578.97, "total_tokens": 26997904} |
|
{"current_steps": 2600, "total_steps": 6770, "eval_loss": 0.3406156003475189, "epoch": 0.7680945347119645, "percentage": 38.4, "elapsed_time": "4:45:17", "remaining_time": "7:37:34", "throughput": 1577.19, "total_tokens": 26997904} |
|
{"current_steps": 2605, "total_steps": 6770, "loss": 0.1983, "lr": 7.236919423791556e-05, "epoch": 0.7695716395864106, "percentage": 38.48, "elapsed_time": "4:46:49", "remaining_time": "7:38:34", "throughput": 1571.84, "total_tokens": 27050064} |
|
{"current_steps": 2610, "total_steps": 6770, "loss": 0.2442, "lr": 7.225990439047264e-05, "epoch": 0.7710487444608567, "percentage": 38.55, "elapsed_time": "4:48:16", "remaining_time": "7:39:27", "throughput": 1566.95, "total_tokens": 27102096} |
|
{"current_steps": 2615, "total_steps": 6770, "loss": 0.2095, "lr": 7.215048174066337e-05, "epoch": 0.7725258493353028, "percentage": 38.63, "elapsed_time": "4:49:42", "remaining_time": "7:40:18", "throughput": 1562.24, "total_tokens": 27155064} |
|
{"current_steps": 2620, "total_steps": 6770, "loss": 0.2332, "lr": 7.204092694130218e-05, "epoch": 0.7740029542097489, "percentage": 38.7, "elapsed_time": "4:51:08", "remaining_time": "7:41:09", "throughput": 1557.44, "total_tokens": 27206472} |
|
{"current_steps": 2625, "total_steps": 6770, "loss": 0.2078, "lr": 7.193124064599188e-05, "epoch": 0.7754800590841949, "percentage": 38.77, "elapsed_time": "4:52:34", "remaining_time": "7:41:59", "throughput": 1552.78, "total_tokens": 27258792} |
|
{"current_steps": 2630, "total_steps": 6770, "loss": 0.2311, "lr": 7.182142350911985e-05, "epoch": 0.7769571639586411, "percentage": 38.85, "elapsed_time": "4:54:01", "remaining_time": "7:42:50", "throughput": 1548.09, "total_tokens": 27310840} |
|
{"current_steps": 2635, "total_steps": 6770, "loss": 0.2639, "lr": 7.1711476185854e-05, "epoch": 0.7784342688330872, "percentage": 38.92, "elapsed_time": "4:55:26", "remaining_time": "7:43:38", "throughput": 1543.55, "total_tokens": 27362496} |
|
{"current_steps": 2640, "total_steps": 6770, "loss": 0.2475, "lr": 7.160139933213898e-05, "epoch": 0.7799113737075333, "percentage": 39.0, "elapsed_time": "4:56:54", "remaining_time": "7:44:28", "throughput": 1538.9, "total_tokens": 27414544} |
|
{"current_steps": 2645, "total_steps": 6770, "loss": 0.1917, "lr": 7.149119360469217e-05, "epoch": 0.7813884785819794, "percentage": 39.07, "elapsed_time": "4:58:20", "remaining_time": "7:45:16", "throughput": 1534.44, "total_tokens": 27467408} |
|
{"current_steps": 2650, "total_steps": 6770, "loss": 0.1736, "lr": 7.138085966099985e-05, "epoch": 0.7828655834564254, "percentage": 39.14, "elapsed_time": "4:59:48", "remaining_time": "7:46:06", "throughput": 1529.95, "total_tokens": 27521088} |
|
{"current_steps": 2650, "total_steps": 6770, "eval_loss": 0.269732803106308, "epoch": 0.7828655834564254, "percentage": 39.14, "elapsed_time": "5:00:07", "remaining_time": "7:46:36", "throughput": 1528.34, "total_tokens": 27521088} |
|
{"current_steps": 2655, "total_steps": 6770, "loss": 0.2047, "lr": 7.127039815931322e-05, "epoch": 0.7843426883308715, "percentage": 39.22, "elapsed_time": "5:01:38", "remaining_time": "7:47:30", "throughput": 1523.55, "total_tokens": 27573512} |
|
{"current_steps": 2660, "total_steps": 6770, "loss": 0.2309, "lr": 7.11598097586445e-05, "epoch": 0.7858197932053176, "percentage": 39.29, "elapsed_time": "5:03:05", "remaining_time": "7:48:18", "throughput": 1519.12, "total_tokens": 27625488} |
|
{"current_steps": 2665, "total_steps": 6770, "loss": 0.2188, "lr": 7.104909511876293e-05, "epoch": 0.7872968980797637, "percentage": 39.36, "elapsed_time": "5:04:31", "remaining_time": "7:49:04", "throughput": 1514.81, "total_tokens": 27677824} |
|
{"current_steps": 2670, "total_steps": 6770, "loss": 0.2127, "lr": 7.0938254900191e-05, "epoch": 0.7887740029542097, "percentage": 39.44, "elapsed_time": "5:05:58", "remaining_time": "7:49:51", "throughput": 1510.45, "total_tokens": 27730048} |
|
{"current_steps": 2675, "total_steps": 6770, "loss": 0.2534, "lr": 7.082728976420032e-05, "epoch": 0.7902511078286558, "percentage": 39.51, "elapsed_time": "5:07:24", "remaining_time": "7:50:35", "throughput": 1506.24, "total_tokens": 27781512} |
|
{"current_steps": 2680, "total_steps": 6770, "loss": 0.204, "lr": 7.071620037280779e-05, "epoch": 0.7917282127031019, "percentage": 39.59, "elapsed_time": "5:08:50", "remaining_time": "7:51:20", "throughput": 1502.02, "total_tokens": 27833808} |
|
{"current_steps": 2685, "total_steps": 6770, "loss": 0.2218, "lr": 7.060498738877159e-05, "epoch": 0.793205317577548, "percentage": 39.66, "elapsed_time": "5:10:17", "remaining_time": "7:52:04", "throughput": 1497.88, "total_tokens": 27886232} |
|
{"current_steps": 2690, "total_steps": 6770, "loss": 0.2157, "lr": 7.049365147558727e-05, "epoch": 0.794682422451994, "percentage": 39.73, "elapsed_time": "5:11:42", "remaining_time": "7:52:46", "throughput": 1493.84, "total_tokens": 27938696} |
|
{"current_steps": 2695, "total_steps": 6770, "loss": 0.2401, "lr": 7.038219329748376e-05, "epoch": 0.7961595273264401, "percentage": 39.81, "elapsed_time": "5:13:08", "remaining_time": "7:53:29", "throughput": 1489.76, "total_tokens": 27990816} |
|
{"current_steps": 2700, "total_steps": 6770, "loss": 0.2225, "lr": 7.027061351941948e-05, "epoch": 0.7976366322008862, "percentage": 39.88, "elapsed_time": "5:14:34", "remaining_time": "7:54:10", "throughput": 1485.79, "total_tokens": 28042992} |
|
{"current_steps": 2700, "total_steps": 6770, "eval_loss": 0.41549214720726013, "epoch": 0.7976366322008862, "percentage": 39.88, "elapsed_time": "5:14:53", "remaining_time": "7:54:39", "throughput": 1484.3, "total_tokens": 28042992} |
|
{"current_steps": 2705, "total_steps": 6770, "loss": 0.2279, "lr": 7.01589128070782e-05, "epoch": 0.7991137370753324, "percentage": 39.96, "elapsed_time": "5:16:24", "remaining_time": "7:55:28", "throughput": 1479.88, "total_tokens": 28094200} |
|
{"current_steps": 2710, "total_steps": 6770, "loss": 0.2307, "lr": 7.004709182686531e-05, "epoch": 0.8005908419497785, "percentage": 40.03, "elapsed_time": "5:17:50", "remaining_time": "7:56:09", "throughput": 1475.93, "total_tokens": 28146144} |
|
{"current_steps": 2715, "total_steps": 6770, "loss": 0.2025, "lr": 6.993515124590362e-05, "epoch": 0.8020679468242246, "percentage": 40.1, "elapsed_time": "5:19:16", "remaining_time": "7:56:51", "throughput": 1472.01, "total_tokens": 28198600} |
|
{"current_steps": 2720, "total_steps": 6770, "loss": 0.2318, "lr": 6.982309173202951e-05, "epoch": 0.8035450516986706, "percentage": 40.18, "elapsed_time": "5:20:42", "remaining_time": "7:57:32", "throughput": 1468.07, "total_tokens": 28249928} |
|
{"current_steps": 2725, "total_steps": 6770, "loss": 0.2074, "lr": 6.971091395378895e-05, "epoch": 0.8050221565731167, "percentage": 40.25, "elapsed_time": "5:22:09", "remaining_time": "7:58:13", "throughput": 1464.15, "total_tokens": 28301928} |
|
{"current_steps": 2730, "total_steps": 6770, "loss": 0.1935, "lr": 6.95986185804334e-05, "epoch": 0.8064992614475628, "percentage": 40.32, "elapsed_time": "5:23:36", "remaining_time": "7:58:53", "throughput": 1460.32, "total_tokens": 28354256} |
|
{"current_steps": 2735, "total_steps": 6770, "loss": 0.2457, "lr": 6.948620628191595e-05, "epoch": 0.8079763663220089, "percentage": 40.4, "elapsed_time": "5:25:04", "remaining_time": "7:59:35", "throughput": 1456.37, "total_tokens": 28405800} |
|
{"current_steps": 2740, "total_steps": 6770, "loss": 0.2021, "lr": 6.937367772888725e-05, "epoch": 0.8094534711964549, "percentage": 40.47, "elapsed_time": "5:26:31", "remaining_time": "8:00:14", "throughput": 1452.59, "total_tokens": 28457664} |
|
{"current_steps": 2745, "total_steps": 6770, "loss": 0.2323, "lr": 6.926103359269152e-05, "epoch": 0.810930576070901, "percentage": 40.55, "elapsed_time": "5:28:00", "remaining_time": "8:00:57", "throughput": 1448.66, "total_tokens": 28509944} |
|
{"current_steps": 2750, "total_steps": 6770, "loss": 0.2501, "lr": 6.914827454536254e-05, "epoch": 0.8124076809453471, "percentage": 40.62, "elapsed_time": "5:29:26", "remaining_time": "8:01:35", "throughput": 1444.92, "total_tokens": 28561248} |
|
{"current_steps": 2750, "total_steps": 6770, "eval_loss": 0.41148969531059265, "epoch": 0.8124076809453471, "percentage": 40.62, "elapsed_time": "5:29:45", "remaining_time": "8:02:03", "throughput": 1443.51, "total_tokens": 28561248} |
|
{"current_steps": 2755, "total_steps": 6770, "loss": 0.225, "lr": 6.903540125961965e-05, "epoch": 0.8138847858197932, "percentage": 40.69, "elapsed_time": "5:31:19", "remaining_time": "8:02:51", "throughput": 1439.31, "total_tokens": 28613120} |
|
{"current_steps": 2760, "total_steps": 6770, "loss": 0.2365, "lr": 6.892241440886377e-05, "epoch": 0.8153618906942393, "percentage": 40.77, "elapsed_time": "5:32:46", "remaining_time": "8:03:29", "throughput": 1435.65, "total_tokens": 28664864} |
|
{"current_steps": 2765, "total_steps": 6770, "loss": 0.2386, "lr": 6.880931466717327e-05, "epoch": 0.8168389955686853, "percentage": 40.84, "elapsed_time": "5:34:14", "remaining_time": "8:04:08", "throughput": 1431.93, "total_tokens": 28716896} |
|
{"current_steps": 2770, "total_steps": 6770, "loss": 0.2358, "lr": 6.86961027093001e-05, "epoch": 0.8183161004431314, "percentage": 40.92, "elapsed_time": "5:35:40", "remaining_time": "8:04:43", "throughput": 1428.45, "total_tokens": 28769528} |
|
{"current_steps": 2775, "total_steps": 6770, "loss": 0.2844, "lr": 6.858277921066568e-05, "epoch": 0.8197932053175776, "percentage": 40.99, "elapsed_time": "5:37:07", "remaining_time": "8:05:19", "throughput": 1424.89, "total_tokens": 28821304} |
|
{"current_steps": 2780, "total_steps": 6770, "loss": 0.1867, "lr": 6.846934484735686e-05, "epoch": 0.8212703101920237, "percentage": 41.06, "elapsed_time": "5:38:33", "remaining_time": "8:05:54", "throughput": 1421.36, "total_tokens": 28872712} |
|
{"current_steps": 2785, "total_steps": 6770, "loss": 0.2184, "lr": 6.83558002961219e-05, "epoch": 0.8227474150664698, "percentage": 41.14, "elapsed_time": "5:39:59", "remaining_time": "8:06:29", "throughput": 1417.87, "total_tokens": 28924272} |
|
{"current_steps": 2790, "total_steps": 6770, "loss": 0.1938, "lr": 6.824214623436644e-05, "epoch": 0.8242245199409158, "percentage": 41.21, "elapsed_time": "5:41:26", "remaining_time": "8:07:04", "throughput": 1414.4, "total_tokens": 28976352} |
|
{"current_steps": 2795, "total_steps": 6770, "loss": 0.2046, "lr": 6.812838334014951e-05, "epoch": 0.8257016248153619, "percentage": 41.29, "elapsed_time": "5:42:52", "remaining_time": "8:07:38", "throughput": 1411.01, "total_tokens": 29028344} |
|
{"current_steps": 2800, "total_steps": 6770, "loss": 0.2507, "lr": 6.801451229217938e-05, "epoch": 0.827178729689808, "percentage": 41.36, "elapsed_time": "5:44:20", "remaining_time": "8:08:13", "throughput": 1407.53, "total_tokens": 29079576} |
|
{"current_steps": 2800, "total_steps": 6770, "eval_loss": 0.32233569025993347, "epoch": 0.827178729689808, "percentage": 41.36, "elapsed_time": "5:44:39", "remaining_time": "8:08:40", "throughput": 1406.21, "total_tokens": 29079576} |
|
{"current_steps": 2805, "total_steps": 6770, "loss": 0.1752, "lr": 6.790053376980959e-05, "epoch": 0.8286558345642541, "percentage": 41.43, "elapsed_time": "5:46:11", "remaining_time": "8:09:21", "throughput": 1402.5, "total_tokens": 29131768} |
|
{"current_steps": 2810, "total_steps": 6770, "loss": 0.2502, "lr": 6.778644845303483e-05, "epoch": 0.8301329394387001, "percentage": 41.51, "elapsed_time": "5:47:38", "remaining_time": "8:09:55", "throughput": 1399.12, "total_tokens": 29183952} |
|
{"current_steps": 2815, "total_steps": 6770, "loss": 0.2092, "lr": 6.767225702248698e-05, "epoch": 0.8316100443131462, "percentage": 41.58, "elapsed_time": "5:49:05", "remaining_time": "8:10:28", "throughput": 1395.79, "total_tokens": 29236232} |
|
{"current_steps": 2820, "total_steps": 6770, "loss": 0.2492, "lr": 6.755796015943097e-05, "epoch": 0.8330871491875923, "percentage": 41.65, "elapsed_time": "5:50:33", "remaining_time": "8:11:01", "throughput": 1392.43, "total_tokens": 29287672} |
|
{"current_steps": 2825, "total_steps": 6770, "loss": 0.2377, "lr": 6.744355854576075e-05, "epoch": 0.8345642540620384, "percentage": 41.73, "elapsed_time": "5:52:01", "remaining_time": "8:11:35", "throughput": 1389.11, "total_tokens": 29339952} |
|
{"current_steps": 2830, "total_steps": 6770, "loss": 0.168, "lr": 6.732905286399516e-05, "epoch": 0.8360413589364845, "percentage": 41.8, "elapsed_time": "5:53:29", "remaining_time": "8:12:08", "throughput": 1385.78, "total_tokens": 29392128} |
|
{"current_steps": 2835, "total_steps": 6770, "loss": 0.1919, "lr": 6.721444379727398e-05, "epoch": 0.8375184638109305, "percentage": 41.88, "elapsed_time": "5:54:56", "remaining_time": "8:12:40", "throughput": 1382.56, "total_tokens": 29444168} |
|
{"current_steps": 2840, "total_steps": 6770, "loss": 0.2225, "lr": 6.709973202935374e-05, "epoch": 0.8389955686853766, "percentage": 41.95, "elapsed_time": "5:56:26", "remaining_time": "8:13:14", "throughput": 1379.2, "total_tokens": 29495592} |
|
{"current_steps": 2845, "total_steps": 6770, "loss": 0.1715, "lr": 6.698491824460371e-05, "epoch": 0.8404726735598228, "percentage": 42.02, "elapsed_time": "5:57:53", "remaining_time": "8:13:44", "throughput": 1376.04, "total_tokens": 29548008} |
|
{"current_steps": 2850, "total_steps": 6770, "loss": 0.1928, "lr": 6.687000312800178e-05, "epoch": 0.8419497784342689, "percentage": 42.1, "elapsed_time": "5:59:21", "remaining_time": "8:14:17", "throughput": 1372.81, "total_tokens": 29600536} |
|
{"current_steps": 2850, "total_steps": 6770, "eval_loss": 0.28275948762893677, "epoch": 0.8419497784342689, "percentage": 42.1, "elapsed_time": "5:59:41", "remaining_time": "8:14:44", "throughput": 1371.57, "total_tokens": 29600536} |
|
{"current_steps": 2855, "total_steps": 6770, "loss": 0.2163, "lr": 6.675498736513036e-05, "epoch": 0.843426883308715, "percentage": 42.17, "elapsed_time": "6:01:13", "remaining_time": "8:15:20", "throughput": 1368.13, "total_tokens": 29652440} |
|
{"current_steps": 2860, "total_steps": 6770, "loss": 0.2589, "lr": 6.663987164217236e-05, "epoch": 0.844903988183161, "percentage": 42.25, "elapsed_time": "6:02:41", "remaining_time": "8:15:50", "throughput": 1365.0, "total_tokens": 29704376} |
|
{"current_steps": 2865, "total_steps": 6770, "loss": 0.2325, "lr": 6.652465664590703e-05, "epoch": 0.8463810930576071, "percentage": 42.32, "elapsed_time": "6:04:07", "remaining_time": "8:16:18", "throughput": 1362.0, "total_tokens": 29756504} |
|
{"current_steps": 2870, "total_steps": 6770, "loss": 0.242, "lr": 6.640934306370586e-05, "epoch": 0.8478581979320532, "percentage": 42.39, "elapsed_time": "6:05:35", "remaining_time": "8:16:47", "throughput": 1358.88, "total_tokens": 29807328} |
|
{"current_steps": 2875, "total_steps": 6770, "loss": 0.2169, "lr": 6.629393158352854e-05, "epoch": 0.8493353028064993, "percentage": 42.47, "elapsed_time": "6:07:01", "remaining_time": "8:17:14", "throughput": 1355.89, "total_tokens": 29859208} |
|
{"current_steps": 2880, "total_steps": 6770, "loss": 0.2335, "lr": 6.61784228939188e-05, "epoch": 0.8508124076809453, "percentage": 42.54, "elapsed_time": "6:08:28", "remaining_time": "8:17:41", "throughput": 1352.93, "total_tokens": 29911128} |
|
{"current_steps": 2885, "total_steps": 6770, "loss": 0.1913, "lr": 6.606281768400032e-05, "epoch": 0.8522895125553914, "percentage": 42.61, "elapsed_time": "6:09:55", "remaining_time": "8:18:08", "throughput": 1349.95, "total_tokens": 29962384} |
|
{"current_steps": 2890, "total_steps": 6770, "loss": 0.2425, "lr": 6.594711664347264e-05, "epoch": 0.8537666174298375, "percentage": 42.69, "elapsed_time": "6:11:22", "remaining_time": "8:18:35", "throughput": 1346.98, "total_tokens": 30013664} |
|
{"current_steps": 2895, "total_steps": 6770, "loss": 0.2312, "lr": 6.5831320462607e-05, "epoch": 0.8552437223042836, "percentage": 42.76, "elapsed_time": "6:12:49", "remaining_time": "8:19:01", "throughput": 1344.06, "total_tokens": 30066016} |
|
{"current_steps": 2900, "total_steps": 6770, "loss": 0.2029, "lr": 6.571542983224223e-05, "epoch": 0.8567208271787297, "percentage": 42.84, "elapsed_time": "6:14:15", "remaining_time": "8:19:26", "throughput": 1341.23, "total_tokens": 30118072} |
|
{"current_steps": 2900, "total_steps": 6770, "eval_loss": 0.39434579014778137, "epoch": 0.8567208271787297, "percentage": 42.84, "elapsed_time": "6:14:34", "remaining_time": "8:19:52", "throughput": 1340.09, "total_tokens": 30118072} |
|
{"current_steps": 2905, "total_steps": 6770, "loss": 0.2241, "lr": 6.559944544378072e-05, "epoch": 0.8581979320531757, "percentage": 42.91, "elapsed_time": "6:16:07", "remaining_time": "8:20:24", "throughput": 1336.91, "total_tokens": 30170248} |
|
{"current_steps": 2910, "total_steps": 6770, "loss": 0.2298, "lr": 6.548336798918411e-05, "epoch": 0.8596750369276218, "percentage": 42.98, "elapsed_time": "6:17:33", "remaining_time": "8:20:49", "throughput": 1334.08, "total_tokens": 30222016} |
|
{"current_steps": 2915, "total_steps": 6770, "loss": 0.2396, "lr": 6.536719816096935e-05, "epoch": 0.8611521418020679, "percentage": 43.06, "elapsed_time": "6:19:01", "remaining_time": "8:21:15", "throughput": 1331.18, "total_tokens": 30273312} |
|
{"current_steps": 2920, "total_steps": 6770, "loss": 0.2324, "lr": 6.52509366522045e-05, "epoch": 0.8626292466765141, "percentage": 43.13, "elapsed_time": "6:20:29", "remaining_time": "8:21:39", "throughput": 1328.32, "total_tokens": 30324328} |
|
{"current_steps": 2925, "total_steps": 6770, "loss": 0.2263, "lr": 6.513458415650452e-05, "epoch": 0.8641063515509602, "percentage": 43.21, "elapsed_time": "6:21:57", "remaining_time": "8:22:05", "throughput": 1325.47, "total_tokens": 30376488} |
|
{"current_steps": 2930, "total_steps": 6770, "loss": 0.1734, "lr": 6.501814136802725e-05, "epoch": 0.8655834564254062, "percentage": 43.28, "elapsed_time": "6:23:24", "remaining_time": "8:22:28", "throughput": 1322.77, "total_tokens": 30429504} |
|
{"current_steps": 2935, "total_steps": 6770, "loss": 0.2235, "lr": 6.490160898146918e-05, "epoch": 0.8670605612998523, "percentage": 43.35, "elapsed_time": "6:24:52", "remaining_time": "8:22:53", "throughput": 1319.93, "total_tokens": 30480400} |
|
{"current_steps": 2940, "total_steps": 6770, "loss": 0.2297, "lr": 6.47849876920614e-05, "epoch": 0.8685376661742984, "percentage": 43.43, "elapsed_time": "6:26:19", "remaining_time": "8:23:15", "throughput": 1317.22, "total_tokens": 30531912} |
|
{"current_steps": 2945, "total_steps": 6770, "loss": 0.1764, "lr": 6.46682781955653e-05, "epoch": 0.8700147710487445, "percentage": 43.5, "elapsed_time": "6:27:46", "remaining_time": "8:23:39", "throughput": 1314.52, "total_tokens": 30584688} |
|
{"current_steps": 2950, "total_steps": 6770, "loss": 0.1692, "lr": 6.455148118826859e-05, "epoch": 0.8714918759231906, "percentage": 43.57, "elapsed_time": "6:29:12", "remaining_time": "8:23:59", "throughput": 1311.97, "total_tokens": 30637448} |
|
{"current_steps": 2950, "total_steps": 6770, "eval_loss": 0.20344533026218414, "epoch": 0.8714918759231906, "percentage": 43.57, "elapsed_time": "6:29:31", "remaining_time": "8:24:23", "throughput": 1310.91, "total_tokens": 30637448} |
|
{"current_steps": 2955, "total_steps": 6770, "loss": 0.152, "lr": 6.443459736698105e-05, "epoch": 0.8729689807976366, "percentage": 43.65, "elapsed_time": "6:31:03", "remaining_time": "8:24:51", "throughput": 1308.02, "total_tokens": 30690624} |
|
{"current_steps": 2960, "total_steps": 6770, "loss": 0.1945, "lr": 6.431762742903038e-05, "epoch": 0.8744460856720827, "percentage": 43.72, "elapsed_time": "6:32:28", "remaining_time": "8:25:10", "throughput": 1305.52, "total_tokens": 30742992} |
|
{"current_steps": 2965, "total_steps": 6770, "loss": 0.2177, "lr": 6.420057207225807e-05, "epoch": 0.8759231905465288, "percentage": 43.8, "elapsed_time": "6:33:54", "remaining_time": "8:25:29", "throughput": 1303.0, "total_tokens": 30795256} |
|
{"current_steps": 2970, "total_steps": 6770, "loss": 0.1579, "lr": 6.408343199501519e-05, "epoch": 0.8774002954209749, "percentage": 43.87, "elapsed_time": "6:35:19", "remaining_time": "8:25:48", "throughput": 1300.51, "total_tokens": 30847696} |
|
{"current_steps": 2975, "total_steps": 6770, "loss": 0.1943, "lr": 6.396620789615825e-05, "epoch": 0.8788774002954209, "percentage": 43.94, "elapsed_time": "6:36:45", "remaining_time": "8:26:07", "throughput": 1298.0, "total_tokens": 30899904} |
|
{"current_steps": 2980, "total_steps": 6770, "loss": 0.1749, "lr": 6.384890047504508e-05, "epoch": 0.880354505169867, "percentage": 44.02, "elapsed_time": "6:38:12", "remaining_time": "8:26:26", "throughput": 1295.5, "total_tokens": 30952168} |
|
{"current_steps": 2985, "total_steps": 6770, "loss": 0.2156, "lr": 6.373151043153056e-05, "epoch": 0.8818316100443131, "percentage": 44.09, "elapsed_time": "6:39:38", "remaining_time": "8:26:44", "throughput": 1293.0, "total_tokens": 31004192} |
|
{"current_steps": 2990, "total_steps": 6770, "loss": 0.2083, "lr": 6.361403846596252e-05, "epoch": 0.8833087149187593, "percentage": 44.17, "elapsed_time": "6:41:05", "remaining_time": "8:27:04", "throughput": 1290.49, "total_tokens": 31056712} |
|
{"current_steps": 2995, "total_steps": 6770, "loss": 0.1721, "lr": 6.349648527917752e-05, "epoch": 0.8847858197932054, "percentage": 44.24, "elapsed_time": "6:42:33", "remaining_time": "8:27:23", "throughput": 1287.99, "total_tokens": 31108944} |
|
{"current_steps": 3000, "total_steps": 6770, "loss": 0.234, "lr": 6.33788515724967e-05, "epoch": 0.8862629246676514, "percentage": 44.31, "elapsed_time": "6:44:00", "remaining_time": "8:27:42", "throughput": 1285.42, "total_tokens": 31159736} |
|
{"current_steps": 3000, "total_steps": 6770, "eval_loss": 0.25555509328842163, "epoch": 0.8862629246676514, "percentage": 44.31, "elapsed_time": "6:44:20", "remaining_time": "8:28:07", "throughput": 1284.39, "total_tokens": 31159736} |
|
{"current_steps": 3005, "total_steps": 6770, "loss": 0.2011, "lr": 6.326113804772157e-05, "epoch": 0.8877400295420975, "percentage": 44.39, "elapsed_time": "6:45:52", "remaining_time": "8:28:31", "throughput": 1281.64, "total_tokens": 31211632} |
|
{"current_steps": 3010, "total_steps": 6770, "loss": 0.2197, "lr": 6.314334540712983e-05, "epoch": 0.8892171344165436, "percentage": 44.46, "elapsed_time": "6:47:20", "remaining_time": "8:28:50", "throughput": 1279.2, "total_tokens": 31264376} |
|
{"current_steps": 3015, "total_steps": 6770, "loss": 0.1853, "lr": 6.302547435347122e-05, "epoch": 0.8906942392909897, "percentage": 44.53, "elapsed_time": "6:48:47", "remaining_time": "8:29:07", "throughput": 1276.8, "total_tokens": 31316584} |
|
{"current_steps": 3020, "total_steps": 6770, "loss": 0.2312, "lr": 6.290752558996325e-05, "epoch": 0.8921713441654358, "percentage": 44.61, "elapsed_time": "6:50:15", "remaining_time": "8:29:25", "throughput": 1274.33, "total_tokens": 31367768} |
|
{"current_steps": 3025, "total_steps": 6770, "loss": 0.2608, "lr": 6.278949982028704e-05, "epoch": 0.8936484490398818, "percentage": 44.68, "elapsed_time": "6:51:41", "remaining_time": "8:29:40", "throughput": 1271.97, "total_tokens": 31419664} |
|
{"current_steps": 3030, "total_steps": 6770, "loss": 0.225, "lr": 6.267139774858318e-05, "epoch": 0.8951255539143279, "percentage": 44.76, "elapsed_time": "6:53:08", "remaining_time": "8:29:57", "throughput": 1269.61, "total_tokens": 31471672} |
|
{"current_steps": 3035, "total_steps": 6770, "loss": 0.225, "lr": 6.255322007944743e-05, "epoch": 0.896602658788774, "percentage": 44.83, "elapsed_time": "6:54:34", "remaining_time": "8:30:11", "throughput": 1267.32, "total_tokens": 31523888} |
|
{"current_steps": 3040, "total_steps": 6770, "loss": 0.2291, "lr": 6.243496751792658e-05, "epoch": 0.8980797636632201, "percentage": 44.9, "elapsed_time": "6:56:02", "remaining_time": "8:30:27", "throughput": 1264.91, "total_tokens": 31574992} |
|
{"current_steps": 3045, "total_steps": 6770, "loss": 0.1926, "lr": 6.231664076951421e-05, "epoch": 0.8995568685376661, "percentage": 44.98, "elapsed_time": "6:57:28", "remaining_time": "8:30:41", "throughput": 1262.67, "total_tokens": 31627608} |
|
{"current_steps": 3070, "total_steps": 6770, "loss": 0.2158, "lr": 6.17239189471017e-05, "epoch": 0.9069423929098966, "percentage": 45.35, "elapsed_time": "7:05:09", "remaining_time": "8:32:24", "throughput": 1250.02, "total_tokens": 31887120} |
|
{"current_steps": 3075, "total_steps": 6770, "loss": 0.1947, "lr": 6.160516191719638e-05, "epoch": 0.9084194977843427, "percentage": 45.42, "elapsed_time": "7:06:36", "remaining_time": "8:32:37", "throughput": 1247.79, "total_tokens": 31938768} |
|
{"current_steps": 3080, "total_steps": 6770, "loss": 0.2058, "lr": 6.148633565101145e-05, "epoch": 0.9098966026587888, "percentage": 45.49, "elapsed_time": "7:08:03", "remaining_time": "8:32:49", "throughput": 1245.58, "total_tokens": 31990800} |
|
{"current_steps": 3085, "total_steps": 6770, "loss": 0.2366, "lr": 6.136744085746322e-05, "epoch": 0.9113737075332349, "percentage": 45.57, "elapsed_time": "7:09:30", "remaining_time": "8:33:02", "throughput": 1243.36, "total_tokens": 32042096} |
|
{"current_steps": 3090, "total_steps": 6770, "loss": 0.2749, "lr": 6.124847824587684e-05, "epoch": 0.912850812407681, "percentage": 45.64, "elapsed_time": "7:10:58", "remaining_time": "8:33:15", "throughput": 1241.12, "total_tokens": 32092864} |
|
{"current_steps": 3095, "total_steps": 6770, "loss": 0.2242, "lr": 6.112944852598205e-05, "epoch": 0.914327917282127, "percentage": 45.72, "elapsed_time": "7:12:25", "remaining_time": "8:33:27", "throughput": 1238.91, "total_tokens": 32144288} |
|
{"current_steps": 3100, "total_steps": 6770, "loss": 0.1999, "lr": 6.1010352407908966e-05, "epoch": 0.9158050221565731, "percentage": 45.79, "elapsed_time": "7:13:52", "remaining_time": "8:33:39", "throughput": 1236.75, "total_tokens": 32196176} |
|
{"current_steps": 3100, "total_steps": 6770, "eval_loss": 0.2710443437099457, "epoch": 0.9158050221565731, "percentage": 45.79, "elapsed_time": "7:14:11", "remaining_time": "8:34:02", "throughput": 1235.85, "total_tokens": 32196176} |
|
{"current_steps": 3105, "total_steps": 6770, "loss": 0.2205, "lr": 6.089119060218385e-05, "epoch": 0.9172821270310192, "percentage": 45.86, "elapsed_time": "7:15:43", "remaining_time": "8:34:19", "throughput": 1233.46, "total_tokens": 32247416} |
|
{"current_steps": 3110, "total_steps": 6770, "loss": 0.2161, "lr": 6.077196381972482e-05, "epoch": 0.9187592319054653, "percentage": 45.94, "elapsed_time": "7:17:09", "remaining_time": "8:34:28", "throughput": 1231.36, "total_tokens": 32298088} |
|
{"current_steps": 3115, "total_steps": 6770, "loss": 0.1846, "lr": 6.065267277183767e-05, "epoch": 0.9202363367799113, "percentage": 46.01, "elapsed_time": "7:18:37", "remaining_time": "8:34:39", "throughput": 1229.23, "total_tokens": 32349768} |
|
{"current_steps": 3120, "total_steps": 6770, "loss": 0.2246, "lr": 6.0533318170211584e-05, "epoch": 0.9217134416543574, "percentage": 46.09, "elapsed_time": "7:20:03", "remaining_time": "8:34:48", "throughput": 1227.16, "total_tokens": 32401136} |
|
{"current_steps": 3125, "total_steps": 6770, "loss": 0.1909, "lr": 6.041390072691495e-05, "epoch": 0.9231905465288035, "percentage": 46.16, "elapsed_time": "7:21:31", "remaining_time": "8:35:00", "throughput": 1225.03, "total_tokens": 32453424} |
|
{"current_steps": 3130, "total_steps": 6770, "loss": 0.1651, "lr": 6.0294421154391013e-05, "epoch": 0.9246676514032496, "percentage": 46.23, "elapsed_time": "7:22:58", "remaining_time": "8:35:09", "throughput": 1223.01, "total_tokens": 32506104} |
|
{"current_steps": 3135, "total_steps": 6770, "loss": 0.2352, "lr": 6.0174880165453714e-05, "epoch": 0.9261447562776958, "percentage": 46.31, "elapsed_time": "7:24:26", "remaining_time": "8:35:19", "throughput": 1220.91, "total_tokens": 32557496} |
|
{"current_steps": 3140, "total_steps": 6770, "loss": 0.1875, "lr": 6.005527847328338e-05, "epoch": 0.9276218611521418, "percentage": 46.38, "elapsed_time": "7:25:52", "remaining_time": "8:35:27", "throughput": 1218.92, "total_tokens": 32609696} |
|
{"current_steps": 3145, "total_steps": 6770, "loss": 0.1889, "lr": 5.993561679142253e-05, "epoch": 0.9290989660265879, "percentage": 46.45, "elapsed_time": "7:27:20", "remaining_time": "8:35:36", "throughput": 1216.9, "total_tokens": 32661992} |
|
{"current_steps": 3150, "total_steps": 6770, "loss": 0.2069, "lr": 5.981589583377154e-05, "epoch": 0.930576070901034, "percentage": 46.53, "elapsed_time": "7:28:47", "remaining_time": "8:35:45", "throughput": 1214.88, "total_tokens": 32713824} |
|
{"current_steps": 3150, "total_steps": 6770, "eval_loss": 0.20293839275836945, "epoch": 0.930576070901034, "percentage": 46.53, "elapsed_time": "7:29:06", "remaining_time": "8:36:07", "throughput": 1214.01, "total_tokens": 32713824} |
|
{"current_steps": 3155, "total_steps": 6770, "loss": 0.2035, "lr": 5.969611631458444e-05, "epoch": 0.9320531757754801, "percentage": 46.6, "elapsed_time": "7:30:40", "remaining_time": "8:36:22", "throughput": 1211.75, "total_tokens": 32765648} |
|
{"current_steps": 3160, "total_steps": 6770, "loss": 0.1655, "lr": 5.957627894846465e-05, "epoch": 0.9335302806499262, "percentage": 46.68, "elapsed_time": "7:32:07", "remaining_time": "8:36:30", "throughput": 1209.77, "total_tokens": 32818192} |
|
{"current_steps": 3165, "total_steps": 6770, "loss": 0.196, "lr": 5.9456384450360694e-05, "epoch": 0.9350073855243722, "percentage": 46.75, "elapsed_time": "7:33:34", "remaining_time": "8:36:38", "throughput": 1207.82, "total_tokens": 32870520} |
|
{"current_steps": 3170, "total_steps": 6770, "loss": 0.1774, "lr": 5.933643353556195e-05, "epoch": 0.9364844903988183, "percentage": 46.82, "elapsed_time": "7:35:02", "remaining_time": "8:36:46", "throughput": 1205.83, "total_tokens": 32922712} |
|
{"current_steps": 3175, "total_steps": 6770, "loss": 0.1731, "lr": 5.9216426919694356e-05, "epoch": 0.9379615952732644, "percentage": 46.9, "elapsed_time": "7:36:29", "remaining_time": "8:36:52", "throughput": 1203.96, "total_tokens": 32975768} |
|
{"current_steps": 3180, "total_steps": 6770, "loss": 0.2306, "lr": 5.9096365318716194e-05, "epoch": 0.9394387001477105, "percentage": 46.97, "elapsed_time": "7:37:57", "remaining_time": "8:36:59", "throughput": 1201.99, "total_tokens": 33027264} |
|
{"current_steps": 3185, "total_steps": 6770, "loss": 0.1972, "lr": 5.897624944891378e-05, "epoch": 0.9409158050221565, "percentage": 47.05, "elapsed_time": "7:39:23", "remaining_time": "8:37:05", "throughput": 1200.12, "total_tokens": 33079712} |
|
{"current_steps": 3190, "total_steps": 6770, "loss": 0.1612, "lr": 5.8856080026897144e-05, "epoch": 0.9423929098966026, "percentage": 47.12, "elapsed_time": "7:40:50", "remaining_time": "8:37:10", "throughput": 1198.24, "total_tokens": 33132048} |
|
{"current_steps": 3195, "total_steps": 6770, "loss": 0.2148, "lr": 5.8735857769595905e-05, "epoch": 0.9438700147710487, "percentage": 47.19, "elapsed_time": "7:42:15", "remaining_time": "8:37:13", "throughput": 1196.48, "total_tokens": 33184488} |
|
{"current_steps": 3200, "total_steps": 6770, "loss": 0.2135, "lr": 5.8615583394254814e-05, "epoch": 0.9453471196454948, "percentage": 47.27, "elapsed_time": "7:43:41", "remaining_time": "8:37:18", "throughput": 1194.59, "total_tokens": 33235872} |
|
{"current_steps": 3200, "total_steps": 6770, "eval_loss": 0.3564297556877136, "epoch": 0.9453471196454948, "percentage": 47.27, "elapsed_time": "7:44:01", "remaining_time": "8:37:40", "throughput": 1193.76, "total_tokens": 33235872} |
|
{"current_steps": 3205, "total_steps": 6770, "loss": 0.2059, "lr": 5.849525761842961e-05, "epoch": 0.946824224519941, "percentage": 47.34, "elapsed_time": "7:45:31", "remaining_time": "8:37:49", "throughput": 1191.75, "total_tokens": 33287792} |
|
{"current_steps": 3210, "total_steps": 6770, "loss": 0.1482, "lr": 5.837488115998264e-05, "epoch": 0.948301329394387, "percentage": 47.42, "elapsed_time": "7:46:59", "remaining_time": "8:37:54", "throughput": 1189.91, "total_tokens": 33340168} |
|
{"current_steps": 3215, "total_steps": 6770, "loss": 0.2017, "lr": 5.825445473707867e-05, "epoch": 0.9497784342688331, "percentage": 47.49, "elapsed_time": "7:48:24", "remaining_time": "8:37:56", "throughput": 1188.15, "total_tokens": 33391904} |
|
{"current_steps": 3220, "total_steps": 6770, "loss": 0.206, "lr": 5.813397906818051e-05, "epoch": 0.9512555391432792, "percentage": 47.56, "elapsed_time": "7:49:51", "remaining_time": "8:38:00", "throughput": 1186.29, "total_tokens": 33443080} |
|
{"current_steps": 3225, "total_steps": 6770, "loss": 0.1868, "lr": 5.801345487204482e-05, "epoch": 0.9527326440177253, "percentage": 47.64, "elapsed_time": "7:51:17", "remaining_time": "8:38:03", "throughput": 1184.53, "total_tokens": 33495320} |
|
{"current_steps": 3230, "total_steps": 6770, "loss": 0.2033, "lr": 5.78928828677177e-05, "epoch": 0.9542097488921714, "percentage": 47.71, "elapsed_time": "7:52:45", "remaining_time": "8:38:07", "throughput": 1182.67, "total_tokens": 33546688} |
|
{"current_steps": 3235, "total_steps": 6770, "loss": 0.2008, "lr": 5.777226377453057e-05, "epoch": 0.9556868537666174, "percentage": 47.78, "elapsed_time": "7:54:11", "remaining_time": "8:38:10", "throughput": 1180.87, "total_tokens": 33597928} |
|
{"current_steps": 3240, "total_steps": 6770, "loss": 0.2121, "lr": 5.76515983120957e-05, "epoch": 0.9571639586410635, "percentage": 47.86, "elapsed_time": "7:55:38", "remaining_time": "8:38:13", "throughput": 1179.09, "total_tokens": 33649752} |
|
{"current_steps": 3245, "total_steps": 6770, "loss": 0.2204, "lr": 5.7530887200302055e-05, "epoch": 0.9586410635155096, "percentage": 47.93, "elapsed_time": "7:57:05", "remaining_time": "8:38:15", "throughput": 1177.28, "total_tokens": 33700792} |
|
{"current_steps": 3250, "total_steps": 6770, "loss": 0.1964, "lr": 5.741013115931088e-05, "epoch": 0.9601181683899557, "percentage": 48.01, "elapsed_time": "7:58:33", "remaining_time": "8:38:18", "throughput": 1175.49, "total_tokens": 33752488} |
|
{"current_steps": 3250, "total_steps": 6770, "eval_loss": 0.308076411485672, "epoch": 0.9601181683899557, "percentage": 48.01, "elapsed_time": "7:58:52", "remaining_time": "8:38:39", "throughput": 1174.7, "total_tokens": 33752488} |
|
{"current_steps": 3255, "total_steps": 6770, "loss": 0.227, "lr": 5.728933090955151e-05, "epoch": 0.9615952732644018, "percentage": 48.08, "elapsed_time": "8:00:24", "remaining_time": "8:38:47", "throughput": 1172.74, "total_tokens": 33803968} |
|
{"current_steps": 3260, "total_steps": 6770, "loss": 0.2117, "lr": 5.7168487171717056e-05, "epoch": 0.9630723781388478, "percentage": 48.15, "elapsed_time": "8:01:51", "remaining_time": "8:38:48", "throughput": 1171.02, "total_tokens": 33856104} |
|
{"current_steps": 3265, "total_steps": 6770, "loss": 0.196, "lr": 5.704760066676003e-05, "epoch": 0.9645494830132939, "percentage": 48.23, "elapsed_time": "8:03:19", "remaining_time": "8:38:50", "throughput": 1169.27, "total_tokens": 33907912} |
|
{"current_steps": 3270, "total_steps": 6770, "loss": 0.1984, "lr": 5.69266721158881e-05, "epoch": 0.96602658788774, "percentage": 48.3, "elapsed_time": "8:04:45", "remaining_time": "8:38:51", "throughput": 1167.58, "total_tokens": 33960104} |
|
{"current_steps": 3275, "total_steps": 6770, "loss": 0.182, "lr": 5.6805702240559786e-05, "epoch": 0.9675036927621861, "percentage": 48.38, "elapsed_time": "8:06:12", "remaining_time": "8:38:52", "throughput": 1165.89, "total_tokens": 34012120} |
|
{"current_steps": 3280, "total_steps": 6770, "loss": 0.2219, "lr": 5.668469176248017e-05, "epoch": 0.9689807976366323, "percentage": 48.45, "elapsed_time": "8:07:38", "remaining_time": "8:38:51", "throughput": 1164.23, "total_tokens": 34063520} |
|
{"current_steps": 3285, "total_steps": 6770, "loss": 0.2421, "lr": 5.6563641403596536e-05, "epoch": 0.9704579025110783, "percentage": 48.52, "elapsed_time": "8:09:04", "remaining_time": "8:38:50", "throughput": 1162.61, "total_tokens": 34115800} |
|
{"current_steps": 3290, "total_steps": 6770, "loss": 0.1888, "lr": 5.644255188609411e-05, "epoch": 0.9719350073855244, "percentage": 48.6, "elapsed_time": "8:10:29", "remaining_time": "8:38:49", "throughput": 1161.01, "total_tokens": 34168208} |
|
{"current_steps": 3295, "total_steps": 6770, "loss": 0.2366, "lr": 5.632142393239174e-05, "epoch": 0.9734121122599705, "percentage": 48.67, "elapsed_time": "8:11:55", "remaining_time": "8:38:48", "throughput": 1159.32, "total_tokens": 34218456} |
|
{"current_steps": 3300, "total_steps": 6770, "loss": 0.2131, "lr": 5.6200258265137585e-05, "epoch": 0.9748892171344166, "percentage": 48.74, "elapsed_time": "8:13:21", "remaining_time": "8:38:46", "throughput": 1157.69, "total_tokens": 34269496} |
|
{"current_steps": 3300, "total_steps": 6770, "eval_loss": 0.3541204631328583, "epoch": 0.9748892171344166, "percentage": 48.74, "elapsed_time": "8:13:40", "remaining_time": "8:39:06", "throughput": 1156.94, "total_tokens": 34269496} |
|
{"current_steps": 3305, "total_steps": 6770, "loss": 0.1993, "lr": 5.607905560720481e-05, "epoch": 0.9763663220088626, "percentage": 48.82, "elapsed_time": "8:15:13", "remaining_time": "8:39:12", "throughput": 1155.07, "total_tokens": 34321480} |
|
{"current_steps": 3310, "total_steps": 6770, "loss": 0.1965, "lr": 5.595781668168725e-05, "epoch": 0.9778434268833087, "percentage": 48.89, "elapsed_time": "8:16:39", "remaining_time": "8:39:10", "throughput": 1153.46, "total_tokens": 34372752} |
|
{"current_steps": 3315, "total_steps": 6770, "loss": 0.2429, "lr": 5.5836542211895105e-05, "epoch": 0.9793205317577548, "percentage": 48.97, "elapsed_time": "8:18:07", "remaining_time": "8:39:09", "throughput": 1151.83, "total_tokens": 34424768} |
|
{"current_steps": 3320, "total_steps": 6770, "loss": 0.2214, "lr": 5.571523292135067e-05, "epoch": 0.9807976366322009, "percentage": 49.04, "elapsed_time": "8:19:33", "remaining_time": "8:39:07", "throughput": 1150.18, "total_tokens": 34475248} |
|
{"current_steps": 3325, "total_steps": 6770, "loss": 0.1624, "lr": 5.559388953378393e-05, "epoch": 0.982274741506647, "percentage": 49.11, "elapsed_time": "8:21:01", "remaining_time": "8:39:06", "throughput": 1148.58, "total_tokens": 34528200} |
|
{"current_steps": 3330, "total_steps": 6770, "loss": 0.2222, "lr": 5.547251277312833e-05, "epoch": 0.983751846381093, "percentage": 49.19, "elapsed_time": "8:22:30", "remaining_time": "8:39:06", "throughput": 1146.88, "total_tokens": 34579080} |
|
{"current_steps": 3335, "total_steps": 6770, "loss": 0.1895, "lr": 5.535110336351642e-05, "epoch": 0.9852289512555391, "percentage": 49.26, "elapsed_time": "8:23:59", "remaining_time": "8:39:06", "throughput": 1145.21, "total_tokens": 34630680} |
|
{"current_steps": 3340, "total_steps": 6770, "loss": 0.2164, "lr": 5.5229662029275505e-05, "epoch": 0.9867060561299852, "percentage": 49.34, "elapsed_time": "8:25:27", "remaining_time": "8:39:04", "throughput": 1143.6, "total_tokens": 34682208} |
|
{"current_steps": 3345, "total_steps": 6770, "loss": 0.223, "lr": 5.510818949492337e-05, "epoch": 0.9881831610044313, "percentage": 49.41, "elapsed_time": "8:26:53", "remaining_time": "8:39:01", "throughput": 1142.01, "total_tokens": 34732696} |
|
{"current_steps": 3350, "total_steps": 6770, "loss": 0.1779, "lr": 5.498668648516394e-05, "epoch": 0.9896602658788775, "percentage": 49.48, "elapsed_time": "8:28:20", "remaining_time": "8:38:58", "throughput": 1140.45, "total_tokens": 34784784} |
|
{"current_steps": 3350, "total_steps": 6770, "eval_loss": 0.22550027072429657, "epoch": 0.9896602658788775, "percentage": 49.48, "elapsed_time": "8:28:40", "remaining_time": "8:39:18", "throughput": 1139.72, "total_tokens": 34784784} |
|
{"current_steps": 3355, "total_steps": 6770, "loss": 0.2145, "lr": 5.4865153724882945e-05, "epoch": 0.9911373707533235, "percentage": 49.56, "elapsed_time": "8:30:10", "remaining_time": "8:39:18", "throughput": 1138.05, "total_tokens": 34836528} |
|
{"current_steps": 3360, "total_steps": 6770, "loss": 0.1839, "lr": 5.4743591939143624e-05, "epoch": 0.9926144756277696, "percentage": 49.63, "elapsed_time": "8:31:36", "remaining_time": "8:39:13", "throughput": 1136.57, "total_tokens": 34888888} |
|
{"current_steps": 3365, "total_steps": 6770, "loss": 0.1665, "lr": 5.462200185318236e-05, "epoch": 0.9940915805022157, "percentage": 49.7, "elapsed_time": "8:33:01", "remaining_time": "8:39:07", "throughput": 1135.13, "total_tokens": 34941112} |
|
{"current_steps": 3370, "total_steps": 6770, "loss": 0.1908, "lr": 5.4500384192404395e-05, "epoch": 0.9955686853766618, "percentage": 49.78, "elapsed_time": "8:34:27", "remaining_time": "8:39:02", "throughput": 1133.65, "total_tokens": 34992976} |
|
{"current_steps": 3375, "total_steps": 6770, "loss": 0.1577, "lr": 5.4378739682379475e-05, "epoch": 0.9970457902511078, "percentage": 49.85, "elapsed_time": "8:35:52", "remaining_time": "8:38:55", "throughput": 1132.25, "total_tokens": 35045680} |
|
{"current_steps": 3380, "total_steps": 6770, "loss": 0.2097, "lr": 5.425706904883753e-05, "epoch": 0.9985228951255539, "percentage": 49.93, "elapsed_time": "8:37:18", "remaining_time": "8:38:50", "throughput": 1130.8, "total_tokens": 35097928} |
|
{"current_steps": 3385, "total_steps": 6770, "loss": 0.1933, "lr": 5.4135373017664326e-05, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "8:38:43", "remaining_time": "8:38:43", "throughput": 1129.39, "total_tokens": 35150544} |
|
{"current_steps": 3390, "total_steps": 6770, "loss": 0.1708, "lr": 5.401365231489718e-05, "epoch": 1.0014771048744462, "percentage": 50.07, "elapsed_time": "8:40:10", "remaining_time": "8:38:38", "throughput": 1127.94, "total_tokens": 35203152} |
|
{"current_steps": 3395, "total_steps": 6770, "loss": 0.1766, "lr": 5.389190766672056e-05, "epoch": 1.0029542097488922, "percentage": 50.15, "elapsed_time": "8:41:34", "remaining_time": "8:38:30", "throughput": 1126.54, "total_tokens": 35254992} |
|
{"current_steps": 3400, "total_steps": 6770, "loss": 0.2173, "lr": 5.3770139799461824e-05, "epoch": 1.0044313146233383, "percentage": 50.22, "elapsed_time": "8:43:01", "remaining_time": "8:38:24", "throughput": 1125.07, "total_tokens": 35305984} |
|
{"current_steps": 3400, "total_steps": 6770, "eval_loss": 0.40779221057891846, "epoch": 1.0044313146233383, "percentage": 50.22, "elapsed_time": "8:43:20", "remaining_time": "8:38:43", "throughput": 1124.38, "total_tokens": 35305984} |
|
{"current_steps": 3405, "total_steps": 6770, "loss": 0.2062, "lr": 5.364834943958688e-05, "epoch": 1.0059084194977843, "percentage": 50.3, "elapsed_time": "8:44:51", "remaining_time": "8:38:41", "throughput": 1122.78, "total_tokens": 35357880} |
|
{"current_steps": 3401, "total_steps": 3400, "epoch": 1.0047267355982274, "percentage": 100.03, "elapsed_time": "0:01:43", "remaining_time": "0:00:00", "throughput": 341090.05, "total_tokens": 35316128} |
|
|