File size: 40,072 Bytes
ae07701 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
{"current_steps": 5, "total_steps": 792, "loss": 4.2968, "learning_rate": 4.9995083170283816e-05, "epoch": 0.03770028275212064, "percentage": 0.63, "elapsed_time": "0:00:30", "remaining_time": "1:20:51", "throughput": 1683.05, "total_tokens": 51872}
{"current_steps": 10, "total_steps": 792, "loss": 3.9725, "learning_rate": 4.998033461515242e-05, "epoch": 0.07540056550424128, "percentage": 1.26, "elapsed_time": "0:01:03", "remaining_time": "1:22:23", "throughput": 1692.93, "total_tokens": 107024}
{"current_steps": 15, "total_steps": 792, "loss": 3.8314, "learning_rate": 4.9955760135896534e-05, "epoch": 0.11310084825636192, "percentage": 1.89, "elapsed_time": "0:01:35", "remaining_time": "1:22:16", "throughput": 1679.83, "total_tokens": 160080}
{"current_steps": 20, "total_steps": 792, "loss": 3.7, "learning_rate": 4.992136939879856e-05, "epoch": 0.15080113100848255, "percentage": 2.53, "elapsed_time": "0:02:04", "remaining_time": "1:20:15", "throughput": 1673.98, "total_tokens": 208816}
{"current_steps": 25, "total_steps": 792, "loss": 3.6029, "learning_rate": 4.9877175931330346e-05, "epoch": 0.1885014137606032, "percentage": 3.16, "elapsed_time": "0:02:37", "remaining_time": "1:20:42", "throughput": 1662.7, "total_tokens": 262448}
{"current_steps": 30, "total_steps": 792, "loss": 3.4859, "learning_rate": 4.982319711683221e-05, "epoch": 0.22620169651272384, "percentage": 3.79, "elapsed_time": "0:03:10", "remaining_time": "1:20:40", "throughput": 1655.81, "total_tokens": 315552}
{"current_steps": 35, "total_steps": 792, "loss": 3.4653, "learning_rate": 4.975945418767529e-05, "epoch": 0.2639019792648445, "percentage": 4.42, "elapsed_time": "0:03:40", "remaining_time": "1:19:29", "throughput": 1664.09, "total_tokens": 366992}
{"current_steps": 40, "total_steps": 792, "loss": 3.485, "learning_rate": 4.968597221690986e-05, "epoch": 0.3016022620169651, "percentage": 5.05, "elapsed_time": "0:04:11", "remaining_time": "1:18:41", "throughput": 1667.72, "total_tokens": 418816}
{"current_steps": 45, "total_steps": 792, "loss": 3.4304, "learning_rate": 4.96027801084029e-05, "epoch": 0.3393025447690858, "percentage": 5.68, "elapsed_time": "0:04:40", "remaining_time": "1:17:44", "throughput": 1672.47, "total_tokens": 469936}
{"current_steps": 50, "total_steps": 792, "loss": 3.4635, "learning_rate": 4.950991058546893e-05, "epoch": 0.3770028275212064, "percentage": 6.31, "elapsed_time": "0:05:14", "remaining_time": "1:17:44", "throughput": 1670.71, "total_tokens": 525120}
{"current_steps": 55, "total_steps": 792, "loss": 3.3626, "learning_rate": 4.940740017799833e-05, "epoch": 0.41470311027332707, "percentage": 6.94, "elapsed_time": "0:05:45", "remaining_time": "1:17:13", "throughput": 1668.68, "total_tokens": 576944}
{"current_steps": 60, "total_steps": 792, "loss": 3.3347, "learning_rate": 4.929528920808854e-05, "epoch": 0.4524033930254477, "percentage": 7.58, "elapsed_time": "0:06:16", "remaining_time": "1:16:30", "throughput": 1670.41, "total_tokens": 628544}
{"current_steps": 65, "total_steps": 792, "loss": 3.3347, "learning_rate": 4.917362177418342e-05, "epoch": 0.49010367577756836, "percentage": 8.21, "elapsed_time": "0:06:48", "remaining_time": "1:16:05", "throughput": 1669.21, "total_tokens": 681392}
{"current_steps": 70, "total_steps": 792, "loss": 3.3173, "learning_rate": 4.904244573372733e-05, "epoch": 0.527803958529689, "percentage": 8.84, "elapsed_time": "0:07:20", "remaining_time": "1:15:41", "throughput": 1668.96, "total_tokens": 734928}
{"current_steps": 75, "total_steps": 792, "loss": 3.3863, "learning_rate": 4.8901812684340564e-05, "epoch": 0.5655042412818096, "percentage": 9.47, "elapsed_time": "0:07:49", "remaining_time": "1:14:51", "throughput": 1672.63, "total_tokens": 785840}
{"current_steps": 80, "total_steps": 792, "loss": 3.2669, "learning_rate": 4.8751777943523634e-05, "epoch": 0.6032045240339302, "percentage": 10.1, "elapsed_time": "0:08:21", "remaining_time": "1:14:24", "throughput": 1670.95, "total_tokens": 838112}
{"current_steps": 85, "total_steps": 792, "loss": 3.249, "learning_rate": 4.8592400526898314e-05, "epoch": 0.6409048067860509, "percentage": 10.73, "elapsed_time": "0:08:52", "remaining_time": "1:13:46", "throughput": 1666.15, "total_tokens": 886784}
{"current_steps": 90, "total_steps": 792, "loss": 3.3248, "learning_rate": 4.842374312499405e-05, "epoch": 0.6786050895381716, "percentage": 11.36, "elapsed_time": "0:09:21", "remaining_time": "1:13:01", "throughput": 1667.85, "total_tokens": 936880}
{"current_steps": 95, "total_steps": 792, "loss": 3.1471, "learning_rate": 4.824587207858888e-05, "epoch": 0.7163053722902922, "percentage": 11.99, "elapsed_time": "0:09:54", "remaining_time": "1:12:40", "throughput": 1663.77, "total_tokens": 988864}
{"current_steps": 100, "total_steps": 792, "loss": 3.2406, "learning_rate": 4.805885735261454e-05, "epoch": 0.7540056550424128, "percentage": 12.63, "elapsed_time": "0:10:27", "remaining_time": "1:12:21", "throughput": 1664.76, "total_tokens": 1044400}
{"current_steps": 105, "total_steps": 792, "loss": 3.1418, "learning_rate": 4.786277250863599e-05, "epoch": 0.7917059377945335, "percentage": 13.26, "elapsed_time": "0:11:00", "remaining_time": "1:11:59", "throughput": 1662.38, "total_tokens": 1097376}
{"current_steps": 110, "total_steps": 792, "loss": 3.1452, "learning_rate": 4.765769467591625e-05, "epoch": 0.8294062205466541, "percentage": 13.89, "elapsed_time": "0:11:32", "remaining_time": "1:11:32", "throughput": 1665.96, "total_tokens": 1153456}
{"current_steps": 115, "total_steps": 792, "loss": 3.2019, "learning_rate": 4.744370452107789e-05, "epoch": 0.8671065032987747, "percentage": 14.52, "elapsed_time": "0:12:03", "remaining_time": "1:10:57", "throughput": 1666.89, "total_tokens": 1205536}
{"current_steps": 120, "total_steps": 792, "loss": 3.2528, "learning_rate": 4.722088621637309e-05, "epoch": 0.9048067860508954, "percentage": 15.15, "elapsed_time": "0:12:32", "remaining_time": "1:10:14", "throughput": 1669.19, "total_tokens": 1256080}
{"current_steps": 125, "total_steps": 792, "loss": 3.0824, "learning_rate": 4.698932740657479e-05, "epoch": 0.942507068803016, "percentage": 15.78, "elapsed_time": "0:13:05", "remaining_time": "1:09:50", "throughput": 1666.21, "total_tokens": 1308592}
{"current_steps": 130, "total_steps": 792, "loss": 3.2487, "learning_rate": 4.6749119174501975e-05, "epoch": 0.9802073515551367, "percentage": 16.41, "elapsed_time": "0:13:33", "remaining_time": "1:09:02", "throughput": 1666.61, "total_tokens": 1355792}
{"current_steps": 135, "total_steps": 792, "loss": 3.12, "learning_rate": 4.6500356005192514e-05, "epoch": 1.0179076343072573, "percentage": 17.05, "elapsed_time": "0:14:02", "remaining_time": "1:08:21", "throughput": 1664.06, "total_tokens": 1402560}
{"current_steps": 140, "total_steps": 792, "loss": 3.1617, "learning_rate": 4.6243135748737864e-05, "epoch": 1.055607917059378, "percentage": 17.68, "elapsed_time": "0:14:32", "remaining_time": "1:07:41", "throughput": 1666.41, "total_tokens": 1453152}
{"current_steps": 145, "total_steps": 792, "loss": 3.0912, "learning_rate": 4.597755958179406e-05, "epoch": 1.0933081998114986, "percentage": 18.31, "elapsed_time": "0:15:02", "remaining_time": "1:07:07", "throughput": 1668.13, "total_tokens": 1505744}
{"current_steps": 150, "total_steps": 792, "loss": 2.9448, "learning_rate": 4.570373196778427e-05, "epoch": 1.1310084825636193, "percentage": 18.94, "elapsed_time": "0:15:35", "remaining_time": "1:06:45", "throughput": 1665.99, "total_tokens": 1559264}
{"current_steps": 155, "total_steps": 792, "loss": 3.1383, "learning_rate": 4.5421760615808474e-05, "epoch": 1.1687087653157398, "percentage": 19.57, "elapsed_time": "0:16:03", "remaining_time": "1:06:00", "throughput": 1664.91, "total_tokens": 1604368}
{"current_steps": 160, "total_steps": 792, "loss": 3.0526, "learning_rate": 4.513175643827647e-05, "epoch": 1.2064090480678604, "percentage": 20.2, "elapsed_time": "0:16:35", "remaining_time": "1:05:30", "throughput": 1665.23, "total_tokens": 1657168}
{"current_steps": 165, "total_steps": 792, "loss": 3.0386, "learning_rate": 4.4833833507280884e-05, "epoch": 1.244109330819981, "percentage": 20.83, "elapsed_time": "0:17:03", "remaining_time": "1:04:48", "throughput": 1666.22, "total_tokens": 1704960}
{"current_steps": 170, "total_steps": 792, "loss": 2.9411, "learning_rate": 4.4528109009727336e-05, "epoch": 1.2818096135721018, "percentage": 21.46, "elapsed_time": "0:17:33", "remaining_time": "1:04:15", "throughput": 1666.69, "total_tokens": 1756400}
{"current_steps": 175, "total_steps": 792, "loss": 3.1312, "learning_rate": 4.42147032012394e-05, "epoch": 1.3195098963242224, "percentage": 22.1, "elapsed_time": "0:18:03", "remaining_time": "1:03:38", "throughput": 1668.26, "total_tokens": 1806976}
{"current_steps": 180, "total_steps": 792, "loss": 3.0785, "learning_rate": 4.389373935885646e-05, "epoch": 1.3572101790763431, "percentage": 22.73, "elapsed_time": "0:18:36", "remaining_time": "1:03:16", "throughput": 1667.01, "total_tokens": 1861264}
{"current_steps": 185, "total_steps": 792, "loss": 3.0567, "learning_rate": 4.356534373254316e-05, "epoch": 1.3949104618284638, "percentage": 23.36, "elapsed_time": "0:19:12", "remaining_time": "1:03:00", "throughput": 1664.67, "total_tokens": 1917872}
{"current_steps": 190, "total_steps": 792, "loss": 3.0526, "learning_rate": 4.322964549552943e-05, "epoch": 1.4326107445805842, "percentage": 23.99, "elapsed_time": "0:19:44", "remaining_time": "1:02:32", "throughput": 1665.11, "total_tokens": 1971888}
{"current_steps": 195, "total_steps": 792, "loss": 3.0147, "learning_rate": 4.288677669350066e-05, "epoch": 1.4703110273327051, "percentage": 24.62, "elapsed_time": "0:20:14", "remaining_time": "1:01:59", "throughput": 1664.36, "total_tokens": 2022112}
{"current_steps": 200, "total_steps": 792, "loss": 3.0681, "learning_rate": 4.2536872192658036e-05, "epoch": 1.5080113100848256, "percentage": 25.25, "elapsed_time": "0:20:45", "remaining_time": "1:01:27", "throughput": 1664.08, "total_tokens": 2073088}
{"current_steps": 205, "total_steps": 792, "loss": 2.9832, "learning_rate": 4.218006962666934e-05, "epoch": 1.5457115928369463, "percentage": 25.88, "elapsed_time": "0:21:18", "remaining_time": "1:00:59", "throughput": 1661.89, "total_tokens": 2124080}
{"current_steps": 210, "total_steps": 792, "loss": 2.9811, "learning_rate": 4.181650934253132e-05, "epoch": 1.583411875589067, "percentage": 26.52, "elapsed_time": "0:21:49", "remaining_time": "1:00:29", "throughput": 1663.59, "total_tokens": 2178784}
{"current_steps": 215, "total_steps": 792, "loss": 2.9752, "learning_rate": 4.144633434536467e-05, "epoch": 1.6211121583411876, "percentage": 27.15, "elapsed_time": "0:22:19", "remaining_time": "0:59:55", "throughput": 1663.52, "total_tokens": 2228928}
{"current_steps": 220, "total_steps": 792, "loss": 3.1081, "learning_rate": 4.1069690242163484e-05, "epoch": 1.6588124410933083, "percentage": 27.78, "elapsed_time": "0:22:50", "remaining_time": "0:59:23", "throughput": 1663.24, "total_tokens": 2279536}
{"current_steps": 225, "total_steps": 792, "loss": 2.9904, "learning_rate": 4.06867251845213e-05, "epoch": 1.6965127238454287, "percentage": 28.41, "elapsed_time": "0:23:27", "remaining_time": "0:59:07", "throughput": 1661.42, "total_tokens": 2338832}
{"current_steps": 230, "total_steps": 792, "loss": 3.0209, "learning_rate": 4.0297589810356165e-05, "epoch": 1.7342130065975496, "percentage": 29.04, "elapsed_time": "0:24:00", "remaining_time": "0:58:40", "throughput": 1660.62, "total_tokens": 2392784}
{"current_steps": 235, "total_steps": 792, "loss": 2.9835, "learning_rate": 3.9902437184657784e-05, "epoch": 1.77191328934967, "percentage": 29.67, "elapsed_time": "0:24:35", "remaining_time": "0:58:16", "throughput": 1660.05, "total_tokens": 2449088}
{"current_steps": 240, "total_steps": 792, "loss": 3.0256, "learning_rate": 3.9501422739279956e-05, "epoch": 1.8096135721017907, "percentage": 30.3, "elapsed_time": "0:25:08", "remaining_time": "0:57:49", "throughput": 1660.0, "total_tokens": 2503872}
{"current_steps": 245, "total_steps": 792, "loss": 2.9455, "learning_rate": 3.909470421180201e-05, "epoch": 1.8473138548539114, "percentage": 30.93, "elapsed_time": "0:25:39", "remaining_time": "0:57:18", "throughput": 1660.36, "total_tokens": 2556896}
{"current_steps": 250, "total_steps": 792, "loss": 2.9629, "learning_rate": 3.8682441583483314e-05, "epoch": 1.885014137606032, "percentage": 31.57, "elapsed_time": "0:26:09", "remaining_time": "0:56:42", "throughput": 1661.51, "total_tokens": 2607712}
{"current_steps": 255, "total_steps": 792, "loss": 3.0255, "learning_rate": 3.8264797016335205e-05, "epoch": 1.9227144203581528, "percentage": 32.2, "elapsed_time": "0:26:40", "remaining_time": "0:56:10", "throughput": 1662.83, "total_tokens": 2661696}
{"current_steps": 260, "total_steps": 792, "loss": 2.9352, "learning_rate": 3.7841934789335164e-05, "epoch": 1.9604147031102732, "percentage": 32.83, "elapsed_time": "0:27:11", "remaining_time": "0:55:37", "throughput": 1662.51, "total_tokens": 2711664}
{"current_steps": 265, "total_steps": 792, "loss": 3.0592, "learning_rate": 3.741402123380828e-05, "epoch": 1.998114985862394, "percentage": 33.46, "elapsed_time": "0:27:43", "remaining_time": "0:55:08", "throughput": 1663.2, "total_tokens": 2766656}
{"current_steps": 270, "total_steps": 792, "loss": 2.8064, "learning_rate": 3.6981224668001424e-05, "epoch": 2.0358152686145146, "percentage": 34.09, "elapsed_time": "0:28:14", "remaining_time": "0:54:35", "throughput": 1662.39, "total_tokens": 2816736}
{"current_steps": 275, "total_steps": 792, "loss": 2.6955, "learning_rate": 3.654371533087586e-05, "epoch": 2.0735155513666355, "percentage": 34.72, "elapsed_time": "0:28:48", "remaining_time": "0:54:09", "throughput": 1661.62, "total_tokens": 2871728}
{"current_steps": 280, "total_steps": 792, "loss": 2.8221, "learning_rate": 3.610166531514436e-05, "epoch": 2.111215834118756, "percentage": 35.35, "elapsed_time": "0:29:14", "remaining_time": "0:53:27", "throughput": 1661.75, "total_tokens": 2914880}
{"current_steps": 285, "total_steps": 792, "loss": 2.8134, "learning_rate": 3.565524849957921e-05, "epoch": 2.1489161168708764, "percentage": 35.98, "elapsed_time": "0:29:45", "remaining_time": "0:52:56", "throughput": 1661.03, "total_tokens": 2966144}
{"current_steps": 290, "total_steps": 792, "loss": 2.8455, "learning_rate": 3.520464048061758e-05, "epoch": 2.1866163996229973, "percentage": 36.62, "elapsed_time": "0:30:14", "remaining_time": "0:52:21", "throughput": 1662.83, "total_tokens": 3017856}
{"current_steps": 295, "total_steps": 792, "loss": 2.8067, "learning_rate": 3.47500185032913e-05, "epoch": 2.2243166823751177, "percentage": 37.25, "elapsed_time": "0:30:43", "remaining_time": "0:51:45", "throughput": 1664.85, "total_tokens": 3069232}
{"current_steps": 300, "total_steps": 792, "loss": 2.8081, "learning_rate": 3.4291561391508185e-05, "epoch": 2.2620169651272386, "percentage": 37.88, "elapsed_time": "0:31:15", "remaining_time": "0:51:16", "throughput": 1663.75, "total_tokens": 3120768}
{"current_steps": 305, "total_steps": 792, "loss": 2.7738, "learning_rate": 3.3829449477712324e-05, "epoch": 2.299717247879359, "percentage": 38.51, "elapsed_time": "0:31:51", "remaining_time": "0:50:52", "throughput": 1662.22, "total_tokens": 3177328}
{"current_steps": 310, "total_steps": 792, "loss": 2.8478, "learning_rate": 3.336386453195088e-05, "epoch": 2.3374175306314795, "percentage": 39.14, "elapsed_time": "0:32:24", "remaining_time": "0:50:23", "throughput": 1662.21, "total_tokens": 3232688}
{"current_steps": 315, "total_steps": 792, "loss": 2.7571, "learning_rate": 3.2894989690375626e-05, "epoch": 2.3751178133836004, "percentage": 39.77, "elapsed_time": "0:32:57", "remaining_time": "0:49:53", "throughput": 1663.1, "total_tokens": 3288048}
{"current_steps": 320, "total_steps": 792, "loss": 2.7997, "learning_rate": 3.2423009383206876e-05, "epoch": 2.412818096135721, "percentage": 40.4, "elapsed_time": "0:33:26", "remaining_time": "0:49:20", "throughput": 1663.32, "total_tokens": 3338128}
{"current_steps": 325, "total_steps": 792, "loss": 2.7538, "learning_rate": 3.194810926218861e-05, "epoch": 2.4505183788878417, "percentage": 41.04, "elapsed_time": "0:33:58", "remaining_time": "0:48:49", "throughput": 1663.19, "total_tokens": 3390848}
{"current_steps": 330, "total_steps": 792, "loss": 2.7098, "learning_rate": 3.147047612756302e-05, "epoch": 2.488218661639962, "percentage": 41.67, "elapsed_time": "0:34:30", "remaining_time": "0:48:18", "throughput": 1662.54, "total_tokens": 3441776}
{"current_steps": 335, "total_steps": 792, "loss": 2.8332, "learning_rate": 3.099029785459328e-05, "epoch": 2.525918944392083, "percentage": 42.3, "elapsed_time": "0:35:00", "remaining_time": "0:47:45", "throughput": 1661.91, "total_tokens": 3490976}
{"current_steps": 340, "total_steps": 792, "loss": 2.8147, "learning_rate": 3.0507763319663517e-05, "epoch": 2.5636192271442035, "percentage": 42.93, "elapsed_time": "0:35:32", "remaining_time": "0:47:14", "throughput": 1662.41, "total_tokens": 3544848}
{"current_steps": 345, "total_steps": 792, "loss": 2.7288, "learning_rate": 3.002306232598497e-05, "epoch": 2.6013195098963244, "percentage": 43.56, "elapsed_time": "0:36:04", "remaining_time": "0:46:44", "throughput": 1662.04, "total_tokens": 3597536}
{"current_steps": 350, "total_steps": 792, "loss": 2.6969, "learning_rate": 2.9536385528937567e-05, "epoch": 2.639019792648445, "percentage": 44.19, "elapsed_time": "0:36:36", "remaining_time": "0:46:14", "throughput": 1661.46, "total_tokens": 3649760}
{"current_steps": 355, "total_steps": 792, "loss": 2.7767, "learning_rate": 2.9047924361076345e-05, "epoch": 2.6767200754005653, "percentage": 44.82, "elapsed_time": "0:37:09", "remaining_time": "0:45:45", "throughput": 1661.1, "total_tokens": 3704160}
{"current_steps": 360, "total_steps": 792, "loss": 2.6182, "learning_rate": 2.8557870956832132e-05, "epoch": 2.7144203581526862, "percentage": 45.45, "elapsed_time": "0:37:37", "remaining_time": "0:45:09", "throughput": 1661.52, "total_tokens": 3751280}
{"current_steps": 365, "total_steps": 792, "loss": 2.7498, "learning_rate": 2.8066418076936167e-05, "epoch": 2.7521206409048067, "percentage": 46.09, "elapsed_time": "0:38:09", "remaining_time": "0:44:38", "throughput": 1660.54, "total_tokens": 3801904}
{"current_steps": 370, "total_steps": 792, "loss": 2.7674, "learning_rate": 2.7573759032598366e-05, "epoch": 2.7898209236569276, "percentage": 46.72, "elapsed_time": "0:38:43", "remaining_time": "0:44:09", "throughput": 1660.77, "total_tokens": 3858528}
{"current_steps": 375, "total_steps": 792, "loss": 2.7692, "learning_rate": 2.7080087609469062e-05, "epoch": 2.827521206409048, "percentage": 47.35, "elapsed_time": "0:39:14", "remaining_time": "0:43:38", "throughput": 1660.35, "total_tokens": 3909216}
{"current_steps": 380, "total_steps": 792, "loss": 2.6428, "learning_rate": 2.6585597991414114e-05, "epoch": 2.8652214891611685, "percentage": 47.98, "elapsed_time": "0:39:46", "remaining_time": "0:43:07", "throughput": 1660.47, "total_tokens": 3963040}
{"current_steps": 385, "total_steps": 792, "loss": 2.6906, "learning_rate": 2.6090484684133404e-05, "epoch": 2.9029217719132894, "percentage": 48.61, "elapsed_time": "0:40:13", "remaining_time": "0:42:31", "throughput": 1661.24, "total_tokens": 4009728}
{"current_steps": 390, "total_steps": 792, "loss": 2.7791, "learning_rate": 2.5594942438652688e-05, "epoch": 2.9406220546654103, "percentage": 49.24, "elapsed_time": "0:40:48", "remaining_time": "0:42:03", "throughput": 1661.96, "total_tokens": 4069024}
{"current_steps": 395, "total_steps": 792, "loss": 2.8693, "learning_rate": 2.509916617471903e-05, "epoch": 2.9783223374175307, "percentage": 49.87, "elapsed_time": "0:41:22", "remaining_time": "0:41:35", "throughput": 1660.97, "total_tokens": 4123440}
{"current_steps": 400, "total_steps": 792, "loss": 2.6128, "learning_rate": 2.46033509041298e-05, "epoch": 3.016022620169651, "percentage": 50.51, "elapsed_time": "0:41:54", "remaining_time": "0:41:03", "throughput": 1660.66, "total_tokens": 4175200}
{"current_steps": 405, "total_steps": 792, "loss": 2.5518, "learning_rate": 2.410769165402549e-05, "epoch": 3.053722902921772, "percentage": 51.14, "elapsed_time": "0:42:30", "remaining_time": "0:40:36", "throughput": 1659.27, "total_tokens": 4231536}
{"current_steps": 410, "total_steps": 792, "loss": 2.5435, "learning_rate": 2.3612383390176503e-05, "epoch": 3.0914231856738925, "percentage": 51.77, "elapsed_time": "0:43:00", "remaining_time": "0:40:04", "throughput": 1659.76, "total_tokens": 4283696}
{"current_steps": 415, "total_steps": 792, "loss": 2.5171, "learning_rate": 2.3117620940294048e-05, "epoch": 3.1291234684260134, "percentage": 52.4, "elapsed_time": "0:43:35", "remaining_time": "0:39:35", "throughput": 1659.58, "total_tokens": 4340368}
{"current_steps": 420, "total_steps": 792, "loss": 2.4978, "learning_rate": 2.2623598917395438e-05, "epoch": 3.166823751178134, "percentage": 53.03, "elapsed_time": "0:44:07", "remaining_time": "0:39:04", "throughput": 1659.87, "total_tokens": 4394352}
{"current_steps": 425, "total_steps": 792, "loss": 2.5332, "learning_rate": 2.213051164325366e-05, "epoch": 3.2045240339302543, "percentage": 53.66, "elapsed_time": "0:44:34", "remaining_time": "0:38:29", "throughput": 1660.23, "total_tokens": 4440992}
{"current_steps": 430, "total_steps": 792, "loss": 2.5037, "learning_rate": 2.1638553071961708e-05, "epoch": 3.242224316682375, "percentage": 54.29, "elapsed_time": "0:45:02", "remaining_time": "0:37:54", "throughput": 1659.78, "total_tokens": 4485120}
{"current_steps": 435, "total_steps": 792, "loss": 2.5358, "learning_rate": 2.1147916713641367e-05, "epoch": 3.2799245994344957, "percentage": 54.92, "elapsed_time": "0:45:33", "remaining_time": "0:37:23", "throughput": 1659.64, "total_tokens": 4537456}
{"current_steps": 440, "total_steps": 792, "loss": 2.5274, "learning_rate": 2.0658795558326743e-05, "epoch": 3.3176248821866166, "percentage": 55.56, "elapsed_time": "0:46:04", "remaining_time": "0:36:51", "throughput": 1659.68, "total_tokens": 4587680}
{"current_steps": 445, "total_steps": 792, "loss": 2.5192, "learning_rate": 2.017138200005236e-05, "epoch": 3.355325164938737, "percentage": 56.19, "elapsed_time": "0:46:38", "remaining_time": "0:36:21", "throughput": 1660.01, "total_tokens": 4644752}
{"current_steps": 450, "total_steps": 792, "loss": 2.5902, "learning_rate": 1.9685867761175584e-05, "epoch": 3.3930254476908575, "percentage": 56.82, "elapsed_time": "0:47:09", "remaining_time": "0:35:50", "throughput": 1659.86, "total_tokens": 4696928}
{"current_steps": 455, "total_steps": 792, "loss": 2.5584, "learning_rate": 1.9202443816963425e-05, "epoch": 3.4307257304429783, "percentage": 57.45, "elapsed_time": "0:47:41", "remaining_time": "0:35:19", "throughput": 1659.5, "total_tokens": 4748800}
{"current_steps": 460, "total_steps": 792, "loss": 2.4829, "learning_rate": 1.872130032047302e-05, "epoch": 3.468426013195099, "percentage": 58.08, "elapsed_time": "0:48:15", "remaining_time": "0:34:49", "throughput": 1660.09, "total_tokens": 4806000}
{"current_steps": 465, "total_steps": 792, "loss": 2.5967, "learning_rate": 1.824262652775568e-05, "epoch": 3.5061262959472197, "percentage": 58.71, "elapsed_time": "0:48:46", "remaining_time": "0:34:18", "throughput": 1661.09, "total_tokens": 4861440}
{"current_steps": 470, "total_steps": 792, "loss": 2.4773, "learning_rate": 1.7766610723413684e-05, "epoch": 3.54382657869934, "percentage": 59.34, "elapsed_time": "0:49:20", "remaining_time": "0:33:48", "throughput": 1660.22, "total_tokens": 4915152}
{"current_steps": 475, "total_steps": 792, "loss": 2.591, "learning_rate": 1.7293440146539196e-05, "epoch": 3.581526861451461, "percentage": 59.97, "elapsed_time": "0:49:50", "remaining_time": "0:33:15", "throughput": 1660.31, "total_tokens": 4965680}
{"current_steps": 480, "total_steps": 792, "loss": 2.5478, "learning_rate": 1.682330091706446e-05, "epoch": 3.6192271442035815, "percentage": 60.61, "elapsed_time": "0:50:27", "remaining_time": "0:32:47", "throughput": 1659.94, "total_tokens": 5024832}
{"current_steps": 485, "total_steps": 792, "loss": 2.5146, "learning_rate": 1.6356377962552238e-05, "epoch": 3.6569274269557024, "percentage": 61.24, "elapsed_time": "0:50:57", "remaining_time": "0:32:15", "throughput": 1659.97, "total_tokens": 5076112}
{"current_steps": 490, "total_steps": 792, "loss": 2.523, "learning_rate": 1.589285494545514e-05, "epoch": 3.694627709707823, "percentage": 61.87, "elapsed_time": "0:51:30", "remaining_time": "0:31:44", "throughput": 1659.63, "total_tokens": 5129328}
{"current_steps": 495, "total_steps": 792, "loss": 2.4704, "learning_rate": 1.5432914190872757e-05, "epoch": 3.7323279924599433, "percentage": 62.5, "elapsed_time": "0:52:01", "remaining_time": "0:31:13", "throughput": 1659.94, "total_tokens": 5181936}
{"current_steps": 500, "total_steps": 792, "loss": 2.4309, "learning_rate": 1.4976736614834664e-05, "epoch": 3.770028275212064, "percentage": 63.13, "elapsed_time": "0:52:32", "remaining_time": "0:30:41", "throughput": 1660.32, "total_tokens": 5234112}
{"current_steps": 505, "total_steps": 792, "loss": 2.5634, "learning_rate": 1.4524501653137787e-05, "epoch": 3.8077285579641846, "percentage": 63.76, "elapsed_time": "0:53:03", "remaining_time": "0:30:08", "throughput": 1659.51, "total_tokens": 5282256}
{"current_steps": 510, "total_steps": 792, "loss": 2.5133, "learning_rate": 1.4076387190766017e-05, "epoch": 3.8454288407163055, "percentage": 64.39, "elapsed_time": "0:53:34", "remaining_time": "0:29:37", "throughput": 1659.6, "total_tokens": 5334720}
{"current_steps": 515, "total_steps": 792, "loss": 2.3979, "learning_rate": 1.363256949191972e-05, "epoch": 3.883129123468426, "percentage": 65.03, "elapsed_time": "0:54:05", "remaining_time": "0:29:05", "throughput": 1658.86, "total_tokens": 5384608}
{"current_steps": 520, "total_steps": 792, "loss": 2.5216, "learning_rate": 1.3193223130682936e-05, "epoch": 3.9208294062205464, "percentage": 65.66, "elapsed_time": "0:54:36", "remaining_time": "0:28:33", "throughput": 1658.96, "total_tokens": 5434816}
{"current_steps": 525, "total_steps": 792, "loss": 2.4333, "learning_rate": 1.2758520922355226e-05, "epoch": 3.9585296889726673, "percentage": 66.29, "elapsed_time": "0:55:02", "remaining_time": "0:27:59", "throughput": 1659.2, "total_tokens": 5480304}
{"current_steps": 530, "total_steps": 792, "loss": 2.4818, "learning_rate": 1.2328633855475429e-05, "epoch": 3.9962299717247878, "percentage": 66.92, "elapsed_time": "0:55:34", "remaining_time": "0:27:28", "throughput": 1658.96, "total_tokens": 5532160}
{"current_steps": 535, "total_steps": 792, "loss": 2.2869, "learning_rate": 1.1903731024563966e-05, "epoch": 4.033930254476909, "percentage": 67.55, "elapsed_time": "0:56:05", "remaining_time": "0:26:56", "throughput": 1659.56, "total_tokens": 5585152}
{"current_steps": 540, "total_steps": 792, "loss": 2.2979, "learning_rate": 1.148397956361007e-05, "epoch": 4.071630537229029, "percentage": 68.18, "elapsed_time": "0:56:39", "remaining_time": "0:26:26", "throughput": 1659.75, "total_tokens": 5642016}
{"current_steps": 545, "total_steps": 792, "loss": 2.3257, "learning_rate": 1.106954458033026e-05, "epoch": 4.10933081998115, "percentage": 68.81, "elapsed_time": "0:57:13", "remaining_time": "0:25:56", "throughput": 1659.32, "total_tokens": 5698096}
{"current_steps": 550, "total_steps": 792, "loss": 2.4352, "learning_rate": 1.0660589091223855e-05, "epoch": 4.147031102733271, "percentage": 69.44, "elapsed_time": "0:57:42", "remaining_time": "0:25:23", "throughput": 1659.77, "total_tokens": 5747504}
{"current_steps": 555, "total_steps": 792, "loss": 2.4073, "learning_rate": 1.025727395745095e-05, "epoch": 4.184731385485391, "percentage": 70.08, "elapsed_time": "0:58:11", "remaining_time": "0:24:51", "throughput": 1659.95, "total_tokens": 5796192}
{"current_steps": 560, "total_steps": 792, "loss": 2.3018, "learning_rate": 9.859757821558337e-06, "epoch": 4.222431668237512, "percentage": 70.71, "elapsed_time": "0:58:44", "remaining_time": "0:24:20", "throughput": 1660.05, "total_tokens": 5850880}
{"current_steps": 565, "total_steps": 792, "loss": 2.2485, "learning_rate": 9.468197045077976e-06, "epoch": 4.260131950989632, "percentage": 71.34, "elapsed_time": "0:59:12", "remaining_time": "0:23:47", "throughput": 1660.08, "total_tokens": 5897680}
{"current_steps": 570, "total_steps": 792, "loss": 2.3062, "learning_rate": 9.082745647022797e-06, "epoch": 4.297832233741753, "percentage": 71.97, "elapsed_time": "0:59:47", "remaining_time": "0:23:17", "throughput": 1659.85, "total_tokens": 5954608}
{"current_steps": 575, "total_steps": 792, "loss": 2.3681, "learning_rate": 8.703555243303835e-06, "epoch": 4.335532516493874, "percentage": 72.6, "elapsed_time": "1:00:16", "remaining_time": "0:22:44", "throughput": 1659.97, "total_tokens": 6003136}
{"current_steps": 580, "total_steps": 792, "loss": 2.2854, "learning_rate": 8.330774987092712e-06, "epoch": 4.3732327992459945, "percentage": 73.23, "elapsed_time": "1:00:46", "remaining_time": "0:22:12", "throughput": 1660.17, "total_tokens": 6053360}
{"current_steps": 585, "total_steps": 792, "loss": 2.3556, "learning_rate": 7.96455151015272e-06, "epoch": 4.410933081998115, "percentage": 73.86, "elapsed_time": "1:01:17", "remaining_time": "0:21:41", "throughput": 1659.97, "total_tokens": 6104480}
{"current_steps": 590, "total_steps": 792, "loss": 2.3613, "learning_rate": 7.605028865161809e-06, "epoch": 4.448633364750235, "percentage": 74.49, "elapsed_time": "1:01:49", "remaining_time": "0:21:10", "throughput": 1659.84, "total_tokens": 6157456}
{"current_steps": 595, "total_steps": 792, "loss": 2.3387, "learning_rate": 7.25234846904993e-06, "epoch": 4.486333647502356, "percentage": 75.13, "elapsed_time": "1:02:20", "remaining_time": "0:20:38", "throughput": 1660.08, "total_tokens": 6209744}
{"current_steps": 600, "total_steps": 792, "loss": 2.3717, "learning_rate": 6.906649047373246e-06, "epoch": 4.524033930254477, "percentage": 75.76, "elapsed_time": "1:02:53", "remaining_time": "0:20:07", "throughput": 1660.23, "total_tokens": 6264528}
{"current_steps": 605, "total_steps": 792, "loss": 2.3322, "learning_rate": 6.568066579746901e-06, "epoch": 4.561734213006598, "percentage": 76.39, "elapsed_time": "1:03:22", "remaining_time": "0:19:35", "throughput": 1659.93, "total_tokens": 6311568}
{"current_steps": 610, "total_steps": 792, "loss": 2.4046, "learning_rate": 6.2367342463579475e-06, "epoch": 4.599434495758718, "percentage": 77.02, "elapsed_time": "1:03:53", "remaining_time": "0:19:03", "throughput": 1659.36, "total_tokens": 6361040}
{"current_steps": 615, "total_steps": 792, "loss": 2.2987, "learning_rate": 5.912782375579412e-06, "epoch": 4.6371347785108386, "percentage": 77.65, "elapsed_time": "1:04:24", "remaining_time": "0:18:32", "throughput": 1658.82, "total_tokens": 6411280}
{"current_steps": 620, "total_steps": 792, "loss": 2.3819, "learning_rate": 5.596338392706077e-06, "epoch": 4.674835061262959, "percentage": 78.28, "elapsed_time": "1:04:55", "remaining_time": "0:18:00", "throughput": 1659.17, "total_tokens": 6462816}
{"current_steps": 625, "total_steps": 792, "loss": 2.3011, "learning_rate": 5.2875267698322325e-06, "epoch": 4.71253534401508, "percentage": 78.91, "elapsed_time": "1:05:25", "remaining_time": "0:17:28", "throughput": 1659.45, "total_tokens": 6514640}
{"current_steps": 630, "total_steps": 792, "loss": 2.3686, "learning_rate": 4.986468976890993e-06, "epoch": 4.750235626767201, "percentage": 79.55, "elapsed_time": "1:05:56", "remaining_time": "0:16:57", "throughput": 1659.86, "total_tokens": 6567824}
{"current_steps": 635, "total_steps": 792, "loss": 2.3264, "learning_rate": 4.693283433874565e-06, "epoch": 4.787935909519321, "percentage": 80.18, "elapsed_time": "1:06:26", "remaining_time": "0:16:25", "throughput": 1660.06, "total_tokens": 6616992}
{"current_steps": 640, "total_steps": 792, "loss": 2.4168, "learning_rate": 4.408085464254183e-06, "epoch": 4.825636192271442, "percentage": 80.81, "elapsed_time": "1:06:55", "remaining_time": "0:15:53", "throughput": 1659.99, "total_tokens": 6665056}
{"current_steps": 645, "total_steps": 792, "loss": 2.3583, "learning_rate": 4.130987249617993e-06, "epoch": 4.863336475023563, "percentage": 81.44, "elapsed_time": "1:07:27", "remaining_time": "0:15:22", "throughput": 1660.3, "total_tokens": 6720512}
{"current_steps": 650, "total_steps": 792, "loss": 2.3542, "learning_rate": 3.8620977855448935e-06, "epoch": 4.9010367577756835, "percentage": 82.07, "elapsed_time": "1:08:02", "remaining_time": "0:14:51", "throughput": 1660.38, "total_tokens": 6777888}
{"current_steps": 655, "total_steps": 792, "loss": 2.3247, "learning_rate": 3.601522838731461e-06, "epoch": 4.938737040527804, "percentage": 82.7, "elapsed_time": "1:08:35", "remaining_time": "0:14:20", "throughput": 1660.38, "total_tokens": 6832688}
{"current_steps": 660, "total_steps": 792, "loss": 2.4144, "learning_rate": 3.3493649053890326e-06, "epoch": 4.976437323279924, "percentage": 83.33, "elapsed_time": "1:09:08", "remaining_time": "0:13:49", "throughput": 1659.89, "total_tokens": 6885296}
{"current_steps": 665, "total_steps": 792, "loss": 2.3183, "learning_rate": 3.1057231709272077e-06, "epoch": 5.014137606032045, "percentage": 83.96, "elapsed_time": "1:09:42", "remaining_time": "0:13:18", "throughput": 1659.16, "total_tokens": 6938896}
{"current_steps": 670, "total_steps": 792, "loss": 2.3131, "learning_rate": 2.8706934709395892e-06, "epoch": 5.051837888784166, "percentage": 84.6, "elapsed_time": "1:10:13", "remaining_time": "0:12:47", "throughput": 1658.96, "total_tokens": 6989504}
{"current_steps": 675, "total_steps": 792, "loss": 2.1683, "learning_rate": 2.6443682535072177e-06, "epoch": 5.089538171536287, "percentage": 85.23, "elapsed_time": "1:10:41", "remaining_time": "0:12:15", "throughput": 1659.18, "total_tokens": 7037856}
{"current_steps": 680, "total_steps": 792, "loss": 2.2366, "learning_rate": 2.4268365428344736e-06, "epoch": 5.127238454288407, "percentage": 85.86, "elapsed_time": "1:11:15", "remaining_time": "0:11:44", "throughput": 1658.81, "total_tokens": 7091952}
{"current_steps": 685, "total_steps": 792, "loss": 2.1666, "learning_rate": 2.21818390423168e-06, "epoch": 5.1649387370405275, "percentage": 86.49, "elapsed_time": "1:11:44", "remaining_time": "0:11:12", "throughput": 1658.46, "total_tokens": 7138112}
{"current_steps": 690, "total_steps": 792, "loss": 2.2275, "learning_rate": 2.0184924104583613e-06, "epoch": 5.202639019792649, "percentage": 87.12, "elapsed_time": "1:12:12", "remaining_time": "0:10:40", "throughput": 1658.23, "total_tokens": 7183936}
{"current_steps": 695, "total_steps": 792, "loss": 2.2126, "learning_rate": 1.8278406094401623e-06, "epoch": 5.240339302544769, "percentage": 87.75, "elapsed_time": "1:12:41", "remaining_time": "0:10:08", "throughput": 1658.12, "total_tokens": 7231504}
{"current_steps": 700, "total_steps": 792, "loss": 2.3274, "learning_rate": 1.6463034933723337e-06, "epoch": 5.27803958529689, "percentage": 88.38, "elapsed_time": "1:13:10", "remaining_time": "0:09:37", "throughput": 1658.18, "total_tokens": 7280928}
{"current_steps": 705, "total_steps": 792, "loss": 2.2847, "learning_rate": 1.4739524692218314e-06, "epoch": 5.31573986804901, "percentage": 89.02, "elapsed_time": "1:13:42", "remaining_time": "0:09:05", "throughput": 1657.38, "total_tokens": 7329424}
{"current_steps": 710, "total_steps": 792, "loss": 2.3183, "learning_rate": 1.3108553306396265e-06, "epoch": 5.353440150801131, "percentage": 89.65, "elapsed_time": "1:14:14", "remaining_time": "0:08:34", "throughput": 1657.61, "total_tokens": 7383920}
{"current_steps": 715, "total_steps": 792, "loss": 2.2681, "learning_rate": 1.1570762312943295e-06, "epoch": 5.391140433553252, "percentage": 90.28, "elapsed_time": "1:14:47", "remaining_time": "0:08:03", "throughput": 1657.72, "total_tokens": 7438480}
{"current_steps": 720, "total_steps": 792, "loss": 2.1764, "learning_rate": 1.0126756596375686e-06, "epoch": 5.4288407163053725, "percentage": 90.91, "elapsed_time": "1:15:17", "remaining_time": "0:07:31", "throughput": 1657.61, "total_tokens": 7488160}
{"current_steps": 725, "total_steps": 792, "loss": 2.2726, "learning_rate": 8.777104151110826e-07, "epoch": 5.466540999057493, "percentage": 91.54, "elapsed_time": "1:15:53", "remaining_time": "0:07:00", "throughput": 1657.82, "total_tokens": 7548144}
{"current_steps": 730, "total_steps": 792, "loss": 2.2841, "learning_rate": 7.522335858048707e-07, "epoch": 5.504241281809613, "percentage": 92.17, "elapsed_time": "1:16:27", "remaining_time": "0:06:29", "throughput": 1657.77, "total_tokens": 7605536}
{"current_steps": 735, "total_steps": 792, "loss": 2.2894, "learning_rate": 6.362945275751736e-07, "epoch": 5.541941564561734, "percentage": 92.8, "elapsed_time": "1:16:56", "remaining_time": "0:05:58", "throughput": 1658.23, "total_tokens": 7655408}
{"current_steps": 740, "total_steps": 792, "loss": 2.3057, "learning_rate": 5.299388446305343e-07, "epoch": 5.579641847313855, "percentage": 93.43, "elapsed_time": "1:17:26", "remaining_time": "0:05:26", "throughput": 1658.49, "total_tokens": 7706016}
{"current_steps": 745, "total_steps": 792, "loss": 2.2593, "learning_rate": 4.3320837159353813e-07, "epoch": 5.617342130065976, "percentage": 94.07, "elapsed_time": "1:18:00", "remaining_time": "0:04:55", "throughput": 1658.33, "total_tokens": 7761520}
{"current_steps": 750, "total_steps": 792, "loss": 2.2211, "learning_rate": 3.4614115704533767e-07, "epoch": 5.655042412818096, "percentage": 94.7, "elapsed_time": "1:18:34", "remaining_time": "0:04:24", "throughput": 1658.3, "total_tokens": 7818592}
{"current_steps": 755, "total_steps": 792, "loss": 2.3253, "learning_rate": 2.687714485593462e-07, "epoch": 5.6927426955702165, "percentage": 95.33, "elapsed_time": "1:19:04", "remaining_time": "0:03:52", "throughput": 1658.61, "total_tokens": 7869808}
{"current_steps": 760, "total_steps": 792, "loss": 2.354, "learning_rate": 2.011296792301165e-07, "epoch": 5.730442978322337, "percentage": 95.96, "elapsed_time": "1:19:39", "remaining_time": "0:03:21", "throughput": 1658.54, "total_tokens": 7926288}
{"current_steps": 765, "total_steps": 792, "loss": 2.2959, "learning_rate": 1.4324245570256633e-07, "epoch": 5.768143261074458, "percentage": 96.59, "elapsed_time": "1:20:09", "remaining_time": "0:02:49", "throughput": 1658.91, "total_tokens": 7978000}
{"current_steps": 770, "total_steps": 792, "loss": 2.305, "learning_rate": 9.513254770636137e-08, "epoch": 5.805843543826579, "percentage": 97.22, "elapsed_time": "1:20:38", "remaining_time": "0:02:18", "throughput": 1658.97, "total_tokens": 8026688}
{"current_steps": 775, "total_steps": 792, "loss": 2.321, "learning_rate": 5.681887909952388e-08, "epoch": 5.843543826578699, "percentage": 97.85, "elapsed_time": "1:21:10", "remaining_time": "0:01:46", "throughput": 1659.04, "total_tokens": 8080832}
{"current_steps": 780, "total_steps": 792, "loss": 2.2447, "learning_rate": 2.831652042480093e-08, "epoch": 5.88124410933082, "percentage": 98.48, "elapsed_time": "1:21:41", "remaining_time": "0:01:15", "throughput": 1659.16, "total_tokens": 8132832}
{"current_steps": 785, "total_steps": 792, "loss": 2.2446, "learning_rate": 9.636682981720158e-09, "epoch": 5.918944392082941, "percentage": 99.12, "elapsed_time": "1:22:12", "remaining_time": "0:00:43", "throughput": 1659.69, "total_tokens": 8186208}
{"current_steps": 790, "total_steps": 792, "loss": 2.2856, "learning_rate": 7.867144166728846e-10, "epoch": 5.956644674835061, "percentage": 99.75, "elapsed_time": "1:22:45", "remaining_time": "0:00:12", "throughput": 1659.68, "total_tokens": 8240448}
{"current_steps": 792, "total_steps": 792, "epoch": 5.971724787935909, "percentage": 100.0, "elapsed_time": "1:22:57", "remaining_time": "0:00:00", "throughput": 1659.55, "total_tokens": 8259920}
|