"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2023-08-29 21:35:57,998] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4294967296, reducing to 4294967296\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [7596/7596 1:49:59, Epoch 1/1]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 50 | \n",
" 1.480100 | \n",
"
\n",
" \n",
" 100 | \n",
" 0.668900 | \n",
"
\n",
" \n",
" 150 | \n",
" 0.696300 | \n",
"
\n",
" \n",
" 200 | \n",
" 0.730000 | \n",
"
\n",
" \n",
" 250 | \n",
" 0.611000 | \n",
"
\n",
" \n",
" 300 | \n",
" 0.657100 | \n",
"
\n",
" \n",
" 350 | \n",
" 0.645100 | \n",
"
\n",
" \n",
" 400 | \n",
" 0.583100 | \n",
"
\n",
" \n",
" 450 | \n",
" 0.677300 | \n",
"
\n",
" \n",
" 500 | \n",
" 0.639500 | \n",
"
\n",
" \n",
" 550 | \n",
" 0.665100 | \n",
"
\n",
" \n",
" 600 | \n",
" 0.630700 | \n",
"
\n",
" \n",
" 650 | \n",
" 0.622500 | \n",
"
\n",
" \n",
" 700 | \n",
" 0.679500 | \n",
"
\n",
" \n",
" 750 | \n",
" 0.648400 | \n",
"
\n",
" \n",
" 800 | \n",
" 0.605200 | \n",
"
\n",
" \n",
" 850 | \n",
" 0.614400 | \n",
"
\n",
" \n",
" 900 | \n",
" 0.591200 | \n",
"
\n",
" \n",
" 950 | \n",
" 0.613400 | \n",
"
\n",
" \n",
" 1000 | \n",
" 0.563000 | \n",
"
\n",
" \n",
" 1050 | \n",
" 0.627500 | \n",
"
\n",
" \n",
" 1100 | \n",
" 0.638100 | \n",
"
\n",
" \n",
" 1150 | \n",
" 0.638200 | \n",
"
\n",
" \n",
" 1200 | \n",
" 0.564700 | \n",
"
\n",
" \n",
" 1250 | \n",
" 0.612700 | \n",
"
\n",
" \n",
" 1300 | \n",
" 0.589200 | \n",
"
\n",
" \n",
" 1350 | \n",
" 0.531500 | \n",
"
\n",
" \n",
" 1400 | \n",
" 0.638900 | \n",
"
\n",
" \n",
" 1450 | \n",
" 0.569100 | \n",
"
\n",
" \n",
" 1500 | \n",
" 0.536600 | \n",
"
\n",
" \n",
" 1550 | \n",
" 0.585900 | \n",
"
\n",
" \n",
" 1600 | \n",
" 0.581200 | \n",
"
\n",
" \n",
" 1650 | \n",
" 0.566900 | \n",
"
\n",
" \n",
" 1700 | \n",
" 0.529200 | \n",
"
\n",
" \n",
" 1750 | \n",
" 0.556700 | \n",
"
\n",
" \n",
" 1800 | \n",
" 0.589900 | \n",
"
\n",
" \n",
" 1850 | \n",
" 0.636000 | \n",
"
\n",
" \n",
" 1900 | \n",
" 0.526500 | \n",
"
\n",
" \n",
" 1950 | \n",
" 0.522500 | \n",
"
\n",
" \n",
" 2000 | \n",
" 0.551500 | \n",
"
\n",
" \n",
" 2050 | \n",
" 0.535100 | \n",
"
\n",
" \n",
" 2100 | \n",
" 0.509800 | \n",
"
\n",
" \n",
" 2150 | \n",
" 0.533200 | \n",
"
\n",
" \n",
" 2200 | \n",
" 0.524600 | \n",
"
\n",
" \n",
" 2250 | \n",
" 0.507000 | \n",
"
\n",
" \n",
" 2300 | \n",
" 0.507500 | \n",
"
\n",
" \n",
" 2350 | \n",
" 0.508700 | \n",
"
\n",
" \n",
" 2400 | \n",
" 0.502200 | \n",
"
\n",
" \n",
" 2450 | \n",
" 0.519300 | \n",
"
\n",
" \n",
" 2500 | \n",
" 0.483600 | \n",
"
\n",
" \n",
" 2550 | \n",
" 0.506600 | \n",
"
\n",
" \n",
" 2600 | \n",
" 0.476800 | \n",
"
\n",
" \n",
" 2650 | \n",
" 0.542600 | \n",
"
\n",
" \n",
" 2700 | \n",
" 0.545200 | \n",
"
\n",
" \n",
" 2750 | \n",
" 0.506200 | \n",
"
\n",
" \n",
" 2800 | \n",
" 0.505700 | \n",
"
\n",
" \n",
" 2850 | \n",
" 0.565400 | \n",
"
\n",
" \n",
" 2900 | \n",
" 0.552000 | \n",
"
\n",
" \n",
" 2950 | \n",
" 0.502800 | \n",
"
\n",
" \n",
" 3000 | \n",
" 0.509000 | \n",
"
\n",
" \n",
" 3050 | \n",
" 0.472700 | \n",
"
\n",
" \n",
" 3100 | \n",
" 0.474400 | \n",
"
\n",
" \n",
" 3150 | \n",
" 0.548200 | \n",
"
\n",
" \n",
" 3200 | \n",
" 0.533600 | \n",
"
\n",
" \n",
" 3250 | \n",
" 0.490700 | \n",
"
\n",
" \n",
" 3300 | \n",
" 0.523100 | \n",
"
\n",
" \n",
" 3350 | \n",
" 0.469000 | \n",
"
\n",
" \n",
" 3400 | \n",
" 0.547100 | \n",
"
\n",
" \n",
" 3450 | \n",
" 0.494700 | \n",
"
\n",
" \n",
" 3500 | \n",
" 0.578500 | \n",
"
\n",
" \n",
" 3550 | \n",
" 0.481800 | \n",
"
\n",
" \n",
" 3600 | \n",
" 0.515100 | \n",
"
\n",
" \n",
" 3650 | \n",
" 0.475500 | \n",
"
\n",
" \n",
" 3700 | \n",
" 0.446500 | \n",
"
\n",
" \n",
" 3750 | \n",
" 0.461500 | \n",
"
\n",
" \n",
" 3800 | \n",
" 0.514300 | \n",
"
\n",
" \n",
" 3850 | \n",
" 0.471800 | \n",
"
\n",
" \n",
" 3900 | \n",
" 0.414400 | \n",
"
\n",
" \n",
" 3950 | \n",
" 0.485800 | \n",
"
\n",
" \n",
" 4000 | \n",
" 0.508800 | \n",
"
\n",
" \n",
" 4050 | \n",
" 0.494700 | \n",
"
\n",
" \n",
" 4100 | \n",
" 0.489200 | \n",
"
\n",
" \n",
" 4150 | \n",
" 0.482400 | \n",
"
\n",
" \n",
" 4200 | \n",
" 0.585000 | \n",
"
\n",
" \n",
" 4250 | \n",
" 0.456700 | \n",
"
\n",
" \n",
" 4300 | \n",
" 0.475300 | \n",
"
\n",
" \n",
" 4350 | \n",
" 0.630800 | \n",
"
\n",
" \n",
" 4400 | \n",
" 0.486800 | \n",
"
\n",
" \n",
" 4450 | \n",
" 0.544200 | \n",
"
\n",
" \n",
" 4500 | \n",
" 0.539600 | \n",
"
\n",
" \n",
" 4550 | \n",
" 0.481100 | \n",
"
\n",
" \n",
" 4600 | \n",
" 0.775000 | \n",
"
\n",
" \n",
" 4650 | \n",
" 2.227600 | \n",
"
\n",
" \n",
" 4700 | \n",
" 1.397400 | \n",
"
\n",
" \n",
" 4750 | \n",
" 0.580000 | \n",
"
\n",
" \n",
" 4800 | \n",
" 0.461500 | \n",
"
\n",
" \n",
" 4850 | \n",
" 0.518500 | \n",
"
\n",
" \n",
" 4900 | \n",
" 0.528600 | \n",
"
\n",
" \n",
" 4950 | \n",
" 0.480700 | \n",
"
\n",
" \n",
" 5000 | \n",
" 0.432000 | \n",
"
\n",
" \n",
" 5050 | \n",
" 0.441700 | \n",
"
\n",
" \n",
" 5100 | \n",
" 0.490600 | \n",
"
\n",
" \n",
" 5150 | \n",
" 0.521600 | \n",
"
\n",
" \n",
" 5200 | \n",
" 0.491800 | \n",
"
\n",
" \n",
" 5250 | \n",
" 0.493000 | \n",
"
\n",
" \n",
" 5300 | \n",
" 0.418200 | \n",
"
\n",
" \n",
" 5350 | \n",
" 0.431700 | \n",
"
\n",
" \n",
" 5400 | \n",
" 0.444900 | \n",
"
\n",
" \n",
" 5450 | \n",
" 0.435600 | \n",
"
\n",
" \n",
" 5500 | \n",
" 0.477000 | \n",
"
\n",
" \n",
" 5550 | \n",
" 0.436500 | \n",
"
\n",
" \n",
" 5600 | \n",
" 0.406700 | \n",
"
\n",
" \n",
" 5650 | \n",
" 0.411100 | \n",
"
\n",
" \n",
" 5700 | \n",
" 0.500900 | \n",
"
\n",
" \n",
" 5750 | \n",
" 0.440300 | \n",
"
\n",
" \n",
" 5800 | \n",
" 0.414100 | \n",
"
\n",
" \n",
" 5850 | \n",
" 0.433500 | \n",
"
\n",
" \n",
" 5900 | \n",
" 0.481600 | \n",
"
\n",
" \n",
" 5950 | \n",
" 0.492800 | \n",
"
\n",
" \n",
" 6000 | \n",
" 0.417800 | \n",
"
\n",
" \n",
" 6050 | \n",
" 0.413900 | \n",
"
\n",
" \n",
" 6100 | \n",
" 0.456400 | \n",
"
\n",
" \n",
" 6150 | \n",
" 0.411200 | \n",
"
\n",
" \n",
" 6200 | \n",
" 0.407400 | \n",
"
\n",
" \n",
" 6250 | \n",
" 0.491800 | \n",
"
\n",
" \n",
" 6300 | \n",
" 0.433900 | \n",
"
\n",
" \n",
" 6350 | \n",
" 0.443000 | \n",
"
\n",
" \n",
" 6400 | \n",
" 0.417900 | \n",
"
\n",
" \n",
" 6450 | \n",
" 0.423900 | \n",
"
\n",
" \n",
" 6500 | \n",
" 0.450600 | \n",
"
\n",
" \n",
" 6550 | \n",
" 0.442500 | \n",
"
\n",
" \n",
" 6600 | \n",
" 0.399500 | \n",
"
\n",
" \n",
" 6650 | \n",
" 0.401200 | \n",
"
\n",
" \n",
" 6700 | \n",
" 0.439100 | \n",
"
\n",
" \n",
" 6750 | \n",
" 0.418000 | \n",
"
\n",
" \n",
" 6800 | \n",
" 0.386000 | \n",
"
\n",
" \n",
" 6850 | \n",
" 0.404000 | \n",
"
\n",
" \n",
" 6900 | \n",
" 0.414600 | \n",
"
\n",
" \n",
" 6950 | \n",
" 0.442100 | \n",
"
\n",
" \n",
" 7000 | \n",
" 0.381300 | \n",
"
\n",
" \n",
" 7050 | \n",
" 0.394700 | \n",
"
\n",
" \n",
" 7100 | \n",
" 0.428300 | \n",
"
\n",
" \n",
" 7150 | \n",
" 0.395900 | \n",
"
\n",
" \n",
" 7200 | \n",
" 0.409700 | \n",
"
\n",
" \n",
" 7250 | \n",
" 0.439700 | \n",
"
\n",
" \n",
" 7300 | \n",
" 0.419500 | \n",
"
\n",
" \n",
" 7350 | \n",
" 0.429400 | \n",
"
\n",
" \n",
" 7400 | \n",
" 0.425000 | \n",
"
\n",
" \n",
" 7450 | \n",
" 0.415900 | \n",
"
\n",
" \n",
" 7500 | \n",
" 0.430600 | \n",
"
\n",
" \n",
" 7550 | \n",
" 0.407200 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2023-08-29 21:35:58,377] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4294967296, reducing to 2147483648.0\n",
"[2023-08-29 21:35:58,755] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2147483648.0, reducing to 1073741824.0\n",
"[2023-08-29 21:35:59,120] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1073741824.0, reducing to 536870912.0\n",
"[2023-08-29 21:35:59,499] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 536870912.0, reducing to 268435456.0\n",
"[2023-08-29 21:35:59,866] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 268435456.0, reducing to 134217728.0\n",
"[2023-08-29 21:36:00,244] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 134217728.0, reducing to 67108864.0\n",
"[2023-08-29 21:36:00,623] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 67108864.0, reducing to 33554432.0\n",
"[2023-08-29 21:36:00,997] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 33554432.0, reducing to 16777216.0\n",
"[2023-08-29 21:36:01,370] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16777216.0, reducing to 8388608.0\n",
"[2023-08-29 21:36:01,372] [INFO] [logging.py:68:log_dist] [Rank 0] step=10, skipped=10, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:36:01,372] [INFO] [timer.py:198:stop] 0/10, RunningAvgSamplesPerSec=2.6916653383923004, CurrSamplesPerSec=2.693093108518521, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:36:01,756] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8388608.0, reducing to 4194304.0\n",
"[2023-08-29 21:36:02,130] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4194304.0, reducing to 2097152.0\n",
"[2023-08-29 21:36:02,507] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2097152.0, reducing to 1048576.0\n",
"[2023-08-29 21:36:02,881] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1048576.0, reducing to 524288.0\n",
"[2023-08-29 21:36:03,252] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 524288.0, reducing to 262144.0\n",
"[2023-08-29 21:36:03,628] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144.0, reducing to 131072.0\n",
"[2023-08-29 21:36:03,992] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 131072.0, reducing to 65536.0\n",
"[2023-08-29 21:36:04,371] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536.0, reducing to 32768.0\n",
"[2023-08-29 21:36:04,749] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768.0, reducing to 16384.0\n",
"[2023-08-29 21:36:05,127] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384.0, reducing to 8192.0\n",
"[2023-08-29 21:36:05,129] [INFO] [logging.py:68:log_dist] [Rank 0] step=20, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:36:05,130] [INFO] [timer.py:198:stop] 0/20, RunningAvgSamplesPerSec=2.6861949258676106, CurrSamplesPerSec=2.656568132486891, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:36:13,520] [INFO] [logging.py:68:log_dist] [Rank 0] step=30, skipped=20, lr=[2.9429595503388953e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:36:13,536] [INFO] [timer.py:198:stop] 0/30, RunningAvgSamplesPerSec=1.8578083594873875, CurrSamplesPerSec=1.2025343710963112, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:36:21,909] [INFO] [logging.py:68:log_dist] [Rank 0] step=40, skipped=20, lr=[3.8288786510166846e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:36:21,925] [INFO] [timer.py:198:stop] 0/40, RunningAvgSamplesPerSec=1.6222143925845791, CurrSamplesPerSec=1.200502606052453, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:36:30,313] [INFO] [logging.py:68:log_dist] [Rank 0] step=50, skipped=20, lr=[4.347108103585803e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:36:30,329] [INFO] [timer.py:198:stop] 0/50, RunningAvgSamplesPerSec=1.5097570969458491, CurrSamplesPerSec=1.204732526540827, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:36:38,689] [INFO] [logging.py:68:log_dist] [Rank 0] step=60, skipped=20, lr=[4.714797751694474e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:36:38,705] [INFO] [timer.py:198:stop] 0/60, RunningAvgSamplesPerSec=1.4451937868371407, CurrSamplesPerSec=1.1985924275252775, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:36:47,078] [INFO] [logging.py:68:log_dist] [Rank 0] step=70, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:36:47,094] [INFO] [timer.py:198:stop] 0/70, RunningAvgSamplesPerSec=1.402458889976063, CurrSamplesPerSec=1.1997831736455862, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:36:55,488] [INFO] [logging.py:68:log_dist] [Rank 0] step=80, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:36:55,504] [INFO] [timer.py:198:stop] 0/80, RunningAvgSamplesPerSec=1.371763250300358, CurrSamplesPerSec=1.1975920835592575, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:37:03,907] [INFO] [logging.py:68:log_dist] [Rank 0] step=90, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:37:03,923] [INFO] [timer.py:198:stop] 0/90, RunningAvgSamplesPerSec=1.3487748982458996, CurrSamplesPerSec=1.2048681879538574, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:37:15,566] [INFO] [logging.py:68:log_dist] [Rank 0] step=100, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:37:15,582] [INFO] [timer.py:198:stop] 0/100, RunningAvgSamplesPerSec=1.274906466660983, CurrSamplesPerSec=0.2437342211108861, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:37:23,991] [INFO] [logging.py:68:log_dist] [Rank 0] step=110, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:37:24,007] [INFO] [timer.py:198:stop] 0/110, RunningAvgSamplesPerSec=1.2667708231377706, CurrSamplesPerSec=1.2002463274094186, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:37:32,414] [INFO] [logging.py:68:log_dist] [Rank 0] step=120, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:37:32,430] [INFO] [timer.py:198:stop] 0/120, RunningAvgSamplesPerSec=1.2600880054053687, CurrSamplesPerSec=1.2026985061410886, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:37:40,798] [INFO] [logging.py:68:log_dist] [Rank 0] step=130, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:37:40,814] [INFO] [timer.py:198:stop] 0/130, RunningAvgSamplesPerSec=1.2549864089927067, CurrSamplesPerSec=1.1986770353955618, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:37:49,184] [INFO] [logging.py:68:log_dist] [Rank 0] step=140, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:37:49,200] [INFO] [timer.py:198:stop] 0/140, RunningAvgSamplesPerSec=1.2506235049475627, CurrSamplesPerSec=1.20171436741086, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:37:57,554] [INFO] [logging.py:68:log_dist] [Rank 0] step=150, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:37:57,570] [INFO] [timer.py:198:stop] 0/150, RunningAvgSamplesPerSec=1.2470538085453151, CurrSamplesPerSec=1.2073796234192793, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:38:05,946] [INFO] [logging.py:68:log_dist] [Rank 0] step=160, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:38:05,962] [INFO] [timer.py:198:stop] 0/160, RunningAvgSamplesPerSec=1.2437517654771124, CurrSamplesPerSec=1.194231318425221, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:38:14,348] [INFO] [logging.py:68:log_dist] [Rank 0] step=170, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:38:14,364] [INFO] [timer.py:198:stop] 0/170, RunningAvgSamplesPerSec=1.240746894890977, CurrSamplesPerSec=1.1880810737163054, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:38:22,735] [INFO] [logging.py:68:log_dist] [Rank 0] step=180, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:38:22,751] [INFO] [timer.py:198:stop] 0/180, RunningAvgSamplesPerSec=1.2382310295157077, CurrSamplesPerSec=1.1862122017715533, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:38:31,134] [INFO] [logging.py:68:log_dist] [Rank 0] step=190, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:38:31,150] [INFO] [timer.py:198:stop] 0/190, RunningAvgSamplesPerSec=1.2358864838117465, CurrSamplesPerSec=1.1957988706022153, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:38:43,038] [INFO] [logging.py:68:log_dist] [Rank 0] step=200, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:38:43,054] [INFO] [timer.py:198:stop] 0/200, RunningAvgSamplesPerSec=1.2074221815135844, CurrSamplesPerSec=0.23049353530079067, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:38:51,453] [INFO] [logging.py:68:log_dist] [Rank 0] step=210, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:38:51,468] [INFO] [timer.py:198:stop] 0/210, RunningAvgSamplesPerSec=1.206751776712127, CurrSamplesPerSec=1.188727912420286, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:38:59,836] [INFO] [logging.py:68:log_dist] [Rank 0] step=220, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:38:59,851] [INFO] [timer.py:198:stop] 0/220, RunningAvgSamplesPerSec=1.2063426448685801, CurrSamplesPerSec=1.1929860689009026, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:39:08,224] [INFO] [logging.py:68:log_dist] [Rank 0] step=230, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:39:08,239] [INFO] [timer.py:198:stop] 0/230, RunningAvgSamplesPerSec=1.2059388645900535, CurrSamplesPerSec=1.2033385778337804, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:39:16,610] [INFO] [logging.py:68:log_dist] [Rank 0] step=240, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:39:16,626] [INFO] [timer.py:198:stop] 0/240, RunningAvgSamplesPerSec=1.2055772326727383, CurrSamplesPerSec=1.202631950187005, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:39:24,991] [INFO] [logging.py:68:log_dist] [Rank 0] step=250, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:39:25,007] [INFO] [timer.py:198:stop] 0/250, RunningAvgSamplesPerSec=1.2052764323242422, CurrSamplesPerSec=1.2020542920029347, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:39:33,384] [INFO] [logging.py:68:log_dist] [Rank 0] step=260, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:39:33,399] [INFO] [timer.py:198:stop] 0/260, RunningAvgSamplesPerSec=1.2049335050852177, CurrSamplesPerSec=1.200485769378612, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:39:41,785] [INFO] [logging.py:68:log_dist] [Rank 0] step=270, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:39:41,801] [INFO] [timer.py:198:stop] 0/270, RunningAvgSamplesPerSec=1.2045575016224725, CurrSamplesPerSec=1.1826467486620762, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:39:50,176] [INFO] [logging.py:68:log_dist] [Rank 0] step=280, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:39:50,192] [INFO] [timer.py:198:stop] 0/280, RunningAvgSamplesPerSec=1.2042624159253803, CurrSamplesPerSec=1.1984287263746072, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:39:58,563] [INFO] [logging.py:68:log_dist] [Rank 0] step=290, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:39:58,578] [INFO] [timer.py:198:stop] 0/290, RunningAvgSamplesPerSec=1.2040119951957236, CurrSamplesPerSec=1.2040591708842046, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:40:09,979] [INFO] [logging.py:68:log_dist] [Rank 0] step=300, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:40:09,995] [INFO] [timer.py:198:stop] 0/300, RunningAvgSamplesPerSec=1.1892238755743643, CurrSamplesPerSec=0.25930506334028475, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:40:18,377] [INFO] [logging.py:68:log_dist] [Rank 0] step=310, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:40:18,393] [INFO] [timer.py:198:stop] 0/310, RunningAvgSamplesPerSec=1.189426826067851, CurrSamplesPerSec=1.200487143783264, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:40:26,768] [INFO] [logging.py:68:log_dist] [Rank 0] step=320, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:40:26,784] [INFO] [timer.py:198:stop] 0/320, RunningAvgSamplesPerSec=1.1896455200137965, CurrSamplesPerSec=1.1942272380813002, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:40:35,164] [INFO] [logging.py:68:log_dist] [Rank 0] step=330, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:40:35,180] [INFO] [timer.py:198:stop] 0/330, RunningAvgSamplesPerSec=1.189827230188488, CurrSamplesPerSec=1.187993580654499, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:40:43,553] [INFO] [logging.py:68:log_dist] [Rank 0] step=340, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:40:43,569] [INFO] [timer.py:198:stop] 0/340, RunningAvgSamplesPerSec=1.1900293650788403, CurrSamplesPerSec=1.1999541109467993, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:40:51,926] [INFO] [logging.py:68:log_dist] [Rank 0] step=350, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:40:51,942] [INFO] [timer.py:198:stop] 0/350, RunningAvgSamplesPerSec=1.1902865609893243, CurrSamplesPerSec=1.2034649473916774, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:41:00,311] [INFO] [logging.py:68:log_dist] [Rank 0] step=360, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:41:00,327] [INFO] [timer.py:198:stop] 0/360, RunningAvgSamplesPerSec=1.1904849077353659, CurrSamplesPerSec=1.2052286004097594, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:41:08,715] [INFO] [logging.py:68:log_dist] [Rank 0] step=370, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:41:08,731] [INFO] [timer.py:198:stop] 0/370, RunningAvgSamplesPerSec=1.1905972678189984, CurrSamplesPerSec=1.2014162737426504, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:41:17,116] [INFO] [logging.py:68:log_dist] [Rank 0] step=380, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:41:17,132] [INFO] [timer.py:198:stop] 0/380, RunningAvgSamplesPerSec=1.1907111962884611, CurrSamplesPerSec=1.1943568026133844, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:41:25,512] [INFO] [logging.py:68:log_dist] [Rank 0] step=390, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:41:25,527] [INFO] [timer.py:198:stop] 0/390, RunningAvgSamplesPerSec=1.190839119714931, CurrSamplesPerSec=1.1969270255608442, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:41:37,071] [INFO] [logging.py:68:log_dist] [Rank 0] step=400, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:41:37,087] [INFO] [timer.py:198:stop] 0/400, RunningAvgSamplesPerSec=1.1797900673444968, CurrSamplesPerSec=0.2486268423075546, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:41:45,468] [INFO] [logging.py:68:log_dist] [Rank 0] step=410, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:41:45,484] [INFO] [timer.py:198:stop] 0/410, RunningAvgSamplesPerSec=1.1801771147284261, CurrSamplesPerSec=1.20171781046496, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:41:53,849] [INFO] [logging.py:68:log_dist] [Rank 0] step=420, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:41:53,864] [INFO] [timer.py:198:stop] 0/420, RunningAvgSamplesPerSec=1.1805947925394826, CurrSamplesPerSec=1.2046657452819243, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:42:02,235] [INFO] [logging.py:68:log_dist] [Rank 0] step=430, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:42:02,251] [INFO] [timer.py:198:stop] 0/430, RunningAvgSamplesPerSec=1.1809778742125392, CurrSamplesPerSec=1.200163901777968, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:42:10,624] [INFO] [logging.py:68:log_dist] [Rank 0] step=440, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:42:10,639] [INFO] [timer.py:198:stop] 0/440, RunningAvgSamplesPerSec=1.1813351174772393, CurrSamplesPerSec=1.2005874838237285, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:42:19,016] [INFO] [logging.py:68:log_dist] [Rank 0] step=450, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:42:19,032] [INFO] [timer.py:198:stop] 0/450, RunningAvgSamplesPerSec=1.1816642882986486, CurrSamplesPerSec=1.2046176535208961, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:42:27,398] [INFO] [logging.py:68:log_dist] [Rank 0] step=460, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:42:27,414] [INFO] [timer.py:198:stop] 0/460, RunningAvgSamplesPerSec=1.1820248630336592, CurrSamplesPerSec=1.205195008199272, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:42:35,756] [INFO] [logging.py:68:log_dist] [Rank 0] step=470, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:42:35,772] [INFO] [timer.py:198:stop] 0/470, RunningAvgSamplesPerSec=1.1824370050681852, CurrSamplesPerSec=1.2086512873047504, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:42:44,138] [INFO] [logging.py:68:log_dist] [Rank 0] step=480, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:42:44,154] [INFO] [timer.py:198:stop] 0/480, RunningAvgSamplesPerSec=1.1827600469034059, CurrSamplesPerSec=1.2032653923037908, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:42:52,515] [INFO] [logging.py:68:log_dist] [Rank 0] step=490, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:42:52,531] [INFO] [timer.py:198:stop] 0/490, RunningAvgSamplesPerSec=1.1830839201090122, CurrSamplesPerSec=1.1922237625321521, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:43:04,131] [INFO] [logging.py:68:log_dist] [Rank 0] step=500, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:43:04,147] [INFO] [timer.py:198:stop] 0/500, RunningAvgSamplesPerSec=1.1743582284890552, CurrSamplesPerSec=0.24542386996028856, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:43:12,527] [INFO] [logging.py:68:log_dist] [Rank 0] step=510, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:43:12,543] [INFO] [timer.py:198:stop] 0/510, RunningAvgSamplesPerSec=1.1747831400537596, CurrSamplesPerSec=1.1969929515524107, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:43:20,882] [INFO] [logging.py:68:log_dist] [Rank 0] step=520, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:43:20,898] [INFO] [timer.py:198:stop] 0/520, RunningAvgSamplesPerSec=1.1752987116811267, CurrSamplesPerSec=1.205693196031102, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:43:29,255] [INFO] [logging.py:68:log_dist] [Rank 0] step=530, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:43:29,270] [INFO] [timer.py:198:stop] 0/530, RunningAvgSamplesPerSec=1.175747777823081, CurrSamplesPerSec=1.2039782943671449, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:43:37,612] [INFO] [logging.py:68:log_dist] [Rank 0] step=540, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:43:37,628] [INFO] [timer.py:198:stop] 0/540, RunningAvgSamplesPerSec=1.1762202305278602, CurrSamplesPerSec=1.187263180242021, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:43:45,997] [INFO] [logging.py:68:log_dist] [Rank 0] step=550, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:43:46,013] [INFO] [timer.py:198:stop] 0/550, RunningAvgSamplesPerSec=1.1766060225425583, CurrSamplesPerSec=1.185815463281259, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:43:54,381] [INFO] [logging.py:68:log_dist] [Rank 0] step=560, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:43:54,397] [INFO] [timer.py:198:stop] 0/560, RunningAvgSamplesPerSec=1.1769840857647764, CurrSamplesPerSec=1.2080427813154615, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:44:02,735] [INFO] [logging.py:68:log_dist] [Rank 0] step=570, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:44:02,751] [INFO] [timer.py:198:stop] 0/570, RunningAvgSamplesPerSec=1.177416520707941, CurrSamplesPerSec=1.1969758715829872, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:44:11,117] [INFO] [logging.py:68:log_dist] [Rank 0] step=580, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:44:11,132] [INFO] [timer.py:198:stop] 0/580, RunningAvgSamplesPerSec=1.1777704655158834, CurrSamplesPerSec=1.1972751832035096, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:44:19,478] [INFO] [logging.py:68:log_dist] [Rank 0] step=590, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:44:19,494] [INFO] [timer.py:198:stop] 0/590, RunningAvgSamplesPerSec=1.1781585609395926, CurrSamplesPerSec=1.2011451576367278, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:44:30,811] [INFO] [logging.py:68:log_dist] [Rank 0] step=600, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:44:30,827] [INFO] [timer.py:198:stop] 0/600, RunningAvgSamplesPerSec=1.1716729561023274, CurrSamplesPerSec=0.26364894111198484, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:44:39,197] [INFO] [logging.py:68:log_dist] [Rank 0] step=610, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:44:39,213] [INFO] [timer.py:198:stop] 0/610, RunningAvgSamplesPerSec=1.1720922807332417, CurrSamplesPerSec=1.205124020512632, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:44:47,571] [INFO] [logging.py:68:log_dist] [Rank 0] step=620, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:44:47,587] [INFO] [timer.py:198:stop] 0/620, RunningAvgSamplesPerSec=1.1725268615907658, CurrSamplesPerSec=1.2019592177342906, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:44:55,934] [INFO] [logging.py:68:log_dist] [Rank 0] step=630, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:44:55,950] [INFO] [timer.py:198:stop] 0/630, RunningAvgSamplesPerSec=1.1729687237935866, CurrSamplesPerSec=1.202084264183724, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:45:04,312] [INFO] [logging.py:68:log_dist] [Rank 0] step=640, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:45:04,328] [INFO] [timer.py:198:stop] 0/640, RunningAvgSamplesPerSec=1.1733628231283861, CurrSamplesPerSec=1.2056145256535256, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:45:12,665] [INFO] [logging.py:68:log_dist] [Rank 0] step=650, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:45:12,681] [INFO] [timer.py:198:stop] 0/650, RunningAvgSamplesPerSec=1.1737976529056626, CurrSamplesPerSec=1.1952464896097463, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:45:21,043] [INFO] [logging.py:68:log_dist] [Rank 0] step=660, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:45:21,059] [INFO] [timer.py:198:stop] 0/660, RunningAvgSamplesPerSec=1.1741688922082778, CurrSamplesPerSec=1.2064867116300169, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:45:29,422] [INFO] [logging.py:68:log_dist] [Rank 0] step=670, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:45:29,437] [INFO] [timer.py:198:stop] 0/670, RunningAvgSamplesPerSec=1.174530689075969, CurrSamplesPerSec=1.1869147744332071, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:45:37,784] [INFO] [logging.py:68:log_dist] [Rank 0] step=680, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:45:37,800] [INFO] [timer.py:198:stop] 0/680, RunningAvgSamplesPerSec=1.1749131241537893, CurrSamplesPerSec=1.2057444932603654, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:45:46,149] [INFO] [logging.py:68:log_dist] [Rank 0] step=690, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:45:46,165] [INFO] [timer.py:198:stop] 0/690, RunningAvgSamplesPerSec=1.1752792469226345, CurrSamplesPerSec=1.2014744351416973, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:45:57,573] [INFO] [logging.py:68:log_dist] [Rank 0] step=700, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:45:57,588] [INFO] [timer.py:198:stop] 0/700, RunningAvgSamplesPerSec=1.1696091805191486, CurrSamplesPerSec=0.2572721498547659, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:46:05,943] [INFO] [logging.py:68:log_dist] [Rank 0] step=710, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:46:05,959] [INFO] [timer.py:198:stop] 0/710, RunningAvgSamplesPerSec=1.170029027603389, CurrSamplesPerSec=1.204341632657632, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:46:14,316] [INFO] [logging.py:68:log_dist] [Rank 0] step=720, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:46:14,332] [INFO] [timer.py:198:stop] 0/720, RunningAvgSamplesPerSec=1.170428364154163, CurrSamplesPerSec=1.2081061097683883, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:46:22,684] [INFO] [logging.py:68:log_dist] [Rank 0] step=730, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:46:22,699] [INFO] [timer.py:198:stop] 0/730, RunningAvgSamplesPerSec=1.1708284157148905, CurrSamplesPerSec=1.1852813086998826, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:46:31,042] [INFO] [logging.py:68:log_dist] [Rank 0] step=740, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:46:31,058] [INFO] [timer.py:198:stop] 0/740, RunningAvgSamplesPerSec=1.1712348172165306, CurrSamplesPerSec=1.1891677336457342, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:46:39,425] [INFO] [logging.py:68:log_dist] [Rank 0] step=750, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:46:39,440] [INFO] [timer.py:198:stop] 0/750, RunningAvgSamplesPerSec=1.1715853854467204, CurrSamplesPerSec=1.2009315850747107, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:46:47,800] [INFO] [logging.py:68:log_dist] [Rank 0] step=760, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:46:47,815] [INFO] [timer.py:198:stop] 0/760, RunningAvgSamplesPerSec=1.1719441844632394, CurrSamplesPerSec=1.204128996103659, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:46:56,154] [INFO] [logging.py:68:log_dist] [Rank 0] step=770, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:46:56,169] [INFO] [timer.py:198:stop] 0/770, RunningAvgSamplesPerSec=1.172328409585184, CurrSamplesPerSec=1.1946194185353671, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:47:04,523] [INFO] [logging.py:68:log_dist] [Rank 0] step=780, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:47:04,539] [INFO] [timer.py:198:stop] 0/780, RunningAvgSamplesPerSec=1.1726749190684693, CurrSamplesPerSec=1.200005607631185, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:47:12,893] [INFO] [logging.py:68:log_dist] [Rank 0] step=790, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:47:12,908] [INFO] [timer.py:198:stop] 0/790, RunningAvgSamplesPerSec=1.1730130641924061, CurrSamplesPerSec=1.2045297835306132, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:47:24,240] [INFO] [logging.py:68:log_dist] [Rank 0] step=800, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:47:24,256] [INFO] [timer.py:198:stop] 0/800, RunningAvgSamplesPerSec=1.1682281028388157, CurrSamplesPerSec=0.2609677368575652, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:47:32,612] [INFO] [logging.py:68:log_dist] [Rank 0] step=810, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:47:32,628] [INFO] [timer.py:198:stop] 0/810, RunningAvgSamplesPerSec=1.1686078316071642, CurrSamplesPerSec=1.1993097463303468, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:47:40,967] [INFO] [logging.py:68:log_dist] [Rank 0] step=820, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:47:40,983] [INFO] [timer.py:198:stop] 0/820, RunningAvgSamplesPerSec=1.1690047232999423, CurrSamplesPerSec=1.2031045535096039, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:47:49,352] [INFO] [logging.py:68:log_dist] [Rank 0] step=830, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:47:49,368] [INFO] [timer.py:198:stop] 0/830, RunningAvgSamplesPerSec=1.1693429094197814, CurrSamplesPerSec=1.1934670807361514, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:47:57,720] [INFO] [logging.py:68:log_dist] [Rank 0] step=840, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:47:57,736] [INFO] [timer.py:198:stop] 0/840, RunningAvgSamplesPerSec=1.1697007373425765, CurrSamplesPerSec=1.2049052232906179, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:48:06,094] [INFO] [logging.py:68:log_dist] [Rank 0] step=850, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:48:06,109] [INFO] [timer.py:198:stop] 0/850, RunningAvgSamplesPerSec=1.1700430603352892, CurrSamplesPerSec=1.1948940641087986, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:48:14,455] [INFO] [logging.py:68:log_dist] [Rank 0] step=860, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:48:14,471] [INFO] [timer.py:198:stop] 0/860, RunningAvgSamplesPerSec=1.1703960912808753, CurrSamplesPerSec=1.2087498616695562, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:48:22,841] [INFO] [logging.py:68:log_dist] [Rank 0] step=870, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:48:22,856] [INFO] [timer.py:198:stop] 0/870, RunningAvgSamplesPerSec=1.17070300513158, CurrSamplesPerSec=1.1900621970973553, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:48:31,206] [INFO] [logging.py:68:log_dist] [Rank 0] step=880, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:48:31,221] [INFO] [timer.py:198:stop] 0/880, RunningAvgSamplesPerSec=1.1710354563514755, CurrSamplesPerSec=1.1991273364349047, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:48:39,558] [INFO] [logging.py:68:log_dist] [Rank 0] step=890, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:48:39,573] [INFO] [timer.py:198:stop] 0/890, RunningAvgSamplesPerSec=1.171380730193178, CurrSamplesPerSec=1.203541610712064, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:48:50,948] [INFO] [logging.py:68:log_dist] [Rank 0] step=900, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:48:50,964] [INFO] [timer.py:198:stop] 0/900, RunningAvgSamplesPerSec=1.167091442442119, CurrSamplesPerSec=0.25914903084731206, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:48:59,338] [INFO] [logging.py:68:log_dist] [Rank 0] step=910, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:48:59,354] [INFO] [timer.py:198:stop] 0/910, RunningAvgSamplesPerSec=1.1674166792580025, CurrSamplesPerSec=1.1915199698647407, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:49:07,717] [INFO] [logging.py:68:log_dist] [Rank 0] step=920, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:49:07,733] [INFO] [timer.py:198:stop] 0/920, RunningAvgSamplesPerSec=1.167748391663879, CurrSamplesPerSec=1.1876427283675117, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:49:16,076] [INFO] [logging.py:68:log_dist] [Rank 0] step=930, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:49:16,092] [INFO] [timer.py:198:stop] 0/930, RunningAvgSamplesPerSec=1.1681009150589934, CurrSamplesPerSec=1.205035730425981, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:49:24,428] [INFO] [logging.py:68:log_dist] [Rank 0] step=940, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:49:24,444] [INFO] [timer.py:198:stop] 0/940, RunningAvgSamplesPerSec=1.1684573352294179, CurrSamplesPerSec=1.196754559264446, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:49:32,812] [INFO] [logging.py:68:log_dist] [Rank 0] step=950, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:49:32,828] [INFO] [timer.py:198:stop] 0/950, RunningAvgSamplesPerSec=1.1687621524329455, CurrSamplesPerSec=1.1927635157981367, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:49:41,198] [INFO] [logging.py:68:log_dist] [Rank 0] step=960, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:49:41,214] [INFO] [timer.py:198:stop] 0/960, RunningAvgSamplesPerSec=1.1690573564496418, CurrSamplesPerSec=1.1935601369563633, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:49:49,559] [INFO] [logging.py:68:log_dist] [Rank 0] step=970, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:49:49,574] [INFO] [timer.py:198:stop] 0/970, RunningAvgSamplesPerSec=1.1693807539653251, CurrSamplesPerSec=1.2070460595421386, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:49:57,919] [INFO] [logging.py:68:log_dist] [Rank 0] step=980, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:49:57,934] [INFO] [timer.py:198:stop] 0/980, RunningAvgSamplesPerSec=1.1696999462797608, CurrSamplesPerSec=1.2069418586248075, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:50:06,282] [INFO] [logging.py:68:log_dist] [Rank 0] step=990, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:50:06,298] [INFO] [timer.py:198:stop] 0/990, RunningAvgSamplesPerSec=1.1700079591875303, CurrSamplesPerSec=1.2048526130250237, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:50:17,816] [INFO] [logging.py:68:log_dist] [Rank 0] step=1000, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:50:17,831] [INFO] [timer.py:198:stop] 0/1000, RunningAvgSamplesPerSec=1.1659761396906199, CurrSamplesPerSec=0.25018025511048886, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:50:26,204] [INFO] [logging.py:68:log_dist] [Rank 0] step=1010, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:50:26,219] [INFO] [timer.py:198:stop] 0/1010, RunningAvgSamplesPerSec=1.1662810916544029, CurrSamplesPerSec=1.2068460096690306, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:50:34,567] [INFO] [logging.py:68:log_dist] [Rank 0] step=1020, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:50:34,583] [INFO] [timer.py:198:stop] 0/1020, RunningAvgSamplesPerSec=1.166611080528496, CurrSamplesPerSec=1.1910970105745504, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:50:42,940] [INFO] [logging.py:68:log_dist] [Rank 0] step=1030, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:50:42,956] [INFO] [timer.py:198:stop] 0/1030, RunningAvgSamplesPerSec=1.1669223919867315, CurrSamplesPerSec=1.1981356828878404, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:50:51,311] [INFO] [logging.py:68:log_dist] [Rank 0] step=1040, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:50:51,326] [INFO] [timer.py:198:stop] 0/1040, RunningAvgSamplesPerSec=1.1672305215156724, CurrSamplesPerSec=1.1740571764151977, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:50:59,657] [INFO] [logging.py:68:log_dist] [Rank 0] step=1050, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:50:59,672] [INFO] [timer.py:198:stop] 0/1050, RunningAvgSamplesPerSec=1.167562995497898, CurrSamplesPerSec=1.204200903802978, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:51:08,031] [INFO] [logging.py:68:log_dist] [Rank 0] step=1060, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:51:08,047] [INFO] [timer.py:198:stop] 0/1060, RunningAvgSamplesPerSec=1.167851670326408, CurrSamplesPerSec=1.1924796775704973, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:51:16,372] [INFO] [logging.py:68:log_dist] [Rank 0] step=1070, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:51:16,388] [INFO] [timer.py:198:stop] 0/1070, RunningAvgSamplesPerSec=1.1681768584609944, CurrSamplesPerSec=1.2095585690753805, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:51:24,733] [INFO] [logging.py:68:log_dist] [Rank 0] step=1080, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:51:24,749] [INFO] [timer.py:198:stop] 0/1080, RunningAvgSamplesPerSec=1.168470406301103, CurrSamplesPerSec=1.203604813496385, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:51:33,082] [INFO] [logging.py:68:log_dist] [Rank 0] step=1090, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:51:33,097] [INFO] [timer.py:198:stop] 0/1090, RunningAvgSamplesPerSec=1.1687753404668955, CurrSamplesPerSec=1.2013732585789993, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:51:44,487] [INFO] [logging.py:68:log_dist] [Rank 0] step=1100, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:51:44,503] [INFO] [timer.py:198:stop] 0/1100, RunningAvgSamplesPerSec=1.1652813574781666, CurrSamplesPerSec=0.25881986326243045, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:51:52,876] [INFO] [logging.py:68:log_dist] [Rank 0] step=1110, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:51:52,892] [INFO] [timer.py:198:stop] 0/1110, RunningAvgSamplesPerSec=1.1655589152866384, CurrSamplesPerSec=1.194467345760703, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:52:01,261] [INFO] [logging.py:68:log_dist] [Rank 0] step=1120, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:52:01,277] [INFO] [timer.py:198:stop] 0/1120, RunningAvgSamplesPerSec=1.1658354060475684, CurrSamplesPerSec=1.192931101418814, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:52:09,612] [INFO] [logging.py:68:log_dist] [Rank 0] step=1130, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:52:09,628] [INFO] [timer.py:198:stop] 0/1130, RunningAvgSamplesPerSec=1.1661479183340202, CurrSamplesPerSec=1.2059153999818293, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:52:17,972] [INFO] [logging.py:68:log_dist] [Rank 0] step=1140, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:52:17,987] [INFO] [timer.py:198:stop] 0/1140, RunningAvgSamplesPerSec=1.1664454039013543, CurrSamplesPerSec=1.2061546814331783, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:52:26,344] [INFO] [logging.py:68:log_dist] [Rank 0] step=1150, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:52:26,360] [INFO] [timer.py:198:stop] 0/1150, RunningAvgSamplesPerSec=1.1667213837929866, CurrSamplesPerSec=1.196768218148699, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:52:34,700] [INFO] [logging.py:68:log_dist] [Rank 0] step=1160, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:52:34,716] [INFO] [timer.py:198:stop] 0/1160, RunningAvgSamplesPerSec=1.1670138505161456, CurrSamplesPerSec=1.1925196848154969, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:52:43,057] [INFO] [logging.py:68:log_dist] [Rank 0] step=1170, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:52:43,073] [INFO] [timer.py:198:stop] 0/1170, RunningAvgSamplesPerSec=1.1672995068584044, CurrSamplesPerSec=1.1973079935782813, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:52:51,416] [INFO] [logging.py:68:log_dist] [Rank 0] step=1180, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:52:51,432] [INFO] [timer.py:198:stop] 0/1180, RunningAvgSamplesPerSec=1.1675776549904708, CurrSamplesPerSec=1.200395065629147, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:52:59,792] [INFO] [logging.py:68:log_dist] [Rank 0] step=1190, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:52:59,808] [INFO] [timer.py:198:stop] 0/1190, RunningAvgSamplesPerSec=1.1678318434019916, CurrSamplesPerSec=1.201777034083697, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:53:11,167] [INFO] [logging.py:68:log_dist] [Rank 0] step=1200, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:53:11,183] [INFO] [timer.py:198:stop] 0/1200, RunningAvgSamplesPerSec=1.1646777067883043, CurrSamplesPerSec=0.2600788923203228, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:53:19,529] [INFO] [logging.py:68:log_dist] [Rank 0] step=1210, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:53:19,545] [INFO] [timer.py:198:stop] 0/1210, RunningAvgSamplesPerSec=1.1649675640802477, CurrSamplesPerSec=1.1918731170536896, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:53:27,878] [INFO] [logging.py:68:log_dist] [Rank 0] step=1220, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:53:27,894] [INFO] [timer.py:198:stop] 0/1220, RunningAvgSamplesPerSec=1.1652663007166217, CurrSamplesPerSec=1.1964131884582165, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:53:36,235] [INFO] [logging.py:68:log_dist] [Rank 0] step=1230, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:53:36,251] [INFO] [timer.py:198:stop] 0/1230, RunningAvgSamplesPerSec=1.1655519568574437, CurrSamplesPerSec=1.2033075074175634, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:53:44,601] [INFO] [logging.py:68:log_dist] [Rank 0] step=1240, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:53:44,617] [INFO] [timer.py:198:stop] 0/1240, RunningAvgSamplesPerSec=1.1658230470215611, CurrSamplesPerSec=1.2035081124742293, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:53:52,967] [INFO] [logging.py:68:log_dist] [Rank 0] step=1250, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:53:52,983] [INFO] [timer.py:198:stop] 0/1250, RunningAvgSamplesPerSec=1.1660894202665717, CurrSamplesPerSec=1.1948283691062682, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:54:01,335] [INFO] [logging.py:68:log_dist] [Rank 0] step=1260, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:54:01,351] [INFO] [timer.py:198:stop] 0/1260, RunningAvgSamplesPerSec=1.1663500617440548, CurrSamplesPerSec=1.2060402306320066, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:54:09,711] [INFO] [logging.py:68:log_dist] [Rank 0] step=1270, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:54:09,727] [INFO] [timer.py:198:stop] 0/1270, RunningAvgSamplesPerSec=1.1665979951951784, CurrSamplesPerSec=1.201331278749377, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:54:18,065] [INFO] [logging.py:68:log_dist] [Rank 0] step=1280, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:54:18,080] [INFO] [timer.py:198:stop] 0/1280, RunningAvgSamplesPerSec=1.1668650143116144, CurrSamplesPerSec=1.2041628746228292, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:54:26,426] [INFO] [logging.py:68:log_dist] [Rank 0] step=1290, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:54:26,441] [INFO] [timer.py:198:stop] 0/1290, RunningAvgSamplesPerSec=1.1671210913889996, CurrSamplesPerSec=1.205913666404743, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:54:37,792] [INFO] [logging.py:68:log_dist] [Rank 0] step=1300, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:54:37,808] [INFO] [timer.py:198:stop] 0/1300, RunningAvgSamplesPerSec=1.1642256989320505, CurrSamplesPerSec=0.2601210386821688, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:54:46,156] [INFO] [logging.py:68:log_dist] [Rank 0] step=1310, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:54:46,171] [INFO] [timer.py:198:stop] 0/1310, RunningAvgSamplesPerSec=1.1644949161728118, CurrSamplesPerSec=1.205348093035553, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:54:54,512] [INFO] [logging.py:68:log_dist] [Rank 0] step=1320, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:54:54,528] [INFO] [timer.py:198:stop] 0/1320, RunningAvgSamplesPerSec=1.1647669450494398, CurrSamplesPerSec=1.2035409200079197, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:55:02,880] [INFO] [logging.py:68:log_dist] [Rank 0] step=1330, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:55:02,896] [INFO] [timer.py:198:stop] 0/1330, RunningAvgSamplesPerSec=1.1650225562764591, CurrSamplesPerSec=1.2042655589221887, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:55:11,265] [INFO] [logging.py:68:log_dist] [Rank 0] step=1340, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:55:11,281] [INFO] [timer.py:198:stop] 0/1340, RunningAvgSamplesPerSec=1.1652579708256496, CurrSamplesPerSec=1.189558284414853, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:55:19,648] [INFO] [logging.py:68:log_dist] [Rank 0] step=1350, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:55:19,664] [INFO] [timer.py:198:stop] 0/1350, RunningAvgSamplesPerSec=1.1654914559738776, CurrSamplesPerSec=1.2002250330367041, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:55:28,014] [INFO] [logging.py:68:log_dist] [Rank 0] step=1360, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:55:28,029] [INFO] [timer.py:198:stop] 0/1360, RunningAvgSamplesPerSec=1.165740816279911, CurrSamplesPerSec=1.1917420591094179, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:55:36,362] [INFO] [logging.py:68:log_dist] [Rank 0] step=1370, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:55:36,377] [INFO] [timer.py:198:stop] 0/1370, RunningAvgSamplesPerSec=1.1660023827087205, CurrSamplesPerSec=1.2081266407469218, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:55:44,730] [INFO] [logging.py:68:log_dist] [Rank 0] step=1380, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:55:44,746] [INFO] [timer.py:198:stop] 0/1380, RunningAvgSamplesPerSec=1.166240405252059, CurrSamplesPerSec=1.2051759619015, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:55:53,099] [INFO] [logging.py:68:log_dist] [Rank 0] step=1390, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:55:53,115] [INFO] [timer.py:198:stop] 0/1390, RunningAvgSamplesPerSec=1.1664736649024308, CurrSamplesPerSec=1.208085927452699, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:56:04,678] [INFO] [logging.py:68:log_dist] [Rank 0] step=1400, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:56:04,694] [INFO] [timer.py:198:stop] 0/1400, RunningAvgSamplesPerSec=1.163587305386481, CurrSamplesPerSec=0.24813697777577948, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:56:13,053] [INFO] [logging.py:68:log_dist] [Rank 0] step=1410, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:56:13,069] [INFO] [timer.py:198:stop] 0/1410, RunningAvgSamplesPerSec=1.1638312180940797, CurrSamplesPerSec=1.2010381899026181, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:56:21,419] [INFO] [logging.py:68:log_dist] [Rank 0] step=1420, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:56:21,435] [INFO] [timer.py:198:stop] 0/1420, RunningAvgSamplesPerSec=1.1640793827110718, CurrSamplesPerSec=1.2022169176406383, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:56:29,805] [INFO] [logging.py:68:log_dist] [Rank 0] step=1430, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:56:29,820] [INFO] [timer.py:198:stop] 0/1430, RunningAvgSamplesPerSec=1.1643056408814036, CurrSamplesPerSec=1.1976885201851857, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:56:38,157] [INFO] [logging.py:68:log_dist] [Rank 0] step=1440, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:56:38,172] [INFO] [timer.py:198:stop] 0/1440, RunningAvgSamplesPerSec=1.1645595692941066, CurrSamplesPerSec=1.1995673404997091, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:56:46,560] [INFO] [logging.py:68:log_dist] [Rank 0] step=1450, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:56:46,576] [INFO] [timer.py:198:stop] 0/1450, RunningAvgSamplesPerSec=1.1647627106979368, CurrSamplesPerSec=1.1989586909486323, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:56:54,912] [INFO] [logging.py:68:log_dist] [Rank 0] step=1460, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:56:54,928] [INFO] [timer.py:198:stop] 0/1460, RunningAvgSamplesPerSec=1.1650111757889137, CurrSamplesPerSec=1.2074755571574733, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:57:03,280] [INFO] [logging.py:68:log_dist] [Rank 0] step=1470, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:57:03,296] [INFO] [timer.py:198:stop] 0/1470, RunningAvgSamplesPerSec=1.165241146315457, CurrSamplesPerSec=1.2007631192614325, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:57:11,670] [INFO] [logging.py:68:log_dist] [Rank 0] step=1480, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:57:11,686] [INFO] [timer.py:198:stop] 0/1480, RunningAvgSamplesPerSec=1.1654476543835521, CurrSamplesPerSec=1.1908562271673275, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:57:20,039] [INFO] [logging.py:68:log_dist] [Rank 0] step=1490, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:57:20,054] [INFO] [timer.py:198:stop] 0/1490, RunningAvgSamplesPerSec=1.1656711118220788, CurrSamplesPerSec=1.2021383556948109, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:57:31,423] [INFO] [logging.py:68:log_dist] [Rank 0] step=1500, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:57:31,439] [INFO] [timer.py:198:stop] 0/1500, RunningAvgSamplesPerSec=1.1631615185389523, CurrSamplesPerSec=0.2598776878956396, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:57:39,772] [INFO] [logging.py:68:log_dist] [Rank 0] step=1510, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:57:39,787] [INFO] [timer.py:198:stop] 0/1510, RunningAvgSamplesPerSec=1.1634154068758673, CurrSamplesPerSec=1.2039316397801392, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:57:48,118] [INFO] [logging.py:68:log_dist] [Rank 0] step=1520, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:57:48,134] [INFO] [timer.py:198:stop] 0/1520, RunningAvgSamplesPerSec=1.1636668387478766, CurrSamplesPerSec=1.2065651487888478, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:57:56,498] [INFO] [logging.py:68:log_dist] [Rank 0] step=1530, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:57:56,514] [INFO] [timer.py:198:stop] 0/1530, RunningAvgSamplesPerSec=1.1638852810067044, CurrSamplesPerSec=1.1895356807896538, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:58:04,845] [INFO] [logging.py:68:log_dist] [Rank 0] step=1540, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:58:04,861] [INFO] [timer.py:198:stop] 0/1540, RunningAvgSamplesPerSec=1.164130726856262, CurrSamplesPerSec=1.2068359394669188, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:58:13,201] [INFO] [logging.py:68:log_dist] [Rank 0] step=1550, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:58:13,217] [INFO] [timer.py:198:stop] 0/1550, RunningAvgSamplesPerSec=1.1643647601304554, CurrSamplesPerSec=1.2053422044381197, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:58:21,556] [INFO] [logging.py:68:log_dist] [Rank 0] step=1560, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:58:21,572] [INFO] [timer.py:198:stop] 0/1560, RunningAvgSamplesPerSec=1.1645980717205504, CurrSamplesPerSec=1.2019919408756825, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:58:29,915] [INFO] [logging.py:68:log_dist] [Rank 0] step=1570, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:58:29,930] [INFO] [timer.py:198:stop] 0/1570, RunningAvgSamplesPerSec=1.1648237938661645, CurrSamplesPerSec=1.2081489124279383, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:58:38,276] [INFO] [logging.py:68:log_dist] [Rank 0] step=1580, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:58:38,291] [INFO] [timer.py:198:stop] 0/1580, RunningAvgSamplesPerSec=1.165045206202618, CurrSamplesPerSec=1.2058783025201145, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:58:46,640] [INFO] [logging.py:68:log_dist] [Rank 0] step=1590, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:58:46,656] [INFO] [timer.py:198:stop] 0/1590, RunningAvgSamplesPerSec=1.1652605198992732, CurrSamplesPerSec=1.2067355938757285, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:58:58,135] [INFO] [logging.py:68:log_dist] [Rank 0] step=1600, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:58:58,151] [INFO] [timer.py:198:stop] 0/1600, RunningAvgSamplesPerSec=1.1628185305903276, CurrSamplesPerSec=0.2517262614703745, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:59:06,491] [INFO] [logging.py:68:log_dist] [Rank 0] step=1610, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:59:06,507] [INFO] [timer.py:198:stop] 0/1610, RunningAvgSamplesPerSec=1.163052095291884, CurrSamplesPerSec=1.195023092418108, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:59:14,844] [INFO] [logging.py:68:log_dist] [Rank 0] step=1620, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:59:14,860] [INFO] [timer.py:198:stop] 0/1620, RunningAvgSamplesPerSec=1.16328464510362, CurrSamplesPerSec=1.2050447319601876, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:59:23,213] [INFO] [logging.py:68:log_dist] [Rank 0] step=1630, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:59:23,229] [INFO] [timer.py:198:stop] 0/1630, RunningAvgSamplesPerSec=1.1635019085385048, CurrSamplesPerSec=1.1982072188323725, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:59:31,585] [INFO] [logging.py:68:log_dist] [Rank 0] step=1640, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:59:31,601] [INFO] [timer.py:198:stop] 0/1640, RunningAvgSamplesPerSec=1.1637135672463053, CurrSamplesPerSec=1.2024306029376857, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:59:39,930] [INFO] [logging.py:68:log_dist] [Rank 0] step=1650, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:59:39,946] [INFO] [timer.py:198:stop] 0/1650, RunningAvgSamplesPerSec=1.1639445527446717, CurrSamplesPerSec=1.2015305369741787, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:59:48,295] [INFO] [logging.py:68:log_dist] [Rank 0] step=1660, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:59:48,310] [INFO] [timer.py:198:stop] 0/1660, RunningAvgSamplesPerSec=1.1641596357801285, CurrSamplesPerSec=1.2093461786536748, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 21:59:56,678] [INFO] [logging.py:68:log_dist] [Rank 0] step=1670, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 21:59:56,694] [INFO] [timer.py:198:stop] 0/1670, RunningAvgSamplesPerSec=1.164355347959699, CurrSamplesPerSec=1.1924434021350412, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:00:05,046] [INFO] [logging.py:68:log_dist] [Rank 0] step=1680, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:00:05,062] [INFO] [timer.py:198:stop] 0/1680, RunningAvgSamplesPerSec=1.1645609171714237, CurrSamplesPerSec=1.1946075098738804, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:00:13,403] [INFO] [logging.py:68:log_dist] [Rank 0] step=1690, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:00:13,420] [INFO] [timer.py:198:stop] 0/1690, RunningAvgSamplesPerSec=1.1647714724888811, CurrSamplesPerSec=1.202573331872994, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:00:24,786] [INFO] [logging.py:68:log_dist] [Rank 0] step=1700, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:00:24,802] [INFO] [timer.py:198:stop] 0/1700, RunningAvgSamplesPerSec=1.1625682513900277, CurrSamplesPerSec=0.25877195877840603, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:00:33,177] [INFO] [logging.py:68:log_dist] [Rank 0] step=1710, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:00:33,193] [INFO] [timer.py:198:stop] 0/1710, RunningAvgSamplesPerSec=1.162762222699121, CurrSamplesPerSec=1.1930335757437502, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:00:41,533] [INFO] [logging.py:68:log_dist] [Rank 0] step=1720, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:00:41,548] [INFO] [timer.py:198:stop] 0/1720, RunningAvgSamplesPerSec=1.162980949473688, CurrSamplesPerSec=1.2035630229337606, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:00:49,912] [INFO] [logging.py:68:log_dist] [Rank 0] step=1730, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:00:49,928] [INFO] [timer.py:198:stop] 0/1730, RunningAvgSamplesPerSec=1.1631800827893253, CurrSamplesPerSec=1.1861337049653942, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:00:58,271] [INFO] [logging.py:68:log_dist] [Rank 0] step=1740, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:00:58,287] [INFO] [timer.py:198:stop] 0/1740, RunningAvgSamplesPerSec=1.1633915321920782, CurrSamplesPerSec=1.2035709663710374, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:01:06,643] [INFO] [logging.py:68:log_dist] [Rank 0] step=1750, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:01:06,659] [INFO] [timer.py:198:stop] 0/1750, RunningAvgSamplesPerSec=1.163590225994984, CurrSamplesPerSec=1.1981233617693234, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:01:15,033] [INFO] [logging.py:68:log_dist] [Rank 0] step=1760, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:01:15,048] [INFO] [timer.py:198:stop] 0/1760, RunningAvgSamplesPerSec=1.1637745260012058, CurrSamplesPerSec=1.1909857376023225, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:01:23,410] [INFO] [logging.py:68:log_dist] [Rank 0] step=1770, skipped=20, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:01:23,426] [INFO] [timer.py:198:stop] 0/1770, RunningAvgSamplesPerSec=1.1639653769909815, CurrSamplesPerSec=1.1960184653691635, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:01:27,975] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384.0, reducing to 16384.0\n",
"[2023-08-29 22:01:31,324] [INFO] [logging.py:68:log_dist] [Rank 0] step=1780, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:01:31,340] [INFO] [timer.py:198:stop] 0/1780, RunningAvgSamplesPerSec=1.1645070301248723, CurrSamplesPerSec=1.196450047124278, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:01:39,697] [INFO] [logging.py:68:log_dist] [Rank 0] step=1790, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:01:39,713] [INFO] [timer.py:198:stop] 0/1790, RunningAvgSamplesPerSec=1.1646949230405967, CurrSamplesPerSec=1.191542987465274, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:01:51,251] [INFO] [logging.py:68:log_dist] [Rank 0] step=1800, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:01:51,266] [INFO] [timer.py:198:stop] 0/1800, RunningAvgSamplesPerSec=1.1624855742164946, CurrSamplesPerSec=0.24883886578993614, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:01:59,634] [INFO] [logging.py:68:log_dist] [Rank 0] step=1810, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:01:59,650] [INFO] [timer.py:198:stop] 0/1810, RunningAvgSamplesPerSec=1.162675417705554, CurrSamplesPerSec=1.1946282651233135, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:02:08,024] [INFO] [logging.py:68:log_dist] [Rank 0] step=1820, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:02:08,040] [INFO] [timer.py:198:stop] 0/1820, RunningAvgSamplesPerSec=1.1628574966481522, CurrSamplesPerSec=1.2035827090355995, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:02:16,380] [INFO] [logging.py:68:log_dist] [Rank 0] step=1830, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:02:16,395] [INFO] [timer.py:198:stop] 0/1830, RunningAvgSamplesPerSec=1.1630634093342112, CurrSamplesPerSec=1.198741098692055, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:02:24,741] [INFO] [logging.py:68:log_dist] [Rank 0] step=1840, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:02:24,756] [INFO] [timer.py:198:stop] 0/1840, RunningAvgSamplesPerSec=1.1632629576300788, CurrSamplesPerSec=1.2072145557533536, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:02:33,098] [INFO] [logging.py:68:log_dist] [Rank 0] step=1850, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:02:33,114] [INFO] [timer.py:198:stop] 0/1850, RunningAvgSamplesPerSec=1.1634627681781216, CurrSamplesPerSec=1.1980090612505576, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:02:41,469] [INFO] [logging.py:68:log_dist] [Rank 0] step=1860, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:02:41,485] [INFO] [timer.py:198:stop] 0/1860, RunningAvgSamplesPerSec=1.1636513763207068, CurrSamplesPerSec=1.195050331506982, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:02:49,847] [INFO] [logging.py:68:log_dist] [Rank 0] step=1870, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:02:49,863] [INFO] [timer.py:198:stop] 0/1870, RunningAvgSamplesPerSec=1.1638317932896425, CurrSamplesPerSec=1.2009439640189594, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:02:58,224] [INFO] [logging.py:68:log_dist] [Rank 0] step=1880, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:02:58,240] [INFO] [timer.py:198:stop] 0/1880, RunningAvgSamplesPerSec=1.1640113194277257, CurrSamplesPerSec=1.1990707732102293, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:03:06,563] [INFO] [logging.py:68:log_dist] [Rank 0] step=1890, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:03:06,578] [INFO] [timer.py:198:stop] 0/1890, RunningAvgSamplesPerSec=1.1642163228917104, CurrSamplesPerSec=1.2034929180120335, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:03:17,985] [INFO] [logging.py:68:log_dist] [Rank 0] step=1900, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:03:18,001] [INFO] [timer.py:198:stop] 0/1900, RunningAvgSamplesPerSec=1.162220768055696, CurrSamplesPerSec=0.25688508073777144, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:03:26,362] [INFO] [logging.py:68:log_dist] [Rank 0] step=1910, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:03:26,378] [INFO] [timer.py:198:stop] 0/1910, RunningAvgSamplesPerSec=1.1624059965559692, CurrSamplesPerSec=1.1976522691352771, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:03:34,722] [INFO] [logging.py:68:log_dist] [Rank 0] step=1920, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:03:34,738] [INFO] [timer.py:198:stop] 0/1920, RunningAvgSamplesPerSec=1.162601175949291, CurrSamplesPerSec=1.2030627976783776, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:03:43,103] [INFO] [logging.py:68:log_dist] [Rank 0] step=1930, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:03:43,119] [INFO] [timer.py:198:stop] 0/1930, RunningAvgSamplesPerSec=1.1627791088799508, CurrSamplesPerSec=1.190687195873719, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:03:51,468] [INFO] [logging.py:68:log_dist] [Rank 0] step=1940, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:03:51,484] [INFO] [timer.py:198:stop] 0/1940, RunningAvgSamplesPerSec=1.1629659909094956, CurrSamplesPerSec=1.2073758002890698, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:03:59,840] [INFO] [logging.py:68:log_dist] [Rank 0] step=1950, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:03:59,855] [INFO] [timer.py:198:stop] 0/1950, RunningAvgSamplesPerSec=1.1631467619888265, CurrSamplesPerSec=1.2019323516449003, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:04:08,227] [INFO] [logging.py:68:log_dist] [Rank 0] step=1960, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:04:08,242] [INFO] [timer.py:198:stop] 0/1960, RunningAvgSamplesPerSec=1.1633159561398292, CurrSamplesPerSec=1.1997361572667449, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:04:16,620] [INFO] [logging.py:68:log_dist] [Rank 0] step=1970, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:04:16,636] [INFO] [timer.py:198:stop] 0/1970, RunningAvgSamplesPerSec=1.1634778285088418, CurrSamplesPerSec=1.198791806048744, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:04:25,003] [INFO] [logging.py:68:log_dist] [Rank 0] step=1980, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:04:25,019] [INFO] [timer.py:198:stop] 0/1980, RunningAvgSamplesPerSec=1.1636459373228516, CurrSamplesPerSec=1.2008958251136816, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:04:33,382] [INFO] [logging.py:68:log_dist] [Rank 0] step=1990, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:04:33,397] [INFO] [timer.py:198:stop] 0/1990, RunningAvgSamplesPerSec=1.1638154096687383, CurrSamplesPerSec=1.201435201390516, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:04:44,806] [INFO] [logging.py:68:log_dist] [Rank 0] step=2000, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:04:44,822] [INFO] [timer.py:198:stop] 0/2000, RunningAvgSamplesPerSec=1.1619215469281161, CurrSamplesPerSec=0.256704857264022, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:04:53,174] [INFO] [logging.py:68:log_dist] [Rank 0] step=2010, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:04:53,190] [INFO] [timer.py:198:stop] 0/2010, RunningAvgSamplesPerSec=1.1621050554891992, CurrSamplesPerSec=1.2058814227794443, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:05:01,535] [INFO] [logging.py:68:log_dist] [Rank 0] step=2020, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:05:01,550] [INFO] [timer.py:198:stop] 0/2020, RunningAvgSamplesPerSec=1.1622907804481553, CurrSamplesPerSec=1.2012583371711243, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:05:09,902] [INFO] [logging.py:68:log_dist] [Rank 0] step=2030, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:05:09,918] [INFO] [timer.py:198:stop] 0/2030, RunningAvgSamplesPerSec=1.1624704145723594, CurrSamplesPerSec=1.1998178377153736, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:05:18,273] [INFO] [logging.py:68:log_dist] [Rank 0] step=2040, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:05:18,289] [INFO] [timer.py:198:stop] 0/2040, RunningAvgSamplesPerSec=1.1626460675162018, CurrSamplesPerSec=1.1923918744696245, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:05:26,632] [INFO] [logging.py:68:log_dist] [Rank 0] step=2050, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:05:26,647] [INFO] [timer.py:198:stop] 0/2050, RunningAvgSamplesPerSec=1.162828501949115, CurrSamplesPerSec=1.2008528472092788, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:05:35,008] [INFO] [logging.py:68:log_dist] [Rank 0] step=2060, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:05:35,023] [INFO] [timer.py:198:stop] 0/2060, RunningAvgSamplesPerSec=1.1629980922987784, CurrSamplesPerSec=1.1930797288713755, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:05:43,404] [INFO] [logging.py:68:log_dist] [Rank 0] step=2070, skipped=21, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:05:43,420] [INFO] [timer.py:198:stop] 0/2070, RunningAvgSamplesPerSec=1.1631524664461472, CurrSamplesPerSec=1.2036400441589665, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:05:48,803] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384.0, reducing to 8192.0\n",
"[2023-08-29 22:05:51,304] [INFO] [logging.py:68:log_dist] [Rank 0] step=2080, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:05:51,319] [INFO] [timer.py:198:stop] 0/2080, RunningAvgSamplesPerSec=1.1636283452822573, CurrSamplesPerSec=1.2036434982565078, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:05:59,663] [INFO] [logging.py:68:log_dist] [Rank 0] step=2090, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:05:59,679] [INFO] [timer.py:198:stop] 0/2090, RunningAvgSamplesPerSec=1.1638023104655757, CurrSamplesPerSec=1.2002198812858533, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:06:11,356] [INFO] [logging.py:68:log_dist] [Rank 0] step=2100, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:06:11,372] [INFO] [timer.py:198:stop] 0/2100, RunningAvgSamplesPerSec=1.1618251911323756, CurrSamplesPerSec=0.24102627225811687, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:06:19,727] [INFO] [logging.py:68:log_dist] [Rank 0] step=2110, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:06:19,743] [INFO] [timer.py:198:stop] 0/2110, RunningAvgSamplesPerSec=1.1619986114226246, CurrSamplesPerSec=1.2020167427592625, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:06:28,092] [INFO] [logging.py:68:log_dist] [Rank 0] step=2120, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:06:28,108] [INFO] [timer.py:198:stop] 0/2120, RunningAvgSamplesPerSec=1.162173878064501, CurrSamplesPerSec=1.1985294075624753, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:06:36,469] [INFO] [logging.py:68:log_dist] [Rank 0] step=2130, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:06:36,484] [INFO] [timer.py:198:stop] 0/2130, RunningAvgSamplesPerSec=1.1623400117139147, CurrSamplesPerSec=1.1928096480329025, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:06:44,837] [INFO] [logging.py:68:log_dist] [Rank 0] step=2140, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:06:44,853] [INFO] [timer.py:198:stop] 0/2140, RunningAvgSamplesPerSec=1.162510441850738, CurrSamplesPerSec=1.1978790452619235, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:06:53,195] [INFO] [logging.py:68:log_dist] [Rank 0] step=2150, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:06:53,210] [INFO] [timer.py:198:stop] 0/2150, RunningAvgSamplesPerSec=1.1626892377054097, CurrSamplesPerSec=1.2063361131124177, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:07:01,567] [INFO] [logging.py:68:log_dist] [Rank 0] step=2160, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:07:01,583] [INFO] [timer.py:198:stop] 0/2160, RunningAvgSamplesPerSec=1.1628562908175413, CurrSamplesPerSec=1.2059427911613159, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:07:09,934] [INFO] [logging.py:68:log_dist] [Rank 0] step=2170, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:07:09,950] [INFO] [timer.py:198:stop] 0/2170, RunningAvgSamplesPerSec=1.1630225112423656, CurrSamplesPerSec=1.1914847681957417, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:07:18,309] [INFO] [logging.py:68:log_dist] [Rank 0] step=2180, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:07:18,325] [INFO] [timer.py:198:stop] 0/2180, RunningAvgSamplesPerSec=1.1631821845066208, CurrSamplesPerSec=1.2027736921931191, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:07:26,714] [INFO] [logging.py:68:log_dist] [Rank 0] step=2190, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:07:26,730] [INFO] [timer.py:198:stop] 0/2190, RunningAvgSamplesPerSec=1.163321637180159, CurrSamplesPerSec=1.1952267346629386, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:07:38,302] [INFO] [logging.py:68:log_dist] [Rank 0] step=2200, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:07:38,318] [INFO] [timer.py:198:stop] 0/2200, RunningAvgSamplesPerSec=1.1615051859501846, CurrSamplesPerSec=0.24813813749323274, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:07:46,704] [INFO] [logging.py:68:log_dist] [Rank 0] step=2210, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:07:46,720] [INFO] [timer.py:198:stop] 0/2210, RunningAvgSamplesPerSec=1.1616563166848133, CurrSamplesPerSec=1.2005479643399555, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:07:55,109] [INFO] [logging.py:68:log_dist] [Rank 0] step=2220, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:07:55,125] [INFO] [timer.py:198:stop] 0/2220, RunningAvgSamplesPerSec=1.1618009544040993, CurrSamplesPerSec=1.1984311233491693, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:08:03,511] [INFO] [logging.py:68:log_dist] [Rank 0] step=2230, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:08:03,527] [INFO] [timer.py:198:stop] 0/2230, RunningAvgSamplesPerSec=1.1619455252712958, CurrSamplesPerSec=1.1990923694480602, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:08:11,900] [INFO] [logging.py:68:log_dist] [Rank 0] step=2240, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:08:11,916] [INFO] [timer.py:198:stop] 0/2240, RunningAvgSamplesPerSec=1.1620971229745332, CurrSamplesPerSec=1.1933224309009984, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:08:20,296] [INFO] [logging.py:68:log_dist] [Rank 0] step=2250, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:08:20,312] [INFO] [timer.py:198:stop] 0/2250, RunningAvgSamplesPerSec=1.1622435329450058, CurrSamplesPerSec=1.1934229350499472, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:08:28,712] [INFO] [logging.py:68:log_dist] [Rank 0] step=2260, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:08:28,728] [INFO] [timer.py:198:stop] 0/2260, RunningAvgSamplesPerSec=1.1623768092986313, CurrSamplesPerSec=1.195514608363378, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:08:37,108] [INFO] [logging.py:68:log_dist] [Rank 0] step=2270, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:08:37,124] [INFO] [timer.py:198:stop] 0/2270, RunningAvgSamplesPerSec=1.1625200769054644, CurrSamplesPerSec=1.185627415463034, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:08:45,496] [INFO] [logging.py:68:log_dist] [Rank 0] step=2280, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:08:45,512] [INFO] [timer.py:198:stop] 0/2280, RunningAvgSamplesPerSec=1.1626669904275317, CurrSamplesPerSec=1.1992655102627843, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:08:53,893] [INFO] [logging.py:68:log_dist] [Rank 0] step=2290, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:08:53,909] [INFO] [timer.py:198:stop] 0/2290, RunningAvgSamplesPerSec=1.162807626792389, CurrSamplesPerSec=1.1915148925725765, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:09:05,356] [INFO] [logging.py:68:log_dist] [Rank 0] step=2300, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:09:05,372] [INFO] [timer.py:198:stop] 0/2300, RunningAvgSamplesPerSec=1.1611455670340856, CurrSamplesPerSec=0.2561972925561634, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:09:13,763] [INFO] [logging.py:68:log_dist] [Rank 0] step=2310, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:09:13,779] [INFO] [timer.py:198:stop] 0/2310, RunningAvgSamplesPerSec=1.1612859639842572, CurrSamplesPerSec=1.190151345973826, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:09:22,173] [INFO] [logging.py:68:log_dist] [Rank 0] step=2320, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:09:22,189] [INFO] [timer.py:198:stop] 0/2320, RunningAvgSamplesPerSec=1.161422813651431, CurrSamplesPerSec=1.188636618690112, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:09:30,560] [INFO] [logging.py:68:log_dist] [Rank 0] step=2330, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:09:30,576] [INFO] [timer.py:198:stop] 0/2330, RunningAvgSamplesPerSec=1.161571595835165, CurrSamplesPerSec=1.193294591529645, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:09:38,989] [INFO] [logging.py:68:log_dist] [Rank 0] step=2340, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:09:39,005] [INFO] [timer.py:198:stop] 0/2340, RunningAvgSamplesPerSec=1.1616948740579196, CurrSamplesPerSec=1.1890059224871952, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:09:47,382] [INFO] [logging.py:68:log_dist] [Rank 0] step=2350, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:09:47,398] [INFO] [timer.py:198:stop] 0/2350, RunningAvgSamplesPerSec=1.1618378656470407, CurrSamplesPerSec=1.2006593128512761, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:09:55,781] [INFO] [logging.py:68:log_dist] [Rank 0] step=2360, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:09:55,796] [INFO] [timer.py:198:stop] 0/2360, RunningAvgSamplesPerSec=1.1619775489343325, CurrSamplesPerSec=1.1956618352392505, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:10:04,181] [INFO] [logging.py:68:log_dist] [Rank 0] step=2370, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:10:04,197] [INFO] [timer.py:198:stop] 0/2370, RunningAvgSamplesPerSec=1.1621140152871154, CurrSamplesPerSec=1.2024771412590347, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:10:12,587] [INFO] [logging.py:68:log_dist] [Rank 0] step=2380, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:10:12,603] [INFO] [timer.py:198:stop] 0/2380, RunningAvgSamplesPerSec=1.1622469989894588, CurrSamplesPerSec=1.1895356807896538, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:10:20,983] [INFO] [logging.py:68:log_dist] [Rank 0] step=2390, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:10:20,999] [INFO] [timer.py:198:stop] 0/2390, RunningAvgSamplesPerSec=1.16238387412805, CurrSamplesPerSec=1.1938156067126626, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:10:32,607] [INFO] [logging.py:68:log_dist] [Rank 0] step=2400, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:10:32,623] [INFO] [timer.py:198:stop] 0/2400, RunningAvgSamplesPerSec=1.1607032173584833, CurrSamplesPerSec=0.24530885210679532, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:10:40,995] [INFO] [logging.py:68:log_dist] [Rank 0] step=2410, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:10:41,011] [INFO] [timer.py:198:stop] 0/2410, RunningAvgSamplesPerSec=1.1608507775483243, CurrSamplesPerSec=1.1994424741154825, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:10:49,367] [INFO] [logging.py:68:log_dist] [Rank 0] step=2420, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:10:49,382] [INFO] [timer.py:198:stop] 0/2420, RunningAvgSamplesPerSec=1.1610051038717573, CurrSamplesPerSec=1.2030962711308228, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:10:57,760] [INFO] [logging.py:68:log_dist] [Rank 0] step=2430, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:10:57,775] [INFO] [timer.py:198:stop] 0/2430, RunningAvgSamplesPerSec=1.1611465681223172, CurrSamplesPerSec=1.191854489490475, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:11:06,141] [INFO] [logging.py:68:log_dist] [Rank 0] step=2440, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:11:06,157] [INFO] [timer.py:198:stop] 0/2440, RunningAvgSamplesPerSec=1.1612930930775778, CurrSamplesPerSec=1.2022803259967254, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:11:14,513] [INFO] [logging.py:68:log_dist] [Rank 0] step=2450, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:11:14,529] [INFO] [timer.py:198:stop] 0/2450, RunningAvgSamplesPerSec=1.161443880361637, CurrSamplesPerSec=1.1905807307944067, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:11:22,888] [INFO] [logging.py:68:log_dist] [Rank 0] step=2460, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:11:22,903] [INFO] [timer.py:198:stop] 0/2460, RunningAvgSamplesPerSec=1.161592492864129, CurrSamplesPerSec=1.2056523000160684, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:11:31,275] [INFO] [logging.py:68:log_dist] [Rank 0] step=2470, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:11:31,290] [INFO] [timer.py:198:stop] 0/2470, RunningAvgSamplesPerSec=1.1617326630467193, CurrSamplesPerSec=1.18804439236387, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:11:39,642] [INFO] [logging.py:68:log_dist] [Rank 0] step=2480, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:11:39,657] [INFO] [timer.py:198:stop] 0/2480, RunningAvgSamplesPerSec=1.1618826708808123, CurrSamplesPerSec=1.1931299583913997, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:11:47,999] [INFO] [logging.py:68:log_dist] [Rank 0] step=2490, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:11:48,015] [INFO] [timer.py:198:stop] 0/2490, RunningAvgSamplesPerSec=1.1620362599168466, CurrSamplesPerSec=1.2003490318487269, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:11:59,439] [INFO] [logging.py:68:log_dist] [Rank 0] step=2500, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:11:59,455] [INFO] [timer.py:198:stop] 0/2500, RunningAvgSamplesPerSec=1.1605248031722977, CurrSamplesPerSec=0.25584932311287134, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:12:07,797] [INFO] [logging.py:68:log_dist] [Rank 0] step=2510, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:12:07,813] [INFO] [timer.py:198:stop] 0/2510, RunningAvgSamplesPerSec=1.160683144450647, CurrSamplesPerSec=1.2004228937310641, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:12:16,177] [INFO] [logging.py:68:log_dist] [Rank 0] step=2520, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:12:16,193] [INFO] [timer.py:198:stop] 0/2520, RunningAvgSamplesPerSec=1.1608278065128157, CurrSamplesPerSec=1.1927848854338363, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:12:24,559] [INFO] [logging.py:68:log_dist] [Rank 0] step=2530, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:12:24,574] [INFO] [timer.py:198:stop] 0/2530, RunningAvgSamplesPerSec=1.1609706399942537, CurrSamplesPerSec=1.2010058625683857, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:12:32,911] [INFO] [logging.py:68:log_dist] [Rank 0] step=2540, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:12:32,926] [INFO] [timer.py:198:stop] 0/2540, RunningAvgSamplesPerSec=1.1611276167899633, CurrSamplesPerSec=1.204001104592383, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:12:41,274] [INFO] [logging.py:68:log_dist] [Rank 0] step=2550, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:12:41,290] [INFO] [timer.py:198:stop] 0/2550, RunningAvgSamplesPerSec=1.1612774506368242, CurrSamplesPerSec=1.202233458306103, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:12:49,661] [INFO] [logging.py:68:log_dist] [Rank 0] step=2560, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:12:49,677] [INFO] [timer.py:198:stop] 0/2560, RunningAvgSamplesPerSec=1.1614142847504396, CurrSamplesPerSec=1.1949784910914456, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:12:58,028] [INFO] [logging.py:68:log_dist] [Rank 0] step=2570, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:12:58,044] [INFO] [timer.py:198:stop] 0/2570, RunningAvgSamplesPerSec=1.161560390486842, CurrSamplesPerSec=1.200048524911362, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:13:06,415] [INFO] [logging.py:68:log_dist] [Rank 0] step=2580, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:13:06,431] [INFO] [timer.py:198:stop] 0/2580, RunningAvgSamplesPerSec=1.1616946752587423, CurrSamplesPerSec=1.190808555484136, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:13:14,795] [INFO] [logging.py:68:log_dist] [Rank 0] step=2590, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:13:14,811] [INFO] [timer.py:198:stop] 0/2590, RunningAvgSamplesPerSec=1.1618318314615896, CurrSamplesPerSec=1.1907754242220348, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:13:26,240] [INFO] [logging.py:68:log_dist] [Rank 0] step=2600, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:13:26,255] [INFO] [timer.py:198:stop] 0/2600, RunningAvgSamplesPerSec=1.1603771394952613, CurrSamplesPerSec=0.2553694009263502, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:13:34,606] [INFO] [logging.py:68:log_dist] [Rank 0] step=2610, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:13:34,622] [INFO] [timer.py:198:stop] 0/2610, RunningAvgSamplesPerSec=1.1605253304241379, CurrSamplesPerSec=1.203856999961252, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:13:42,972] [INFO] [logging.py:68:log_dist] [Rank 0] step=2620, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:13:42,988] [INFO] [timer.py:198:stop] 0/2620, RunningAvgSamplesPerSec=1.1606720079610466, CurrSamplesPerSec=1.2042040153876588, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:13:51,329] [INFO] [logging.py:68:log_dist] [Rank 0] step=2630, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:13:51,345] [INFO] [timer.py:198:stop] 0/2630, RunningAvgSamplesPerSec=1.1608222143009739, CurrSamplesPerSec=1.1969659654240603, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:13:59,704] [INFO] [logging.py:68:log_dist] [Rank 0] step=2640, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:13:59,720] [INFO] [timer.py:198:stop] 0/2640, RunningAvgSamplesPerSec=1.1609625547651767, CurrSamplesPerSec=1.2000983128023812, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:14:08,069] [INFO] [logging.py:68:log_dist] [Rank 0] step=2650, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:14:08,085] [INFO] [timer.py:198:stop] 0/2650, RunningAvgSamplesPerSec=1.1611063784998163, CurrSamplesPerSec=1.1949924499273483, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:14:16,425] [INFO] [logging.py:68:log_dist] [Rank 0] step=2660, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:14:16,441] [INFO] [timer.py:198:stop] 0/2660, RunningAvgSamplesPerSec=1.1612546079852948, CurrSamplesPerSec=1.198947723779493, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:14:24,785] [INFO] [logging.py:68:log_dist] [Rank 0] step=2670, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:14:24,801] [INFO] [timer.py:198:stop] 0/2670, RunningAvgSamplesPerSec=1.1613988783321605, CurrSamplesPerSec=1.195394672528635, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:14:33,168] [INFO] [logging.py:68:log_dist] [Rank 0] step=2680, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:14:33,183] [INFO] [timer.py:198:stop] 0/2680, RunningAvgSamplesPerSec=1.1615309457668996, CurrSamplesPerSec=1.2031677103978111, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:14:41,540] [INFO] [logging.py:68:log_dist] [Rank 0] step=2690, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:14:41,556] [INFO] [timer.py:198:stop] 0/2690, RunningAvgSamplesPerSec=1.1616667862578856, CurrSamplesPerSec=1.2008683189004805, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:14:52,893] [INFO] [logging.py:68:log_dist] [Rank 0] step=2700, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:14:52,909] [INFO] [timer.py:198:stop] 0/2700, RunningAvgSamplesPerSec=1.1603128237702753, CurrSamplesPerSec=0.2631750540680752, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:15:01,249] [INFO] [logging.py:68:log_dist] [Rank 0] step=2710, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:15:01,264] [INFO] [timer.py:198:stop] 0/2710, RunningAvgSamplesPerSec=1.160460752085973, CurrSamplesPerSec=1.2077293684442094, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:15:09,611] [INFO] [logging.py:68:log_dist] [Rank 0] step=2720, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:15:09,626] [INFO] [timer.py:198:stop] 0/2720, RunningAvgSamplesPerSec=1.1606043961833492, CurrSamplesPerSec=1.2039091777500184, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:15:17,976] [INFO] [logging.py:68:log_dist] [Rank 0] step=2730, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:15:17,991] [INFO] [timer.py:198:stop] 0/2730, RunningAvgSamplesPerSec=1.1607452557056932, CurrSamplesPerSec=1.2053643735749482, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:15:26,343] [INFO] [logging.py:68:log_dist] [Rank 0] step=2740, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:15:26,359] [INFO] [timer.py:198:stop] 0/2740, RunningAvgSamplesPerSec=1.1608839145861811, CurrSamplesPerSec=1.199843236424074, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:15:34,714] [INFO] [logging.py:68:log_dist] [Rank 0] step=2750, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:15:34,729] [INFO] [timer.py:198:stop] 0/2750, RunningAvgSamplesPerSec=1.161020415988092, CurrSamplesPerSec=1.1843548853921797, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:15:43,063] [INFO] [logging.py:68:log_dist] [Rank 0] step=2760, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:15:43,078] [INFO] [timer.py:198:stop] 0/2760, RunningAvgSamplesPerSec=1.1611667497188312, CurrSamplesPerSec=1.206357971854838, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:15:51,420] [INFO] [logging.py:68:log_dist] [Rank 0] step=2770, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:15:51,436] [INFO] [timer.py:198:stop] 0/2770, RunningAvgSamplesPerSec=1.1613072330001464, CurrSamplesPerSec=1.2017735906902247, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:15:59,796] [INFO] [logging.py:68:log_dist] [Rank 0] step=2780, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:15:59,811] [INFO] [timer.py:198:stop] 0/2780, RunningAvgSamplesPerSec=1.1614383547722833, CurrSamplesPerSec=1.1897013481737393, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:16:08,184] [INFO] [logging.py:68:log_dist] [Rank 0] step=2790, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:16:08,200] [INFO] [timer.py:198:stop] 0/2790, RunningAvgSamplesPerSec=1.1615621504972071, CurrSamplesPerSec=1.2036607690415864, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:16:19,808] [INFO] [logging.py:68:log_dist] [Rank 0] step=2800, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:16:19,824] [INFO] [timer.py:198:stop] 0/2800, RunningAvgSamplesPerSec=1.1601264998565453, CurrSamplesPerSec=0.24470803266784652, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:16:28,167] [INFO] [logging.py:68:log_dist] [Rank 0] step=2810, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:16:28,182] [INFO] [timer.py:198:stop] 0/2810, RunningAvgSamplesPerSec=1.1602691287643567, CurrSamplesPerSec=1.2073511243038229, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:16:36,526] [INFO] [logging.py:68:log_dist] [Rank 0] step=2820, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:16:36,542] [INFO] [timer.py:198:stop] 0/2820, RunningAvgSamplesPerSec=1.16040944227304, CurrSamplesPerSec=1.201004830873609, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:16:44,905] [INFO] [logging.py:68:log_dist] [Rank 0] step=2830, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:16:44,920] [INFO] [timer.py:198:stop] 0/2830, RunningAvgSamplesPerSec=1.1605398709428472, CurrSamplesPerSec=1.203423511825347, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:16:53,268] [INFO] [logging.py:68:log_dist] [Rank 0] step=2840, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:16:53,283] [INFO] [timer.py:198:stop] 0/2840, RunningAvgSamplesPerSec=1.1606765105829475, CurrSamplesPerSec=1.2033617090743938, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:17:01,622] [INFO] [logging.py:68:log_dist] [Rank 0] step=2850, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:17:01,638] [INFO] [timer.py:198:stop] 0/2850, RunningAvgSamplesPerSec=1.1608163405491723, CurrSamplesPerSec=1.205655419105925, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:17:09,985] [INFO] [logging.py:68:log_dist] [Rank 0] step=2860, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:17:10,001] [INFO] [timer.py:198:stop] 0/2860, RunningAvgSamplesPerSec=1.160952197086995, CurrSamplesPerSec=1.2018262767690695, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:17:18,370] [INFO] [logging.py:68:log_dist] [Rank 0] step=2870, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:17:18,386] [INFO] [timer.py:198:stop] 0/2870, RunningAvgSamplesPerSec=1.1610758888645851, CurrSamplesPerSec=1.1966681740435408, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:17:26,772] [INFO] [logging.py:68:log_dist] [Rank 0] step=2880, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:17:26,788] [INFO] [timer.py:198:stop] 0/2880, RunningAvgSamplesPerSec=1.161191054268428, CurrSamplesPerSec=1.1842224663527603, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:17:35,135] [INFO] [logging.py:68:log_dist] [Rank 0] step=2890, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:17:35,150] [INFO] [timer.py:198:stop] 0/2890, RunningAvgSamplesPerSec=1.1613240275812535, CurrSamplesPerSec=1.1909049170813757, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:17:46,722] [INFO] [logging.py:68:log_dist] [Rank 0] step=2900, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:17:46,738] [INFO] [timer.py:198:stop] 0/2900, RunningAvgSamplesPerSec=1.1599562405251571, CurrSamplesPerSec=0.2488507506245885, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:17:55,111] [INFO] [logging.py:68:log_dist] [Rank 0] step=2910, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:17:55,127] [INFO] [timer.py:198:stop] 0/2910, RunningAvgSamplesPerSec=1.1600803046093626, CurrSamplesPerSec=1.200198587851517, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:18:03,479] [INFO] [logging.py:68:log_dist] [Rank 0] step=2920, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:18:03,495] [INFO] [timer.py:198:stop] 0/2920, RunningAvgSamplesPerSec=1.1602124247832164, CurrSamplesPerSec=1.1953101867301688, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:18:11,844] [INFO] [logging.py:68:log_dist] [Rank 0] step=2930, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:18:11,859] [INFO] [timer.py:198:stop] 0/2930, RunningAvgSamplesPerSec=1.160345279402696, CurrSamplesPerSec=1.1947286490489721, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:18:20,207] [INFO] [logging.py:68:log_dist] [Rank 0] step=2940, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:18:20,223] [INFO] [timer.py:198:stop] 0/2940, RunningAvgSamplesPerSec=1.1604777580945194, CurrSamplesPerSec=1.2022972130454412, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:18:28,571] [INFO] [logging.py:68:log_dist] [Rank 0] step=2950, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:18:28,587] [INFO] [timer.py:198:stop] 0/2950, RunningAvgSamplesPerSec=1.1606099365180116, CurrSamplesPerSec=1.1992572806567587, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:18:36,937] [INFO] [logging.py:68:log_dist] [Rank 0] step=2960, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:18:36,952] [INFO] [timer.py:198:stop] 0/2960, RunningAvgSamplesPerSec=1.1607402187389992, CurrSamplesPerSec=1.2031011025045866, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:18:45,318] [INFO] [logging.py:68:log_dist] [Rank 0] step=2970, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:18:45,333] [INFO] [timer.py:198:stop] 0/2970, RunningAvgSamplesPerSec=1.1608619034667573, CurrSamplesPerSec=1.2023654553392595, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:18:53,699] [INFO] [logging.py:68:log_dist] [Rank 0] step=2980, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:18:53,715] [INFO] [timer.py:198:stop] 0/2980, RunningAvgSamplesPerSec=1.1609828509405704, CurrSamplesPerSec=1.2035440281828118, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:19:02,060] [INFO] [logging.py:68:log_dist] [Rank 0] step=2990, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:19:02,075] [INFO] [timer.py:198:stop] 0/2990, RunningAvgSamplesPerSec=1.1611121915348155, CurrSamplesPerSec=1.2009762880210009, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:19:13,510] [INFO] [logging.py:68:log_dist] [Rank 0] step=3000, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:19:13,526] [INFO] [timer.py:198:stop] 0/3000, RunningAvgSamplesPerSec=1.1598528599222053, CurrSamplesPerSec=0.25548369962801004, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:19:21,877] [INFO] [logging.py:68:log_dist] [Rank 0] step=3010, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:19:21,892] [INFO] [timer.py:198:stop] 0/3010, RunningAvgSamplesPerSec=1.1599825580783374, CurrSamplesPerSec=1.2028430234425544, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:19:30,229] [INFO] [logging.py:68:log_dist] [Rank 0] step=3020, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:19:30,245] [INFO] [timer.py:198:stop] 0/3020, RunningAvgSamplesPerSec=1.160117146423619, CurrSamplesPerSec=1.2066935856254486, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:19:38,601] [INFO] [logging.py:68:log_dist] [Rank 0] step=3030, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:19:38,617] [INFO] [timer.py:198:stop] 0/3030, RunningAvgSamplesPerSec=1.1602424037183252, CurrSamplesPerSec=1.1980408852520545, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:19:46,960] [INFO] [logging.py:68:log_dist] [Rank 0] step=3040, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:19:46,976] [INFO] [timer.py:198:stop] 0/3040, RunningAvgSamplesPerSec=1.1603726505462368, CurrSamplesPerSec=1.2034939539859046, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:19:55,332] [INFO] [logging.py:68:log_dist] [Rank 0] step=3050, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:19:55,348] [INFO] [timer.py:198:stop] 0/3050, RunningAvgSamplesPerSec=1.160496457952866, CurrSamplesPerSec=1.2042116215512417, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:20:03,734] [INFO] [logging.py:68:log_dist] [Rank 0] step=3060, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:20:03,750] [INFO] [timer.py:198:stop] 0/3060, RunningAvgSamplesPerSec=1.1606065369720608, CurrSamplesPerSec=1.196620035913117, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:20:12,096] [INFO] [logging.py:68:log_dist] [Rank 0] step=3070, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:20:12,112] [INFO] [timer.py:198:stop] 0/3070, RunningAvgSamplesPerSec=1.160733372111958, CurrSamplesPerSec=1.199608167841448, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:20:20,460] [INFO] [logging.py:68:log_dist] [Rank 0] step=3080, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:20:20,476] [INFO] [timer.py:198:stop] 0/3080, RunningAvgSamplesPerSec=1.1608584835405131, CurrSamplesPerSec=1.191090245666566, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:20:28,820] [INFO] [logging.py:68:log_dist] [Rank 0] step=3090, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:20:28,836] [INFO] [timer.py:198:stop] 0/3090, RunningAvgSamplesPerSec=1.1609842903806298, CurrSamplesPerSec=1.2027995611275557, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:20:40,166] [INFO] [logging.py:68:log_dist] [Rank 0] step=3100, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:20:40,182] [INFO] [timer.py:198:stop] 0/3100, RunningAvgSamplesPerSec=1.15981127473471, CurrSamplesPerSec=0.26153977772850273, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:20:48,526] [INFO] [logging.py:68:log_dist] [Rank 0] step=3110, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:20:48,542] [INFO] [timer.py:198:stop] 0/3110, RunningAvgSamplesPerSec=1.1599400820858916, CurrSamplesPerSec=1.2006486582421247, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:20:56,885] [INFO] [logging.py:68:log_dist] [Rank 0] step=3120, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:20:56,901] [INFO] [timer.py:198:stop] 0/3120, RunningAvgSamplesPerSec=1.1600678478822535, CurrSamplesPerSec=1.1829699644993712, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:21:05,246] [INFO] [logging.py:68:log_dist] [Rank 0] step=3130, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:21:05,261] [INFO] [timer.py:198:stop] 0/3130, RunningAvgSamplesPerSec=1.160194157145367, CurrSamplesPerSec=1.201182996445111, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:21:13,605] [INFO] [logging.py:68:log_dist] [Rank 0] step=3140, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:21:13,620] [INFO] [timer.py:198:stop] 0/3140, RunningAvgSamplesPerSec=1.1603206254166496, CurrSamplesPerSec=1.2011217675657053, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:21:21,985] [INFO] [logging.py:68:log_dist] [Rank 0] step=3150, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:21:22,001] [INFO] [timer.py:198:stop] 0/3150, RunningAvgSamplesPerSec=1.1604367510823739, CurrSamplesPerSec=1.1974956624620314, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:21:30,346] [INFO] [logging.py:68:log_dist] [Rank 0] step=3160, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:21:30,361] [INFO] [timer.py:198:stop] 0/3160, RunningAvgSamplesPerSec=1.1605610995757096, CurrSamplesPerSec=1.2037236388849746, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:21:38,707] [INFO] [logging.py:68:log_dist] [Rank 0] step=3170, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:21:38,723] [INFO] [timer.py:198:stop] 0/3170, RunningAvgSamplesPerSec=1.160683883717105, CurrSamplesPerSec=1.2019233965486609, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:21:47,090] [INFO] [logging.py:68:log_dist] [Rank 0] step=3180, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:21:47,106] [INFO] [timer.py:198:stop] 0/3180, RunningAvgSamplesPerSec=1.1607970743181923, CurrSamplesPerSec=1.1934789666712289, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:21:55,460] [INFO] [logging.py:68:log_dist] [Rank 0] step=3190, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:21:55,476] [INFO] [timer.py:198:stop] 0/3190, RunningAvgSamplesPerSec=1.1609152228838684, CurrSamplesPerSec=1.2023689021252637, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:22:06,916] [INFO] [logging.py:68:log_dist] [Rank 0] step=3200, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:22:06,932] [INFO] [timer.py:198:stop] 0/3200, RunningAvgSamplesPerSec=1.1597327538552546, CurrSamplesPerSec=0.25515375975638416, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:22:15,283] [INFO] [logging.py:68:log_dist] [Rank 0] step=3210, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:22:15,299] [INFO] [timer.py:198:stop] 0/3210, RunningAvgSamplesPerSec=1.1598545830188864, CurrSamplesPerSec=1.2032474424955963, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:22:23,651] [INFO] [logging.py:68:log_dist] [Rank 0] step=3220, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:22:23,667] [INFO] [timer.py:198:stop] 0/3220, RunningAvgSamplesPerSec=1.159974999287975, CurrSamplesPerSec=1.2049838011237082, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:22:32,035] [INFO] [logging.py:68:log_dist] [Rank 0] step=3230, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:22:32,050] [INFO] [timer.py:198:stop] 0/3230, RunningAvgSamplesPerSec=1.1600881239471699, CurrSamplesPerSec=1.2033945086265714, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:22:40,410] [INFO] [logging.py:68:log_dist] [Rank 0] step=3240, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:22:40,425] [INFO] [timer.py:198:stop] 0/3240, RunningAvgSamplesPerSec=1.1602039689349772, CurrSamplesPerSec=1.1954369199066863, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:22:48,796] [INFO] [logging.py:68:log_dist] [Rank 0] step=3250, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:22:48,811] [INFO] [timer.py:198:stop] 0/3250, RunningAvgSamplesPerSec=1.1603145194460447, CurrSamplesPerSec=1.1916577502427748, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:22:57,169] [INFO] [logging.py:68:log_dist] [Rank 0] step=3260, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:22:57,184] [INFO] [timer.py:198:stop] 0/3260, RunningAvgSamplesPerSec=1.1604307334207031, CurrSamplesPerSec=1.1957426209439923, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:23:05,540] [INFO] [logging.py:68:log_dist] [Rank 0] step=3270, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:23:05,556] [INFO] [timer.py:198:stop] 0/3270, RunningAvgSamplesPerSec=1.1605460200717082, CurrSamplesPerSec=1.1949114251348523, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:23:13,916] [INFO] [logging.py:68:log_dist] [Rank 0] step=3280, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:23:13,932] [INFO] [timer.py:198:stop] 0/3280, RunningAvgSamplesPerSec=1.1606588561012878, CurrSamplesPerSec=1.198098035821506, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:23:22,256] [INFO] [logging.py:68:log_dist] [Rank 0] step=3290, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:23:22,271] [INFO] [timer.py:198:stop] 0/3290, RunningAvgSamplesPerSec=1.1607860237315724, CurrSamplesPerSec=1.2085220851920384, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:23:33,902] [INFO] [logging.py:68:log_dist] [Rank 0] step=3300, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:23:33,918] [INFO] [timer.py:198:stop] 0/3300, RunningAvgSamplesPerSec=1.1595624371650486, CurrSamplesPerSec=0.24407986291550363, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:23:42,276] [INFO] [logging.py:68:log_dist] [Rank 0] step=3310, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:23:42,291] [INFO] [timer.py:198:stop] 0/3310, RunningAvgSamplesPerSec=1.1596785568106232, CurrSamplesPerSec=1.2059143598349793, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:23:50,647] [INFO] [logging.py:68:log_dist] [Rank 0] step=3320, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:23:50,662] [INFO] [timer.py:198:stop] 0/3320, RunningAvgSamplesPerSec=1.1597945293353495, CurrSamplesPerSec=1.207532568483784, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:23:59,014] [INFO] [logging.py:68:log_dist] [Rank 0] step=3330, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:23:59,030] [INFO] [timer.py:198:stop] 0/3330, RunningAvgSamplesPerSec=1.1599110130385182, CurrSamplesPerSec=1.2032350160190208, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:24:07,400] [INFO] [logging.py:68:log_dist] [Rank 0] step=3340, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:24:07,416] [INFO] [timer.py:198:stop] 0/3340, RunningAvgSamplesPerSec=1.160019886411588, CurrSamplesPerSec=1.1899170210767998, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:24:15,774] [INFO] [logging.py:68:log_dist] [Rank 0] step=3350, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:24:15,790] [INFO] [timer.py:198:stop] 0/3350, RunningAvgSamplesPerSec=1.1601326183599512, CurrSamplesPerSec=1.19715250200724, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:24:24,121] [INFO] [logging.py:68:log_dist] [Rank 0] step=3360, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:24:24,137] [INFO] [timer.py:198:stop] 0/3360, RunningAvgSamplesPerSec=1.1602561180977733, CurrSamplesPerSec=1.2055469535702614, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:24:32,490] [INFO] [logging.py:68:log_dist] [Rank 0] step=3370, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:24:32,506] [INFO] [timer.py:198:stop] 0/3370, RunningAvgSamplesPerSec=1.1603695369013376, CurrSamplesPerSec=1.2019233965486609, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:24:40,841] [INFO] [logging.py:68:log_dist] [Rank 0] step=3380, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:24:40,857] [INFO] [timer.py:198:stop] 0/3380, RunningAvgSamplesPerSec=1.160489788456834, CurrSamplesPerSec=1.2085986976664844, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:24:49,210] [INFO] [logging.py:68:log_dist] [Rank 0] step=3390, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:24:49,226] [INFO] [timer.py:198:stop] 0/3390, RunningAvgSamplesPerSec=1.1606018446736297, CurrSamplesPerSec=1.200444195125302, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:25:00,729] [INFO] [logging.py:68:log_dist] [Rank 0] step=3400, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:25:00,746] [INFO] [timer.py:198:stop] 0/3400, RunningAvgSamplesPerSec=1.1594656790526658, CurrSamplesPerSec=0.25071801235315394, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:25:09,109] [INFO] [logging.py:68:log_dist] [Rank 0] step=3410, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:25:09,125] [INFO] [timer.py:198:stop] 0/3410, RunningAvgSamplesPerSec=1.1595771052487034, CurrSamplesPerSec=1.2021783245244946, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:25:17,485] [INFO] [logging.py:68:log_dist] [Rank 0] step=3420, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:25:17,501] [INFO] [timer.py:198:stop] 0/3420, RunningAvgSamplesPerSec=1.1596881828604064, CurrSamplesPerSec=1.2028326749874247, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:25:25,851] [INFO] [logging.py:68:log_dist] [Rank 0] step=3430, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:25:25,866] [INFO] [timer.py:198:stop] 0/3430, RunningAvgSamplesPerSec=1.1598027497315453, CurrSamplesPerSec=1.2021221621841613, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:25:34,222] [INFO] [logging.py:68:log_dist] [Rank 0] step=3440, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:25:34,238] [INFO] [timer.py:198:stop] 0/3440, RunningAvgSamplesPerSec=1.1599144202805962, CurrSamplesPerSec=1.2053453219237114, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:25:42,613] [INFO] [logging.py:68:log_dist] [Rank 0] step=3450, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:25:42,629] [INFO] [timer.py:198:stop] 0/3450, RunningAvgSamplesPerSec=1.160017625896308, CurrSamplesPerSec=1.1915368944844704, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:25:51,000] [INFO] [logging.py:68:log_dist] [Rank 0] step=3460, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:25:51,015] [INFO] [timer.py:198:stop] 0/3460, RunningAvgSamplesPerSec=1.1601226792933939, CurrSamplesPerSec=1.2013846142829523, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:25:59,360] [INFO] [logging.py:68:log_dist] [Rank 0] step=3470, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:25:59,376] [INFO] [timer.py:198:stop] 0/3470, RunningAvgSamplesPerSec=1.1602365506377486, CurrSamplesPerSec=1.2008583482093649, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:26:07,701] [INFO] [logging.py:68:log_dist] [Rank 0] step=3480, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:26:07,717] [INFO] [timer.py:198:stop] 0/3480, RunningAvgSamplesPerSec=1.1603572930524086, CurrSamplesPerSec=1.2082561063787496, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:26:16,094] [INFO] [logging.py:68:log_dist] [Rank 0] step=3490, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:26:16,110] [INFO] [timer.py:198:stop] 0/3490, RunningAvgSamplesPerSec=1.1604574686067521, CurrSamplesPerSec=1.2047186852571676, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:26:27,505] [INFO] [logging.py:68:log_dist] [Rank 0] step=3500, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:26:27,520] [INFO] [timer.py:198:stop] 0/3500, RunningAvgSamplesPerSec=1.1593962421626778, CurrSamplesPerSec=0.2580222239829838, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:26:35,879] [INFO] [logging.py:68:log_dist] [Rank 0] step=3510, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:26:35,895] [INFO] [timer.py:198:stop] 0/3510, RunningAvgSamplesPerSec=1.1595059179488945, CurrSamplesPerSec=1.2013385045765848, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:26:44,258] [INFO] [logging.py:68:log_dist] [Rank 0] step=3520, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:26:44,274] [INFO] [timer.py:198:stop] 0/3520, RunningAvgSamplesPerSec=1.159612877264, CurrSamplesPerSec=1.203465638008624, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:26:52,627] [INFO] [logging.py:68:log_dist] [Rank 0] step=3530, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:26:52,643] [INFO] [timer.py:198:stop] 0/3530, RunningAvgSamplesPerSec=1.159722884604132, CurrSamplesPerSec=1.2040667752181171, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:27:00,990] [INFO] [logging.py:68:log_dist] [Rank 0] step=3540, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:27:01,006] [INFO] [timer.py:198:stop] 0/3540, RunningAvgSamplesPerSec=1.159834450181443, CurrSamplesPerSec=1.2020170872370743, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:27:09,349] [INFO] [logging.py:68:log_dist] [Rank 0] step=3550, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:27:09,364] [INFO] [timer.py:198:stop] 0/3550, RunningAvgSamplesPerSec=1.1599472924555758, CurrSamplesPerSec=1.1970769921011202, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:27:17,717] [INFO] [logging.py:68:log_dist] [Rank 0] step=3560, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:27:17,733] [INFO] [timer.py:198:stop] 0/3560, RunningAvgSamplesPerSec=1.1600563211162345, CurrSamplesPerSec=1.1879010539379578, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:27:26,091] [INFO] [logging.py:68:log_dist] [Rank 0] step=3570, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:27:26,107] [INFO] [timer.py:198:stop] 0/3570, RunningAvgSamplesPerSec=1.1601622592489211, CurrSamplesPerSec=1.199738559474144, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:27:34,483] [INFO] [logging.py:68:log_dist] [Rank 0] step=3580, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:27:34,498] [INFO] [timer.py:198:stop] 0/3580, RunningAvgSamplesPerSec=1.1602611959205706, CurrSamplesPerSec=1.191633373998592, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:27:42,852] [INFO] [logging.py:68:log_dist] [Rank 0] step=3590, skipped=22, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:27:42,868] [INFO] [timer.py:198:stop] 0/3590, RunningAvgSamplesPerSec=1.1603677431607546, CurrSamplesPerSec=1.2022389719624025, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:27:49,950] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384.0, reducing to 16384.0\n",
"[2023-08-29 22:27:53,816] [INFO] [logging.py:68:log_dist] [Rank 0] step=3600, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:27:53,832] [INFO] [timer.py:198:stop] 0/3600, RunningAvgSamplesPerSec=1.1595030163314841, CurrSamplesPerSec=0.2578660347172944, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:28:02,190] [INFO] [logging.py:68:log_dist] [Rank 0] step=3610, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:28:02,205] [INFO] [timer.py:198:stop] 0/3610, RunningAvgSamplesPerSec=1.1596096167397183, CurrSamplesPerSec=1.2048315009865747, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:28:10,554] [INFO] [logging.py:68:log_dist] [Rank 0] step=3620, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:28:10,570] [INFO] [timer.py:198:stop] 0/3620, RunningAvgSamplesPerSec=1.1597190271770774, CurrSamplesPerSec=1.2061033492966058, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:28:18,924] [INFO] [logging.py:68:log_dist] [Rank 0] step=3630, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:28:18,940] [INFO] [timer.py:198:stop] 0/3630, RunningAvgSamplesPerSec=1.1598254716463734, CurrSamplesPerSec=1.1956904668983945, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:28:27,313] [INFO] [logging.py:68:log_dist] [Rank 0] step=3640, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:28:27,329] [INFO] [timer.py:198:stop] 0/3640, RunningAvgSamplesPerSec=1.15992461251603, CurrSamplesPerSec=1.204374831481581, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:28:35,692] [INFO] [logging.py:68:log_dist] [Rank 0] step=3650, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:28:35,707] [INFO] [timer.py:198:stop] 0/3650, RunningAvgSamplesPerSec=1.1600270300968827, CurrSamplesPerSec=1.1945425267898184, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:28:44,053] [INFO] [logging.py:68:log_dist] [Rank 0] step=3660, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:28:44,068] [INFO] [timer.py:198:stop] 0/3660, RunningAvgSamplesPerSec=1.1601355988854705, CurrSamplesPerSec=1.201420059224513, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:28:52,407] [INFO] [logging.py:68:log_dist] [Rank 0] step=3670, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:28:52,422] [INFO] [timer.py:198:stop] 0/3670, RunningAvgSamplesPerSec=1.1602460048276162, CurrSamplesPerSec=1.2052632335150388, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:29:00,793] [INFO] [logging.py:68:log_dist] [Rank 0] step=3680, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:29:00,809] [INFO] [timer.py:198:stop] 0/3680, RunningAvgSamplesPerSec=1.1603444581023579, CurrSamplesPerSec=1.1824297028268282, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:29:09,167] [INFO] [logging.py:68:log_dist] [Rank 0] step=3690, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:29:09,183] [INFO] [timer.py:198:stop] 0/3690, RunningAvgSamplesPerSec=1.160446326612676, CurrSamplesPerSec=1.1895879740635298, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:29:20,723] [INFO] [logging.py:68:log_dist] [Rank 0] step=3700, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:29:20,739] [INFO] [timer.py:198:stop] 0/3700, RunningAvgSamplesPerSec=1.1593896275007727, CurrSamplesPerSec=0.24905793567906656, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:29:29,122] [INFO] [logging.py:68:log_dist] [Rank 0] step=3710, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:29:29,137] [INFO] [timer.py:198:stop] 0/3710, RunningAvgSamplesPerSec=1.1594850960179062, CurrSamplesPerSec=1.1927295973137377, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:29:37,538] [INFO] [logging.py:68:log_dist] [Rank 0] step=3720, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:29:37,554] [INFO] [timer.py:198:stop] 0/3720, RunningAvgSamplesPerSec=1.1595731194077756, CurrSamplesPerSec=1.203957558548742, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:29:45,901] [INFO] [logging.py:68:log_dist] [Rank 0] step=3730, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:29:45,917] [INFO] [timer.py:198:stop] 0/3730, RunningAvgSamplesPerSec=1.1596796456769172, CurrSamplesPerSec=1.1984119478211428, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:29:54,271] [INFO] [logging.py:68:log_dist] [Rank 0] step=3740, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:29:54,287] [INFO] [timer.py:198:stop] 0/3740, RunningAvgSamplesPerSec=1.1597833228147931, CurrSamplesPerSec=1.2027367877727755, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:30:02,646] [INFO] [logging.py:68:log_dist] [Rank 0] step=3750, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:30:02,661] [INFO] [timer.py:198:stop] 0/3750, RunningAvgSamplesPerSec=1.1598846235145097, CurrSamplesPerSec=1.1918223159799457, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:30:11,013] [INFO] [logging.py:68:log_dist] [Rank 0] step=3760, skipped=23, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:30:11,029] [INFO] [timer.py:198:stop] 0/3760, RunningAvgSamplesPerSec=1.1599881172318394, CurrSamplesPerSec=1.1935835731093785, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:30:18,920] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384.0, reducing to 8192.0\n",
"[2023-08-29 22:30:18,921] [INFO] [logging.py:68:log_dist] [Rank 0] step=3770, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:30:18,921] [INFO] [timer.py:198:stop] 0/3770, RunningAvgSamplesPerSec=1.160260815946333, CurrSamplesPerSec=2.7685869348538117, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:30:27,286] [INFO] [logging.py:68:log_dist] [Rank 0] step=3780, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:30:27,301] [INFO] [timer.py:198:stop] 0/3780, RunningAvgSamplesPerSec=1.1603579141579747, CurrSamplesPerSec=1.2035032778310046, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:30:35,657] [INFO] [logging.py:68:log_dist] [Rank 0] step=3790, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:30:35,672] [INFO] [timer.py:198:stop] 0/3790, RunningAvgSamplesPerSec=1.160458059451504, CurrSamplesPerSec=1.2004191145304008, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:30:47,309] [INFO] [logging.py:68:log_dist] [Rank 0] step=3800, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:30:47,325] [INFO] [timer.py:198:stop] 0/3800, RunningAvgSamplesPerSec=1.1593950473447734, CurrSamplesPerSec=0.24276087899692966, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:30:55,679] [INFO] [logging.py:68:log_dist] [Rank 0] step=3810, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:30:55,694] [INFO] [timer.py:198:stop] 0/3810, RunningAvgSamplesPerSec=1.1594982467472426, CurrSamplesPerSec=1.2029254724954708, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:31:04,023] [INFO] [logging.py:68:log_dist] [Rank 0] step=3820, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:31:04,039] [INFO] [timer.py:198:stop] 0/3820, RunningAvgSamplesPerSec=1.1596090470459317, CurrSamplesPerSec=1.2051049764584836, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:31:12,405] [INFO] [logging.py:68:log_dist] [Rank 0] step=3830, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:31:12,420] [INFO] [timer.py:198:stop] 0/3830, RunningAvgSamplesPerSec=1.1597069428137055, CurrSamplesPerSec=1.2008425329699572, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:31:20,782] [INFO] [logging.py:68:log_dist] [Rank 0] step=3840, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:31:20,797] [INFO] [timer.py:198:stop] 0/3840, RunningAvgSamplesPerSec=1.1598057832673943, CurrSamplesPerSec=1.2016816593808894, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:31:29,148] [INFO] [logging.py:68:log_dist] [Rank 0] step=3850, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:31:29,164] [INFO] [timer.py:198:stop] 0/3850, RunningAvgSamplesPerSec=1.1599073235180541, CurrSamplesPerSec=1.2029037379660985, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:31:37,504] [INFO] [logging.py:68:log_dist] [Rank 0] step=3860, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:31:37,520] [INFO] [timer.py:198:stop] 0/3860, RunningAvgSamplesPerSec=1.1600126558494113, CurrSamplesPerSec=1.206529746684513, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:31:46,033] [INFO] [logging.py:68:log_dist] [Rank 0] step=3870, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:31:46,049] [INFO] [timer.py:198:stop] 0/3870, RunningAvgSamplesPerSec=1.1600565457882341, CurrSamplesPerSec=1.2028664806000933, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:31:54,406] [INFO] [logging.py:68:log_dist] [Rank 0] step=3880, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:31:54,422] [INFO] [timer.py:198:stop] 0/3880, RunningAvgSamplesPerSec=1.1601546529144418, CurrSamplesPerSec=1.1971699287230313, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:32:02,757] [INFO] [logging.py:68:log_dist] [Rank 0] step=3890, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:32:02,773] [INFO] [timer.py:198:stop] 0/3890, RunningAvgSamplesPerSec=1.160259651473614, CurrSamplesPerSec=1.2057850488891138, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:32:14,167] [INFO] [logging.py:68:log_dist] [Rank 0] step=3900, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:32:14,183] [INFO] [timer.py:198:stop] 0/3900, RunningAvgSamplesPerSec=1.1593085295706727, CurrSamplesPerSec=0.25711147671884554, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:32:22,521] [INFO] [logging.py:68:log_dist] [Rank 0] step=3910, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:32:22,537] [INFO] [timer.py:198:stop] 0/3910, RunningAvgSamplesPerSec=1.1594142920447512, CurrSamplesPerSec=1.208289869451351, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:32:30,885] [INFO] [logging.py:68:log_dist] [Rank 0] step=3920, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:32:30,901] [INFO] [timer.py:198:stop] 0/3920, RunningAvgSamplesPerSec=1.159515874717374, CurrSamplesPerSec=1.200124410174738, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:32:39,242] [INFO] [logging.py:68:log_dist] [Rank 0] step=3930, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:32:39,257] [INFO] [timer.py:198:stop] 0/3930, RunningAvgSamplesPerSec=1.1596192818098925, CurrSamplesPerSec=1.2034746161010592, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:32:47,614] [INFO] [logging.py:68:log_dist] [Rank 0] step=3940, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:32:47,630] [INFO] [timer.py:198:stop] 0/3940, RunningAvgSamplesPerSec=1.1597171735970315, CurrSamplesPerSec=1.199625666124386, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:32:55,992] [INFO] [logging.py:68:log_dist] [Rank 0] step=3950, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:32:56,008] [INFO] [timer.py:198:stop] 0/3950, RunningAvgSamplesPerSec=1.1598126609543384, CurrSamplesPerSec=1.1739539929595855, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:33:04,362] [INFO] [logging.py:68:log_dist] [Rank 0] step=3960, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:33:04,378] [INFO] [timer.py:198:stop] 0/3960, RunningAvgSamplesPerSec=1.1599106484582253, CurrSamplesPerSec=1.2022468979319356, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:33:12,716] [INFO] [logging.py:68:log_dist] [Rank 0] step=3970, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:33:12,732] [INFO] [timer.py:198:stop] 0/3970, RunningAvgSamplesPerSec=1.160013313893012, CurrSamplesPerSec=1.203174958333692, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:33:21,089] [INFO] [logging.py:68:log_dist] [Rank 0] step=3980, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:33:21,105] [INFO] [timer.py:198:stop] 0/3980, RunningAvgSamplesPerSec=1.1601089409497143, CurrSamplesPerSec=1.1984410537745014, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:33:29,475] [INFO] [logging.py:68:log_dist] [Rank 0] step=3990, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:33:29,491] [INFO] [timer.py:198:stop] 0/3990, RunningAvgSamplesPerSec=1.1601997260092032, CurrSamplesPerSec=1.193441272092286, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:33:41,060] [INFO] [logging.py:68:log_dist] [Rank 0] step=4000, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:33:41,076] [INFO] [timer.py:198:stop] 0/4000, RunningAvgSamplesPerSec=1.1592138863709063, CurrSamplesPerSec=0.24738025369206257, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:33:49,437] [INFO] [logging.py:68:log_dist] [Rank 0] step=4010, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:33:49,452] [INFO] [timer.py:198:stop] 0/4010, RunningAvgSamplesPerSec=1.1593097487793012, CurrSamplesPerSec=1.2065654958785859, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:33:57,818] [INFO] [logging.py:68:log_dist] [Rank 0] step=4020, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:33:57,834] [INFO] [timer.py:198:stop] 0/4020, RunningAvgSamplesPerSec=1.1594030315827628, CurrSamplesPerSec=1.182891228992441, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:34:06,176] [INFO] [logging.py:68:log_dist] [Rank 0] step=4030, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:34:06,192] [INFO] [timer.py:198:stop] 0/4030, RunningAvgSamplesPerSec=1.1595038398132245, CurrSamplesPerSec=1.2018920547626464, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:34:14,560] [INFO] [logging.py:68:log_dist] [Rank 0] step=4040, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:34:14,575] [INFO] [timer.py:198:stop] 0/4040, RunningAvgSamplesPerSec=1.1595956622877126, CurrSamplesPerSec=1.1990200422513873, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:34:22,950] [INFO] [logging.py:68:log_dist] [Rank 0] step=4050, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:34:22,966] [INFO] [timer.py:198:stop] 0/4050, RunningAvgSamplesPerSec=1.159684832084364, CurrSamplesPerSec=1.1946469794911012, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:34:31,336] [INFO] [logging.py:68:log_dist] [Rank 0] step=4060, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:34:31,352] [INFO] [timer.py:198:stop] 0/4060, RunningAvgSamplesPerSec=1.159775244592649, CurrSamplesPerSec=1.195480873977402, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:34:39,707] [INFO] [logging.py:68:log_dist] [Rank 0] step=4070, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:34:39,723] [INFO] [timer.py:198:stop] 0/4070, RunningAvgSamplesPerSec=1.1598699520271276, CurrSamplesPerSec=1.1957930749383472, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:34:48,074] [INFO] [logging.py:68:log_dist] [Rank 0] step=4080, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:34:48,090] [INFO] [timer.py:198:stop] 0/4080, RunningAvgSamplesPerSec=1.1599654752731383, CurrSamplesPerSec=1.2002033959803253, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:34:56,437] [INFO] [logging.py:68:log_dist] [Rank 0] step=4090, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:34:56,452] [INFO] [timer.py:198:stop] 0/4090, RunningAvgSamplesPerSec=1.1600620016954772, CurrSamplesPerSec=1.2046041608388998, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:35:07,980] [INFO] [logging.py:68:log_dist] [Rank 0] step=4100, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:35:07,996] [INFO] [timer.py:198:stop] 0/4100, RunningAvgSamplesPerSec=1.1591142157496206, CurrSamplesPerSec=0.24908164481500852, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:35:16,360] [INFO] [logging.py:68:log_dist] [Rank 0] step=4110, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:35:16,376] [INFO] [timer.py:198:stop] 0/4110, RunningAvgSamplesPerSec=1.159206927486573, CurrSamplesPerSec=1.1989494373864453, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:35:24,727] [INFO] [logging.py:68:log_dist] [Rank 0] step=4120, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:35:24,743] [INFO] [timer.py:198:stop] 0/4120, RunningAvgSamplesPerSec=1.1593030846988435, CurrSamplesPerSec=1.206296214276059, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:35:33,083] [INFO] [logging.py:68:log_dist] [Rank 0] step=4130, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:35:33,099] [INFO] [timer.py:198:stop] 0/4130, RunningAvgSamplesPerSec=1.1594022046278096, CurrSamplesPerSec=1.19621937655078, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:35:41,446] [INFO] [logging.py:68:log_dist] [Rank 0] step=4140, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:35:41,462] [INFO] [timer.py:198:stop] 0/4140, RunningAvgSamplesPerSec=1.1594990137500525, CurrSamplesPerSec=1.2036897850763042, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:35:49,807] [INFO] [logging.py:68:log_dist] [Rank 0] step=4150, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:35:49,823] [INFO] [timer.py:198:stop] 0/4150, RunningAvgSamplesPerSec=1.1595955109588914, CurrSamplesPerSec=1.2097696383673966, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:35:58,190] [INFO] [logging.py:68:log_dist] [Rank 0] step=4160, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:35:58,206] [INFO] [timer.py:198:stop] 0/4160, RunningAvgSamplesPerSec=1.1596851714574568, CurrSamplesPerSec=1.2078163146476133, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:36:06,557] [INFO] [logging.py:68:log_dist] [Rank 0] step=4170, skipped=24, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:36:06,573] [INFO] [timer.py:198:stop] 0/4170, RunningAvgSamplesPerSec=1.1597796112042587, CurrSamplesPerSec=1.1978438089471932, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:36:11,956] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192.0, reducing to 4096.0\n",
"[2023-08-29 22:36:14,466] [INFO] [logging.py:68:log_dist] [Rank 0] step=4180, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:36:14,481] [INFO] [timer.py:198:stop] 0/4180, RunningAvgSamplesPerSec=1.1600223747488714, CurrSamplesPerSec=1.194919935626004, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:36:22,875] [INFO] [logging.py:68:log_dist] [Rank 0] step=4190, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:36:22,891] [INFO] [timer.py:198:stop] 0/4190, RunningAvgSamplesPerSec=1.1601013942166498, CurrSamplesPerSec=1.1903134690638837, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:36:34,309] [INFO] [logging.py:68:log_dist] [Rank 0] step=4200, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:36:34,324] [INFO] [timer.py:198:stop] 0/4200, RunningAvgSamplesPerSec=1.159211443501191, CurrSamplesPerSec=0.256776362957264, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:36:42,673] [INFO] [logging.py:68:log_dist] [Rank 0] step=4210, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:36:42,688] [INFO] [timer.py:198:stop] 0/4210, RunningAvgSamplesPerSec=1.1593070470963478, CurrSamplesPerSec=1.2034183325801755, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:36:51,041] [INFO] [logging.py:68:log_dist] [Rank 0] step=4220, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:36:51,057] [INFO] [timer.py:198:stop] 0/4220, RunningAvgSamplesPerSec=1.1594001950009687, CurrSamplesPerSec=1.1992775119740353, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:36:59,397] [INFO] [logging.py:68:log_dist] [Rank 0] step=4230, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:36:59,412] [INFO] [timer.py:198:stop] 0/4230, RunningAvgSamplesPerSec=1.159496957136286, CurrSamplesPerSec=1.1975117315666794, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:37:07,771] [INFO] [logging.py:68:log_dist] [Rank 0] step=4240, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:37:07,787] [INFO] [timer.py:198:stop] 0/4240, RunningAvgSamplesPerSec=1.1595874072221093, CurrSamplesPerSec=1.2020570480031365, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:37:16,143] [INFO] [logging.py:68:log_dist] [Rank 0] step=4250, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:37:16,158] [INFO] [timer.py:198:stop] 0/4250, RunningAvgSamplesPerSec=1.1596785233240532, CurrSamplesPerSec=1.206839064684072, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:37:24,522] [INFO] [logging.py:68:log_dist] [Rank 0] step=4260, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:37:24,537] [INFO] [timer.py:198:stop] 0/4260, RunningAvgSamplesPerSec=1.1597671692907234, CurrSamplesPerSec=1.2032926632295653, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:37:32,895] [INFO] [logging.py:68:log_dist] [Rank 0] step=4270, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:37:32,911] [INFO] [timer.py:198:stop] 0/4270, RunningAvgSamplesPerSec=1.1598567145994696, CurrSamplesPerSec=1.203371030870513, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:37:41,254] [INFO] [logging.py:68:log_dist] [Rank 0] step=4280, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:37:41,270] [INFO] [timer.py:198:stop] 0/4280, RunningAvgSamplesPerSec=1.1599504291900997, CurrSamplesPerSec=1.2033561851152128, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:37:49,622] [INFO] [logging.py:68:log_dist] [Rank 0] step=4290, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:37:49,637] [INFO] [timer.py:198:stop] 0/4290, RunningAvgSamplesPerSec=1.1600408893826315, CurrSamplesPerSec=1.194360883842727, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:38:01,097] [INFO] [logging.py:68:log_dist] [Rank 0] step=4300, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:38:01,112] [INFO] [timer.py:198:stop] 0/4300, RunningAvgSamplesPerSec=1.1591587908883842, CurrSamplesPerSec=0.25269161224059344, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:38:09,472] [INFO] [logging.py:68:log_dist] [Rank 0] step=4310, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:38:09,488] [INFO] [timer.py:198:stop] 0/4310, RunningAvgSamplesPerSec=1.1592487710468873, CurrSamplesPerSec=1.2044872369216066, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:38:17,828] [INFO] [logging.py:68:log_dist] [Rank 0] step=4320, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:38:17,843] [INFO] [timer.py:198:stop] 0/4320, RunningAvgSamplesPerSec=1.1593437096795816, CurrSamplesPerSec=1.2036628415691022, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:38:26,193] [INFO] [logging.py:68:log_dist] [Rank 0] step=4330, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:38:26,209] [INFO] [timer.py:198:stop] 0/4330, RunningAvgSamplesPerSec=1.159435379671053, CurrSamplesPerSec=1.2067890631518678, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:38:34,586] [INFO] [logging.py:68:log_dist] [Rank 0] step=4340, skipped=25, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:38:34,602] [INFO] [timer.py:198:stop] 0/4340, RunningAvgSamplesPerSec=1.1595180649974823, CurrSamplesPerSec=1.1780121460707829, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:38:41,658] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096.0, reducing to 2048.0\n",
"[2023-08-29 22:38:42,487] [INFO] [logging.py:68:log_dist] [Rank 0] step=4350, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:38:42,503] [INFO] [timer.py:198:stop] 0/4350, RunningAvgSamplesPerSec=1.1597522778888452, CurrSamplesPerSec=1.1886453768900331, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:38:50,867] [INFO] [logging.py:68:log_dist] [Rank 0] step=4360, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:38:50,883] [INFO] [timer.py:198:stop] 0/4360, RunningAvgSamplesPerSec=1.1598384821557877, CurrSamplesPerSec=1.2070172288213903, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:38:59,241] [INFO] [logging.py:68:log_dist] [Rank 0] step=4370, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:38:59,256] [INFO] [timer.py:198:stop] 0/4370, RunningAvgSamplesPerSec=1.1599258746079761, CurrSamplesPerSec=1.193963774906602, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:39:07,610] [INFO] [logging.py:68:log_dist] [Rank 0] step=4380, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:39:07,626] [INFO] [timer.py:198:stop] 0/4380, RunningAvgSamplesPerSec=1.1600141460999724, CurrSamplesPerSec=1.2023985453006796, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:39:15,994] [INFO] [logging.py:68:log_dist] [Rank 0] step=4390, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:39:16,010] [INFO] [timer.py:198:stop] 0/4390, RunningAvgSamplesPerSec=1.1600974621133298, CurrSamplesPerSec=1.204725951891432, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:39:27,545] [INFO] [logging.py:68:log_dist] [Rank 0] step=4400, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:39:27,560] [INFO] [timer.py:198:stop] 0/4400, RunningAvgSamplesPerSec=1.1592119552252402, CurrSamplesPerSec=0.24995032939921516, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:39:35,927] [INFO] [logging.py:68:log_dist] [Rank 0] step=4410, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:39:35,943] [INFO] [timer.py:198:stop] 0/4410, RunningAvgSamplesPerSec=1.1592972542922244, CurrSamplesPerSec=1.1973302099572172, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:39:44,308] [INFO] [logging.py:68:log_dist] [Rank 0] step=4420, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:39:44,324] [INFO] [timer.py:198:stop] 0/4420, RunningAvgSamplesPerSec=1.1593823214248289, CurrSamplesPerSec=1.2093922076843606, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:39:52,685] [INFO] [logging.py:68:log_dist] [Rank 0] step=4430, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:39:52,700] [INFO] [timer.py:198:stop] 0/4430, RunningAvgSamplesPerSec=1.159468671315671, CurrSamplesPerSec=1.1966248154124604, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:40:01,079] [INFO] [logging.py:68:log_dist] [Rank 0] step=4440, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:40:01,095] [INFO] [timer.py:198:stop] 0/4440, RunningAvgSamplesPerSec=1.1595490256552614, CurrSamplesPerSec=1.1579156599364822, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:40:09,474] [INFO] [logging.py:68:log_dist] [Rank 0] step=4450, skipped=26, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:40:09,490] [INFO] [timer.py:198:stop] 0/4450, RunningAvgSamplesPerSec=1.1596287157732228, CurrSamplesPerSec=1.1965411796858352, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:40:12,366] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 2048.0, reducing to 1024.0\n",
"[2023-08-29 22:40:17,385] [INFO] [logging.py:68:log_dist] [Rank 0] step=4460, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:40:17,401] [INFO] [timer.py:198:stop] 0/4460, RunningAvgSamplesPerSec=1.1598540766031873, CurrSamplesPerSec=1.1954655407949413, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:40:25,755] [INFO] [logging.py:68:log_dist] [Rank 0] step=4470, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:40:25,771] [INFO] [timer.py:198:stop] 0/4470, RunningAvgSamplesPerSec=1.1599406270569694, CurrSamplesPerSec=1.2025357501957492, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:40:34,108] [INFO] [logging.py:68:log_dist] [Rank 0] step=4480, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:40:34,124] [INFO] [timer.py:198:stop] 0/4480, RunningAvgSamplesPerSec=1.1600318287707292, CurrSamplesPerSec=1.204284576455043, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:40:42,470] [INFO] [logging.py:68:log_dist] [Rank 0] step=4490, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:40:42,485] [INFO] [timer.py:198:stop] 0/4490, RunningAvgSamplesPerSec=1.1601201847801028, CurrSamplesPerSec=1.2071273487487537, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:40:53,856] [INFO] [logging.py:68:log_dist] [Rank 0] step=4500, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:40:53,872] [INFO] [timer.py:198:stop] 0/4500, RunningAvgSamplesPerSec=1.1593036053816606, CurrSamplesPerSec=0.26047236972869675, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:41:02,217] [INFO] [logging.py:68:log_dist] [Rank 0] step=4510, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:41:02,233] [INFO] [timer.py:198:stop] 0/4510, RunningAvgSamplesPerSec=1.1593932838072707, CurrSamplesPerSec=1.2081381244799032, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:41:10,583] [INFO] [logging.py:68:log_dist] [Rank 0] step=4520, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:41:10,599] [INFO] [timer.py:198:stop] 0/4520, RunningAvgSamplesPerSec=1.1594807692979334, CurrSamplesPerSec=1.209705786496924, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:41:18,953] [INFO] [logging.py:68:log_dist] [Rank 0] step=4530, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:41:18,969] [INFO] [timer.py:198:stop] 0/4530, RunningAvgSamplesPerSec=1.1595669547293421, CurrSamplesPerSec=1.2027029894494405, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:41:27,309] [INFO] [logging.py:68:log_dist] [Rank 0] step=4540, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:41:27,325] [INFO] [timer.py:198:stop] 0/4540, RunningAvgSamplesPerSec=1.15965679704441, CurrSamplesPerSec=1.1943489803351053, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:41:35,669] [INFO] [logging.py:68:log_dist] [Rank 0] step=4550, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:41:35,685] [INFO] [timer.py:198:stop] 0/4550, RunningAvgSamplesPerSec=1.1597449318297672, CurrSamplesPerSec=1.20514029499949, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:41:44,025] [INFO] [logging.py:68:log_dist] [Rank 0] step=4560, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:41:44,040] [INFO] [timer.py:198:stop] 0/4560, RunningAvgSamplesPerSec=1.1598344628101827, CurrSamplesPerSec=1.1939644546619594, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:41:52,392] [INFO] [logging.py:68:log_dist] [Rank 0] step=4570, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:41:52,408] [INFO] [timer.py:198:stop] 0/4570, RunningAvgSamplesPerSec=1.1599197072033807, CurrSamplesPerSec=1.2064415975304543, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:42:00,760] [INFO] [logging.py:68:log_dist] [Rank 0] step=4580, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:42:00,776] [INFO] [timer.py:198:stop] 0/4580, RunningAvgSamplesPerSec=1.160004531089363, CurrSamplesPerSec=1.1997227737159049, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:42:09,119] [INFO] [logging.py:68:log_dist] [Rank 0] step=4590, skipped=27, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:42:09,135] [INFO] [timer.py:198:stop] 0/4590, RunningAvgSamplesPerSec=1.1600916707679918, CurrSamplesPerSec=1.2000886982670933, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:42:16,195] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1024.0, reducing to 512.0\n",
"[2023-08-29 22:42:20,279] [INFO] [logging.py:68:log_dist] [Rank 0] step=4600, skipped=28, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:42:20,295] [INFO] [timer.py:198:stop] 0/4600, RunningAvgSamplesPerSec=1.1593586789286938, CurrSamplesPerSec=0.2440772778545661, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:42:28,649] [INFO] [logging.py:68:log_dist] [Rank 0] step=4610, skipped=28, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:42:28,664] [INFO] [timer.py:198:stop] 0/4610, RunningAvgSamplesPerSec=1.159443975398885, CurrSamplesPerSec=1.2022944559438602, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:42:29,863] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 512.0, reducing to 256.0\n",
"[2023-08-29 22:42:31,069] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 256.0, reducing to 128.0\n",
"[2023-08-29 22:42:36,075] [INFO] [logging.py:68:log_dist] [Rank 0] step=4620, skipped=30, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:42:36,090] [INFO] [timer.py:198:stop] 0/4620, RunningAvgSamplesPerSec=1.1598024983579118, CurrSamplesPerSec=1.2042292543909263, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:42:44,440] [INFO] [logging.py:68:log_dist] [Rank 0] step=4630, skipped=30, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:42:44,455] [INFO] [timer.py:198:stop] 0/4630, RunningAvgSamplesPerSec=1.1598874621135118, CurrSamplesPerSec=1.1905506536341948, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:42:52,797] [INFO] [logging.py:68:log_dist] [Rank 0] step=4640, skipped=30, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:42:52,813] [INFO] [timer.py:198:stop] 0/4640, RunningAvgSamplesPerSec=1.1599744654853297, CurrSamplesPerSec=1.1942068367799297, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:43:01,161] [INFO] [logging.py:68:log_dist] [Rank 0] step=4650, skipped=30, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:43:01,176] [INFO] [timer.py:198:stop] 0/4650, RunningAvgSamplesPerSec=1.1600594627589471, CurrSamplesPerSec=1.2012015727325813, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:43:03,218] [INFO] [stage_1_and_2.py:1720:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 128.0, reducing to 64.0\n",
"[2023-08-29 22:43:09,079] [INFO] [logging.py:68:log_dist] [Rank 0] step=4660, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:43:09,094] [INFO] [timer.py:198:stop] 0/4660, RunningAvgSamplesPerSec=1.1602724908504205, CurrSamplesPerSec=1.1968047571966438, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:43:17,439] [INFO] [logging.py:68:log_dist] [Rank 0] step=4670, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:43:17,454] [INFO] [timer.py:198:stop] 0/4670, RunningAvgSamplesPerSec=1.1603573148209023, CurrSamplesPerSec=1.2000900717627028, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:43:25,815] [INFO] [logging.py:68:log_dist] [Rank 0] step=4680, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:43:25,831] [INFO] [timer.py:198:stop] 0/4680, RunningAvgSamplesPerSec=1.1604370965948956, CurrSamplesPerSec=1.1802287463074799, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:43:34,172] [INFO] [logging.py:68:log_dist] [Rank 0] step=4690, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:43:34,187] [INFO] [timer.py:198:stop] 0/4690, RunningAvgSamplesPerSec=1.1605222173931622, CurrSamplesPerSec=1.2061834709626769, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:43:45,657] [INFO] [logging.py:68:log_dist] [Rank 0] step=4700, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:43:45,673] [INFO] [timer.py:198:stop] 0/4700, RunningAvgSamplesPerSec=1.1597105773407677, CurrSamplesPerSec=0.2533221663841208, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:43:54,027] [INFO] [logging.py:68:log_dist] [Rank 0] step=4710, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:43:54,043] [INFO] [timer.py:198:stop] 0/4710, RunningAvgSamplesPerSec=1.1597934557233847, CurrSamplesPerSec=1.1945598776020878, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:44:02,399] [INFO] [logging.py:68:log_dist] [Rank 0] step=4720, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:44:02,415] [INFO] [timer.py:198:stop] 0/4720, RunningAvgSamplesPerSec=1.1598748837630846, CurrSamplesPerSec=1.2041680602786842, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:44:10,770] [INFO] [logging.py:68:log_dist] [Rank 0] step=4730, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:44:10,786] [INFO] [timer.py:198:stop] 0/4730, RunningAvgSamplesPerSec=1.1599563497151775, CurrSamplesPerSec=1.2013130425243732, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:44:19,137] [INFO] [logging.py:68:log_dist] [Rank 0] step=4740, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:44:19,152] [INFO] [timer.py:198:stop] 0/4740, RunningAvgSamplesPerSec=1.1600388269072526, CurrSamplesPerSec=1.1966159392297822, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:44:27,514] [INFO] [logging.py:68:log_dist] [Rank 0] step=4750, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:44:27,529] [INFO] [timer.py:198:stop] 0/4750, RunningAvgSamplesPerSec=1.160117929417823, CurrSamplesPerSec=1.2065228053390373, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:44:35,884] [INFO] [logging.py:68:log_dist] [Rank 0] step=4760, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:44:35,900] [INFO] [timer.py:198:stop] 0/4760, RunningAvgSamplesPerSec=1.1601986632852066, CurrSamplesPerSec=1.2020945997650438, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:44:44,247] [INFO] [logging.py:68:log_dist] [Rank 0] step=4770, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:44:44,263] [INFO] [timer.py:198:stop] 0/4770, RunningAvgSamplesPerSec=1.1602810601888784, CurrSamplesPerSec=1.2071739039085538, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:44:52,604] [INFO] [logging.py:68:log_dist] [Rank 0] step=4780, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:44:52,619] [INFO] [timer.py:198:stop] 0/4780, RunningAvgSamplesPerSec=1.1603649781717393, CurrSamplesPerSec=1.198500982248098, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:45:00,975] [INFO] [logging.py:68:log_dist] [Rank 0] step=4790, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:45:00,990] [INFO] [timer.py:198:stop] 0/4790, RunningAvgSamplesPerSec=1.1604443738728978, CurrSamplesPerSec=1.2044163325854853, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:45:12,440] [INFO] [logging.py:68:log_dist] [Rank 0] step=4800, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:45:12,456] [INFO] [timer.py:198:stop] 0/4800, RunningAvgSamplesPerSec=1.159655380163433, CurrSamplesPerSec=0.2546019580227372, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:45:20,838] [INFO] [logging.py:68:log_dist] [Rank 0] step=4810, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:45:20,853] [INFO] [timer.py:198:stop] 0/4810, RunningAvgSamplesPerSec=1.1597289285748946, CurrSamplesPerSec=1.2015408630357018, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:45:29,230] [INFO] [logging.py:68:log_dist] [Rank 0] step=4820, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:45:29,246] [INFO] [timer.py:198:stop] 0/4820, RunningAvgSamplesPerSec=1.1598033759808715, CurrSamplesPerSec=1.208925105975041, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:45:37,588] [INFO] [logging.py:68:log_dist] [Rank 0] step=4830, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:45:37,604] [INFO] [timer.py:198:stop] 0/4830, RunningAvgSamplesPerSec=1.1598873237366476, CurrSamplesPerSec=1.2036542060848718, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:45:45,967] [INFO] [logging.py:68:log_dist] [Rank 0] step=4840, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:45:45,983] [INFO] [timer.py:198:stop] 0/4840, RunningAvgSamplesPerSec=1.1599647295734903, CurrSamplesPerSec=1.1922257958610536, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:45:54,340] [INFO] [logging.py:68:log_dist] [Rank 0] step=4850, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:45:54,356] [INFO] [timer.py:198:stop] 0/4850, RunningAvgSamplesPerSec=1.1600436228644828, CurrSamplesPerSec=1.181219908393094, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:46:02,700] [INFO] [logging.py:68:log_dist] [Rank 0] step=4860, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:46:02,715] [INFO] [timer.py:198:stop] 0/4860, RunningAvgSamplesPerSec=1.1601259002562667, CurrSamplesPerSec=1.203754039846135, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:46:11,066] [INFO] [logging.py:68:log_dist] [Rank 0] step=4870, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:46:11,081] [INFO] [timer.py:198:stop] 0/4870, RunningAvgSamplesPerSec=1.1602056029218204, CurrSamplesPerSec=1.2038044811918465, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:46:19,424] [INFO] [logging.py:68:log_dist] [Rank 0] step=4880, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:46:19,440] [INFO] [timer.py:198:stop] 0/4880, RunningAvgSamplesPerSec=1.1602870017320992, CurrSamplesPerSec=1.1958899039878925, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:46:27,787] [INFO] [logging.py:68:log_dist] [Rank 0] step=4890, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:46:27,802] [INFO] [timer.py:198:stop] 0/4890, RunningAvgSamplesPerSec=1.1603671333161938, CurrSamplesPerSec=1.2005916077458594, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:46:39,246] [INFO] [logging.py:68:log_dist] [Rank 0] step=4900, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:46:39,262] [INFO] [timer.py:198:stop] 0/4900, RunningAvgSamplesPerSec=1.1595962084585647, CurrSamplesPerSec=0.25554326943610245, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:46:47,606] [INFO] [logging.py:68:log_dist] [Rank 0] step=4910, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:46:47,622] [INFO] [timer.py:198:stop] 0/4910, RunningAvgSamplesPerSec=1.159678366876383, CurrSamplesPerSec=1.20018897170947, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:46:55,957] [INFO] [logging.py:68:log_dist] [Rank 0] step=4920, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:46:55,973] [INFO] [timer.py:198:stop] 0/4920, RunningAvgSamplesPerSec=1.1597622397287222, CurrSamplesPerSec=1.200765181822294, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:47:04,327] [INFO] [logging.py:68:log_dist] [Rank 0] step=4930, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:47:04,343] [INFO] [timer.py:198:stop] 0/4930, RunningAvgSamplesPerSec=1.159840664423047, CurrSamplesPerSec=1.2049117998968688, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:47:12,698] [INFO] [logging.py:68:log_dist] [Rank 0] step=4940, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:47:12,714] [INFO] [timer.py:198:stop] 0/4940, RunningAvgSamplesPerSec=1.159918339647066, CurrSamplesPerSec=1.20350500448484, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:47:21,083] [INFO] [logging.py:68:log_dist] [Rank 0] step=4950, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:47:21,099] [INFO] [timer.py:198:stop] 0/4950, RunningAvgSamplesPerSec=1.1599920199560039, CurrSamplesPerSec=1.2023034165702324, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:47:29,450] [INFO] [logging.py:68:log_dist] [Rank 0] step=4960, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:47:29,466] [INFO] [timer.py:198:stop] 0/4960, RunningAvgSamplesPerSec=1.1600705415447117, CurrSamplesPerSec=1.198412290235905, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:47:37,819] [INFO] [logging.py:68:log_dist] [Rank 0] step=4970, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:47:37,834] [INFO] [timer.py:198:stop] 0/4970, RunningAvgSamplesPerSec=1.1601479663911893, CurrSamplesPerSec=1.1888303397897957, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:47:46,188] [INFO] [logging.py:68:log_dist] [Rank 0] step=4980, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:47:46,204] [INFO] [timer.py:198:stop] 0/4980, RunningAvgSamplesPerSec=1.1602250255843505, CurrSamplesPerSec=1.207205174315391, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:47:54,548] [INFO] [logging.py:68:log_dist] [Rank 0] step=4990, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:47:54,564] [INFO] [timer.py:198:stop] 0/4990, RunningAvgSamplesPerSec=1.1603041255263467, CurrSamplesPerSec=1.201781854867709, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:48:05,930] [INFO] [logging.py:68:log_dist] [Rank 0] step=5000, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:48:05,945] [INFO] [timer.py:198:stop] 0/5000, RunningAvgSamplesPerSec=1.159569670796858, CurrSamplesPerSec=0.25970303494312985, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:48:14,289] [INFO] [logging.py:68:log_dist] [Rank 0] step=5010, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:48:14,304] [INFO] [timer.py:198:stop] 0/5010, RunningAvgSamplesPerSec=1.1596504001645476, CurrSamplesPerSec=1.2062278719673578, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:48:22,648] [INFO] [logging.py:68:log_dist] [Rank 0] step=5020, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:48:22,664] [INFO] [timer.py:198:stop] 0/5020, RunningAvgSamplesPerSec=1.1597302888692345, CurrSamplesPerSec=1.201477188483613, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:48:31,002] [INFO] [logging.py:68:log_dist] [Rank 0] step=5030, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:48:31,018] [INFO] [timer.py:198:stop] 0/5030, RunningAvgSamplesPerSec=1.1598114666433357, CurrSamplesPerSec=1.2039679263686607, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:48:39,365] [INFO] [logging.py:68:log_dist] [Rank 0] step=5040, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:48:39,380] [INFO] [timer.py:198:stop] 0/5040, RunningAvgSamplesPerSec=1.1598901268717248, CurrSamplesPerSec=1.2041881119015634, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:48:47,730] [INFO] [logging.py:68:log_dist] [Rank 0] step=5050, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:48:47,746] [INFO] [timer.py:198:stop] 0/5050, RunningAvgSamplesPerSec=1.159967639641035, CurrSamplesPerSec=1.2061411542975227, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:48:56,088] [INFO] [logging.py:68:log_dist] [Rank 0] step=5060, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:48:56,103] [INFO] [timer.py:198:stop] 0/5060, RunningAvgSamplesPerSec=1.1600472457156723, CurrSamplesPerSec=1.2042672877639922, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:49:04,433] [INFO] [logging.py:68:log_dist] [Rank 0] step=5070, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:49:04,449] [INFO] [timer.py:198:stop] 0/5070, RunningAvgSamplesPerSec=1.160129496379917, CurrSamplesPerSec=1.2069915254846968, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:49:12,787] [INFO] [logging.py:68:log_dist] [Rank 0] step=5080, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:49:12,803] [INFO] [timer.py:198:stop] 0/5080, RunningAvgSamplesPerSec=1.1602093277106194, CurrSamplesPerSec=1.195650928301207, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:49:21,173] [INFO] [logging.py:68:log_dist] [Rank 0] step=5090, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:49:21,188] [INFO] [timer.py:198:stop] 0/5090, RunningAvgSamplesPerSec=1.1602805051725003, CurrSamplesPerSec=1.190327319125363, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:49:32,694] [INFO] [logging.py:68:log_dist] [Rank 0] step=5100, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:49:32,710] [INFO] [timer.py:198:stop] 0/5100, RunningAvgSamplesPerSec=1.1595237954036028, CurrSamplesPerSec=0.25296952269908207, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:49:41,078] [INFO] [logging.py:68:log_dist] [Rank 0] step=5110, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:49:41,093] [INFO] [timer.py:198:stop] 0/5110, RunningAvgSamplesPerSec=1.1595969649007456, CurrSamplesPerSec=1.1961603582859472, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:49:49,462] [INFO] [logging.py:68:log_dist] [Rank 0] step=5120, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:49:49,477] [INFO] [timer.py:198:stop] 0/5120, RunningAvgSamplesPerSec=1.1596697377401755, CurrSamplesPerSec=1.183946688435706, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:49:57,800] [INFO] [logging.py:68:log_dist] [Rank 0] step=5130, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:49:57,816] [INFO] [timer.py:198:stop] 0/5130, RunningAvgSamplesPerSec=1.1597535401234298, CurrSamplesPerSec=1.2059594345109994, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:50:06,153] [INFO] [logging.py:68:log_dist] [Rank 0] step=5140, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:50:06,168] [INFO] [timer.py:198:stop] 0/5140, RunningAvgSamplesPerSec=1.159833307359668, CurrSamplesPerSec=1.2050159967638732, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:50:14,526] [INFO] [logging.py:68:log_dist] [Rank 0] step=5150, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:50:14,542] [INFO] [timer.py:198:stop] 0/5150, RunningAvgSamplesPerSec=1.1599072050062273, CurrSamplesPerSec=1.2037885882569377, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:50:22,883] [INFO] [logging.py:68:log_dist] [Rank 0] step=5160, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:50:22,898] [INFO] [timer.py:198:stop] 0/5160, RunningAvgSamplesPerSec=1.1599860107581745, CurrSamplesPerSec=1.203453207024857, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:50:31,242] [INFO] [logging.py:68:log_dist] [Rank 0] step=5170, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:50:31,257] [INFO] [timer.py:198:stop] 0/5170, RunningAvgSamplesPerSec=1.1600633595916086, CurrSamplesPerSec=1.19242712975959, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:50:39,622] [INFO] [logging.py:68:log_dist] [Rank 0] step=5180, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:50:39,637] [INFO] [timer.py:198:stop] 0/5180, RunningAvgSamplesPerSec=1.160134950934658, CurrSamplesPerSec=1.1921647990108648, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:50:48,000] [INFO] [logging.py:68:log_dist] [Rank 0] step=5190, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:50:48,015] [INFO] [timer.py:198:stop] 0/5190, RunningAvgSamplesPerSec=1.160206660958648, CurrSamplesPerSec=1.1961606994152527, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:50:59,491] [INFO] [logging.py:68:log_dist] [Rank 0] step=5200, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:50:59,506] [INFO] [timer.py:198:stop] 0/5200, RunningAvgSamplesPerSec=1.1594725128429226, CurrSamplesPerSec=0.2527451807596642, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:51:07,895] [INFO] [logging.py:68:log_dist] [Rank 0] step=5210, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:51:07,911] [INFO] [timer.py:198:stop] 0/5210, RunningAvgSamplesPerSec=1.1595386981650082, CurrSamplesPerSec=1.2039426983182477, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:51:16,277] [INFO] [logging.py:68:log_dist] [Rank 0] step=5220, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:51:16,293] [INFO] [timer.py:198:stop] 0/5220, RunningAvgSamplesPerSec=1.1596101499863374, CurrSamplesPerSec=1.2014431167655624, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:51:24,663] [INFO] [logging.py:68:log_dist] [Rank 0] step=5230, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:51:24,678] [INFO] [timer.py:198:stop] 0/5230, RunningAvgSamplesPerSec=1.1596804850235736, CurrSamplesPerSec=1.203422475972746, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:51:33,056] [INFO] [logging.py:68:log_dist] [Rank 0] step=5240, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:51:33,071] [INFO] [timer.py:198:stop] 0/5240, RunningAvgSamplesPerSec=1.1597483743853723, CurrSamplesPerSec=1.2026264329256053, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:51:41,457] [INFO] [logging.py:68:log_dist] [Rank 0] step=5250, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:51:41,473] [INFO] [timer.py:198:stop] 0/5250, RunningAvgSamplesPerSec=1.15981396367072, CurrSamplesPerSec=1.19955601912401, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:51:49,827] [INFO] [logging.py:68:log_dist] [Rank 0] step=5260, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:51:49,843] [INFO] [timer.py:198:stop] 0/5260, RunningAvgSamplesPerSec=1.1598877056593566, CurrSamplesPerSec=1.203849398276789, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:51:58,210] [INFO] [logging.py:68:log_dist] [Rank 0] step=5270, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:51:58,225] [INFO] [timer.py:198:stop] 0/5270, RunningAvgSamplesPerSec=1.1599577628236306, CurrSamplesPerSec=1.2011836844455124, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:52:06,594] [INFO] [logging.py:68:log_dist] [Rank 0] step=5280, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:52:06,609] [INFO] [timer.py:198:stop] 0/5280, RunningAvgSamplesPerSec=1.1600271134184768, CurrSamplesPerSec=1.2025953992734488, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:52:15,008] [INFO] [logging.py:68:log_dist] [Rank 0] step=5290, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:52:15,024] [INFO] [timer.py:198:stop] 0/5290, RunningAvgSamplesPerSec=1.160089543289274, CurrSamplesPerSec=1.1901020422929454, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:52:26,532] [INFO] [logging.py:68:log_dist] [Rank 0] step=5300, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:52:26,547] [INFO] [timer.py:198:stop] 0/5300, RunningAvgSamplesPerSec=1.1593613866282915, CurrSamplesPerSec=0.2523131237589789, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:52:34,911] [INFO] [logging.py:68:log_dist] [Rank 0] step=5310, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:52:34,926] [INFO] [timer.py:198:stop] 0/5310, RunningAvgSamplesPerSec=1.1594331039342889, CurrSamplesPerSec=1.1962992139908375, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:52:43,275] [INFO] [logging.py:68:log_dist] [Rank 0] step=5320, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:52:43,290] [INFO] [timer.py:198:stop] 0/5320, RunningAvgSamplesPerSec=1.1595080479262505, CurrSamplesPerSec=1.1985013247137406, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:52:51,648] [INFO] [logging.py:68:log_dist] [Rank 0] step=5330, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:52:51,663] [INFO] [timer.py:198:stop] 0/5330, RunningAvgSamplesPerSec=1.1595804672940913, CurrSamplesPerSec=1.1961446665482207, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:53:00,028] [INFO] [logging.py:68:log_dist] [Rank 0] step=5340, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:53:00,043] [INFO] [timer.py:198:stop] 0/5340, RunningAvgSamplesPerSec=1.1596507880959277, CurrSamplesPerSec=1.1962551997250581, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:53:08,391] [INFO] [logging.py:68:log_dist] [Rank 0] step=5350, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:53:08,407] [INFO] [timer.py:198:stop] 0/5350, RunningAvgSamplesPerSec=1.159725046880068, CurrSamplesPerSec=1.1973288427716295, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:53:16,756] [INFO] [logging.py:68:log_dist] [Rank 0] step=5360, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:53:16,771] [INFO] [timer.py:198:stop] 0/5360, RunningAvgSamplesPerSec=1.1597991782943569, CurrSamplesPerSec=1.2042548402137987, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:53:25,116] [INFO] [logging.py:68:log_dist] [Rank 0] step=5370, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:53:25,131] [INFO] [timer.py:198:stop] 0/5370, RunningAvgSamplesPerSec=1.1598737174108524, CurrSamplesPerSec=1.2066783106232069, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:53:33,489] [INFO] [logging.py:68:log_dist] [Rank 0] step=5380, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:53:33,505] [INFO] [timer.py:198:stop] 0/5380, RunningAvgSamplesPerSec=1.159944645173836, CurrSamplesPerSec=1.203534703706297, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:53:41,888] [INFO] [logging.py:68:log_dist] [Rank 0] step=5390, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:53:41,904] [INFO] [timer.py:198:stop] 0/5390, RunningAvgSamplesPerSec=1.1600090991560486, CurrSamplesPerSec=1.200655875859921, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:53:53,397] [INFO] [logging.py:68:log_dist] [Rank 0] step=5400, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:53:53,412] [INFO] [timer.py:198:stop] 0/5400, RunningAvgSamplesPerSec=1.1592984332599456, CurrSamplesPerSec=0.2525062468792695, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:54:01,745] [INFO] [logging.py:68:log_dist] [Rank 0] step=5410, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:54:01,761] [INFO] [timer.py:198:stop] 0/5410, RunningAvgSamplesPerSec=1.1593765635250122, CurrSamplesPerSec=1.2084151921987096, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:54:10,093] [INFO] [logging.py:68:log_dist] [Rank 0] step=5420, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:54:10,108] [INFO] [timer.py:198:stop] 0/5420, RunningAvgSamplesPerSec=1.1594542185048784, CurrSamplesPerSec=1.2084931839182365, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:54:18,471] [INFO] [logging.py:68:log_dist] [Rank 0] step=5430, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:54:18,487] [INFO] [timer.py:198:stop] 0/5430, RunningAvgSamplesPerSec=1.1595240235600122, CurrSamplesPerSec=1.2016007576927386, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:54:26,828] [INFO] [logging.py:68:log_dist] [Rank 0] step=5440, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:54:26,844] [INFO] [timer.py:198:stop] 0/5440, RunningAvgSamplesPerSec=1.1595986736469621, CurrSamplesPerSec=1.1995718004942655, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:54:35,198] [INFO] [logging.py:68:log_dist] [Rank 0] step=5450, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:54:35,213] [INFO] [timer.py:198:stop] 0/5450, RunningAvgSamplesPerSec=1.1596702661144114, CurrSamplesPerSec=1.205010803802318, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:54:43,576] [INFO] [logging.py:68:log_dist] [Rank 0] step=5460, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:54:43,592] [INFO] [timer.py:198:stop] 0/5460, RunningAvgSamplesPerSec=1.1597394848824398, CurrSamplesPerSec=1.1752536531375652, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:54:51,929] [INFO] [logging.py:68:log_dist] [Rank 0] step=5470, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:54:51,945] [INFO] [timer.py:198:stop] 0/5470, RunningAvgSamplesPerSec=1.1598147173227116, CurrSamplesPerSec=1.198038832039646, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:55:00,287] [INFO] [logging.py:68:log_dist] [Rank 0] step=5480, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:55:00,303] [INFO] [timer.py:198:stop] 0/5480, RunningAvgSamplesPerSec=1.1598882510651614, CurrSamplesPerSec=1.206896710480882, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:55:08,630] [INFO] [logging.py:68:log_dist] [Rank 0] step=5490, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:55:08,646] [INFO] [timer.py:198:stop] 0/5490, RunningAvgSamplesPerSec=1.1599652302171914, CurrSamplesPerSec=1.2033941633588159, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:55:20,107] [INFO] [logging.py:68:log_dist] [Rank 0] step=5500, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:55:20,122] [INFO] [timer.py:198:stop] 0/5500, RunningAvgSamplesPerSec=1.1592754715453653, CurrSamplesPerSec=0.2530649317911529, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:55:28,471] [INFO] [logging.py:68:log_dist] [Rank 0] step=5510, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:55:28,487] [INFO] [timer.py:198:stop] 0/5510, RunningAvgSamplesPerSec=1.1593482645218434, CurrSamplesPerSec=1.2064062026759337, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:55:36,841] [INFO] [logging.py:68:log_dist] [Rank 0] step=5520, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:55:36,857] [INFO] [timer.py:198:stop] 0/5520, RunningAvgSamplesPerSec=1.1594206686996538, CurrSamplesPerSec=1.194975767454171, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:55:45,218] [INFO] [logging.py:68:log_dist] [Rank 0] step=5530, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:55:45,234] [INFO] [timer.py:198:stop] 0/5530, RunningAvgSamplesPerSec=1.1594910296650072, CurrSamplesPerSec=1.1944928586237615, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:55:53,596] [INFO] [logging.py:68:log_dist] [Rank 0] step=5540, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:55:53,612] [INFO] [timer.py:198:stop] 0/5540, RunningAvgSamplesPerSec=1.1595603069474612, CurrSamplesPerSec=1.2033689593477834, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:56:01,962] [INFO] [logging.py:68:log_dist] [Rank 0] step=5550, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:56:01,978] [INFO] [timer.py:198:stop] 0/5550, RunningAvgSamplesPerSec=1.1596327378730924, CurrSamplesPerSec=1.2026647099692072, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:56:10,343] [INFO] [logging.py:68:log_dist] [Rank 0] step=5560, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:56:10,359] [INFO] [timer.py:198:stop] 0/5560, RunningAvgSamplesPerSec=1.1597016660472055, CurrSamplesPerSec=1.1969786043453396, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:56:18,705] [INFO] [logging.py:68:log_dist] [Rank 0] step=5570, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:56:18,720] [INFO] [timer.py:198:stop] 0/5570, RunningAvgSamplesPerSec=1.159774446118736, CurrSamplesPerSec=1.204692733707525, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:56:27,077] [INFO] [logging.py:68:log_dist] [Rank 0] step=5580, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:56:27,093] [INFO] [timer.py:198:stop] 0/5580, RunningAvgSamplesPerSec=1.1598432808634536, CurrSamplesPerSec=1.1979994801618925, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:56:35,459] [INFO] [logging.py:68:log_dist] [Rank 0] step=5590, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:56:35,475] [INFO] [timer.py:198:stop] 0/5590, RunningAvgSamplesPerSec=1.1599095485966724, CurrSamplesPerSec=1.1959700384741396, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:56:46,869] [INFO] [logging.py:68:log_dist] [Rank 0] step=5600, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:56:46,885] [INFO] [timer.py:198:stop] 0/5600, RunningAvgSamplesPerSec=1.1592482436525289, CurrSamplesPerSec=0.25764862726664756, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:56:55,257] [INFO] [logging.py:68:log_dist] [Rank 0] step=5610, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:56:55,272] [INFO] [timer.py:198:stop] 0/5610, RunningAvgSamplesPerSec=1.1593141533715277, CurrSamplesPerSec=1.196540838339479, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:57:03,633] [INFO] [logging.py:68:log_dist] [Rank 0] step=5620, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:57:03,648] [INFO] [timer.py:198:stop] 0/5620, RunningAvgSamplesPerSec=1.1593823676395905, CurrSamplesPerSec=1.2065634133431524, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:57:12,001] [INFO] [logging.py:68:log_dist] [Rank 0] step=5630, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:57:12,017] [INFO] [timer.py:198:stop] 0/5630, RunningAvgSamplesPerSec=1.1594521693015798, CurrSamplesPerSec=1.1958871761943892, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:57:20,369] [INFO] [logging.py:68:log_dist] [Rank 0] step=5640, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:57:20,385] [INFO] [timer.py:198:stop] 0/5640, RunningAvgSamplesPerSec=1.1595219291153929, CurrSamplesPerSec=1.2057746497498645, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:57:28,725] [INFO] [logging.py:68:log_dist] [Rank 0] step=5650, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:57:28,740] [INFO] [timer.py:198:stop] 0/5650, RunningAvgSamplesPerSec=1.159594353736598, CurrSamplesPerSec=1.2041473179232405, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:57:37,101] [INFO] [logging.py:68:log_dist] [Rank 0] step=5660, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:57:37,116] [INFO] [timer.py:198:stop] 0/5660, RunningAvgSamplesPerSec=1.1596619585423749, CurrSamplesPerSec=1.2006919652505883, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:57:45,489] [INFO] [logging.py:68:log_dist] [Rank 0] step=5670, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:57:45,504] [INFO] [timer.py:198:stop] 0/5670, RunningAvgSamplesPerSec=1.1597262621044682, CurrSamplesPerSec=1.177118189086935, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:57:53,850] [INFO] [logging.py:68:log_dist] [Rank 0] step=5680, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:57:53,866] [INFO] [timer.py:198:stop] 0/5680, RunningAvgSamplesPerSec=1.1597965581487242, CurrSamplesPerSec=1.2014407077273335, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:58:02,233] [INFO] [logging.py:68:log_dist] [Rank 0] step=5690, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:58:02,249] [INFO] [timer.py:198:stop] 0/5690, RunningAvgSamplesPerSec=1.1598614940193444, CurrSamplesPerSec=1.2090617133128685, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:58:13,718] [INFO] [logging.py:68:log_dist] [Rank 0] step=5700, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:58:13,733] [INFO] [timer.py:198:stop] 0/5700, RunningAvgSamplesPerSec=1.1591943462093404, CurrSamplesPerSec=0.25339470832510447, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:58:22,087] [INFO] [logging.py:68:log_dist] [Rank 0] step=5710, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:58:22,102] [INFO] [timer.py:198:stop] 0/5710, RunningAvgSamplesPerSec=1.159263758703892, CurrSamplesPerSec=1.2059455650210278, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:58:30,447] [INFO] [logging.py:68:log_dist] [Rank 0] step=5720, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:58:30,463] [INFO] [timer.py:198:stop] 0/5720, RunningAvgSamplesPerSec=1.1593345276559264, CurrSamplesPerSec=1.194228938221212, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:58:38,813] [INFO] [logging.py:68:log_dist] [Rank 0] step=5730, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:58:38,829] [INFO] [timer.py:198:stop] 0/5730, RunningAvgSamplesPerSec=1.1594038580706343, CurrSamplesPerSec=1.2038614919108324, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:58:47,186] [INFO] [logging.py:68:log_dist] [Rank 0] step=5740, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:58:47,202] [INFO] [timer.py:198:stop] 0/5740, RunningAvgSamplesPerSec=1.159471364526154, CurrSamplesPerSec=1.1969406883822125, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:58:55,559] [INFO] [logging.py:68:log_dist] [Rank 0] step=5750, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:58:55,574] [INFO] [timer.py:198:stop] 0/5750, RunningAvgSamplesPerSec=1.15953868457122, CurrSamplesPerSec=1.195550048556793, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:59:03,928] [INFO] [logging.py:68:log_dist] [Rank 0] step=5760, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:59:03,944] [INFO] [timer.py:198:stop] 0/5760, RunningAvgSamplesPerSec=1.1596065943114178, CurrSamplesPerSec=1.203026565628173, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:59:12,293] [INFO] [logging.py:68:log_dist] [Rank 0] step=5770, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:59:12,309] [INFO] [timer.py:198:stop] 0/5770, RunningAvgSamplesPerSec=1.1596753643673294, CurrSamplesPerSec=1.2040042151446093, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:59:20,706] [INFO] [logging.py:68:log_dist] [Rank 0] step=5780, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:59:20,721] [INFO] [timer.py:198:stop] 0/5780, RunningAvgSamplesPerSec=1.1597328252413777, CurrSamplesPerSec=1.1930155905653357, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:59:29,135] [INFO] [logging.py:68:log_dist] [Rank 0] step=5790, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:59:29,151] [INFO] [timer.py:198:stop] 0/5790, RunningAvgSamplesPerSec=1.1597860051499855, CurrSamplesPerSec=1.1963005788260448, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:59:40,672] [INFO] [logging.py:68:log_dist] [Rank 0] step=5800, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:59:40,688] [INFO] [timer.py:198:stop] 0/5800, RunningAvgSamplesPerSec=1.1591185614779815, CurrSamplesPerSec=0.2510106125388009, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:59:49,052] [INFO] [logging.py:68:log_dist] [Rank 0] step=5810, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:59:49,067] [INFO] [timer.py:198:stop] 0/5810, RunningAvgSamplesPerSec=1.1591847053405484, CurrSamplesPerSec=1.2029513479592897, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 22:59:57,418] [INFO] [logging.py:68:log_dist] [Rank 0] step=5820, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 22:59:57,433] [INFO] [timer.py:198:stop] 0/5820, RunningAvgSamplesPerSec=1.1592531918950917, CurrSamplesPerSec=1.2072687624722636, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:00:05,788] [INFO] [logging.py:68:log_dist] [Rank 0] step=5830, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:00:05,803] [INFO] [timer.py:198:stop] 0/5830, RunningAvgSamplesPerSec=1.1593205205590393, CurrSamplesPerSec=1.2044595659435455, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:00:14,152] [INFO] [logging.py:68:log_dist] [Rank 0] step=5840, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:00:14,167] [INFO] [timer.py:198:stop] 0/5840, RunningAvgSamplesPerSec=1.1593889836296385, CurrSamplesPerSec=1.2039043398840041, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:00:22,513] [INFO] [logging.py:68:log_dist] [Rank 0] step=5850, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:00:22,529] [INFO] [timer.py:198:stop] 0/5850, RunningAvgSamplesPerSec=1.1594578027172464, CurrSamplesPerSec=1.1999709327109425, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:00:30,872] [INFO] [logging.py:68:log_dist] [Rank 0] step=5860, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:00:30,888] [INFO] [timer.py:198:stop] 0/5860, RunningAvgSamplesPerSec=1.159527253216666, CurrSamplesPerSec=1.1920980485001516, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:00:39,251] [INFO] [logging.py:68:log_dist] [Rank 0] step=5870, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:00:39,266] [INFO] [timer.py:198:stop] 0/5870, RunningAvgSamplesPerSec=1.1595920281411254, CurrSamplesPerSec=1.193583912770536, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:00:47,591] [INFO] [logging.py:68:log_dist] [Rank 0] step=5880, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:00:47,607] [INFO] [timer.py:198:stop] 0/5880, RunningAvgSamplesPerSec=1.1596648535499183, CurrSamplesPerSec=1.2100275568562104, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:00:55,966] [INFO] [logging.py:68:log_dist] [Rank 0] step=5890, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:00:55,982] [INFO] [timer.py:198:stop] 0/5890, RunningAvgSamplesPerSec=1.159729908896796, CurrSamplesPerSec=1.195781824692388, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:01:07,539] [INFO] [logging.py:68:log_dist] [Rank 0] step=5900, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:01:07,555] [INFO] [timer.py:198:stop] 0/5900, RunningAvgSamplesPerSec=1.1590655407016563, CurrSamplesPerSec=0.24822723325288923, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:01:15,919] [INFO] [logging.py:68:log_dist] [Rank 0] step=5910, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:01:15,934] [INFO] [timer.py:198:stop] 0/5910, RunningAvgSamplesPerSec=1.1591305032644361, CurrSamplesPerSec=1.195076890814246, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:01:24,282] [INFO] [logging.py:68:log_dist] [Rank 0] step=5920, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:01:24,297] [INFO] [timer.py:198:stop] 0/5920, RunningAvgSamplesPerSec=1.1591987176408738, CurrSamplesPerSec=1.2071770308763372, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:01:32,635] [INFO] [logging.py:68:log_dist] [Rank 0] step=5930, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:01:32,651] [INFO] [timer.py:198:stop] 0/5930, RunningAvgSamplesPerSec=1.1592688367487602, CurrSamplesPerSec=1.2054998308571219, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:01:41,021] [INFO] [logging.py:68:log_dist] [Rank 0] step=5940, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:01:41,037] [INFO] [timer.py:198:stop] 0/5940, RunningAvgSamplesPerSec=1.1593313694712308, CurrSamplesPerSec=1.2041262305944174, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:01:49,468] [INFO] [logging.py:68:log_dist] [Rank 0] step=5950, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:01:49,484] [INFO] [timer.py:198:stop] 0/5950, RunningAvgSamplesPerSec=1.1593800409231374, CurrSamplesPerSec=1.1858245152155646, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:01:57,878] [INFO] [logging.py:68:log_dist] [Rank 0] step=5960, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:01:57,893] [INFO] [timer.py:198:stop] 0/5960, RunningAvgSamplesPerSec=1.1594372166844533, CurrSamplesPerSec=1.196594773479402, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:02:06,274] [INFO] [logging.py:68:log_dist] [Rank 0] step=5970, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:02:06,290] [INFO] [timer.py:198:stop] 0/5970, RunningAvgSamplesPerSec=1.1594967009233788, CurrSamplesPerSec=1.203262975952263, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:02:14,680] [INFO] [logging.py:68:log_dist] [Rank 0] step=5980, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:02:14,695] [INFO] [timer.py:198:stop] 0/5980, RunningAvgSamplesPerSec=1.1595544596771152, CurrSamplesPerSec=1.2035447188905235, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:02:23,080] [INFO] [logging.py:68:log_dist] [Rank 0] step=5990, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:02:23,096] [INFO] [timer.py:198:stop] 0/5990, RunningAvgSamplesPerSec=1.1596135050037861, CurrSamplesPerSec=1.2009439640189594, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:02:34,627] [INFO] [logging.py:68:log_dist] [Rank 0] step=6000, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:02:34,643] [INFO] [timer.py:198:stop] 0/6000, RunningAvgSamplesPerSec=1.1589670061106598, CurrSamplesPerSec=0.2505628892956706, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:02:43,028] [INFO] [logging.py:68:log_dist] [Rank 0] step=6010, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:02:43,044] [INFO] [timer.py:198:stop] 0/6010, RunningAvgSamplesPerSec=1.1590265976887806, CurrSamplesPerSec=1.1901847802460672, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:02:51,424] [INFO] [logging.py:68:log_dist] [Rank 0] step=6020, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:02:51,440] [INFO] [timer.py:198:stop] 0/6020, RunningAvgSamplesPerSec=1.1590870737526027, CurrSamplesPerSec=1.1937653194471167, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:02:59,801] [INFO] [logging.py:68:log_dist] [Rank 0] step=6030, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:02:59,817] [INFO] [timer.py:198:stop] 0/6030, RunningAvgSamplesPerSec=1.1591514653094142, CurrSamplesPerSec=1.1930810863703636, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:03:08,176] [INFO] [logging.py:68:log_dist] [Rank 0] step=6040, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:03:08,192] [INFO] [timer.py:198:stop] 0/6040, RunningAvgSamplesPerSec=1.1592161182715388, CurrSamplesPerSec=1.2007754947328884, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:03:16,570] [INFO] [logging.py:68:log_dist] [Rank 0] step=6050, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:03:16,585] [INFO] [timer.py:198:stop] 0/6050, RunningAvgSamplesPerSec=1.1592766897780349, CurrSamplesPerSec=1.2005586171620497, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:03:24,987] [INFO] [logging.py:68:log_dist] [Rank 0] step=6060, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:03:25,003] [INFO] [timer.py:198:stop] 0/6060, RunningAvgSamplesPerSec=1.1593315029895637, CurrSamplesPerSec=1.1960286969051221, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:03:33,387] [INFO] [logging.py:68:log_dist] [Rank 0] step=6070, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:03:33,402] [INFO] [timer.py:198:stop] 0/6070, RunningAvgSamplesPerSec=1.1593902029674212, CurrSamplesPerSec=1.2006393785753215, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:03:41,767] [INFO] [logging.py:68:log_dist] [Rank 0] step=6080, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:03:41,783] [INFO] [timer.py:198:stop] 0/6080, RunningAvgSamplesPerSec=1.159452745416707, CurrSamplesPerSec=1.1995868961065443, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:03:50,169] [INFO] [logging.py:68:log_dist] [Rank 0] step=6090, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:03:50,185] [INFO] [timer.py:198:stop] 0/6090, RunningAvgSamplesPerSec=1.159510411550704, CurrSamplesPerSec=1.1876968732478919, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:04:01,701] [INFO] [logging.py:68:log_dist] [Rank 0] step=6100, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:04:01,717] [INFO] [timer.py:198:stop] 0/6100, RunningAvgSamplesPerSec=1.1588782477461397, CurrSamplesPerSec=0.2510060459706336, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:04:10,107] [INFO] [logging.py:68:log_dist] [Rank 0] step=6110, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:04:10,122] [INFO] [timer.py:198:stop] 0/6110, RunningAvgSamplesPerSec=1.158936244029505, CurrSamplesPerSec=1.2027429958345783, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:04:18,503] [INFO] [logging.py:68:log_dist] [Rank 0] step=6120, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:04:18,519] [INFO] [timer.py:198:stop] 0/6120, RunningAvgSamplesPerSec=1.1589955657199413, CurrSamplesPerSec=1.1982458997191725, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:04:26,904] [INFO] [logging.py:68:log_dist] [Rank 0] step=6130, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:04:26,919] [INFO] [timer.py:198:stop] 0/6130, RunningAvgSamplesPerSec=1.159053934706277, CurrSamplesPerSec=1.1920543428246457, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:04:35,294] [INFO] [logging.py:68:log_dist] [Rank 0] step=6140, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:04:35,309] [INFO] [timer.py:198:stop] 0/6140, RunningAvgSamplesPerSec=1.159114352449531, CurrSamplesPerSec=1.190548963951178, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:04:43,692] [INFO] [logging.py:68:log_dist] [Rank 0] step=6150, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:04:43,708] [INFO] [timer.py:198:stop] 0/6150, RunningAvgSamplesPerSec=1.1591729067331376, CurrSamplesPerSec=1.1906858438168453, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:04:52,086] [INFO] [logging.py:68:log_dist] [Rank 0] step=6160, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:04:52,101] [INFO] [timer.py:198:stop] 0/6160, RunningAvgSamplesPerSec=1.159232695443641, CurrSamplesPerSec=1.196157288130953, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:05:00,476] [INFO] [logging.py:68:log_dist] [Rank 0] step=6170, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:05:00,491] [INFO] [timer.py:198:stop] 0/6170, RunningAvgSamplesPerSec=1.1592924941446843, CurrSamplesPerSec=1.2046235350409755, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:05:08,861] [INFO] [logging.py:68:log_dist] [Rank 0] step=6180, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:05:08,877] [INFO] [timer.py:198:stop] 0/6180, RunningAvgSamplesPerSec=1.1593531910563122, CurrSamplesPerSec=1.1967070970657694, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:05:17,246] [INFO] [logging.py:68:log_dist] [Rank 0] step=6190, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:05:17,262] [INFO] [timer.py:198:stop] 0/6190, RunningAvgSamplesPerSec=1.1594137559175064, CurrSamplesPerSec=1.1972601457160374, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:05:28,712] [INFO] [logging.py:68:log_dist] [Rank 0] step=6200, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:05:28,728] [INFO] [timer.py:198:stop] 0/6200, RunningAvgSamplesPerSec=1.158806217783768, CurrSamplesPerSec=0.2554942199993007, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:05:37,110] [INFO] [logging.py:68:log_dist] [Rank 0] step=6210, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:05:37,126] [INFO] [timer.py:198:stop] 0/6210, RunningAvgSamplesPerSec=1.1588650649837533, CurrSamplesPerSec=1.190073677642792, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:05:45,496] [INFO] [logging.py:68:log_dist] [Rank 0] step=6220, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:05:45,512] [INFO] [timer.py:198:stop] 0/6220, RunningAvgSamplesPerSec=1.1589255950769295, CurrSamplesPerSec=1.1959635591166364, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:05:53,883] [INFO] [logging.py:68:log_dist] [Rank 0] step=6230, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:05:53,899] [INFO] [timer.py:198:stop] 0/6230, RunningAvgSamplesPerSec=1.158986086977488, CurrSamplesPerSec=1.199848728178153, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:06:02,265] [INFO] [logging.py:68:log_dist] [Rank 0] step=6240, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:06:02,280] [INFO] [timer.py:198:stop] 0/6240, RunningAvgSamplesPerSec=1.1590474716145434, CurrSamplesPerSec=1.2026533300530862, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:06:10,673] [INFO] [logging.py:68:log_dist] [Rank 0] step=6250, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:06:10,689] [INFO] [timer.py:198:stop] 0/6250, RunningAvgSamplesPerSec=1.1591029571423666, CurrSamplesPerSec=1.198547216880174, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:06:19,076] [INFO] [logging.py:68:log_dist] [Rank 0] step=6260, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:06:19,092] [INFO] [timer.py:198:stop] 0/6260, RunningAvgSamplesPerSec=1.1591595871513796, CurrSamplesPerSec=1.1925949599596923, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:06:27,492] [INFO] [logging.py:68:log_dist] [Rank 0] step=6270, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:06:27,508] [INFO] [timer.py:198:stop] 0/6270, RunningAvgSamplesPerSec=1.1592132179304493, CurrSamplesPerSec=1.189500933608311, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:06:35,874] [INFO] [logging.py:68:log_dist] [Rank 0] step=6280, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:06:35,890] [INFO] [timer.py:198:stop] 0/6280, RunningAvgSamplesPerSec=1.1592736649601916, CurrSamplesPerSec=1.1941653562833343, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:06:44,266] [INFO] [logging.py:68:log_dist] [Rank 0] step=6290, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:06:44,282] [INFO] [timer.py:198:stop] 0/6290, RunningAvgSamplesPerSec=1.1593318174115936, CurrSamplesPerSec=1.203683567236811, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:06:55,899] [INFO] [logging.py:68:log_dist] [Rank 0] step=6300, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:06:55,915] [INFO] [timer.py:198:stop] 0/6300, RunningAvgSamplesPerSec=1.1586985347949486, CurrSamplesPerSec=0.24503232100763261, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:07:04,297] [INFO] [logging.py:68:log_dist] [Rank 0] step=6310, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:07:04,312] [INFO] [timer.py:198:stop] 0/6310, RunningAvgSamplesPerSec=1.1587567008538395, CurrSamplesPerSec=1.2071457619274713, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:07:12,697] [INFO] [logging.py:68:log_dist] [Rank 0] step=6320, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:07:12,713] [INFO] [timer.py:198:stop] 0/6320, RunningAvgSamplesPerSec=1.158813657323637, CurrSamplesPerSec=1.2001587505518927, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:07:21,076] [INFO] [logging.py:68:log_dist] [Rank 0] step=6330, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:07:21,091] [INFO] [timer.py:198:stop] 0/6330, RunningAvgSamplesPerSec=1.1588751296613022, CurrSamplesPerSec=1.2007607129493831, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:07:29,461] [INFO] [logging.py:68:log_dist] [Rank 0] step=6340, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:07:29,477] [INFO] [timer.py:198:stop] 0/6340, RunningAvgSamplesPerSec=1.1589348840567413, CurrSamplesPerSec=1.1893140758190899, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:07:37,859] [INFO] [logging.py:68:log_dist] [Rank 0] step=6350, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:07:37,874] [INFO] [timer.py:198:stop] 0/6350, RunningAvgSamplesPerSec=1.1589920070553843, CurrSamplesPerSec=1.1923311995733616, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:07:46,239] [INFO] [logging.py:68:log_dist] [Rank 0] step=6360, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:07:46,255] [INFO] [timer.py:198:stop] 0/6360, RunningAvgSamplesPerSec=1.1590527098460905, CurrSamplesPerSec=1.2037122389204225, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:07:54,625] [INFO] [logging.py:68:log_dist] [Rank 0] step=6370, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:07:54,640] [INFO] [timer.py:198:stop] 0/6370, RunningAvgSamplesPerSec=1.1591117358459246, CurrSamplesPerSec=1.1990419794365197, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:08:03,004] [INFO] [logging.py:68:log_dist] [Rank 0] step=6380, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:08:03,020] [INFO] [timer.py:198:stop] 0/6380, RunningAvgSamplesPerSec=1.1591719630685207, CurrSamplesPerSec=1.201202948776732, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:08:11,406] [INFO] [logging.py:68:log_dist] [Rank 0] step=6390, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:08:11,421] [INFO] [timer.py:198:stop] 0/6390, RunningAvgSamplesPerSec=1.1592275109596646, CurrSamplesPerSec=1.1990707732102293, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:08:22,851] [INFO] [logging.py:68:log_dist] [Rank 0] step=6400, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:08:22,866] [INFO] [timer.py:198:stop] 0/6400, RunningAvgSamplesPerSec=1.15864397901343, CurrSamplesPerSec=0.2564419648291553, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:08:31,240] [INFO] [logging.py:68:log_dist] [Rank 0] step=6410, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:08:31,255] [INFO] [timer.py:198:stop] 0/6410, RunningAvgSamplesPerSec=1.1587030754612082, CurrSamplesPerSec=1.2033896748960236, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:08:39,628] [INFO] [logging.py:68:log_dist] [Rank 0] step=6420, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:08:39,643] [INFO] [timer.py:198:stop] 0/6420, RunningAvgSamplesPerSec=1.1587619988887667, CurrSamplesPerSec=1.2012841409622237, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:08:48,031] [INFO] [logging.py:68:log_dist] [Rank 0] step=6430, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:08:48,046] [INFO] [timer.py:198:stop] 0/6430, RunningAvgSamplesPerSec=1.158817325657336, CurrSamplesPerSec=1.1997670435237298, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:08:56,473] [INFO] [logging.py:68:log_dist] [Rank 0] step=6440, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:08:56,489] [INFO] [timer.py:198:stop] 0/6440, RunningAvgSamplesPerSec=1.1588644518038667, CurrSamplesPerSec=1.2007820263345566, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:09:04,864] [INFO] [logging.py:68:log_dist] [Rank 0] step=6450, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:09:04,880] [INFO] [timer.py:198:stop] 0/6450, RunningAvgSamplesPerSec=1.1589220728805336, CurrSamplesPerSec=1.188523784196273, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:09:13,255] [INFO] [logging.py:68:log_dist] [Rank 0] step=6460, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:09:13,271] [INFO] [timer.py:198:stop] 0/6460, RunningAvgSamplesPerSec=1.1589798548535004, CurrSamplesPerSec=1.1994421311117713, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:09:21,643] [INFO] [logging.py:68:log_dist] [Rank 0] step=6470, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:09:21,659] [INFO] [timer.py:198:stop] 0/6470, RunningAvgSamplesPerSec=1.159038040581079, CurrSamplesPerSec=1.199771161811483, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:09:30,040] [INFO] [logging.py:68:log_dist] [Rank 0] step=6480, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:09:30,056] [INFO] [timer.py:198:stop] 0/6480, RunningAvgSamplesPerSec=1.159093998962152, CurrSamplesPerSec=1.1979697113285046, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:09:38,431] [INFO] [logging.py:68:log_dist] [Rank 0] step=6490, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:09:38,447] [INFO] [timer.py:198:stop] 0/6490, RunningAvgSamplesPerSec=1.1591510028785843, CurrSamplesPerSec=1.1985852346944987, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:09:50,143] [INFO] [logging.py:68:log_dist] [Rank 0] step=6500, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:09:50,159] [INFO] [timer.py:198:stop] 0/6500, RunningAvgSamplesPerSec=1.1585215398206732, CurrSamplesPerSec=0.24111935658335756, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:09:58,525] [INFO] [logging.py:68:log_dist] [Rank 0] step=6510, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:09:58,541] [INFO] [timer.py:198:stop] 0/6510, RunningAvgSamplesPerSec=1.1585814399858485, CurrSamplesPerSec=1.1997553751952965, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:10:06,911] [INFO] [logging.py:68:log_dist] [Rank 0] step=6520, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:10:06,927] [INFO] [timer.py:198:stop] 0/6520, RunningAvgSamplesPerSec=1.1586400020260228, CurrSamplesPerSec=1.1996610073504659, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:10:15,317] [INFO] [logging.py:68:log_dist] [Rank 0] step=6530, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:10:15,333] [INFO] [timer.py:198:stop] 0/6530, RunningAvgSamplesPerSec=1.1586941226531362, CurrSamplesPerSec=1.1945323206651075, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:10:23,714] [INFO] [logging.py:68:log_dist] [Rank 0] step=6540, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:10:23,730] [INFO] [timer.py:198:stop] 0/6540, RunningAvgSamplesPerSec=1.1587501754637117, CurrSamplesPerSec=1.1921647990108648, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:10:32,100] [INFO] [logging.py:68:log_dist] [Rank 0] step=6550, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:10:32,116] [INFO] [timer.py:198:stop] 0/6550, RunningAvgSamplesPerSec=1.1588082563317004, CurrSamplesPerSec=1.2057427601746216, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:10:40,490] [INFO] [logging.py:68:log_dist] [Rank 0] step=6560, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:10:40,505] [INFO] [timer.py:198:stop] 0/6560, RunningAvgSamplesPerSec=1.1588656100026127, CurrSamplesPerSec=1.2002459839457988, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:10:48,893] [INFO] [logging.py:68:log_dist] [Rank 0] step=6570, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:10:48,908] [INFO] [timer.py:198:stop] 0/6570, RunningAvgSamplesPerSec=1.158919771460779, CurrSamplesPerSec=1.1833203978760458, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:10:57,294] [INFO] [logging.py:68:log_dist] [Rank 0] step=6580, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:10:57,310] [INFO] [timer.py:198:stop] 0/6580, RunningAvgSamplesPerSec=1.1589740753735147, CurrSamplesPerSec=1.1946449378952133, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:11:05,690] [INFO] [logging.py:68:log_dist] [Rank 0] step=6590, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:11:05,706] [INFO] [timer.py:198:stop] 0/6590, RunningAvgSamplesPerSec=1.1590293525287063, CurrSamplesPerSec=1.2004122433174311, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:11:17,337] [INFO] [logging.py:68:log_dist] [Rank 0] step=6600, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:11:17,352] [INFO] [timer.py:198:stop] 0/6600, RunningAvgSamplesPerSec=1.1584229947752607, CurrSamplesPerSec=0.24530329987394758, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:11:25,759] [INFO] [logging.py:68:log_dist] [Rank 0] step=6610, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:11:25,775] [INFO] [timer.py:198:stop] 0/6610, RunningAvgSamplesPerSec=1.1584739511105147, CurrSamplesPerSec=1.1987674796868797, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:11:34,173] [INFO] [logging.py:68:log_dist] [Rank 0] step=6620, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:11:34,188] [INFO] [timer.py:198:stop] 0/6620, RunningAvgSamplesPerSec=1.1585260851117436, CurrSamplesPerSec=1.171835519073775, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:11:42,555] [INFO] [logging.py:68:log_dist] [Rank 0] step=6630, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:11:42,571] [INFO] [timer.py:198:stop] 0/6630, RunningAvgSamplesPerSec=1.1585844992631948, CurrSamplesPerSec=1.201613838929524, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:11:50,958] [INFO] [logging.py:68:log_dist] [Rank 0] step=6640, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:11:50,973] [INFO] [timer.py:198:stop] 0/6640, RunningAvgSamplesPerSec=1.158638674003851, CurrSamplesPerSec=1.1830864188476773, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:11:59,344] [INFO] [logging.py:68:log_dist] [Rank 0] step=6650, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:11:59,359] [INFO] [timer.py:198:stop] 0/6650, RunningAvgSamplesPerSec=1.1586958932424998, CurrSamplesPerSec=1.193028485543898, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:12:07,736] [INFO] [logging.py:68:log_dist] [Rank 0] step=6660, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:12:07,752] [INFO] [timer.py:198:stop] 0/6660, RunningAvgSamplesPerSec=1.158751910311366, CurrSamplesPerSec=1.197737770410254, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:12:16,133] [INFO] [logging.py:68:log_dist] [Rank 0] step=6670, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:12:16,149] [INFO] [timer.py:198:stop] 0/6670, RunningAvgSamplesPerSec=1.1588065237879999, CurrSamplesPerSec=1.1877012454101676, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:12:24,538] [INFO] [logging.py:68:log_dist] [Rank 0] step=6680, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:12:24,554] [INFO] [timer.py:198:stop] 0/6680, RunningAvgSamplesPerSec=1.1588596682609162, CurrSamplesPerSec=1.20391989030599, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:12:32,920] [INFO] [logging.py:68:log_dist] [Rank 0] step=6690, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:12:32,935] [INFO] [timer.py:198:stop] 0/6690, RunningAvgSamplesPerSec=1.1589171023307092, CurrSamplesPerSec=1.2029472078102728, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:12:44,328] [INFO] [logging.py:68:log_dist] [Rank 0] step=6700, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:12:44,344] [INFO] [timer.py:198:stop] 0/6700, RunningAvgSamplesPerSec=1.158367667195154, CurrSamplesPerSec=0.2589269939297935, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:12:52,707] [INFO] [logging.py:68:log_dist] [Rank 0] step=6710, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:12:52,723] [INFO] [timer.py:198:stop] 0/6710, RunningAvgSamplesPerSec=1.1584264697154447, CurrSamplesPerSec=1.1937530880501284, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:13:01,098] [INFO] [logging.py:68:log_dist] [Rank 0] step=6720, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:13:01,114] [INFO] [timer.py:198:stop] 0/6720, RunningAvgSamplesPerSec=1.1584826462719817, CurrSamplesPerSec=1.1883863908170513, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:13:09,488] [INFO] [logging.py:68:log_dist] [Rank 0] step=6730, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:13:09,503] [INFO] [timer.py:198:stop] 0/6730, RunningAvgSamplesPerSec=1.158538767183873, CurrSamplesPerSec=1.196621401480462, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:13:17,894] [INFO] [logging.py:68:log_dist] [Rank 0] step=6740, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:13:17,910] [INFO] [timer.py:198:stop] 0/6740, RunningAvgSamplesPerSec=1.158591345818074, CurrSamplesPerSec=1.1940184976897104, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:13:26,310] [INFO] [logging.py:68:log_dist] [Rank 0] step=6750, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:13:26,326] [INFO] [timer.py:198:stop] 0/6750, RunningAvgSamplesPerSec=1.158642157434812, CurrSamplesPerSec=1.1970800669791102, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:13:34,716] [INFO] [logging.py:68:log_dist] [Rank 0] step=6760, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:13:34,732] [INFO] [timer.py:198:stop] 0/6760, RunningAvgSamplesPerSec=1.158694559853147, CurrSamplesPerSec=1.2031048986111943, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:13:43,100] [INFO] [logging.py:68:log_dist] [Rank 0] step=6770, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:13:43,116] [INFO] [timer.py:198:stop] 0/6770, RunningAvgSamplesPerSec=1.1587510628199749, CurrSamplesPerSec=1.1975366908183802, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:13:51,496] [INFO] [logging.py:68:log_dist] [Rank 0] step=6780, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:13:51,511] [INFO] [timer.py:198:stop] 0/6780, RunningAvgSamplesPerSec=1.1588053777590654, CurrSamplesPerSec=1.199417435360118, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:13:59,900] [INFO] [logging.py:68:log_dist] [Rank 0] step=6790, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:13:59,916] [INFO] [timer.py:198:stop] 0/6790, RunningAvgSamplesPerSec=1.1588575565792225, CurrSamplesPerSec=1.192751644109161, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:14:11,628] [INFO] [logging.py:68:log_dist] [Rank 0] step=6800, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:14:11,644] [INFO] [timer.py:198:stop] 0/6800, RunningAvgSamplesPerSec=1.1582534760338288, CurrSamplesPerSec=0.23928143641519634, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:14:20,023] [INFO] [logging.py:68:log_dist] [Rank 0] step=6810, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:14:20,039] [INFO] [timer.py:198:stop] 0/6810, RunningAvgSamplesPerSec=1.1583085163324915, CurrSamplesPerSec=1.1963203692865154, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:14:28,407] [INFO] [logging.py:68:log_dist] [Rank 0] step=6820, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:14:28,422] [INFO] [timer.py:198:stop] 0/6820, RunningAvgSamplesPerSec=1.1583654273061397, CurrSamplesPerSec=1.198067577653803, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:14:36,798] [INFO] [logging.py:68:log_dist] [Rank 0] step=6830, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:14:36,814] [INFO] [timer.py:198:stop] 0/6830, RunningAvgSamplesPerSec=1.1584207241895608, CurrSamplesPerSec=1.1996610073504659, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:14:45,182] [INFO] [logging.py:68:log_dist] [Rank 0] step=6840, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:14:45,198] [INFO] [timer.py:198:stop] 0/6840, RunningAvgSamplesPerSec=1.1584772645164778, CurrSamplesPerSec=1.201047819657729, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:14:53,579] [INFO] [logging.py:68:log_dist] [Rank 0] step=6850, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:14:53,595] [INFO] [timer.py:198:stop] 0/6850, RunningAvgSamplesPerSec=1.1585310790496548, CurrSamplesPerSec=1.1927570711376554, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:15:01,964] [INFO] [logging.py:68:log_dist] [Rank 0] step=6860, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:15:01,979] [INFO] [timer.py:198:stop] 0/6860, RunningAvgSamplesPerSec=1.158587361815747, CurrSamplesPerSec=1.187951521269277, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:15:10,369] [INFO] [logging.py:68:log_dist] [Rank 0] step=6870, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:15:10,385] [INFO] [timer.py:198:stop] 0/6870, RunningAvgSamplesPerSec=1.1586391343541016, CurrSamplesPerSec=1.200293383783592, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:15:18,751] [INFO] [logging.py:68:log_dist] [Rank 0] step=6880, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:15:18,766] [INFO] [timer.py:198:stop] 0/6880, RunningAvgSamplesPerSec=1.1586955157726015, CurrSamplesPerSec=1.1992205917041336, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:15:27,159] [INFO] [logging.py:68:log_dist] [Rank 0] step=6890, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:15:27,175] [INFO] [timer.py:198:stop] 0/6890, RunningAvgSamplesPerSec=1.1587464465738073, CurrSamplesPerSec=1.202023632353013, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:15:38,701] [INFO] [logging.py:68:log_dist] [Rank 0] step=6900, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:15:38,717] [INFO] [timer.py:198:stop] 0/6900, RunningAvgSamplesPerSec=1.1581874998665342, CurrSamplesPerSec=0.2526920537302211, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:15:47,097] [INFO] [logging.py:68:log_dist] [Rank 0] step=6910, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:15:47,113] [INFO] [timer.py:198:stop] 0/6910, RunningAvgSamplesPerSec=1.1582416768493404, CurrSamplesPerSec=1.1982342609595922, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:15:55,489] [INFO] [logging.py:68:log_dist] [Rank 0] step=6920, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:15:55,504] [INFO] [timer.py:198:stop] 0/6920, RunningAvgSamplesPerSec=1.1582961919127317, CurrSamplesPerSec=1.2015474029664668, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:16:03,883] [INFO] [logging.py:68:log_dist] [Rank 0] step=6930, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:16:03,899] [INFO] [timer.py:198:stop] 0/6930, RunningAvgSamplesPerSec=1.1583499358190483, CurrSamplesPerSec=1.1913632707226511, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:16:12,285] [INFO] [logging.py:68:log_dist] [Rank 0] step=6940, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:16:12,301] [INFO] [timer.py:198:stop] 0/6940, RunningAvgSamplesPerSec=1.1584023472848144, CurrSamplesPerSec=1.202215539272394, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:16:20,687] [INFO] [logging.py:68:log_dist] [Rank 0] step=6950, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:16:20,703] [INFO] [timer.py:198:stop] 0/6950, RunningAvgSamplesPerSec=1.1584543223465549, CurrSamplesPerSec=1.2001467311963456, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:16:29,079] [INFO] [logging.py:68:log_dist] [Rank 0] step=6960, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:16:29,095] [INFO] [timer.py:198:stop] 0/6960, RunningAvgSamplesPerSec=1.158508642680439, CurrSamplesPerSec=1.1974597649563132, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:16:37,466] [INFO] [logging.py:68:log_dist] [Rank 0] step=6970, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:16:37,482] [INFO] [timer.py:198:stop] 0/6970, RunningAvgSamplesPerSec=1.1585633925110763, CurrSamplesPerSec=1.1968812573818737, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:16:45,891] [INFO] [logging.py:68:log_dist] [Rank 0] step=6980, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:16:45,906] [INFO] [timer.py:198:stop] 0/6980, RunningAvgSamplesPerSec=1.1586106294226133, CurrSamplesPerSec=1.1632555482214075, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:16:54,313] [INFO] [logging.py:68:log_dist] [Rank 0] step=6990, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:16:54,328] [INFO] [timer.py:198:stop] 0/6990, RunningAvgSamplesPerSec=1.1586584035218266, CurrSamplesPerSec=1.1960044825531162, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:17:05,779] [INFO] [logging.py:68:log_dist] [Rank 0] step=7000, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:17:05,795] [INFO] [timer.py:198:stop] 0/7000, RunningAvgSamplesPerSec=1.1581221504685528, CurrSamplesPerSec=0.25611170497093033, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:17:14,168] [INFO] [logging.py:68:log_dist] [Rank 0] step=7010, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:17:14,183] [INFO] [timer.py:198:stop] 0/7010, RunningAvgSamplesPerSec=1.1581768335931921, CurrSamplesPerSec=1.201903420276366, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:17:22,546] [INFO] [logging.py:68:log_dist] [Rank 0] step=7020, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:17:22,561] [INFO] [timer.py:198:stop] 0/7020, RunningAvgSamplesPerSec=1.158233200107117, CurrSamplesPerSec=1.1974929273382324, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:17:30,926] [INFO] [logging.py:68:log_dist] [Rank 0] step=7030, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:17:30,942] [INFO] [timer.py:198:stop] 0/7030, RunningAvgSamplesPerSec=1.1582891497018526, CurrSamplesPerSec=1.199975052398546, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:17:39,311] [INFO] [logging.py:68:log_dist] [Rank 0] step=7040, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:17:39,327] [INFO] [timer.py:198:stop] 0/7040, RunningAvgSamplesPerSec=1.1583438037609848, CurrSamplesPerSec=1.1932606428346677, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:17:47,688] [INFO] [logging.py:68:log_dist] [Rank 0] step=7050, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:17:47,704] [INFO] [timer.py:198:stop] 0/7050, RunningAvgSamplesPerSec=1.158400121277741, CurrSamplesPerSec=1.199767386713296, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:17:56,089] [INFO] [logging.py:68:log_dist] [Rank 0] step=7060, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:17:56,104] [INFO] [timer.py:198:stop] 0/7060, RunningAvgSamplesPerSec=1.1584518310416836, CurrSamplesPerSec=1.205755238503178, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:18:04,472] [INFO] [logging.py:68:log_dist] [Rank 0] step=7070, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:18:04,488] [INFO] [timer.py:198:stop] 0/7070, RunningAvgSamplesPerSec=1.1585063855609028, CurrSamplesPerSec=1.196880915841457, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:18:12,858] [INFO] [logging.py:68:log_dist] [Rank 0] step=7080, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:18:12,874] [INFO] [timer.py:198:stop] 0/7080, RunningAvgSamplesPerSec=1.1585604117493673, CurrSamplesPerSec=1.200985572896459, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:18:21,290] [INFO] [logging.py:68:log_dist] [Rank 0] step=7090, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:18:21,305] [INFO] [timer.py:198:stop] 0/7090, RunningAvgSamplesPerSec=1.1586057241345673, CurrSamplesPerSec=1.1884325218161016, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:18:32,992] [INFO] [logging.py:68:log_dist] [Rank 0] step=7100, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:18:33,008] [INFO] [timer.py:198:stop] 0/7100, RunningAvgSamplesPerSec=1.158032419203838, CurrSamplesPerSec=0.2409309485310797, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:18:41,363] [INFO] [logging.py:68:log_dist] [Rank 0] step=7110, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:18:41,379] [INFO] [timer.py:198:stop] 0/7110, RunningAvgSamplesPerSec=1.1580901027690644, CurrSamplesPerSec=1.2059608214775428, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:18:49,763] [INFO] [logging.py:68:log_dist] [Rank 0] step=7120, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:18:49,778] [INFO] [timer.py:198:stop] 0/7120, RunningAvgSamplesPerSec=1.1581417717631595, CurrSamplesPerSec=1.2008363445114103, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:18:58,162] [INFO] [logging.py:68:log_dist] [Rank 0] step=7130, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:18:58,178] [INFO] [timer.py:198:stop] 0/7130, RunningAvgSamplesPerSec=1.1581933726558222, CurrSamplesPerSec=1.2021097589391476, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:19:06,564] [INFO] [logging.py:68:log_dist] [Rank 0] step=7140, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:19:06,580] [INFO] [timer.py:198:stop] 0/7140, RunningAvgSamplesPerSec=1.1582443597823926, CurrSamplesPerSec=1.2003912866036999, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:19:14,944] [INFO] [logging.py:68:log_dist] [Rank 0] step=7150, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:19:14,960] [INFO] [timer.py:198:stop] 0/7150, RunningAvgSamplesPerSec=1.1582991501868334, CurrSamplesPerSec=1.199841520261234, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:19:23,342] [INFO] [logging.py:68:log_dist] [Rank 0] step=7160, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:19:23,358] [INFO] [timer.py:198:stop] 0/7160, RunningAvgSamplesPerSec=1.1583507477995194, CurrSamplesPerSec=1.189493512143355, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:19:31,734] [INFO] [logging.py:68:log_dist] [Rank 0] step=7170, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:19:31,750] [INFO] [timer.py:198:stop] 0/7170, RunningAvgSamplesPerSec=1.1584032080016626, CurrSamplesPerSec=1.2009419008438624, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:19:40,124] [INFO] [logging.py:68:log_dist] [Rank 0] step=7180, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:19:40,140] [INFO] [timer.py:198:stop] 0/7180, RunningAvgSamplesPerSec=1.1584560273931517, CurrSamplesPerSec=1.2009016703463207, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:19:48,508] [INFO] [logging.py:68:log_dist] [Rank 0] step=7190, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:19:48,524] [INFO] [timer.py:198:stop] 0/7190, RunningAvgSamplesPerSec=1.158509533042569, CurrSamplesPerSec=1.2005036368845612, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:20:00,126] [INFO] [logging.py:68:log_dist] [Rank 0] step=7200, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:20:00,142] [INFO] [timer.py:198:stop] 0/7200, RunningAvgSamplesPerSec=1.1579603148072914, CurrSamplesPerSec=0.24678201977913963, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:20:08,524] [INFO] [logging.py:68:log_dist] [Rank 0] step=7210, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:20:08,540] [INFO] [timer.py:198:stop] 0/7210, RunningAvgSamplesPerSec=1.1580120644909242, CurrSamplesPerSec=1.1934215767729306, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:20:16,892] [INFO] [logging.py:68:log_dist] [Rank 0] step=7220, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:20:16,908] [INFO] [timer.py:198:stop] 0/7220, RunningAvgSamplesPerSec=1.1580687534717484, CurrSamplesPerSec=1.2010577934954576, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:20:25,292] [INFO] [logging.py:68:log_dist] [Rank 0] step=7230, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:20:25,307] [INFO] [timer.py:198:stop] 0/7230, RunningAvgSamplesPerSec=1.1581198104692503, CurrSamplesPerSec=1.2049484917545725, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:20:33,687] [INFO] [logging.py:68:log_dist] [Rank 0] step=7240, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:20:33,703] [INFO] [timer.py:198:stop] 0/7240, RunningAvgSamplesPerSec=1.1581716000173439, CurrSamplesPerSec=1.1989676019212965, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:20:42,069] [INFO] [logging.py:68:log_dist] [Rank 0] step=7250, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:20:42,085] [INFO] [timer.py:198:stop] 0/7250, RunningAvgSamplesPerSec=1.1582254894518051, CurrSamplesPerSec=1.2013419454772354, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:20:50,497] [INFO] [logging.py:68:log_dist] [Rank 0] step=7260, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:20:50,512] [INFO] [timer.py:198:stop] 0/7260, RunningAvgSamplesPerSec=1.15827111092452, CurrSamplesPerSec=1.1933339744359388, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:20:58,889] [INFO] [logging.py:68:log_dist] [Rank 0] step=7270, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:20:58,905] [INFO] [timer.py:198:stop] 0/7270, RunningAvgSamplesPerSec=1.1583230269325213, CurrSamplesPerSec=1.1992737399817577, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:21:07,278] [INFO] [logging.py:68:log_dist] [Rank 0] step=7280, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:21:07,293] [INFO] [timer.py:198:stop] 0/7280, RunningAvgSamplesPerSec=1.158375198191718, CurrSamplesPerSec=1.198380446500077, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:21:15,657] [INFO] [logging.py:68:log_dist] [Rank 0] step=7290, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:21:15,673] [INFO] [timer.py:198:stop] 0/7290, RunningAvgSamplesPerSec=1.158429004938239, CurrSamplesPerSec=1.199963380023803, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:21:27,115] [INFO] [logging.py:68:log_dist] [Rank 0] step=7300, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:21:27,131] [INFO] [timer.py:198:stop] 0/7300, RunningAvgSamplesPerSec=1.1579167093364473, CurrSamplesPerSec=0.25501243384308414, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:21:35,532] [INFO] [logging.py:68:log_dist] [Rank 0] step=7310, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:21:35,547] [INFO] [timer.py:198:stop] 0/7310, RunningAvgSamplesPerSec=1.1579646518106754, CurrSamplesPerSec=1.2007937146412067, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:21:43,915] [INFO] [logging.py:68:log_dist] [Rank 0] step=7320, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:21:43,931] [INFO] [timer.py:198:stop] 0/7320, RunningAvgSamplesPerSec=1.1580179714672438, CurrSamplesPerSec=1.1978646768056058, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:21:52,300] [INFO] [logging.py:68:log_dist] [Rank 0] step=7330, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:21:52,316] [INFO] [timer.py:198:stop] 0/7330, RunningAvgSamplesPerSec=1.1580709589815525, CurrSamplesPerSec=1.2002789573496877, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:22:00,705] [INFO] [logging.py:68:log_dist] [Rank 0] step=7340, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:22:00,720] [INFO] [timer.py:198:stop] 0/7340, RunningAvgSamplesPerSec=1.1581202385778306, CurrSamplesPerSec=1.1974334414866055, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:22:09,099] [INFO] [logging.py:68:log_dist] [Rank 0] step=7350, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:22:09,114] [INFO] [timer.py:198:stop] 0/7350, RunningAvgSamplesPerSec=1.1581714820109217, CurrSamplesPerSec=1.188933795870838, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:22:17,487] [INFO] [logging.py:68:log_dist] [Rank 0] step=7360, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:22:17,503] [INFO] [timer.py:198:stop] 0/7360, RunningAvgSamplesPerSec=1.1582237161784992, CurrSamplesPerSec=1.191700749660117, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:22:25,903] [INFO] [logging.py:68:log_dist] [Rank 0] step=7370, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:22:25,918] [INFO] [timer.py:198:stop] 0/7370, RunningAvgSamplesPerSec=1.1582705197339145, CurrSamplesPerSec=1.186884882208657, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:22:34,299] [INFO] [logging.py:68:log_dist] [Rank 0] step=7380, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:22:34,315] [INFO] [timer.py:198:stop] 0/7380, RunningAvgSamplesPerSec=1.1583205529252494, CurrSamplesPerSec=1.2050842018131267, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:22:42,674] [INFO] [logging.py:68:log_dist] [Rank 0] step=7390, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:22:42,689] [INFO] [timer.py:198:stop] 0/7390, RunningAvgSamplesPerSec=1.1583741725108847, CurrSamplesPerSec=1.2034483728227456, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:22:54,149] [INFO] [logging.py:68:log_dist] [Rank 0] step=7400, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:22:54,164] [INFO] [timer.py:198:stop] 0/7400, RunningAvgSamplesPerSec=1.1578654619195954, CurrSamplesPerSec=0.2547719352061216, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:23:02,556] [INFO] [logging.py:68:log_dist] [Rank 0] step=7410, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:23:02,572] [INFO] [timer.py:198:stop] 0/7410, RunningAvgSamplesPerSec=1.157913725643581, CurrSamplesPerSec=1.1937316837085081, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:23:10,977] [INFO] [logging.py:68:log_dist] [Rank 0] step=7420, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:23:10,993] [INFO] [timer.py:198:stop] 0/7420, RunningAvgSamplesPerSec=1.157959179292855, CurrSamplesPerSec=1.169607250704739, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:23:19,373] [INFO] [logging.py:68:log_dist] [Rank 0] step=7430, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:23:19,388] [INFO] [timer.py:198:stop] 0/7430, RunningAvgSamplesPerSec=1.1580091125686247, CurrSamplesPerSec=1.197970395652888, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:23:27,769] [INFO] [logging.py:68:log_dist] [Rank 0] step=7440, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:23:27,784] [INFO] [timer.py:198:stop] 0/7440, RunningAvgSamplesPerSec=1.158059075611012, CurrSamplesPerSec=1.1961190830760562, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:23:36,173] [INFO] [logging.py:68:log_dist] [Rank 0] step=7450, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:23:36,188] [INFO] [timer.py:198:stop] 0/7450, RunningAvgSamplesPerSec=1.1581072145747917, CurrSamplesPerSec=1.1919618599850406, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:23:44,564] [INFO] [logging.py:68:log_dist] [Rank 0] step=7460, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:23:44,580] [INFO] [timer.py:198:stop] 0/7460, RunningAvgSamplesPerSec=1.1581576366004933, CurrSamplesPerSec=1.1995995904382197, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:23:52,982] [INFO] [logging.py:68:log_dist] [Rank 0] step=7470, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:23:52,998] [INFO] [timer.py:198:stop] 0/7470, RunningAvgSamplesPerSec=1.1582030523154891, CurrSamplesPerSec=1.1926064894245594, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:24:01,380] [INFO] [logging.py:68:log_dist] [Rank 0] step=7480, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:24:01,396] [INFO] [timer.py:198:stop] 0/7480, RunningAvgSamplesPerSec=1.1582518760641165, CurrSamplesPerSec=1.1919151158779209, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:24:09,783] [INFO] [logging.py:68:log_dist] [Rank 0] step=7490, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:24:09,798] [INFO] [timer.py:198:stop] 0/7490, RunningAvgSamplesPerSec=1.1582998052570095, CurrSamplesPerSec=1.1901543853754986, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:24:21,405] [INFO] [logging.py:68:log_dist] [Rank 0] step=7500, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:24:21,420] [INFO] [timer.py:198:stop] 0/7500, RunningAvgSamplesPerSec=1.1577716709293953, CurrSamplesPerSec=0.2458396530078857, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:24:29,791] [INFO] [logging.py:68:log_dist] [Rank 0] step=7510, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:24:29,807] [INFO] [timer.py:198:stop] 0/7510, RunningAvgSamplesPerSec=1.157823121534585, CurrSamplesPerSec=1.196834809674541, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:24:38,178] [INFO] [logging.py:68:log_dist] [Rank 0] step=7520, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:24:38,194] [INFO] [timer.py:198:stop] 0/7520, RunningAvgSamplesPerSec=1.1578740755375452, CurrSamplesPerSec=1.1976700523949808, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:24:46,571] [INFO] [logging.py:68:log_dist] [Rank 0] step=7530, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:24:46,587] [INFO] [timer.py:198:stop] 0/7530, RunningAvgSamplesPerSec=1.1579238470997353, CurrSamplesPerSec=1.204232020373417, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:24:54,962] [INFO] [logging.py:68:log_dist] [Rank 0] step=7540, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:24:54,978] [INFO] [timer.py:198:stop] 0/7540, RunningAvgSamplesPerSec=1.157973772310102, CurrSamplesPerSec=1.2023785532312083, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:25:03,340] [INFO] [logging.py:68:log_dist] [Rank 0] step=7550, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:25:03,356] [INFO] [timer.py:198:stop] 0/7550, RunningAvgSamplesPerSec=1.1580260639481383, CurrSamplesPerSec=1.2000437180233274, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:25:11,743] [INFO] [logging.py:68:log_dist] [Rank 0] step=7560, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:25:11,759] [INFO] [timer.py:198:stop] 0/7560, RunningAvgSamplesPerSec=1.1580739458683564, CurrSamplesPerSec=1.1924796775704973, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:25:20,149] [INFO] [logging.py:68:log_dist] [Rank 0] step=7570, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:25:20,165] [INFO] [timer.py:198:stop] 0/7570, RunningAvgSamplesPerSec=1.158120909595654, CurrSamplesPerSec=1.199421551248106, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:25:28,557] [INFO] [logging.py:68:log_dist] [Rank 0] step=7580, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:25:28,572] [INFO] [timer.py:198:stop] 0/7580, RunningAvgSamplesPerSec=1.1581676099375033, CurrSamplesPerSec=1.1900399119649925, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n",
"[2023-08-29 23:25:36,964] [INFO] [logging.py:68:log_dist] [Rank 0] step=7590, skipped=31, lr=[5e-05], mom=[[0.9, 0.999]]\n",
"[2023-08-29 23:25:36,980] [INFO] [timer.py:198:stop] 0/7590, RunningAvgSamplesPerSec=1.158214134039374, CurrSamplesPerSec=1.2005318136476892, MemAllocated=3.12GB, MaxMemAllocated=4.82GB\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to ./results/checkpoint-7596\n",
"Configuration saved in ./results/checkpoint-7596/config.json\n",
"Model weights saved in ./results/checkpoint-7596/pytorch_model.bin\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2023-08-29 23:25:44,538] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step7596 is begin to save!\n",
"[2023-08-29 23:25:44,541] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./results/checkpoint-7596/global_step7596/mp_rank_00_model_states.pt\n",
"[2023-08-29 23:25:44,541] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./results/checkpoint-7596/global_step7596/mp_rank_00_model_states.pt...\n",
"[2023-08-29 23:25:47,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./results/checkpoint-7596/global_step7596/mp_rank_00_model_states.pt.\n",
"[2023-08-29 23:25:47,076] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./results/checkpoint-7596/global_step7596/zero_pp_rank_0_mp_rank_00_optim_states.pt...\n",
"[2023-08-29 23:25:57,659] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./results/checkpoint-7596/global_step7596/zero_pp_rank_0_mp_rank_00_optim_states.pt.\n",
"[2023-08-29 23:25:57,661] [INFO] [engine.py:3196:_save_zero_checkpoint] zero checkpoint saved ./results/checkpoint-7596/global_step7596/zero_pp_rank_0_mp_rank_00_optim_states.pt\n",
"[2023-08-29 23:25:57,662] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7596 is ready now!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n"
]
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=7596, training_loss=0.5336930856006656, metrics={'train_runtime': 6606.6123, 'train_samples_per_second': 1.15, 'train_steps_per_second': 1.15, 'total_flos': 3194440023801856.0, 'train_loss': 0.5336930856006656, 'epoch': 1.0})"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Trainer(model=model, args=training_args, train_dataset=train_dataset,\n",
" eval_dataset=val_dataset, data_collator=lambda data: {'input_ids': torch.stack([f[0] for f in data]),\n",
" 'attention_mask': torch.stack([f[1] for f in data]),\n",
" 'labels': torch.stack([f[0] for f in data])}).train()\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "7d69539d",
"metadata": {},
"source": [
"## STORING THE TRAINED MODEL"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "bda753e0",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"tokenizer config file saved in ./gpt1_3B_deepspeed_tokenizer/tokenizer_config.json\n",
"Special tokens file saved in ./gpt1_3B_deepspeed_tokenizer/special_tokens_map.json\n",
"Configuration saved in ./gpt1_3B_deepspeed_model/config.json\n",
"Model weights saved in ./gpt1_3B_deepspeed_model/pytorch_model.bin\n"
]
}
],
"source": [
"tokenizer.save_pretrained('./gpt1_3B_deepspeed_tokenizer/')\n",
"model.save_pretrained('./gpt1_3B_deepspeed_model')"
]
},
{
"cell_type": "markdown",
"id": "b4095ac6",
"metadata": {},
"source": [
"## LOADING AND INFERENCE"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "4616e778",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"loading file vocab.json\n",
"loading file merges.txt\n",
"loading file tokenizer.json\n",
"loading file added_tokens.json\n",
"loading file special_tokens_map.json\n",
"loading file tokenizer_config.json\n",
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
"loading configuration file ./gpt1_3B_deepspeed_model/config.json\n",
"Model config GPTNeoConfig {\n",
" \"_name_or_path\": \"./gpt1_3B_deepspeed_model\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPTNeoForCausalLM\"\n",
" ],\n",
" \"attention_dropout\": 0,\n",
" \"attention_layers\": [\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\",\n",
" \"global\",\n",
" \"local\"\n",
" ],\n",
" \"attention_types\": [\n",
" [\n",
" [\n",
" \"global\",\n",
" \"local\"\n",
" ],\n",
" 12\n",
" ]\n",
" ],\n",
" \"bos_token_id\": 50256,\n",
" \"embed_dropout\": 0,\n",
" \"eos_token_id\": 50256,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_size\": 2048,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": null,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"max_position_embeddings\": 2048,\n",
" \"model_type\": \"gpt_neo\",\n",
" \"num_heads\": 16,\n",
" \"num_layers\": 24,\n",
" \"resid_dropout\": 0,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50,\n",
" \"temperature\": 0.9\n",
" }\n",
" },\n",
" \"tokenizer_class\": \"GPT2Tokenizer\",\n",
" \"torch_dtype\": \"float16\",\n",
" \"transformers_version\": \"4.25.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50258,\n",
" \"window_size\": 256\n",
"}\n",
"\n",
"loading weights file ./gpt1_3B_deepspeed_model/pytorch_model.bin\n",
"All model checkpoint weights were used when initializing GPTNeoForCausalLM.\n",
"\n",
"All the weights of GPTNeoForCausalLM were initialized from the model checkpoint at ./gpt1_3B_deepspeed_model.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use GPTNeoForCausalLM for predictions without further training.\n"
]
}
],
"source": [
"tokenizer = AutoTokenizer.from_pretrained('./gpt1_3B_deepspeed_tokenizer/', pad_token='<|pad|>',bos_token='<|startoftext|>',eos_token='<|endoftext|>')\n",
"model = AutoModelForCausalLM.from_pretrained('./gpt1_3B_deepspeed_model').cuda()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "e671764f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"0: SAD:You can learn a new activity in a virtual skill session when you reach the exhilarating Surprise at your day time\n",
"1: SAD:Explore hobbies about playing in a virtual playing day.\n",
"2: SAD:Cultivate a sense of growth within relationships who encourage emotional discovery.\n",
"3: SAD:Accept in some spiritual texts.\n",
"4: SAD: Take up potteryoamining as a project within for your community's benefit.\n",
"5: SAD:Seek out the humor.\n",
"6: SAD:Explore the grief community.\n",
"7: SAD:To reach up and improve mental health, make.\n",
"8: SAD: Try arts therapy for healing, like ceramics and artistic quotes.\n",
"9: SAD:Time each day for bed a deep breathing exercises: stretching before bed by enjoying each achievement and reflecting at the events\n"
]
}
],
"source": [
"input_sentence=\"\"\n",
"outputs = []\n",
"prompt = f'SAD:{input_sentence}'\n",
"generated = tokenizer(prompt, return_tensors=\"pt\").input_ids.cuda()\n",
"sample_outputs = model.generate(generated, do_sample=True, top_k=50,\n",
" max_length=30, top_p=0.95, temperature=1.9, num_return_sequences=10)\n",
"for i, sample_output in enumerate(sample_outputs):\n",
" print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))\n",
" outputs.append(tokenizer.decode(sample_output, skip_special_tokens=True).split(':')[-1])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "497480dc",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"0: ANGRY:A positive physical imagery can be used in a mantra like, future for addressing anger assertively face assert your\n",
"1: ANGRY: Reflect on the physical sensations anger managed using physical activities.\n",
"2: ANGRY:Imagine that overcoming even challenging goals you'll reach after addressing anger-related situations.\n",
"3: ANGRY:Accept it difficult time.\n",
"4: ANGRY:Time to disengage from physical symptoms is an activity the can be done within with practicing forgiveness.\n",
"5: ANGRY:Time-outs remind you that stress-based training challenges will not impact your relationships.\n",
"6: ANGRY:Communication skills training can be learned and learned in conflicts.\n",
"7: ANGRY:Write letters through a message you should leave a calming environment.\n",
"8: ANGRY:If anger has consequences consequences that reflect on your past feelings of responsibility, consider action.\n",
"9: ANGRY: Use guided breaths for stress and to help manage anger.\n"
]
}
],
"source": [
"input_sentence=\"\"\n",
"outputs = []\n",
"prompt = f'ANGRY:{input_sentence}'\n",
"generated = tokenizer(prompt, return_tensors=\"pt\").input_ids.cuda()\n",
"sample_outputs = model.generate(generated, do_sample=True, top_k=50,\n",
" max_length=30, top_p=0.95, temperature=1.9, num_return_sequences=10)\n",
"for i, sample_output in enumerate(sample_outputs):\n",
" print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))\n",
" outputs.append(tokenizer.decode(sample_output, skip_special_tokens=True).split(':')[-1])"
]
},
{
"cell_type": "markdown",
"id": "a074fb55",
"metadata": {},
"source": [
"----- end of notebook"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}