diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -1,13 +1,13 @@ -[2022-12-16 11:39:50,518] [WARNING] [runner.py:179:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. -[2022-12-16 11:39:50,529] [INFO] [runner.py:508:main] cmd = /home/milan/hf_env/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 run_speech_recognition_seq2seq_streaming.py --deepspeed=ds_config.json --model_name_or_path=openai/whisper-large-v2 --dataset_name=mozilla-foundation/common_voice_11_0 --dataset_config_name=cs --language=czech --train_split_name=train+validation --eval_split_name=test --model_index_name=Whisper Large-v2 Czech CV11 v2 --max_steps=5000 --output_dir=./ --per_device_train_batch_size=32 --per_device_eval_batch_size=8 --gradient_accumulation_steps=2 --logging_steps=25 --learning_rate=1e-5 --warmup_steps=500 --evaluation_strategy=steps --eval_steps=1000 --save_strategy=steps --save_steps=1000 --generation_max_length=225 --length_column_name=input_length --max_duration_in_seconds=30 --text_column_name=sentence --freeze_feature_encoder=False --report_to=tensorboard --metric_for_best_model=wer --greater_is_better=False --load_best_model_at_end --gradient_checkpointing --fp16 --overwrite_output_dir --do_train --do_eval --predict_with_generate --do_normalize_eval --streaming=False --use_auth_token --push_to_hub -[2022-12-16 11:39:52,063] [INFO] [launch.py:142:main] WORLD INFO DICT: {'localhost': [0]} -[2022-12-16 11:39:52,063] [INFO] [launch.py:148:main] nnodes=1, num_local_procs=1, node_rank=0 -[2022-12-16 11:39:52,063] [INFO] [launch.py:161:main] global_rank_mapping=defaultdict(, {'localhost': [0]}) -[2022-12-16 11:39:52,063] [INFO] [launch.py:162:main] dist_world_size=1 -[2022-12-16 11:39:52,063] [INFO] [launch.py:164:main] Setting CUDA_VISIBLE_DEVICES=0 -[2022-12-16 11:39:56,169] [INFO] [comm.py:654:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl -12/16/2022 11:39:56 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: True -12/16/2022 11:39:56 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( +[2022-12-16 19:19:56,902] [WARNING] [runner.py:179:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. +[2022-12-16 19:19:56,998] [INFO] [runner.py:508:main] cmd = /home/milan/hf_env/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 run_speech_recognition_seq2seq_streaming.py --deepspeed=ds_config.json --model_name_or_path=openai/whisper-large-v2 --dataset_name=mozilla-foundation/common_voice_11_0 --dataset_config_name=cs --language=czech --train_split_name=train+validation --eval_split_name=test --model_index_name=Whisper Large-v2 Czech CV11 v2 --max_steps=5000 --output_dir=./ --per_device_train_batch_size=32 --per_device_eval_batch_size=8 --gradient_accumulation_steps=2 --logging_steps=25 --learning_rate=1e-5 --warmup_steps=500 --evaluation_strategy=steps --eval_steps=1000 --save_strategy=steps --save_steps=1000 --generation_max_length=225 --length_column_name=input_length --max_duration_in_seconds=30 --text_column_name=sentence --freeze_feature_encoder=False --report_to=tensorboard --metric_for_best_model=wer --greater_is_better=False --load_best_model_at_end --gradient_checkpointing --fp16 --overwrite_output_dir --do_train --do_eval --predict_with_generate --do_normalize_eval --streaming=False --use_auth_token --push_to_hub +[2022-12-16 19:19:58,537] [INFO] [launch.py:142:main] WORLD INFO DICT: {'localhost': [0]} +[2022-12-16 19:19:58,537] [INFO] [launch.py:148:main] nnodes=1, num_local_procs=1, node_rank=0 +[2022-12-16 19:19:58,537] [INFO] [launch.py:161:main] global_rank_mapping=defaultdict(, {'localhost': [0]}) +[2022-12-16 19:19:58,537] [INFO] [launch.py:162:main] dist_world_size=1 +[2022-12-16 19:19:58,537] [INFO] [launch.py:164:main] Setting CUDA_VISIBLE_DEVICES=0 +[2022-12-16 19:20:02,860] [INFO] [comm.py:654:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +12/16/2022 19:20:03 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: True +12/16/2022 19:20:03 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( _n_gpu=1, adafactor=False, adam_beta1=0.9, @@ -64,7 +64,7 @@ local_rank=0, log_level=passive, log_level_replica=passive, log_on_each_node=True, -logging_dir=./runs/Dec16_11-39-56_129-146-123-136, +logging_dir=./runs/Dec16_19-20-02_129-146-123-136, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=25, @@ -117,7 +117,7 @@ warmup_steps=500, weight_decay=0.0, xpu_backend=None, ) -12/16/2022 11:39:56 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( +12/16/2022 19:20:03 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( _n_gpu=1, adafactor=False, adam_beta1=0.9, @@ -174,7 +174,7 @@ local_rank=0, log_level=passive, log_level_replica=passive, log_on_each_node=True, -logging_dir=./runs/Dec16_11-39-56_129-146-123-136, +logging_dir=./runs/Dec16_19-20-02_129-146-123-136, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=25, @@ -227,106 +227,60 @@ warmup_steps=500, weight_decay=0.0, xpu_backend=None, ) -12/16/2022 11:39:58 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f -12/16/2022 11:39:58 - INFO - datasets.builder - Generating dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) -Downloading and preparing dataset common_voice_11_0/cs to /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f... -12/16/2022 11:39:58 - INFO - datasets.builder - Dataset not on Hf google storage. Downloading and preparing it from source -12/16/2022 11:39:58 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/n_shards.json not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpkpv72a0e -12/16/2022 11:39:58 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/n_shards.json in cache at /home/milan/.cache/huggingface/datasets/downloads/a64fc6ccf85aaef9b75e8612e97f260d2435e8ffe8b7626c5c08499f466674da -12/16/2022 11:39:58 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/a64fc6ccf85aaef9b75e8612e97f260d2435e8ffe8b7626c5c08499f466674da -12/16/2022 11:39:58 - INFO - datasets.download.download_manager - Downloading took 0.0 min -12/16/2022 11:39:58 - INFO - datasets.download.download_manager - Checksum Computation took 0.0 min -12/16/2022 11:39:59 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/train/cs_train_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmp38v7cdx_ -12/16/2022 11:40:10 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/train/cs_train_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/e65067c9e6680c37326687e28407d42076ea8a79d0230ccfa9f1a30beefa496d -12/16/2022 11:40:10 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/e65067c9e6680c37326687e28407d42076ea8a79d0230ccfa9f1a30beefa496d -12/16/2022 11:40:10 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/dev/cs_dev_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmp4h1ecyhc -12/16/2022 11:40:16 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/dev/cs_dev_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/faef583dccc4451b03508c77f16c823c3e52f080e7d2696c47ef3bc4da88b993 -12/16/2022 11:40:16 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/faef583dccc4451b03508c77f16c823c3e52f080e7d2696c47ef3bc4da88b993 -12/16/2022 11:40:16 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/test/cs_test_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpns2ng_im -12/16/2022 11:40:22 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/test/cs_test_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/3b199c261a4a515c549e967e06831827866d46648e11b24dc2f4b50880b69664 -12/16/2022 11:40:22 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/3b199c261a4a515c549e967e06831827866d46648e11b24dc2f4b50880b69664 -12/16/2022 11:40:23 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/other/cs_other_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmp09yiimgu -12/16/2022 11:40:28 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/other/cs_other_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/e9daf9a9975b7a926609592de3dd2542f6bfa853753253cd36e1ff97a21d5b16 -12/16/2022 11:40:28 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/e9daf9a9975b7a926609592de3dd2542f6bfa853753253cd36e1ff97a21d5b16 -12/16/2022 11:40:29 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/invalidated/cs_invalidated_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmp9u2s9gxm -12/16/2022 11:40:30 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/cs/invalidated/cs_invalidated_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/8f0074a128edd8f8eb60cf804a3ae6c95ebcea9a743285d0c76e6b2168fd6695 -12/16/2022 11:40:30 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/8f0074a128edd8f8eb60cf804a3ae6c95ebcea9a743285d0c76e6b2168fd6695 -12/16/2022 11:40:30 - INFO - datasets.download.download_manager - Downloading took 0.0 min -12/16/2022 11:40:31 - INFO - datasets.download.download_manager - Checksum Computation took 0.0 min -12/16/2022 11:40:38 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/train.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpvbfyt60l -12/16/2022 11:40:39 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/train.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/e03f690a1b87aaf6a10df06930bea3ea97524f22df3689d7d4f6de39a04ad6df -12/16/2022 11:40:39 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/e03f690a1b87aaf6a10df06930bea3ea97524f22df3689d7d4f6de39a04ad6df -12/16/2022 11:40:39 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/dev.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpzcp057ge -12/16/2022 11:40:41 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/dev.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/5ede6c8121be47f2c2e4cb6be8b87ef855e5e3966ea6288c369f3d760770b29b -12/16/2022 11:40:41 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/5ede6c8121be47f2c2e4cb6be8b87ef855e5e3966ea6288c369f3d760770b29b -12/16/2022 11:40:41 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/test.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpq1s9j9wp -12/16/2022 11:40:42 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/test.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/af41495b66bafe30b9cd41b7495379553a285ecf658214a8659ed096da46180b -12/16/2022 11:40:42 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/af41495b66bafe30b9cd41b7495379553a285ecf658214a8659ed096da46180b -12/16/2022 11:40:43 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/other.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpibvglp0c -12/16/2022 11:40:44 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/other.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/0b1cf69c260eb7bfb5a97457676fa623c013d75427595de37d10c03c53b2a184 -12/16/2022 11:40:44 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/0b1cf69c260eb7bfb5a97457676fa623c013d75427595de37d10c03c53b2a184 -12/16/2022 11:40:45 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/invalidated.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpxneku9q5 -12/16/2022 11:40:45 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/cs/invalidated.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/678b61c8b7174e01bad8858c770f89625c71e9cbcfe3a7ca7905d4fab962dcbf -12/16/2022 11:40:45 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/678b61c8b7174e01bad8858c770f89625c71e9cbcfe3a7ca7905d4fab962dcbf -12/16/2022 11:40:45 - INFO - datasets.download.download_manager - Downloading took 0.0 min -12/16/2022 11:40:45 - INFO - datasets.download.download_manager - Checksum Computation took 0.0 min -12/16/2022 11:40:45 - INFO - datasets.utils.info_utils - Unable to verify checksums. -12/16/2022 11:40:45 - INFO - datasets.builder - Generating train split -12/16/2022 11:40:50 - INFO - datasets.builder - Generating validation split -12/16/2022 11:40:52 - INFO - datasets.builder - Generating test split -12/16/2022 11:40:54 - INFO - datasets.builder - Generating other split -12/16/2022 11:40:57 - INFO - datasets.builder - Generating invalidated split -12/16/2022 11:40:57 - INFO - datasets.utils.info_utils - Unable to verify splits sizes. -Dataset common_voice_11_0 downloaded and prepared to /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f. Subsequent calls will reuse this data. -12/16/2022 11:40:59 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f -12/16/2022 11:40:59 - INFO - datasets.builder - Overwrite dataset info from restored data version. -12/16/2022 11:40:59 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f -12/16/2022 11:40:59 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) -12/16/2022 11:40:59 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f -12/16/2022 11:41:01 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f -12/16/2022 11:41:01 - INFO - datasets.builder - Overwrite dataset info from restored data version. -12/16/2022 11:41:01 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f -12/16/2022 11:41:01 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) -12/16/2022 11:41:01 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f -12/16/2022 11:42:37 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-3d5c448b6a2bf0f7.arrow -12/16/2022 12:19:41 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-7c1ff4193a3aa668.arrow -12/16/2022 12:39:01 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-3470671e4cfe112f.arrow -12/16/2022 12:39:03 - WARNING - huggingface_hub.repository - /home/milan/whisper-large2-czech-cv11-v2/./ is already a clone of https://huggingface.co/mikr/whisper-large2-czech-cv11-v2. Make sure you pull the latest changes with `repo.git_pull()`. -[2022-12-16 12:39:07,229] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.7, git-hash=unknown, git-branch=unknown -[2022-12-16 12:39:08,450] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False -[2022-12-16 12:39:09,626] [WARNING] [cpu_adam.py:83:__init__] FP16 params for CPUAdam may not work on AMD CPUs +12/16/2022 19:20:05 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/16/2022 19:20:05 - INFO - datasets.builder - Overwrite dataset info from restored data version. +12/16/2022 19:20:05 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/16/2022 19:20:05 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) +12/16/2022 19:20:05 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/16/2022 19:20:06 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/16/2022 19:20:06 - INFO - datasets.builder - Overwrite dataset info from restored data version. +12/16/2022 19:20:06 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/16/2022 19:20:06 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) +12/16/2022 19:20:06 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/16/2022 19:20:08 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/16/2022 19:20:08 - INFO - datasets.builder - Overwrite dataset info from restored data version. +12/16/2022 19:20:08 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/16/2022 19:20:08 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) +12/16/2022 19:20:08 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/16/2022 19:20:27 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-3d5c448b6a2bf0f7.arrow +12/16/2022 19:20:29 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-73e3a5936553e76c.arrow +12/16/2022 19:40:11 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/cs/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-3470671e4cfe112f.arrow +12/16/2022 19:40:13 - WARNING - huggingface_hub.repository - /home/milan/whisper-large2-czech-cv11-v2/./ is already a clone of https://huggingface.co/mikr/whisper-large2-czech-cv11-v2. Make sure you pull the latest changes with `repo.git_pull()`. +[2022-12-16 19:40:17,786] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.7, git-hash=unknown, git-branch=unknown +[2022-12-16 19:40:18,780] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +[2022-12-16 19:40:19,982] [WARNING] [cpu_adam.py:83:__init__] FP16 params for CPUAdam may not work on AMD CPUs Installed CUDA version 11.6 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination ninja: no work to do. -Time to load cpu_adam op: 2.82881236076355 seconds +Time to load cpu_adam op: 3.031318426132202 seconds Adam Optimizer #0 is created with AVX2 arithmetic capability. Config: alpha=0.000010, betas=(0.900000, 0.999000), weight_decay=0.000000, adam_w=1 -[2022-12-16 12:39:14,051] [INFO] [logging.py:68:log_dist] [Rank 0] Using DeepSpeed Optimizer param name adamw as basic optimizer -[2022-12-16 12:39:14,354] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = DeepSpeedCPUAdam -[2022-12-16 12:39:14,354] [INFO] [utils.py:52:is_zero_supported_optimizer] Checking ZeRO support for optimizer=DeepSpeedCPUAdam type= -[2022-12-16 12:39:14,354] [INFO] [logging.py:68:log_dist] [Rank 0] Creating fp16 ZeRO stage 2 optimizer -[2022-12-16 12:39:14,354] [INFO] [stage_1_and_2.py:140:__init__] Reduce bucket size 200000000 -[2022-12-16 12:39:14,355] [INFO] [stage_1_and_2.py:141:__init__] Allgather bucket size 200000000 -[2022-12-16 12:39:14,355] [INFO] [stage_1_and_2.py:142:__init__] CPU Offload: True -[2022-12-16 12:39:14,355] [INFO] [stage_1_and_2.py:143:__init__] Round robin gradient partitioning: False +[2022-12-16 19:40:24,909] [INFO] [logging.py:68:log_dist] [Rank 0] Using DeepSpeed Optimizer param name adamw as basic optimizer +[2022-12-16 19:40:25,211] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = DeepSpeedCPUAdam +[2022-12-16 19:40:25,212] [INFO] [utils.py:52:is_zero_supported_optimizer] Checking ZeRO support for optimizer=DeepSpeedCPUAdam type= +[2022-12-16 19:40:25,212] [INFO] [logging.py:68:log_dist] [Rank 0] Creating fp16 ZeRO stage 2 optimizer +[2022-12-16 19:40:25,212] [INFO] [stage_1_and_2.py:140:__init__] Reduce bucket size 200000000 +[2022-12-16 19:40:25,212] [INFO] [stage_1_and_2.py:141:__init__] Allgather bucket size 200000000 +[2022-12-16 19:40:25,212] [INFO] [stage_1_and_2.py:142:__init__] CPU Offload: True +[2022-12-16 19:40:25,212] [INFO] [stage_1_and_2.py:143:__init__] Round robin gradient partitioning: False ninja: no work to do. -Time to load utils op: 0.3903844356536865 seconds +Time to load utils op: 0.5200150012969971 seconds Rank: 0 partition count [1] and sizes[(1543304960, False)] -[2022-12-16 12:39:18,143] [INFO] [utils.py:827:see_memory_usage] Before initializing optimizer states -[2022-12-16 12:39:18,144] [INFO] [utils.py:828:see_memory_usage] MA 3.0 GB Max_MA 3.0 GB CA 5.99 GB Max_CA 6 GB -[2022-12-16 12:39:18,144] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 15.41 GB, percent = 7.8% -[2022-12-16 12:39:22,046] [INFO] [utils.py:827:see_memory_usage] After initializing optimizer states -[2022-12-16 12:39:22,047] [INFO] [utils.py:828:see_memory_usage] MA 3.0 GB Max_MA 3.0 GB CA 5.99 GB Max_CA 6 GB -[2022-12-16 12:39:22,047] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.07 GB, percent = 17.8% -[2022-12-16 12:39:22,047] [INFO] [stage_1_and_2.py:525:__init__] optimizer state initialized -[2022-12-16 12:39:22,119] [INFO] [utils.py:827:see_memory_usage] After initializing ZeRO optimizer -[2022-12-16 12:39:22,120] [INFO] [utils.py:828:see_memory_usage] MA 3.0 GB Max_MA 3.0 GB CA 5.99 GB Max_CA 6 GB -[2022-12-16 12:39:22,120] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.07 GB, percent = 17.8% -[2022-12-16 12:39:22,144] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = adamw -[2022-12-16 12:39:22,144] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using configured LR scheduler = WarmupLR -[2022-12-16 12:39:22,144] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = -[2022-12-16 12:39:22,144] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 12:39:22,146] [INFO] [config.py:1020:print] DeepSpeedEngine configuration: -[2022-12-16 12:39:22,146] [INFO] [config.py:1024:print] activation_checkpointing_config { +[2022-12-16 19:40:29,582] [INFO] [utils.py:827:see_memory_usage] Before initializing optimizer states +[2022-12-16 19:40:29,583] [INFO] [utils.py:828:see_memory_usage] MA 3.0 GB Max_MA 3.0 GB CA 5.99 GB Max_CA 6 GB +[2022-12-16 19:40:29,583] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 15.46 GB, percent = 7.9% +[2022-12-16 19:40:33,634] [INFO] [utils.py:827:see_memory_usage] After initializing optimizer states +[2022-12-16 19:40:33,634] [INFO] [utils.py:828:see_memory_usage] MA 3.0 GB Max_MA 3.0 GB CA 5.99 GB Max_CA 6 GB +[2022-12-16 19:40:33,635] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.2 GB, percent = 17.9% +[2022-12-16 19:40:33,635] [INFO] [stage_1_and_2.py:525:__init__] optimizer state initialized +[2022-12-16 19:40:33,721] [INFO] [utils.py:827:see_memory_usage] After initializing ZeRO optimizer +[2022-12-16 19:40:33,722] [INFO] [utils.py:828:see_memory_usage] MA 3.0 GB Max_MA 3.0 GB CA 5.99 GB Max_CA 6 GB +[2022-12-16 19:40:33,723] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.13 GB, percent = 17.9% +[2022-12-16 19:40:33,756] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = adamw +[2022-12-16 19:40:33,756] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using configured LR scheduler = WarmupDecayLR +[2022-12-16 19:40:33,757] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +[2022-12-16 19:40:33,757] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-16 19:40:33,759] [INFO] [config.py:1020:print] DeepSpeedEngine configuration: +[2022-12-16 19:40:33,759] [INFO] [config.py:1024:print] activation_checkpointing_config { "partition_activations": false, "contiguous_memory_optimization": false, "cpu_checkpointing": false, @@ -334,10 +288,10 @@ Rank: 0 partition count [1] and sizes[(1543304960, False)] "synchronize_checkpoint_boundary": false, "profile": false } -[2022-12-16 12:39:22,146] [INFO] [config.py:1024:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} -[2022-12-16 12:39:22,146] [INFO] [config.py:1024:print] amp_enabled .................. False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] amp_params ................... False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] autotuning_config ............ { +[2022-12-16 19:40:33,759] [INFO] [config.py:1024:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2022-12-16 19:40:33,759] [INFO] [config.py:1024:print] amp_enabled .................. False +[2022-12-16 19:40:33,759] [INFO] [config.py:1024:print] amp_params ................... False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] autotuning_config ............ { "enabled": false, "start_step": null, "end_step": null, @@ -362,29 +316,29 @@ Rank: 0 partition count [1] and sizes[(1543304960, False)] "min_train_micro_batch_size_per_gpu": 1, "num_tuning_micro_batch_sizes": 3 } -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] bfloat16_enabled ............. False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] checkpoint_parallel_write_pipeline False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] checkpoint_tag_validation_enabled True -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] checkpoint_tag_validation_fail False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] comms_config ................. -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] communication_data_type ...... None -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] curriculum_enabled ........... False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] curriculum_params ............ False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] dataloader_drop_last ......... False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] disable_allgather ............ False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] dump_state ................... False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 1000, 'delayed_shift': 2, 'min_scale': 1} -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] eigenvalue_enabled ........... False -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] eigenvalue_gas_boundary_resolution 1 -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] eigenvalue_layer_name ........ bert.encoder.layer -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] eigenvalue_layer_num ......... 0 -[2022-12-16 12:39:22,147] [INFO] [config.py:1024:print] eigenvalue_max_iter .......... 100 -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] eigenvalue_stability ......... 1e-06 -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] eigenvalue_tol ............... 0.01 -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] eigenvalue_verbose ........... False -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] elasticity_enabled ........... False -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] flops_profiler_config ........ { +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] bfloat16_enabled ............. False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] checkpoint_parallel_write_pipeline False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] checkpoint_tag_validation_enabled True +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] checkpoint_tag_validation_fail False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] comms_config ................. +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] communication_data_type ...... None +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] curriculum_enabled ........... False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] curriculum_params ............ False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] dataloader_drop_last ......... False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] disable_allgather ............ False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] dump_state ................... False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 1000, 'delayed_shift': 2, 'min_scale': 1} +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] eigenvalue_enabled ........... False +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] eigenvalue_gas_boundary_resolution 1 +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] eigenvalue_layer_name ........ bert.encoder.layer +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] eigenvalue_layer_num ......... 0 +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] eigenvalue_max_iter .......... 100 +[2022-12-16 19:40:33,760] [INFO] [config.py:1024:print] eigenvalue_stability ......... 1e-06 +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] eigenvalue_tol ............... 0.01 +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] eigenvalue_verbose ........... False +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] elasticity_enabled ........... False +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] flops_profiler_config ........ { "enabled": false, "profile_step": 1, "module_depth": -1, @@ -392,20 +346,20 @@ Rank: 0 partition count [1] and sizes[(1543304960, False)] "detailed": true, "output_file": null } -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] fp16_auto_cast ............... False -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] fp16_enabled ................. True -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] fp16_master_weights_and_gradients False -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] global_rank .................. 0 -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] grad_accum_dtype ............. None -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] gradient_accumulation_steps .. 2 -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] gradient_clipping ............ 1.0 -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] gradient_predivide_factor .... 1.0 -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] initial_dynamic_scale ........ 65536 -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] load_universal_checkpoint .... False -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] loss_scale ................... 0 -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] memory_breakdown ............. False -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] monitor_config ............... -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] nebula_config ................ { +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] fp16_auto_cast ............... False +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] fp16_enabled ................. True +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] fp16_master_weights_and_gradients False +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] global_rank .................. 0 +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] grad_accum_dtype ............. None +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] gradient_accumulation_steps .. 2 +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] gradient_clipping ............ 1.0 +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] gradient_predivide_factor .... 1.0 +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] initial_dynamic_scale ........ 65536 +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] load_universal_checkpoint .... False +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] loss_scale ................... 0 +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] memory_breakdown ............. False +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] monitor_config ............... +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] nebula_config ................ { "enabled": false, "persistent_storage_path": null, "persistent_time_interval": 100, @@ -413,28 +367,28 @@ Rank: 0 partition count [1] and sizes[(1543304960, False)] "enable_nebula_load": true, "load_path": null } -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] optimizer_legacy_fusion ...... False -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] optimizer_name ............... adamw -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] optimizer_params ............. {'lr': 1e-05, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0} -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} -[2022-12-16 12:39:22,148] [INFO] [config.py:1024:print] pld_enabled .................. False -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] pld_params ................... False -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] prescale_gradients ........... False -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] scheduler_name ............... WarmupLR -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] scheduler_params ............. {'warmup_min_lr': 0, 'warmup_max_lr': 1e-05, 'warmup_num_steps': 500} -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] sparse_attention ............. None -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] sparse_gradients_enabled ..... False -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] steps_per_print .............. 10 -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] train_batch_size ............. 64 -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] train_micro_batch_size_per_gpu 32 -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] use_node_local_storage ....... False -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] wall_clock_breakdown ......... False -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] world_size ................... 1 -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] zero_allow_untested_optimizer False -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=200000000 allgather_partitions=True allgather_bucket_size=200000000 overlap_comm=True load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='cpu', nvme_path=None, buffer_count=4, pin_memory=True, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=100,000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] zero_enabled ................. True -[2022-12-16 12:39:22,149] [INFO] [config.py:1024:print] zero_optimization_stage ...... 2 -[2022-12-16 12:39:22,150] [INFO] [config.py:1009:print_user_config] json = { +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] optimizer_legacy_fusion ...... False +[2022-12-16 19:40:33,761] [INFO] [config.py:1024:print] optimizer_name ............... adamw +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] optimizer_params ............. {'lr': 1e-05, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0} +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] pld_enabled .................. False +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] pld_params ................... False +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] prescale_gradients ........... False +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] scheduler_name ............... WarmupDecayLR +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] scheduler_params ............. {'last_batch_iteration': -1, 'total_num_steps': 5000, 'warmup_min_lr': 0, 'warmup_max_lr': 1e-05, 'warmup_num_steps': 500} +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] sparse_attention ............. None +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] sparse_gradients_enabled ..... False +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] steps_per_print .............. 10 +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] train_batch_size ............. 64 +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] train_micro_batch_size_per_gpu 32 +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] use_node_local_storage ....... False +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] wall_clock_breakdown ......... False +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] world_size ................... 1 +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] zero_allow_untested_optimizer False +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=200000000 allgather_partitions=True allgather_bucket_size=200000000 overlap_comm=True load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='cpu', nvme_path=None, buffer_count=4, pin_memory=True, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=100,000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] zero_enabled ................. True +[2022-12-16 19:40:33,762] [INFO] [config.py:1024:print] zero_optimization_stage ...... 2 +[2022-12-16 19:40:33,763] [INFO] [config.py:1009:print_user_config] json = { "fp16": { "enabled": true, "loss_scale": 0, @@ -453,8 +407,10 @@ Rank: 0 partition count [1] and sizes[(1543304960, False)] } }, "scheduler": { - "type": "WarmupLR", + "type": "WarmupDecayLR", "params": { + "last_batch_iteration": -1, + "total_num_steps": 5.000000e+03, "warmup_min_lr": 0, "warmup_max_lr": 1e-05, "warmup_num_steps": 500 @@ -478,1157 +434,1157 @@ Rank: 0 partition count [1] and sizes[(1543304960, False)] "train_batch_size": 64, "train_micro_batch_size_per_gpu": 32 } -Time to load utils op: 0.0003771781921386719 seconds -[2022-12-16 12:39:47,145] [INFO] [timer.py:197:stop] 0/4, RunningAvgSamplesPerSec=6.344583010207686, CurrSamplesPerSec=5.697173747342324, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:39:58,446] [INFO] [timer.py:197:stop] 0/6, RunningAvgSamplesPerSec=6.354105820400516, CurrSamplesPerSec=5.7200148121144485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:40:09,779] [INFO] [timer.py:197:stop] 0/8, RunningAvgSamplesPerSec=6.3511059843491315, CurrSamplesPerSec=5.706168744069791, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:40:21,155] [INFO] [timer.py:197:stop] 0/10, RunningAvgSamplesPerSec=6.343221806642607, CurrSamplesPerSec=5.690943305296631, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:40:32,488] [INFO] [timer.py:197:stop] 0/12, RunningAvgSamplesPerSec=6.343072944394993, CurrSamplesPerSec=5.725201867756091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:40:43,873] [INFO] [timer.py:197:stop] 0/14, RunningAvgSamplesPerSec=6.337683883211206, CurrSamplesPerSec=5.689833779599867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:40:55,274] [INFO] [timer.py:197:stop] 0/16, RunningAvgSamplesPerSec=6.332389036245992, CurrSamplesPerSec=5.669597853035386, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:41:06,603] [INFO] [timer.py:197:stop] 0/18, RunningAvgSamplesPerSec=6.33282615782689, CurrSamplesPerSec=5.706427845798502, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:41:17,385] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 65536 -[2022-12-16 12:41:17,386] [INFO] [logging.py:68:log_dist] [Rank 0] step=10, skipped=1, lr=[3.535580269163017e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:41:17,387] [INFO] [timer.py:197:stop] 0/20, RunningAvgSamplesPerSec=6.372488127559496, CurrSamplesPerSec=6.3796996062220375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:41:28,769] [INFO] [timer.py:197:stop] 0/22, RunningAvgSamplesPerSec=6.366574823404308, CurrSamplesPerSec=5.682797328337197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:41:40,119] [INFO] [timer.py:197:stop] 0/24, RunningAvgSamplesPerSec=6.363487793851469, CurrSamplesPerSec=5.685089104119074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:41:50,879] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768.0 -[2022-12-16 12:41:50,881] [INFO] [timer.py:197:stop] 0/26, RunningAvgSamplesPerSec=6.392071905977898, CurrSamplesPerSec=6.394088716886378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:42:02,274] [INFO] [timer.py:197:stop] 0/28, RunningAvgSamplesPerSec=6.384638482589085, CurrSamplesPerSec=5.6451629089442354, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:42:13,644] [INFO] [timer.py:197:stop] 0/30, RunningAvgSamplesPerSec=6.380158521481853, CurrSamplesPerSec=5.697199381370964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:42:24,983] [INFO] [timer.py:197:stop] 0/32, RunningAvgSamplesPerSec=6.377276100579833, CurrSamplesPerSec=5.709599137836787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:42:36,318] [INFO] [timer.py:197:stop] 0/34, RunningAvgSamplesPerSec=6.374173569489135, CurrSamplesPerSec=5.686944867860612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:42:47,672] [INFO] [timer.py:197:stop] 0/36, RunningAvgSamplesPerSec=6.371391329214875, CurrSamplesPerSec=5.698267509700741, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:42:59,049] [INFO] [timer.py:197:stop] 0/38, RunningAvgSamplesPerSec=6.3681956430878595, CurrSamplesPerSec=5.687853197846394, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:43:10,396] [INFO] [logging.py:68:log_dist] [Rank 0] step=20, skipped=2, lr=[4.650931663140581e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:43:10,397] [INFO] [timer.py:197:stop] 0/40, RunningAvgSamplesPerSec=6.366312333365177, CurrSamplesPerSec=5.696548687004366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:43:21,770] [INFO] [timer.py:197:stop] 0/42, RunningAvgSamplesPerSec=6.36387061297112, CurrSamplesPerSec=5.692867363700557, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:43:32,465] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768.0, reducing to 16384.0 -[2022-12-16 12:43:32,467] [INFO] [timer.py:197:stop] 0/44, RunningAvgSamplesPerSec=6.381094936769063, CurrSamplesPerSec=6.406390958081914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:43:43,780] [INFO] [timer.py:197:stop] 0/46, RunningAvgSamplesPerSec=6.379753390930982, CurrSamplesPerSec=5.7075707942970615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:43:55,117] [INFO] [timer.py:197:stop] 0/48, RunningAvgSamplesPerSec=6.377019018004726, CurrSamplesPerSec=5.686304945402372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:44:06,451] [INFO] [timer.py:197:stop] 0/50, RunningAvgSamplesPerSec=6.37543476454316, CurrSamplesPerSec=5.709647715323768, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +Time to load utils op: 0.0003948211669921875 seconds +[2022-12-16 19:40:58,606] [INFO] [timer.py:197:stop] 0/4, RunningAvgSamplesPerSec=6.327062880977527, CurrSamplesPerSec=5.683973434872449, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:41:09,925] [INFO] [timer.py:197:stop] 0/6, RunningAvgSamplesPerSec=6.337890979134199, CurrSamplesPerSec=5.698936745189652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:41:21,308] [INFO] [timer.py:197:stop] 0/8, RunningAvgSamplesPerSec=6.3294469227923305, CurrSamplesPerSec=5.6523551541575205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:41:33,017] [INFO] [timer.py:197:stop] 0/10, RunningAvgSamplesPerSec=6.328546175322321, CurrSamplesPerSec=5.701343759212486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:41:44,654] [INFO] [timer.py:197:stop] 0/12, RunningAvgSamplesPerSec=6.330046764141762, CurrSamplesPerSec=5.7140661343466865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:41:56,029] [INFO] [timer.py:197:stop] 0/14, RunningAvgSamplesPerSec=6.327367592679242, CurrSamplesPerSec=5.687009205382302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:42:07,620] [INFO] [timer.py:197:stop] 0/16, RunningAvgSamplesPerSec=6.324036355417439, CurrSamplesPerSec=5.67106537300076, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:42:19,208] [INFO] [timer.py:197:stop] 0/18, RunningAvgSamplesPerSec=6.324517029843766, CurrSamplesPerSec=5.686187866744037, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:42:30,000] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 65536 +[2022-12-16 19:42:30,002] [INFO] [logging.py:68:log_dist] [Rank 0] step=10, skipped=1, lr=[3.535580269163017e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:42:30,003] [INFO] [timer.py:197:stop] 0/20, RunningAvgSamplesPerSec=6.364053760974696, CurrSamplesPerSec=6.352128353972973, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:42:41,491] [INFO] [timer.py:197:stop] 0/22, RunningAvgSamplesPerSec=6.359150016371227, CurrSamplesPerSec=5.681020735272481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:42:53,045] [INFO] [timer.py:197:stop] 0/24, RunningAvgSamplesPerSec=6.356683117345163, CurrSamplesPerSec=5.686370954837155, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:43:03,984] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768.0 +[2022-12-16 19:43:03,986] [INFO] [timer.py:197:stop] 0/26, RunningAvgSamplesPerSec=6.376183713003614, CurrSamplesPerSec=6.175548481452842, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:43:15,357] [INFO] [timer.py:197:stop] 0/28, RunningAvgSamplesPerSec=6.370924228753169, CurrSamplesPerSec=5.667562406103809, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:43:26,742] [INFO] [timer.py:197:stop] 0/30, RunningAvgSamplesPerSec=6.366664670767918, CurrSamplesPerSec=5.69276426047378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:43:38,322] [INFO] [timer.py:197:stop] 0/32, RunningAvgSamplesPerSec=6.3545668974505904, CurrSamplesPerSec=5.480904775405896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:43:49,702] [INFO] [timer.py:197:stop] 0/34, RunningAvgSamplesPerSec=6.350895118978619, CurrSamplesPerSec=5.64039048535495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:44:01,055] [INFO] [timer.py:197:stop] 0/36, RunningAvgSamplesPerSec=6.34923010740825, CurrSamplesPerSec=5.697280637929649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:44:12,507] [INFO] [timer.py:197:stop] 0/38, RunningAvgSamplesPerSec=6.344325528095929, CurrSamplesPerSec=5.606869186879086, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:44:23,841] [INFO] [logging.py:68:log_dist] [Rank 0] step=20, skipped=2, lr=[4.650931663140581e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:44:23,843] [INFO] [timer.py:197:stop] 0/40, RunningAvgSamplesPerSec=6.343791060705328, CurrSamplesPerSec=5.697582226434801, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:44:35,251] [INFO] [timer.py:197:stop] 0/42, RunningAvgSamplesPerSec=6.341041313223634, CurrSamplesPerSec=5.669263060201902, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:44:45,976] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768.0, reducing to 16384.0 +[2022-12-16 19:44:45,978] [INFO] [timer.py:197:stop] 0/44, RunningAvgSamplesPerSec=6.358073639041419, CurrSamplesPerSec=6.36476204743189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:44:57,311] [INFO] [timer.py:197:stop] 0/46, RunningAvgSamplesPerSec=6.357102766194718, CurrSamplesPerSec=5.694208770316031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:45:08,706] [INFO] [timer.py:197:stop] 0/48, RunningAvgSamplesPerSec=6.354161775690528, CurrSamplesPerSec=5.654900942339342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:45:20,362] [INFO] [timer.py:197:stop] 0/50, RunningAvgSamplesPerSec=6.35051370610784, CurrSamplesPerSec=5.6561494303952085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.3246, 'learning_rate': 4.973833272194737e-06, 'epoch': 0.11} -[2022-12-16 12:44:17,771] [INFO] [timer.py:197:stop] 0/52, RunningAvgSamplesPerSec=6.373949262772854, CurrSamplesPerSec=5.691750812101744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:44:29,101] [INFO] [timer.py:197:stop] 0/54, RunningAvgSamplesPerSec=6.372228449261265, CurrSamplesPerSec=5.69880753129992, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:44:40,455] [INFO] [timer.py:197:stop] 0/56, RunningAvgSamplesPerSec=6.370542883426897, CurrSamplesPerSec=5.680790624361286, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:44:51,786] [INFO] [timer.py:197:stop] 0/58, RunningAvgSamplesPerSec=6.369515579272822, CurrSamplesPerSec=5.699690125593197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:45:03,101] [INFO] [logging.py:68:log_dist] [Rank 0] step=30, skipped=3, lr=[5.303370403744525e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:45:03,102] [INFO] [timer.py:197:stop] 0/60, RunningAvgSamplesPerSec=6.368480625171759, CurrSamplesPerSec=5.7089997596752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:45:14,482] [INFO] [timer.py:197:stop] 0/62, RunningAvgSamplesPerSec=6.3666495482652, CurrSamplesPerSec=5.675317465009016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:45:25,861] [INFO] [timer.py:197:stop] 0/64, RunningAvgSamplesPerSec=6.364903051268984, CurrSamplesPerSec=5.685087177683414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:45:37,183] [INFO] [timer.py:197:stop] 0/66, RunningAvgSamplesPerSec=6.364047355448845, CurrSamplesPerSec=5.707811575579985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:45:48,536] [INFO] [timer.py:197:stop] 0/68, RunningAvgSamplesPerSec=6.362912684415659, CurrSamplesPerSec=5.695592380490683, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:45:59,895] [INFO] [timer.py:197:stop] 0/70, RunningAvgSamplesPerSec=6.361712214957278, CurrSamplesPerSec=5.683929625983731, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:46:11,275] [INFO] [timer.py:197:stop] 0/72, RunningAvgSamplesPerSec=6.360228512038433, CurrSamplesPerSec=5.675749697315712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:46:22,617] [INFO] [timer.py:197:stop] 0/74, RunningAvgSamplesPerSec=6.359199676451197, CurrSamplesPerSec=5.6725835196614005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:46:33,974] [INFO] [timer.py:197:stop] 0/76, RunningAvgSamplesPerSec=6.358251401819239, CurrSamplesPerSec=5.690369308760629, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:46:45,348] [INFO] [timer.py:197:stop] 0/78, RunningAvgSamplesPerSec=6.357095959071015, CurrSamplesPerSec=5.688037839526133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:46:56,669] [INFO] [logging.py:68:log_dist] [Rank 0] step=40, skipped=3, lr=[5.810371073215365e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:46:56,670] [INFO] [timer.py:197:stop] 0/80, RunningAvgSamplesPerSec=6.356839339793331, CurrSamplesPerSec=5.7071434090115964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:47:08,047] [INFO] [timer.py:197:stop] 0/82, RunningAvgSamplesPerSec=6.355727674187336, CurrSamplesPerSec=5.6961031281183585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:47:19,383] [INFO] [timer.py:197:stop] 0/84, RunningAvgSamplesPerSec=6.355341089950659, CurrSamplesPerSec=5.707567396319488, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:47:30,685] [INFO] [timer.py:197:stop] 0/86, RunningAvgSamplesPerSec=6.354985244848452, CurrSamplesPerSec=5.707153601441993, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:47:42,027] [INFO] [timer.py:197:stop] 0/88, RunningAvgSamplesPerSec=6.354542229262944, CurrSamplesPerSec=5.702634405066058, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:47:53,384] [INFO] [timer.py:197:stop] 0/90, RunningAvgSamplesPerSec=6.353934422195688, CurrSamplesPerSec=5.68178237423023, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:48:04,773] [INFO] [timer.py:197:stop] 0/92, RunningAvgSamplesPerSec=6.353584416009719, CurrSamplesPerSec=5.710814552007692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:48:16,116] [INFO] [timer.py:197:stop] 0/94, RunningAvgSamplesPerSec=6.352760486107527, CurrSamplesPerSec=5.688323744419653, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:48:27,504] [INFO] [timer.py:197:stop] 0/96, RunningAvgSamplesPerSec=6.351592342068352, CurrSamplesPerSec=5.652738899857816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:48:38,843] [INFO] [timer.py:197:stop] 0/98, RunningAvgSamplesPerSec=6.351100856548269, CurrSamplesPerSec=5.697337954217791, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:48:50,211] [INFO] [logging.py:68:log_dist] [Rank 0] step=50, skipped=3, lr=[6.195318418690893e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:48:50,213] [INFO] [timer.py:197:stop] 0/100, RunningAvgSamplesPerSec=6.350433200108205, CurrSamplesPerSec=5.6906366285964465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:45:31,689] [INFO] [timer.py:197:stop] 0/52, RunningAvgSamplesPerSec=6.349782726929668, CurrSamplesPerSec=5.685572439917749, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:45:43,248] [INFO] [timer.py:197:stop] 0/54, RunningAvgSamplesPerSec=6.34880004266226, CurrSamplesPerSec=5.6963738880964865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:45:54,899] [INFO] [timer.py:197:stop] 0/56, RunningAvgSamplesPerSec=6.345344332473558, CurrSamplesPerSec=5.643884141232875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:46:06,245] [INFO] [timer.py:197:stop] 0/58, RunningAvgSamplesPerSec=6.3448005032861134, CurrSamplesPerSec=5.690518647702642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:46:17,576] [INFO] [logging.py:68:log_dist] [Rank 0] step=30, skipped=3, lr=[5.303370403744525e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:46:17,578] [INFO] [timer.py:197:stop] 0/60, RunningAvgSamplesPerSec=6.344274046935118, CurrSamplesPerSec=5.7001045334137865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:46:29,195] [INFO] [timer.py:197:stop] 0/62, RunningAvgSamplesPerSec=6.342096151339661, CurrSamplesPerSec=5.669778676768185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:46:40,581] [INFO] [timer.py:197:stop] 0/64, RunningAvgSamplesPerSec=6.340969890059656, CurrSamplesPerSec=5.679663422407637, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:46:51,901] [INFO] [timer.py:197:stop] 0/66, RunningAvgSamplesPerSec=6.340862832858599, CurrSamplesPerSec=5.708144139586262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:47:03,352] [INFO] [timer.py:197:stop] 0/68, RunningAvgSamplesPerSec=6.3395299478138005, CurrSamplesPerSec=5.699751121093182, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:47:14,725] [INFO] [timer.py:197:stop] 0/70, RunningAvgSamplesPerSec=6.338858901572622, CurrSamplesPerSec=5.6697978375746505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:47:26,090] [INFO] [timer.py:197:stop] 0/72, RunningAvgSamplesPerSec=6.338275884730358, CurrSamplesPerSec=5.6821056575417135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:47:37,502] [INFO] [timer.py:197:stop] 0/74, RunningAvgSamplesPerSec=6.336656821130215, CurrSamplesPerSec=5.693186355412951, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:47:48,841] [INFO] [timer.py:197:stop] 0/76, RunningAvgSamplesPerSec=6.336628677865746, CurrSamplesPerSec=5.704229867270073, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:48:00,193] [INFO] [timer.py:197:stop] 0/78, RunningAvgSamplesPerSec=6.336391619354949, CurrSamplesPerSec=5.6949493023399285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:48:11,884] [INFO] [logging.py:68:log_dist] [Rank 0] step=40, skipped=3, lr=[5.810371073215365e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:48:11,886] [INFO] [timer.py:197:stop] 0/80, RunningAvgSamplesPerSec=6.336877312300462, CurrSamplesPerSec=5.711746323017021, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:48:23,476] [INFO] [timer.py:197:stop] 0/82, RunningAvgSamplesPerSec=6.336437280416101, CurrSamplesPerSec=5.700073789642822, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:48:34,945] [INFO] [timer.py:197:stop] 0/84, RunningAvgSamplesPerSec=6.334408253614145, CurrSamplesPerSec=5.564071227950201, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:48:46,254] [INFO] [timer.py:197:stop] 0/86, RunningAvgSamplesPerSec=6.33464567786091, CurrSamplesPerSec=5.702617444594408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:48:57,589] [INFO] [timer.py:197:stop] 0/88, RunningAvgSamplesPerSec=6.334718218022051, CurrSamplesPerSec=5.6997610450876905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:49:09,242] [INFO] [timer.py:197:stop] 0/90, RunningAvgSamplesPerSec=6.330253477273904, CurrSamplesPerSec=5.385235114108773, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:49:20,553] [INFO] [timer.py:197:stop] 0/92, RunningAvgSamplesPerSec=6.330733283816013, CurrSamplesPerSec=5.722714157612833, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:49:31,916] [INFO] [timer.py:197:stop] 0/94, RunningAvgSamplesPerSec=6.330690353473429, CurrSamplesPerSec=5.702511080508184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:49:43,602] [INFO] [timer.py:197:stop] 0/96, RunningAvgSamplesPerSec=6.330121487925524, CurrSamplesPerSec=5.651281561996539, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:49:54,961] [INFO] [timer.py:197:stop] 0/98, RunningAvgSamplesPerSec=6.3297297307440985, CurrSamplesPerSec=5.654540248857908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:50:06,563] [INFO] [logging.py:68:log_dist] [Rank 0] step=50, skipped=3, lr=[6.195318418690893e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:50:06,565] [INFO] [timer.py:197:stop] 0/100, RunningAvgSamplesPerSec=6.329719324883379, CurrSamplesPerSec=5.702907240231407, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.1691, 'learning_rate': 6.195318418690893e-06, 'epoch': 0.21} -[2022-12-16 12:49:01,599] [INFO] [timer.py:197:stop] 0/102, RunningAvgSamplesPerSec=6.3496171237170556, CurrSamplesPerSec=5.663726039473994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:49:12,937] [INFO] [timer.py:197:stop] 0/104, RunningAvgSamplesPerSec=6.349453781242221, CurrSamplesPerSec=5.705724589763913, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:49:24,295] [INFO] [timer.py:197:stop] 0/106, RunningAvgSamplesPerSec=6.349035750526656, CurrSamplesPerSec=5.715350621958373, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:49:35,667] [INFO] [timer.py:197:stop] 0/108, RunningAvgSamplesPerSec=6.348487560326544, CurrSamplesPerSec=5.6962899981012045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:49:46,955] [INFO] [timer.py:197:stop] 0/110, RunningAvgSamplesPerSec=6.348730187619528, CurrSamplesPerSec=5.728014403669337, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:49:58,281] [INFO] [timer.py:197:stop] 0/112, RunningAvgSamplesPerSec=6.348737425707652, CurrSamplesPerSec=5.726789458927852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:50:09,648] [INFO] [timer.py:197:stop] 0/114, RunningAvgSamplesPerSec=6.348266706061173, CurrSamplesPerSec=5.67953604234834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:50:21,004] [INFO] [timer.py:197:stop] 0/116, RunningAvgSamplesPerSec=6.347940860010868, CurrSamplesPerSec=5.693142887368156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:50:32,344] [INFO] [timer.py:197:stop] 0/118, RunningAvgSamplesPerSec=6.347587204060109, CurrSamplesPerSec=5.698724779414815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:50:43,692] [INFO] [logging.py:68:log_dist] [Rank 0] step=60, skipped=3, lr=[6.505722008216461e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:50:43,694] [INFO] [timer.py:197:stop] 0/120, RunningAvgSamplesPerSec=6.347324281790804, CurrSamplesPerSec=5.701369430780287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:50:55,056] [INFO] [timer.py:197:stop] 0/122, RunningAvgSamplesPerSec=6.346958901073795, CurrSamplesPerSec=5.704603232008679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:51:06,414] [INFO] [timer.py:197:stop] 0/124, RunningAvgSamplesPerSec=6.346601483736209, CurrSamplesPerSec=5.687951784405772, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:51:17,801] [INFO] [timer.py:197:stop] 0/126, RunningAvgSamplesPerSec=6.34594390644621, CurrSamplesPerSec=5.660766399429713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:51:29,174] [INFO] [timer.py:197:stop] 0/128, RunningAvgSamplesPerSec=6.345436031798535, CurrSamplesPerSec=5.684730568773232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:51:40,485] [INFO] [timer.py:197:stop] 0/130, RunningAvgSamplesPerSec=6.345409778459219, CurrSamplesPerSec=5.714689448717287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:51:51,825] [INFO] [timer.py:197:stop] 0/132, RunningAvgSamplesPerSec=6.345255907930603, CurrSamplesPerSec=5.712015898245238, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:52:03,176] [INFO] [timer.py:197:stop] 0/134, RunningAvgSamplesPerSec=6.345007895933262, CurrSamplesPerSec=5.699063303049912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:52:14,543] [INFO] [timer.py:197:stop] 0/136, RunningAvgSamplesPerSec=6.344485483804747, CurrSamplesPerSec=5.668854321704523, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:52:25,910] [INFO] [timer.py:197:stop] 0/138, RunningAvgSamplesPerSec=6.344204784261706, CurrSamplesPerSec=5.686490691190197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:52:37,258] [INFO] [logging.py:68:log_dist] [Rank 0] step=70, skipped=3, lr=[6.765821034569313e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:52:37,259] [INFO] [timer.py:197:stop] 0/140, RunningAvgSamplesPerSec=6.343738385340108, CurrSamplesPerSec=5.668696780364775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:52:48,644] [INFO] [timer.py:197:stop] 0/142, RunningAvgSamplesPerSec=6.343346662062031, CurrSamplesPerSec=5.707930760122382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:52:59,988] [INFO] [timer.py:197:stop] 0/144, RunningAvgSamplesPerSec=6.343367542978673, CurrSamplesPerSec=5.698923436372281, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:53:11,361] [INFO] [timer.py:197:stop] 0/146, RunningAvgSamplesPerSec=6.343167017241652, CurrSamplesPerSec=5.693951984773787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:53:22,734] [INFO] [timer.py:197:stop] 0/148, RunningAvgSamplesPerSec=6.342659205769162, CurrSamplesPerSec=5.658159989352935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:53:34,026] [INFO] [timer.py:197:stop] 0/150, RunningAvgSamplesPerSec=6.342879909127556, CurrSamplesPerSec=5.734806308786819, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:50:18,089] [INFO] [timer.py:197:stop] 0/102, RunningAvgSamplesPerSec=6.328697909114363, CurrSamplesPerSec=5.685371341049817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:50:29,422] [INFO] [timer.py:197:stop] 0/104, RunningAvgSamplesPerSec=6.329127259216088, CurrSamplesPerSec=5.715023301398578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:50:40,909] [INFO] [timer.py:197:stop] 0/106, RunningAvgSamplesPerSec=6.329489892401154, CurrSamplesPerSec=5.732910611937274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:50:52,391] [INFO] [timer.py:197:stop] 0/108, RunningAvgSamplesPerSec=6.3291082887675385, CurrSamplesPerSec=5.713503516490646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:51:03,672] [INFO] [timer.py:197:stop] 0/110, RunningAvgSamplesPerSec=6.329714179875415, CurrSamplesPerSec=5.717954946591897, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:51:15,047] [INFO] [timer.py:197:stop] 0/112, RunningAvgSamplesPerSec=6.330206129328904, CurrSamplesPerSec=5.731479198460716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:51:26,433] [INFO] [timer.py:197:stop] 0/114, RunningAvgSamplesPerSec=6.330185801461701, CurrSamplesPerSec=5.697561668064762, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:51:37,747] [INFO] [timer.py:197:stop] 0/116, RunningAvgSamplesPerSec=6.3305736060670466, CurrSamplesPerSec=5.721073711370483, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:51:49,305] [INFO] [timer.py:197:stop] 0/118, RunningAvgSamplesPerSec=6.330816278593838, CurrSamplesPerSec=5.722915222713305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:52:00,804] [INFO] [logging.py:68:log_dist] [Rank 0] step=60, skipped=3, lr=[6.505722008216461e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:52:00,806] [INFO] [timer.py:197:stop] 0/120, RunningAvgSamplesPerSec=6.330232978736771, CurrSamplesPerSec=5.6565146202221674, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:52:12,174] [INFO] [timer.py:197:stop] 0/122, RunningAvgSamplesPerSec=6.330056877233613, CurrSamplesPerSec=5.686394323508017, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:52:23,596] [INFO] [timer.py:197:stop] 0/124, RunningAvgSamplesPerSec=6.330396119925766, CurrSamplesPerSec=5.719701094321522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:52:35,126] [INFO] [timer.py:197:stop] 0/126, RunningAvgSamplesPerSec=6.330297511420275, CurrSamplesPerSec=5.711849385744503, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:52:46,490] [INFO] [timer.py:197:stop] 0/128, RunningAvgSamplesPerSec=6.330124817932588, CurrSamplesPerSec=5.687188249668486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:52:57,974] [INFO] [timer.py:197:stop] 0/130, RunningAvgSamplesPerSec=6.33039768866297, CurrSamplesPerSec=5.708138070552879, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:53:09,484] [INFO] [timer.py:197:stop] 0/132, RunningAvgSamplesPerSec=6.330326619642486, CurrSamplesPerSec=5.711341157262751, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:53:20,796] [INFO] [timer.py:197:stop] 0/134, RunningAvgSamplesPerSec=6.3306785190922925, CurrSamplesPerSec=5.725297601502475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:53:32,117] [INFO] [timer.py:197:stop] 0/136, RunningAvgSamplesPerSec=6.330777395173404, CurrSamplesPerSec=5.700345895641132, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:53:43,732] [INFO] [timer.py:197:stop] 0/138, RunningAvgSamplesPerSec=6.328317929913682, CurrSamplesPerSec=5.704530009876204, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:53:55,069] [INFO] [logging.py:68:log_dist] [Rank 0] step=70, skipped=3, lr=[6.765821034569313e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:53:55,070] [INFO] [timer.py:197:stop] 0/140, RunningAvgSamplesPerSec=6.3281437768124755, CurrSamplesPerSec=5.6646511134992545, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:54:06,392] [INFO] [timer.py:197:stop] 0/142, RunningAvgSamplesPerSec=6.328266137695921, CurrSamplesPerSec=5.723089458024658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:54:17,774] [INFO] [timer.py:197:stop] 0/144, RunningAvgSamplesPerSec=6.327982292630433, CurrSamplesPerSec=5.701898171177177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:54:29,098] [INFO] [timer.py:197:stop] 0/146, RunningAvgSamplesPerSec=6.3282000633196125, CurrSamplesPerSec=5.7053862444388335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:54:40,438] [INFO] [timer.py:197:stop] 0/148, RunningAvgSamplesPerSec=6.328155482147814, CurrSamplesPerSec=5.673894035889023, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:54:52,053] [INFO] [timer.py:197:stop] 0/150, RunningAvgSamplesPerSec=6.328496355103054, CurrSamplesPerSec=5.7220768941292555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.1644, 'learning_rate': 6.881634451095711e-06, 'epoch': 0.32} -[2022-12-16 12:53:45,320] [INFO] [timer.py:197:stop] 0/152, RunningAvgSamplesPerSec=6.342917910651529, CurrSamplesPerSec=5.710984649224423, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:53:56,669] [INFO] [timer.py:197:stop] 0/154, RunningAvgSamplesPerSec=6.342733602913983, CurrSamplesPerSec=5.707075945747419, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:54:07,978] [INFO] [timer.py:197:stop] 0/156, RunningAvgSamplesPerSec=6.3428841523302095, CurrSamplesPerSec=5.728329034097408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:54:19,322] [INFO] [timer.py:197:stop] 0/158, RunningAvgSamplesPerSec=6.34275224260025, CurrSamplesPerSec=5.702134598060043, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:54:30,661] [INFO] [logging.py:68:log_dist] [Rank 0] step=80, skipped=3, lr=[6.9896691039239e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:54:30,663] [INFO] [timer.py:197:stop] 0/160, RunningAvgSamplesPerSec=6.342683694089697, CurrSamplesPerSec=5.7029733933644255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:54:42,087] [INFO] [timer.py:197:stop] 0/162, RunningAvgSamplesPerSec=6.342013189520437, CurrSamplesPerSec=5.697145453446462, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:54:53,432] [INFO] [timer.py:197:stop] 0/164, RunningAvgSamplesPerSec=6.341816271259256, CurrSamplesPerSec=5.701045404740299, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:55:04,839] [INFO] [timer.py:197:stop] 0/166, RunningAvgSamplesPerSec=6.341202369491997, CurrSamplesPerSec=5.623904352955541, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:55:16,160] [INFO] [timer.py:197:stop] 0/168, RunningAvgSamplesPerSec=6.3412941460944685, CurrSamplesPerSec=5.697974798834354, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:55:27,484] [INFO] [timer.py:197:stop] 0/170, RunningAvgSamplesPerSec=6.341284673747386, CurrSamplesPerSec=5.709030599807577, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:55:38,824] [INFO] [timer.py:197:stop] 0/172, RunningAvgSamplesPerSec=6.3411692034098985, CurrSamplesPerSec=5.697860383670918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:55:50,137] [INFO] [timer.py:197:stop] 0/174, RunningAvgSamplesPerSec=6.341308142694129, CurrSamplesPerSec=5.705610833345307, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:56:01,451] [INFO] [timer.py:197:stop] 0/176, RunningAvgSamplesPerSec=6.341335680588037, CurrSamplesPerSec=5.69166705821747, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:56:12,805] [INFO] [timer.py:197:stop] 0/178, RunningAvgSamplesPerSec=6.3412695672311505, CurrSamplesPerSec=5.6963572066037775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:56:24,116] [INFO] [logging.py:68:log_dist] [Rank 0] step=90, skipped=3, lr=[7.186146009413563e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:56:24,118] [INFO] [timer.py:197:stop] 0/180, RunningAvgSamplesPerSec=6.3413842936177165, CurrSamplesPerSec=5.70348134547156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:56:35,476] [INFO] [timer.py:197:stop] 0/182, RunningAvgSamplesPerSec=6.341238572121128, CurrSamplesPerSec=5.688341102134533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:56:46,831] [INFO] [timer.py:197:stop] 0/184, RunningAvgSamplesPerSec=6.341057889364869, CurrSamplesPerSec=5.673437384747106, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:56:58,196] [INFO] [timer.py:197:stop] 0/186, RunningAvgSamplesPerSec=6.340863335113536, CurrSamplesPerSec=5.687693634667262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:57:09,533] [INFO] [timer.py:197:stop] 0/188, RunningAvgSamplesPerSec=6.340707296916432, CurrSamplesPerSec=5.692211623233167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:57:20,857] [INFO] [timer.py:197:stop] 0/190, RunningAvgSamplesPerSec=6.3407842661084945, CurrSamplesPerSec=5.694887684662681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:57:32,184] [INFO] [timer.py:197:stop] 0/192, RunningAvgSamplesPerSec=6.340805746497473, CurrSamplesPerSec=5.7107445720947325, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:57:43,477] [INFO] [timer.py:197:stop] 0/194, RunningAvgSamplesPerSec=6.3409575863837775, CurrSamplesPerSec=5.7252595029953826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:57:54,814] [INFO] [timer.py:197:stop] 0/196, RunningAvgSamplesPerSec=6.340975998120396, CurrSamplesPerSec=5.690191511550644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:58:06,164] [INFO] [timer.py:197:stop] 0/198, RunningAvgSamplesPerSec=6.340876962362114, CurrSamplesPerSec=5.713173732372909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:58:17,522] [INFO] [logging.py:68:log_dist] [Rank 0] step=100, skipped=3, lr=[7.361221988663844e-06], mom=[[0.9, 0.999]] -[2022-12-16 12:58:17,524] [INFO] [timer.py:197:stop] 0/200, RunningAvgSamplesPerSec=6.340767966334212, CurrSamplesPerSec=5.680717771813953, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:55:03,365] [INFO] [timer.py:197:stop] 0/152, RunningAvgSamplesPerSec=6.328569418735792, CurrSamplesPerSec=5.68189566352516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:55:14,855] [INFO] [timer.py:197:stop] 0/154, RunningAvgSamplesPerSec=6.3275040965459715, CurrSamplesPerSec=5.571994189949849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:55:26,452] [INFO] [timer.py:197:stop] 0/156, RunningAvgSamplesPerSec=6.327817724665443, CurrSamplesPerSec=5.721640992027081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:55:37,782] [INFO] [timer.py:197:stop] 0/158, RunningAvgSamplesPerSec=6.327978159826033, CurrSamplesPerSec=5.712968001389319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:55:49,157] [INFO] [logging.py:68:log_dist] [Rank 0] step=80, skipped=3, lr=[6.9896691039239e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:55:49,159] [INFO] [timer.py:197:stop] 0/160, RunningAvgSamplesPerSec=6.328182428410735, CurrSamplesPerSec=5.717271739216092, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:56:00,611] [INFO] [timer.py:197:stop] 0/162, RunningAvgSamplesPerSec=6.327661285455147, CurrSamplesPerSec=5.696292899154895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:56:11,980] [INFO] [timer.py:197:stop] 0/164, RunningAvgSamplesPerSec=6.327531151891573, CurrSamplesPerSec=5.687551433336378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:56:23,313] [INFO] [timer.py:197:stop] 0/166, RunningAvgSamplesPerSec=6.327629144751145, CurrSamplesPerSec=5.690251580192909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:56:34,645] [INFO] [timer.py:197:stop] 0/168, RunningAvgSamplesPerSec=6.327745174849396, CurrSamplesPerSec=5.682591613865132, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:56:45,962] [INFO] [timer.py:197:stop] 0/170, RunningAvgSamplesPerSec=6.3278631523075575, CurrSamplesPerSec=5.70634268898286, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:56:57,300] [INFO] [timer.py:197:stop] 0/172, RunningAvgSamplesPerSec=6.327832359726225, CurrSamplesPerSec=5.692901410316983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:57:08,611] [INFO] [timer.py:197:stop] 0/174, RunningAvgSamplesPerSec=6.327975893412131, CurrSamplesPerSec=5.691621682502721, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:57:19,938] [INFO] [timer.py:197:stop] 0/176, RunningAvgSamplesPerSec=6.328006607076309, CurrSamplesPerSec=5.674112793988129, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:57:31,335] [INFO] [timer.py:197:stop] 0/178, RunningAvgSamplesPerSec=6.327706582184348, CurrSamplesPerSec=5.648043028531768, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:57:42,679] [INFO] [logging.py:68:log_dist] [Rank 0] step=90, skipped=3, lr=[7.186146009413563e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:57:42,680] [INFO] [timer.py:197:stop] 0/180, RunningAvgSamplesPerSec=6.327737545730115, CurrSamplesPerSec=5.679434382740355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:57:54,053] [INFO] [timer.py:197:stop] 0/182, RunningAvgSamplesPerSec=6.327567772018162, CurrSamplesPerSec=5.671728233835681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:58:05,553] [INFO] [timer.py:197:stop] 0/184, RunningAvgSamplesPerSec=6.327321171217064, CurrSamplesPerSec=5.650057103503205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:58:17,043] [INFO] [timer.py:197:stop] 0/186, RunningAvgSamplesPerSec=6.327257340860697, CurrSamplesPerSec=5.696403866676768, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:58:28,641] [INFO] [timer.py:197:stop] 0/188, RunningAvgSamplesPerSec=6.325485038741021, CurrSamplesPerSec=5.448433209976314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:58:40,184] [INFO] [timer.py:197:stop] 0/190, RunningAvgSamplesPerSec=6.325371332058678, CurrSamplesPerSec=5.650139399406368, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:58:51,535] [INFO] [timer.py:197:stop] 0/192, RunningAvgSamplesPerSec=6.32535828915325, CurrSamplesPerSec=5.693068268772343, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:59:03,080] [INFO] [timer.py:197:stop] 0/194, RunningAvgSamplesPerSec=6.324000040671213, CurrSamplesPerSec=5.484969185042463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:59:14,449] [INFO] [timer.py:197:stop] 0/196, RunningAvgSamplesPerSec=6.323911524635286, CurrSamplesPerSec=5.656116060308147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:59:25,865] [INFO] [timer.py:197:stop] 0/198, RunningAvgSamplesPerSec=6.32353250731071, CurrSamplesPerSec=5.666084499205607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:59:37,404] [INFO] [logging.py:68:log_dist] [Rank 0] step=100, skipped=3, lr=[7.361221988663844e-06], mom=[[0.9, 0.999]] +[2022-12-16 19:59:37,406] [INFO] [timer.py:197:stop] 0/200, RunningAvgSamplesPerSec=6.3223735336504, CurrSamplesPerSec=5.500500901581866, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.1458, 'learning_rate': 7.361221988663844e-06, 'epoch': 0.42} -[2022-12-16 12:58:28,862] [INFO] [timer.py:197:stop] 0/202, RunningAvgSamplesPerSec=6.340591964672333, CurrSamplesPerSec=5.677220163973008, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:58:40,200] [INFO] [timer.py:197:stop] 0/204, RunningAvgSamplesPerSec=6.34056087587368, CurrSamplesPerSec=5.678926379567298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:58:51,575] [INFO] [timer.py:197:stop] 0/206, RunningAvgSamplesPerSec=6.340353070949561, CurrSamplesPerSec=5.706827947024672, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:59:02,889] [INFO] [timer.py:197:stop] 0/208, RunningAvgSamplesPerSec=6.340482329147856, CurrSamplesPerSec=5.71856619629364, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:59:14,195] [INFO] [timer.py:197:stop] 0/210, RunningAvgSamplesPerSec=6.340559967021615, CurrSamplesPerSec=5.71644504525915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:59:25,530] [INFO] [timer.py:197:stop] 0/212, RunningAvgSamplesPerSec=6.340570532053464, CurrSamplesPerSec=5.703255954791647, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:59:36,887] [INFO] [timer.py:197:stop] 0/214, RunningAvgSamplesPerSec=6.340519276026067, CurrSamplesPerSec=5.718165909202066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:59:48,209] [INFO] [timer.py:197:stop] 0/216, RunningAvgSamplesPerSec=6.340597670791518, CurrSamplesPerSec=5.722122268841553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 12:59:59,540] [INFO] [timer.py:197:stop] 0/218, RunningAvgSamplesPerSec=6.340674298114713, CurrSamplesPerSec=5.7178209714922055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:00:10,862] [INFO] [logging.py:68:log_dist] [Rank 0] step=110, skipped=3, lr=[7.5191046007362515e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:00:10,863] [INFO] [timer.py:197:stop] 0/220, RunningAvgSamplesPerSec=6.340654507160751, CurrSamplesPerSec=5.698601865846052, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:00:22,219] [INFO] [timer.py:197:stop] 0/222, RunningAvgSamplesPerSec=6.340582477276827, CurrSamplesPerSec=5.696138180415054, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:00:33,530] [INFO] [timer.py:197:stop] 0/224, RunningAvgSamplesPerSec=6.3406193213495925, CurrSamplesPerSec=5.7056717130819, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:00:44,861] [INFO] [timer.py:197:stop] 0/226, RunningAvgSamplesPerSec=6.340673168930113, CurrSamplesPerSec=5.729910043329389, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:00:56,144] [INFO] [timer.py:197:stop] 0/228, RunningAvgSamplesPerSec=6.3409484317239055, CurrSamplesPerSec=5.753290344527706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:01:07,484] [INFO] [timer.py:197:stop] 0/230, RunningAvgSamplesPerSec=6.34090809825985, CurrSamplesPerSec=5.719477344614187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:01:18,773] [INFO] [timer.py:197:stop] 0/232, RunningAvgSamplesPerSec=6.341049096424349, CurrSamplesPerSec=5.733209617158469, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:01:30,106] [INFO] [timer.py:197:stop] 0/234, RunningAvgSamplesPerSec=6.341090554053744, CurrSamplesPerSec=5.708298782910727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:01:41,423] [INFO] [timer.py:197:stop] 0/236, RunningAvgSamplesPerSec=6.3411969087420434, CurrSamplesPerSec=5.719391797881744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:01:52,772] [INFO] [timer.py:197:stop] 0/238, RunningAvgSamplesPerSec=6.341103710545856, CurrSamplesPerSec=5.686260136951288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:02:04,087] [INFO] [logging.py:68:log_dist] [Rank 0] step=120, skipped=3, lr=[7.662870867121632e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:02:04,088] [INFO] [timer.py:197:stop] 0/240, RunningAvgSamplesPerSec=6.341185991377369, CurrSamplesPerSec=5.713273198646448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:02:15,422] [INFO] [timer.py:197:stop] 0/242, RunningAvgSamplesPerSec=6.341224830608527, CurrSamplesPerSec=5.715415603808621, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:02:26,729] [INFO] [timer.py:197:stop] 0/244, RunningAvgSamplesPerSec=6.34135744332114, CurrSamplesPerSec=5.711197284995015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:02:38,053] [INFO] [timer.py:197:stop] 0/246, RunningAvgSamplesPerSec=6.34141121362421, CurrSamplesPerSec=5.721091269531728, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:02:49,398] [INFO] [timer.py:197:stop] 0/248, RunningAvgSamplesPerSec=6.3413486959959195, CurrSamplesPerSec=5.699164456585958, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:03:00,728] [INFO] [timer.py:197:stop] 0/250, RunningAvgSamplesPerSec=6.341232802134687, CurrSamplesPerSec=5.691354752785703, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 19:59:48,750] [INFO] [timer.py:197:stop] 0/202, RunningAvgSamplesPerSec=6.322279298198284, CurrSamplesPerSec=5.673023008677945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:00:00,166] [INFO] [timer.py:197:stop] 0/204, RunningAvgSamplesPerSec=6.322269734290734, CurrSamplesPerSec=5.667173773723511, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:00:11,649] [INFO] [timer.py:197:stop] 0/206, RunningAvgSamplesPerSec=6.321527490293199, CurrSamplesPerSec=5.603557346378851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:00:23,040] [INFO] [timer.py:197:stop] 0/208, RunningAvgSamplesPerSec=6.321413257018045, CurrSamplesPerSec=5.6625088443919855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:00:34,546] [INFO] [timer.py:197:stop] 0/210, RunningAvgSamplesPerSec=6.32141400676977, CurrSamplesPerSec=5.6849306591484146, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:00:46,129] [INFO] [timer.py:197:stop] 0/212, RunningAvgSamplesPerSec=6.321367640712001, CurrSamplesPerSec=5.6768530161234745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:00:57,500] [INFO] [timer.py:197:stop] 0/214, RunningAvgSamplesPerSec=6.321339276033958, CurrSamplesPerSec=5.702498239554356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:01:08,843] [INFO] [timer.py:197:stop] 0/216, RunningAvgSamplesPerSec=6.321463085232523, CurrSamplesPerSec=5.712604482269962, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:01:20,279] [INFO] [timer.py:197:stop] 0/218, RunningAvgSamplesPerSec=6.321074042641444, CurrSamplesPerSec=5.642485922448768, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:01:31,627] [INFO] [logging.py:68:log_dist] [Rank 0] step=110, skipped=3, lr=[7.5191046007362515e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:01:31,629] [INFO] [timer.py:197:stop] 0/220, RunningAvgSamplesPerSec=6.321164389806482, CurrSamplesPerSec=5.698712681387359, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:01:43,012] [INFO] [timer.py:197:stop] 0/222, RunningAvgSamplesPerSec=6.321083383820289, CurrSamplesPerSec=5.675591772493804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:01:54,342] [INFO] [timer.py:197:stop] 0/224, RunningAvgSamplesPerSec=6.321181478696793, CurrSamplesPerSec=5.701490768394447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:02:05,720] [INFO] [timer.py:197:stop] 0/226, RunningAvgSamplesPerSec=6.321103908583707, CurrSamplesPerSec=5.6887073263966546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:02:17,052] [INFO] [timer.py:197:stop] 0/228, RunningAvgSamplesPerSec=6.321279121862528, CurrSamplesPerSec=5.7162519815248904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:02:28,413] [INFO] [timer.py:197:stop] 0/230, RunningAvgSamplesPerSec=6.321298670377185, CurrSamplesPerSec=5.709214433335775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:02:39,723] [INFO] [timer.py:197:stop] 0/232, RunningAvgSamplesPerSec=6.321502115012851, CurrSamplesPerSec=5.7279660020557355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:02:51,084] [INFO] [timer.py:197:stop] 0/234, RunningAvgSamplesPerSec=6.321540937522988, CurrSamplesPerSec=5.69602673978007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:03:02,438] [INFO] [timer.py:197:stop] 0/236, RunningAvgSamplesPerSec=6.32159137687095, CurrSamplesPerSec=5.691098465896081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:03:13,796] [INFO] [timer.py:197:stop] 0/238, RunningAvgSamplesPerSec=6.321628403692657, CurrSamplesPerSec=5.69404691784024, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:03:25,137] [INFO] [logging.py:68:log_dist] [Rank 0] step=120, skipped=3, lr=[7.662870867121632e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:03:25,138] [INFO] [timer.py:197:stop] 0/240, RunningAvgSamplesPerSec=6.321748647888827, CurrSamplesPerSec=5.696099018567, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:03:36,598] [INFO] [timer.py:197:stop] 0/242, RunningAvgSamplesPerSec=6.321880011749018, CurrSamplesPerSec=5.714113571526187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:03:47,936] [INFO] [timer.py:197:stop] 0/244, RunningAvgSamplesPerSec=6.322009363444331, CurrSamplesPerSec=5.6904749791235485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:03:59,258] [INFO] [timer.py:197:stop] 0/246, RunningAvgSamplesPerSec=6.322221643460198, CurrSamplesPerSec=5.7263758040612815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:04:10,613] [INFO] [timer.py:197:stop] 0/248, RunningAvgSamplesPerSec=6.322263624452999, CurrSamplesPerSec=5.692347539349729, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:04:21,955] [INFO] [timer.py:197:stop] 0/250, RunningAvgSamplesPerSec=6.3222029662188115, CurrSamplesPerSec=5.67385973653264, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.1389, 'learning_rate': 7.730207550743121e-06, 'epoch': 0.53} -[2022-12-16 13:03:12,074] [INFO] [timer.py:197:stop] 0/252, RunningAvgSamplesPerSec=6.341191703621708, CurrSamplesPerSec=5.709133078861001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:03:23,470] [INFO] [timer.py:197:stop] 0/254, RunningAvgSamplesPerSec=6.340809226261937, CurrSamplesPerSec=5.66181732648358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:03:34,837] [INFO] [timer.py:197:stop] 0/256, RunningAvgSamplesPerSec=6.340656210309537, CurrSamplesPerSec=5.695450750660555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:03:46,216] [INFO] [timer.py:197:stop] 0/258, RunningAvgSamplesPerSec=6.340446151839456, CurrSamplesPerSec=5.6939783144927985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:03:57,594] [INFO] [logging.py:68:log_dist] [Rank 0] step=130, skipped=3, lr=[7.794839207460995e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:03:57,596] [INFO] [timer.py:197:stop] 0/260, RunningAvgSamplesPerSec=6.3402530863003035, CurrSamplesPerSec=5.676343794160837, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:04:08,987] [INFO] [timer.py:197:stop] 0/262, RunningAvgSamplesPerSec=6.339991907491181, CurrSamplesPerSec=5.662990020741442, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:04:20,372] [INFO] [timer.py:197:stop] 0/264, RunningAvgSamplesPerSec=6.33977236935012, CurrSamplesPerSec=5.682990304456265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:04:31,812] [INFO] [timer.py:197:stop] 0/266, RunningAvgSamplesPerSec=6.33927565959383, CurrSamplesPerSec=5.644096081786364, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:04:43,213] [INFO] [timer.py:197:stop] 0/268, RunningAvgSamplesPerSec=6.338978242661073, CurrSamplesPerSec=5.671891216846216, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:04:54,555] [INFO] [timer.py:197:stop] 0/270, RunningAvgSamplesPerSec=6.338812609327249, CurrSamplesPerSec=5.693351540036939, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:05:06,009] [INFO] [timer.py:197:stop] 0/272, RunningAvgSamplesPerSec=6.338416777493823, CurrSamplesPerSec=5.65095011838482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:05:17,395] [INFO] [timer.py:197:stop] 0/274, RunningAvgSamplesPerSec=6.338115050856076, CurrSamplesPerSec=5.681647924220384, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:05:28,788] [INFO] [timer.py:197:stop] 0/276, RunningAvgSamplesPerSec=6.337804017797665, CurrSamplesPerSec=5.6637430083955795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:05:40,120] [INFO] [timer.py:197:stop] 0/278, RunningAvgSamplesPerSec=6.337846770383127, CurrSamplesPerSec=5.722553608490977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:05:51,506] [INFO] [logging.py:68:log_dist] [Rank 0] step=140, skipped=3, lr=[7.916799978227501e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:05:51,508] [INFO] [timer.py:197:stop] 0/280, RunningAvgSamplesPerSec=6.337560045613895, CurrSamplesPerSec=5.677672140398493, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:06:02,886] [INFO] [timer.py:197:stop] 0/282, RunningAvgSamplesPerSec=6.3373140118868525, CurrSamplesPerSec=5.672994234809722, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:06:14,273] [INFO] [timer.py:197:stop] 0/284, RunningAvgSamplesPerSec=6.337108122417633, CurrSamplesPerSec=5.683808312585445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:06:25,659] [INFO] [timer.py:197:stop] 0/286, RunningAvgSamplesPerSec=6.336910867350083, CurrSamplesPerSec=5.670826723524054, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:06:37,005] [INFO] [timer.py:197:stop] 0/288, RunningAvgSamplesPerSec=6.336887675368015, CurrSamplesPerSec=5.709099323505792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:06:48,367] [INFO] [timer.py:197:stop] 0/290, RunningAvgSamplesPerSec=6.33672835037139, CurrSamplesPerSec=5.678792064686691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:06:59,737] [INFO] [timer.py:197:stop] 0/292, RunningAvgSamplesPerSec=6.336600926892866, CurrSamplesPerSec=5.689706184143832, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:07:11,154] [INFO] [timer.py:197:stop] 0/294, RunningAvgSamplesPerSec=6.336275402994384, CurrSamplesPerSec=5.6646066456642465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:07:22,536] [INFO] [timer.py:197:stop] 0/296, RunningAvgSamplesPerSec=6.336103758210258, CurrSamplesPerSec=5.683237438898976, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:07:33,911] [INFO] [timer.py:197:stop] 0/298, RunningAvgSamplesPerSec=6.335964380155541, CurrSamplesPerSec=5.690570278821041, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:07:45,241] [INFO] [logging.py:68:log_dist] [Rank 0] step=150, skipped=3, lr=[8.03016458599496e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:07:45,243] [INFO] [timer.py:197:stop] 0/300, RunningAvgSamplesPerSec=6.335939475961018, CurrSamplesPerSec=5.704075201910128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:04:33,296] [INFO] [timer.py:197:stop] 0/252, RunningAvgSamplesPerSec=6.322313955883044, CurrSamplesPerSec=5.701953642527591, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:04:44,678] [INFO] [timer.py:197:stop] 0/254, RunningAvgSamplesPerSec=6.322151144538406, CurrSamplesPerSec=5.665580077082235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:04:56,010] [INFO] [timer.py:197:stop] 0/256, RunningAvgSamplesPerSec=6.322301777645738, CurrSamplesPerSec=5.718121328046148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:05:07,595] [INFO] [timer.py:197:stop] 0/258, RunningAvgSamplesPerSec=6.322330489705541, CurrSamplesPerSec=5.707673706391819, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:05:18,934] [INFO] [logging.py:68:log_dist] [Rank 0] step=130, skipped=3, lr=[7.794839207460995e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:05:18,935] [INFO] [timer.py:197:stop] 0/260, RunningAvgSamplesPerSec=6.322448219599348, CurrSamplesPerSec=5.706156857008184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:05:30,262] [INFO] [timer.py:197:stop] 0/262, RunningAvgSamplesPerSec=6.322617585015774, CurrSamplesPerSec=5.711549930757668, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:05:41,626] [INFO] [timer.py:197:stop] 0/264, RunningAvgSamplesPerSec=6.322613341576608, CurrSamplesPerSec=5.687131619368439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:05:53,023] [INFO] [timer.py:197:stop] 0/266, RunningAvgSamplesPerSec=6.322451874197788, CurrSamplesPerSec=5.676991081195374, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:06:04,411] [INFO] [timer.py:197:stop] 0/268, RunningAvgSamplesPerSec=6.322351880434625, CurrSamplesPerSec=5.67797814191006, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:06:15,738] [INFO] [timer.py:197:stop] 0/270, RunningAvgSamplesPerSec=6.322369845515796, CurrSamplesPerSec=5.689833538393228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:06:27,100] [INFO] [timer.py:197:stop] 0/272, RunningAvgSamplesPerSec=6.322374139413545, CurrSamplesPerSec=5.697104101334954, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:06:38,457] [INFO] [timer.py:197:stop] 0/274, RunningAvgSamplesPerSec=6.322333531347227, CurrSamplesPerSec=5.704905353797042, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:06:49,837] [INFO] [timer.py:197:stop] 0/276, RunningAvgSamplesPerSec=6.3221861198507225, CurrSamplesPerSec=5.6565828005700505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:07:01,155] [INFO] [timer.py:197:stop] 0/278, RunningAvgSamplesPerSec=6.32240021581047, CurrSamplesPerSec=5.725513502620633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:07:12,519] [INFO] [logging.py:68:log_dist] [Rank 0] step=140, skipped=3, lr=[7.916799978227501e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:07:12,521] [INFO] [timer.py:197:stop] 0/280, RunningAvgSamplesPerSec=6.322314987769751, CurrSamplesPerSec=5.682835585653388, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:07:23,835] [INFO] [timer.py:197:stop] 0/282, RunningAvgSamplesPerSec=6.322476369079172, CurrSamplesPerSec=5.728346147854608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:07:35,189] [INFO] [timer.py:197:stop] 0/284, RunningAvgSamplesPerSec=6.322524555667764, CurrSamplesPerSec=5.702553722581063, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:07:46,547] [INFO] [timer.py:197:stop] 0/286, RunningAvgSamplesPerSec=6.322601907843428, CurrSamplesPerSec=5.701636331907292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:07:57,869] [INFO] [timer.py:197:stop] 0/288, RunningAvgSamplesPerSec=6.32279292584737, CurrSamplesPerSec=5.722436496272999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:08:09,214] [INFO] [timer.py:197:stop] 0/290, RunningAvgSamplesPerSec=6.3228057686994665, CurrSamplesPerSec=5.686518156577721, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:08:20,583] [INFO] [timer.py:197:stop] 0/292, RunningAvgSamplesPerSec=6.3227806640395166, CurrSamplesPerSec=5.679599491324056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:08:31,994] [INFO] [timer.py:197:stop] 0/294, RunningAvgSamplesPerSec=6.322781839290583, CurrSamplesPerSec=5.6987397810401985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:08:43,359] [INFO] [timer.py:197:stop] 0/296, RunningAvgSamplesPerSec=6.3228034210183734, CurrSamplesPerSec=5.697710417265229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:08:54,709] [INFO] [timer.py:197:stop] 0/298, RunningAvgSamplesPerSec=6.322863685882392, CurrSamplesPerSec=5.703479406551039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:09:06,039] [INFO] [logging.py:68:log_dist] [Rank 0] step=150, skipped=3, lr=[8.03016458599496e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:09:06,041] [INFO] [timer.py:197:stop] 0/300, RunningAvgSamplesPerSec=6.322928885435723, CurrSamplesPerSec=5.696764601432161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.1376, 'learning_rate': 8.03016458599496e-06, 'epoch': 0.64} -[2022-12-16 13:07:56,649] [INFO] [timer.py:197:stop] 0/302, RunningAvgSamplesPerSec=6.335681670539102, CurrSamplesPerSec=5.672674624718876, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:08:08,021] [INFO] [timer.py:197:stop] 0/304, RunningAvgSamplesPerSec=6.335556580135721, CurrSamplesPerSec=5.6922780112891544, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:08:19,398] [INFO] [timer.py:197:stop] 0/306, RunningAvgSamplesPerSec=6.335409090735576, CurrSamplesPerSec=5.698753088999794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:08:30,775] [INFO] [timer.py:197:stop] 0/308, RunningAvgSamplesPerSec=6.335265510045217, CurrSamplesPerSec=5.695196269706352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:08:42,145] [INFO] [timer.py:197:stop] 0/310, RunningAvgSamplesPerSec=6.3351501550714735, CurrSamplesPerSec=5.7040398094490365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:08:53,530] [INFO] [timer.py:197:stop] 0/312, RunningAvgSamplesPerSec=6.33497950697826, CurrSamplesPerSec=5.686301090884195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:09:04,886] [INFO] [timer.py:197:stop] 0/314, RunningAvgSamplesPerSec=6.334928011074155, CurrSamplesPerSec=5.684328504353401, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:09:16,247] [INFO] [timer.py:197:stop] 0/316, RunningAvgSamplesPerSec=6.334859913066805, CurrSamplesPerSec=5.694762278685135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:09:27,593] [INFO] [timer.py:197:stop] 0/318, RunningAvgSamplesPerSec=6.334788013895496, CurrSamplesPerSec=5.692994618031218, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:09:38,950] [INFO] [logging.py:68:log_dist] [Rank 0] step=160, skipped=3, lr=[8.136065420813943e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:09:38,952] [INFO] [timer.py:197:stop] 0/320, RunningAvgSamplesPerSec=6.334726194202973, CurrSamplesPerSec=5.69166705821747, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:09:50,346] [INFO] [timer.py:197:stop] 0/322, RunningAvgSamplesPerSec=6.334528318138216, CurrSamplesPerSec=5.664160810534791, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:10:01,694] [INFO] [timer.py:197:stop] 0/324, RunningAvgSamplesPerSec=6.334444913892337, CurrSamplesPerSec=5.695642170124778, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:10:13,030] [INFO] [timer.py:197:stop] 0/326, RunningAvgSamplesPerSec=6.334481645980198, CurrSamplesPerSec=5.728451521955659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:10:24,375] [INFO] [timer.py:197:stop] 0/328, RunningAvgSamplesPerSec=6.334359224662135, CurrSamplesPerSec=5.693915993166506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:10:35,730] [INFO] [timer.py:197:stop] 0/330, RunningAvgSamplesPerSec=6.334316250489746, CurrSamplesPerSec=5.70256511006069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:10:47,124] [INFO] [timer.py:197:stop] 0/332, RunningAvgSamplesPerSec=6.334127316131262, CurrSamplesPerSec=5.681846113935855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:10:58,487] [INFO] [timer.py:197:stop] 0/334, RunningAvgSamplesPerSec=6.334067896845867, CurrSamplesPerSec=5.688566279869823, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:11:09,868] [INFO] [timer.py:197:stop] 0/336, RunningAvgSamplesPerSec=6.33393047819036, CurrSamplesPerSec=5.69602673978007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:11:21,244] [INFO] [timer.py:197:stop] 0/338, RunningAvgSamplesPerSec=6.33380854480409, CurrSamplesPerSec=5.695747311289742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:11:32,639] [INFO] [logging.py:68:log_dist] [Rank 0] step=170, skipped=3, lr=[8.235424875329062e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:11:32,641] [INFO] [timer.py:197:stop] 0/340, RunningAvgSamplesPerSec=6.333611534013795, CurrSamplesPerSec=5.6619486899466605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:11:44,035] [INFO] [timer.py:197:stop] 0/342, RunningAvgSamplesPerSec=6.333436692059929, CurrSamplesPerSec=5.672640100352739, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:11:55,425] [INFO] [timer.py:197:stop] 0/344, RunningAvgSamplesPerSec=6.333279569434337, CurrSamplesPerSec=5.672706032667005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:12:06,796] [INFO] [timer.py:197:stop] 0/346, RunningAvgSamplesPerSec=6.333207104012073, CurrSamplesPerSec=5.714714753968254, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:12:18,158] [INFO] [timer.py:197:stop] 0/348, RunningAvgSamplesPerSec=6.333079636533842, CurrSamplesPerSec=5.695123047206334, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:12:29,545] [INFO] [timer.py:197:stop] 0/350, RunningAvgSamplesPerSec=6.332933225418649, CurrSamplesPerSec=5.690190546602886, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:09:17,433] [INFO] [timer.py:197:stop] 0/302, RunningAvgSamplesPerSec=6.322817514724475, CurrSamplesPerSec=5.675178281500542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:09:28,800] [INFO] [timer.py:197:stop] 0/304, RunningAvgSamplesPerSec=6.322797946509351, CurrSamplesPerSec=5.68351227186433, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:09:40,183] [INFO] [timer.py:197:stop] 0/306, RunningAvgSamplesPerSec=6.322896759479059, CurrSamplesPerSec=5.728840535096935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:09:51,543] [INFO] [timer.py:197:stop] 0/308, RunningAvgSamplesPerSec=6.322910192350933, CurrSamplesPerSec=5.701197726295654, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:10:02,885] [INFO] [timer.py:197:stop] 0/310, RunningAvgSamplesPerSec=6.3230024007557315, CurrSamplesPerSec=5.718039475930382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:10:14,283] [INFO] [timer.py:197:stop] 0/312, RunningAvgSamplesPerSec=6.322866428944482, CurrSamplesPerSec=5.6628476184268015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:10:25,595] [INFO] [timer.py:197:stop] 0/314, RunningAvgSamplesPerSec=6.323075360071841, CurrSamplesPerSec=5.708922539388064, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:10:36,913] [INFO] [timer.py:197:stop] 0/316, RunningAvgSamplesPerSec=6.323254930695002, CurrSamplesPerSec=5.7192082831832405, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:10:48,220] [INFO] [timer.py:197:stop] 0/318, RunningAvgSamplesPerSec=6.323405337207393, CurrSamplesPerSec=5.70960253823397, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:10:59,560] [INFO] [logging.py:68:log_dist] [Rank 0] step=160, skipped=3, lr=[8.136065420813943e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:10:59,562] [INFO] [timer.py:197:stop] 0/320, RunningAvgSamplesPerSec=6.323477777341223, CurrSamplesPerSec=5.689709560887472, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:11:10,948] [INFO] [timer.py:197:stop] 0/322, RunningAvgSamplesPerSec=6.323460607589859, CurrSamplesPerSec=5.670344454119559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:11:22,293] [INFO] [timer.py:197:stop] 0/324, RunningAvgSamplesPerSec=6.323463740424428, CurrSamplesPerSec=5.677860204925066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:11:33,643] [INFO] [timer.py:197:stop] 0/326, RunningAvgSamplesPerSec=6.323565869006063, CurrSamplesPerSec=5.704370236980602, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:11:44,948] [INFO] [timer.py:197:stop] 0/328, RunningAvgSamplesPerSec=6.323660017579624, CurrSamplesPerSec=5.709596951869308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:11:56,267] [INFO] [timer.py:197:stop] 0/330, RunningAvgSamplesPerSec=6.323823018324491, CurrSamplesPerSec=5.712549046630418, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:12:07,628] [INFO] [timer.py:197:stop] 0/332, RunningAvgSamplesPerSec=6.323832251532743, CurrSamplesPerSec=5.6960204547548505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:12:18,964] [INFO] [timer.py:197:stop] 0/334, RunningAvgSamplesPerSec=6.323933543769019, CurrSamplesPerSec=5.692614320759774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:12:30,318] [INFO] [timer.py:197:stop] 0/336, RunningAvgSamplesPerSec=6.323971450955896, CurrSamplesPerSec=5.704066717376007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:12:41,675] [INFO] [timer.py:197:stop] 0/338, RunningAvgSamplesPerSec=6.3239844985564035, CurrSamplesPerSec=5.698655579391169, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:12:53,043] [INFO] [logging.py:68:log_dist] [Rank 0] step=170, skipped=3, lr=[8.235424875329062e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:12:53,045] [INFO] [timer.py:197:stop] 0/340, RunningAvgSamplesPerSec=6.323948740973426, CurrSamplesPerSec=5.685194337650517, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:13:04,403] [INFO] [timer.py:197:stop] 0/342, RunningAvgSamplesPerSec=6.32396319355309, CurrSamplesPerSec=5.698694534442487, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:13:15,751] [INFO] [timer.py:197:stop] 0/344, RunningAvgSamplesPerSec=6.3240232984687195, CurrSamplesPerSec=5.707178839997753, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:13:27,102] [INFO] [timer.py:197:stop] 0/346, RunningAvgSamplesPerSec=6.324050524078457, CurrSamplesPerSec=5.720586271562472, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:13:38,482] [INFO] [timer.py:197:stop] 0/348, RunningAvgSamplesPerSec=6.323927304234203, CurrSamplesPerSec=5.6728395802686995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:13:49,894] [INFO] [timer.py:197:stop] 0/350, RunningAvgSamplesPerSec=6.323775163215871, CurrSamplesPerSec=5.6732903798269225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.1374, 'learning_rate': 8.282894746203441e-06, 'epoch': 0.74} -[2022-12-16 13:12:40,934] [INFO] [timer.py:197:stop] 0/352, RunningAvgSamplesPerSec=6.3327789258231455, CurrSamplesPerSec=5.684669172023788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:12:52,331] [INFO] [timer.py:197:stop] 0/354, RunningAvgSamplesPerSec=6.332533138818452, CurrSamplesPerSec=5.640574665860056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:13:03,731] [INFO] [timer.py:197:stop] 0/356, RunningAvgSamplesPerSec=6.332345649627949, CurrSamplesPerSec=5.663396002221176, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:13:15,103] [INFO] [timer.py:197:stop] 0/358, RunningAvgSamplesPerSec=6.3322621358120035, CurrSamplesPerSec=5.684914285399642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:13:26,447] [INFO] [logging.py:68:log_dist] [Rank 0] step=180, skipped=3, lr=[8.329004259959669e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:13:26,449] [INFO] [timer.py:197:stop] 0/360, RunningAvgSamplesPerSec=6.332159145240631, CurrSamplesPerSec=5.67905349198733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:13:37,820] [INFO] [timer.py:197:stop] 0/362, RunningAvgSamplesPerSec=6.332024007287478, CurrSamplesPerSec=5.678510240339759, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:13:49,169] [INFO] [timer.py:197:stop] 0/364, RunningAvgSamplesPerSec=6.331968726588887, CurrSamplesPerSec=5.682525210969754, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:14:00,560] [INFO] [timer.py:197:stop] 0/366, RunningAvgSamplesPerSec=6.331832648381136, CurrSamplesPerSec=5.684769092901, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:14:11,953] [INFO] [timer.py:197:stop] 0/368, RunningAvgSamplesPerSec=6.331687391665875, CurrSamplesPerSec=5.67988911544788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:14:23,301] [INFO] [timer.py:197:stop] 0/370, RunningAvgSamplesPerSec=6.3316890358546, CurrSamplesPerSec=5.702466985398981, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:14:34,670] [INFO] [timer.py:197:stop] 0/372, RunningAvgSamplesPerSec=6.331570795387366, CurrSamplesPerSec=5.695797345345154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:14:46,028] [INFO] [timer.py:197:stop] 0/374, RunningAvgSamplesPerSec=6.331543357774669, CurrSamplesPerSec=5.711262415461503, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:14:57,328] [INFO] [timer.py:197:stop] 0/376, RunningAvgSamplesPerSec=6.331652133552371, CurrSamplesPerSec=5.7202161751546035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:15:08,672] [INFO] [timer.py:197:stop] 0/378, RunningAvgSamplesPerSec=6.33161517038932, CurrSamplesPerSec=5.693738698870023, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:15:20,018] [INFO] [logging.py:68:log_dist] [Rank 0] step=190, skipped=3, lr=[8.417439256037237e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:15:20,019] [INFO] [timer.py:197:stop] 0/380, RunningAvgSamplesPerSec=6.331634947341557, CurrSamplesPerSec=5.706364766431756, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:15:31,375] [INFO] [timer.py:197:stop] 0/382, RunningAvgSamplesPerSec=6.331554282858503, CurrSamplesPerSec=5.694370148810471, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:15:42,826] [INFO] [timer.py:197:stop] 0/384, RunningAvgSamplesPerSec=6.331223208258476, CurrSamplesPerSec=5.598517748356675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:15:54,187] [INFO] [timer.py:197:stop] 0/386, RunningAvgSamplesPerSec=6.33117774424811, CurrSamplesPerSec=5.698536781871162, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:16:05,614] [INFO] [timer.py:197:stop] 0/388, RunningAvgSamplesPerSec=6.331056018601255, CurrSamplesPerSec=5.66092851427846, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:16:16,986] [INFO] [timer.py:197:stop] 0/390, RunningAvgSamplesPerSec=6.3309740768420495, CurrSamplesPerSec=5.668828223825785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:16:28,308] [INFO] [timer.py:197:stop] 0/392, RunningAvgSamplesPerSec=6.331003137668457, CurrSamplesPerSec=5.705271046407015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:16:39,640] [INFO] [timer.py:197:stop] 0/394, RunningAvgSamplesPerSec=6.330996505918037, CurrSamplesPerSec=5.6957204817960045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:16:50,988] [INFO] [timer.py:197:stop] 0/396, RunningAvgSamplesPerSec=6.33099225627066, CurrSamplesPerSec=5.69033022619594, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:17:02,323] [INFO] [timer.py:197:stop] 0/398, RunningAvgSamplesPerSec=6.331037308014052, CurrSamplesPerSec=5.711416741713082, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:17:13,685] [INFO] [logging.py:68:log_dist] [Rank 0] step=200, skipped=3, lr=[8.501266121799902e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:17:13,687] [INFO] [timer.py:197:stop] 0/400, RunningAvgSamplesPerSec=6.330927735043576, CurrSamplesPerSec=5.676714717671288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:14:01,273] [INFO] [timer.py:197:stop] 0/352, RunningAvgSamplesPerSec=6.323744099538408, CurrSamplesPerSec=5.701922878806938, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:14:12,649] [INFO] [timer.py:197:stop] 0/354, RunningAvgSamplesPerSec=6.32363656035121, CurrSamplesPerSec=5.668106198002945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:14:24,018] [INFO] [timer.py:197:stop] 0/356, RunningAvgSamplesPerSec=6.323612329000168, CurrSamplesPerSec=5.690963815932577, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:14:35,387] [INFO] [timer.py:197:stop] 0/358, RunningAvgSamplesPerSec=6.323592321881347, CurrSamplesPerSec=5.682652484549207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:14:46,747] [INFO] [logging.py:68:log_dist] [Rank 0] step=180, skipped=3, lr=[8.329004259959669e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:14:46,749] [INFO] [timer.py:197:stop] 0/360, RunningAvgSamplesPerSec=6.323480334411763, CurrSamplesPerSec=5.657225111507851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:14:58,121] [INFO] [timer.py:197:stop] 0/362, RunningAvgSamplesPerSec=6.323431372663673, CurrSamplesPerSec=5.678844444331616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:15:09,479] [INFO] [timer.py:197:stop] 0/364, RunningAvgSamplesPerSec=6.32339248589731, CurrSamplesPerSec=5.671576284583911, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:15:20,883] [INFO] [timer.py:197:stop] 0/366, RunningAvgSamplesPerSec=6.323253600927474, CurrSamplesPerSec=5.6644067881016404, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:15:32,248] [INFO] [timer.py:197:stop] 0/368, RunningAvgSamplesPerSec=6.323253715509086, CurrSamplesPerSec=5.701989251343157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:15:43,589] [INFO] [timer.py:197:stop] 0/370, RunningAvgSamplesPerSec=6.323333571683783, CurrSamplesPerSec=5.709123607893714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:15:54,984] [INFO] [timer.py:197:stop] 0/372, RunningAvgSamplesPerSec=6.3231740893087585, CurrSamplesPerSec=5.660488032411503, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:16:06,407] [INFO] [timer.py:197:stop] 0/374, RunningAvgSamplesPerSec=6.3231801853876135, CurrSamplesPerSec=5.70347698290224, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:16:17,744] [INFO] [timer.py:197:stop] 0/376, RunningAvgSamplesPerSec=6.323287920909756, CurrSamplesPerSec=5.702711939935516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:16:29,091] [INFO] [timer.py:197:stop] 0/378, RunningAvgSamplesPerSec=6.323286122683462, CurrSamplesPerSec=5.687698937227285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:16:40,418] [INFO] [logging.py:68:log_dist] [Rank 0] step=190, skipped=3, lr=[8.417439256037237e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:16:40,420] [INFO] [timer.py:197:stop] 0/380, RunningAvgSamplesPerSec=6.323397786842634, CurrSamplesPerSec=5.71710686834569, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:16:51,771] [INFO] [timer.py:197:stop] 0/382, RunningAvgSamplesPerSec=6.323379578682893, CurrSamplesPerSec=5.697321750762022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:17:03,330] [INFO] [timer.py:197:stop] 0/384, RunningAvgSamplesPerSec=6.323437318866515, CurrSamplesPerSec=5.70628228013329, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:17:14,693] [INFO] [timer.py:197:stop] 0/386, RunningAvgSamplesPerSec=6.323427514274825, CurrSamplesPerSec=5.700057328547446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:17:26,094] [INFO] [timer.py:197:stop] 0/388, RunningAvgSamplesPerSec=6.323306500731329, CurrSamplesPerSec=5.678972033656878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:17:37,448] [INFO] [timer.py:197:stop] 0/390, RunningAvgSamplesPerSec=6.32331374462535, CurrSamplesPerSec=5.687762568718759, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:17:48,794] [INFO] [timer.py:197:stop] 0/392, RunningAvgSamplesPerSec=6.323321526858228, CurrSamplesPerSec=5.692411274897352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:18:00,146] [INFO] [timer.py:197:stop] 0/394, RunningAvgSamplesPerSec=6.323302134655659, CurrSamplesPerSec=5.682476612732748, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:18:11,512] [INFO] [timer.py:197:stop] 0/396, RunningAvgSamplesPerSec=6.323284170376101, CurrSamplesPerSec=5.6785734261450145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:18:23,016] [INFO] [timer.py:197:stop] 0/398, RunningAvgSamplesPerSec=6.323343518822214, CurrSamplesPerSec=5.709000973847339, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:18:34,373] [INFO] [logging.py:68:log_dist] [Rank 0] step=200, skipped=3, lr=[8.501266121799902e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:18:34,375] [INFO] [timer.py:197:stop] 0/400, RunningAvgSamplesPerSec=6.32328421011125, CurrSamplesPerSec=5.6872694618968955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.1287, 'learning_rate': 8.501266121799902e-06, 'epoch': 0.85} -[2022-12-16 13:17:25,016] [INFO] [timer.py:197:stop] 0/402, RunningAvgSamplesPerSec=6.330933021275755, CurrSamplesPerSec=5.702418045471839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:17:36,361] [INFO] [timer.py:197:stop] 0/404, RunningAvgSamplesPerSec=6.330935328095778, CurrSamplesPerSec=5.709033271015826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:17:47,092] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384.0, reducing to 8192.0 -[2022-12-16 13:17:47,094] [INFO] [timer.py:197:stop] 0/406, RunningAvgSamplesPerSec=6.332789283396464, CurrSamplesPerSec=6.394784222406185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:17:58,451] [INFO] [timer.py:197:stop] 0/408, RunningAvgSamplesPerSec=6.3327465412974275, CurrSamplesPerSec=5.706926464484751, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:18:09,815] [INFO] [timer.py:197:stop] 0/410, RunningAvgSamplesPerSec=6.332696453161811, CurrSamplesPerSec=5.677898635990999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:18:21,204] [INFO] [timer.py:197:stop] 0/412, RunningAvgSamplesPerSec=6.3325963497900535, CurrSamplesPerSec=5.681225614591862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:18:32,608] [INFO] [timer.py:197:stop] 0/414, RunningAvgSamplesPerSec=6.332589791376257, CurrSamplesPerSec=5.693203259831853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:18:43,964] [INFO] [timer.py:197:stop] 0/416, RunningAvgSamplesPerSec=6.332529284688327, CurrSamplesPerSec=5.675976039549646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:18:55,323] [INFO] [timer.py:197:stop] 0/418, RunningAvgSamplesPerSec=6.332541567747686, CurrSamplesPerSec=5.696589063934239, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:19:06,712] [INFO] [logging.py:68:log_dist] [Rank 0] step=210, skipped=4, lr=[8.573149077803088e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:19:06,713] [INFO] [timer.py:197:stop] 0/420, RunningAvgSamplesPerSec=6.332525376024386, CurrSamplesPerSec=5.685732365818978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:19:18,077] [INFO] [timer.py:197:stop] 0/422, RunningAvgSamplesPerSec=6.332480645100248, CurrSamplesPerSec=5.688045312222482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:19:29,426] [INFO] [timer.py:197:stop] 0/424, RunningAvgSamplesPerSec=6.332436469764684, CurrSamplesPerSec=5.694331252857661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:19:40,762] [INFO] [timer.py:197:stop] 0/426, RunningAvgSamplesPerSec=6.332425390489938, CurrSamplesPerSec=5.710604131472058, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:19:52,108] [INFO] [timer.py:197:stop] 0/428, RunningAvgSamplesPerSec=6.33234514899996, CurrSamplesPerSec=5.6711051498664125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:20:03,460] [INFO] [timer.py:197:stop] 0/430, RunningAvgSamplesPerSec=6.332341452878609, CurrSamplesPerSec=5.702187166947249, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:20:14,798] [INFO] [timer.py:197:stop] 0/432, RunningAvgSamplesPerSec=6.3323660481354285, CurrSamplesPerSec=5.70455498277639, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:20:26,129] [INFO] [timer.py:197:stop] 0/434, RunningAvgSamplesPerSec=6.332410466165831, CurrSamplesPerSec=5.709264704274692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:20:37,449] [INFO] [timer.py:197:stop] 0/436, RunningAvgSamplesPerSec=6.332434604656064, CurrSamplesPerSec=5.724072597754097, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:20:48,804] [INFO] [timer.py:197:stop] 0/438, RunningAvgSamplesPerSec=6.332414739569677, CurrSamplesPerSec=5.7062907712597895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:21:00,111] [INFO] [logging.py:68:log_dist] [Rank 0] step=220, skipped=4, lr=[8.64942458567722e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:21:00,113] [INFO] [timer.py:197:stop] 0/440, RunningAvgSamplesPerSec=6.332536560073241, CurrSamplesPerSec=5.718785488696217, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:21:11,432] [INFO] [timer.py:197:stop] 0/442, RunningAvgSamplesPerSec=6.332569559767877, CurrSamplesPerSec=5.701313970796222, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:21:22,769] [INFO] [timer.py:197:stop] 0/444, RunningAvgSamplesPerSec=6.3326005766264215, CurrSamplesPerSec=5.708380841954919, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:21:34,090] [INFO] [timer.py:197:stop] 0/446, RunningAvgSamplesPerSec=6.3326758013028215, CurrSamplesPerSec=5.7286749963346875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:21:45,409] [INFO] [timer.py:197:stop] 0/448, RunningAvgSamplesPerSec=6.33271448435985, CurrSamplesPerSec=5.709920007439783, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:21:56,742] [INFO] [timer.py:197:stop] 0/450, RunningAvgSamplesPerSec=6.3327607879641326, CurrSamplesPerSec=5.704823879601781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:18:45,718] [INFO] [timer.py:197:stop] 0/402, RunningAvgSamplesPerSec=6.323284665364629, CurrSamplesPerSec=5.690986739759475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:18:57,149] [INFO] [timer.py:197:stop] 0/404, RunningAvgSamplesPerSec=6.323064316517982, CurrSamplesPerSec=5.632093995709347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:19:07,878] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384.0, reducing to 8192.0 +[2022-12-16 20:19:07,880] [INFO] [timer.py:197:stop] 0/406, RunningAvgSamplesPerSec=6.324963121796406, CurrSamplesPerSec=6.401309166670601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:19:19,240] [INFO] [timer.py:197:stop] 0/408, RunningAvgSamplesPerSec=6.3249501610981325, CurrSamplesPerSec=5.702367168220342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:19:30,606] [INFO] [timer.py:197:stop] 0/410, RunningAvgSamplesPerSec=6.324919708467864, CurrSamplesPerSec=5.679302927602853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:19:41,978] [INFO] [timer.py:197:stop] 0/412, RunningAvgSamplesPerSec=6.324875286613773, CurrSamplesPerSec=5.687440087427545, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:19:53,395] [INFO] [timer.py:197:stop] 0/414, RunningAvgSamplesPerSec=6.32487659033467, CurrSamplesPerSec=5.692081989929908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:20:04,724] [INFO] [timer.py:197:stop] 0/416, RunningAvgSamplesPerSec=6.32491419453475, CurrSamplesPerSec=5.70353587815117, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:20:16,092] [INFO] [timer.py:197:stop] 0/418, RunningAvgSamplesPerSec=6.324881861283918, CurrSamplesPerSec=5.668392257188035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:20:27,452] [INFO] [logging.py:68:log_dist] [Rank 0] step=210, skipped=4, lr=[8.573149077803088e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:20:27,454] [INFO] [timer.py:197:stop] 0/420, RunningAvgSamplesPerSec=6.324865240196057, CurrSamplesPerSec=5.678834112490291, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:20:38,813] [INFO] [timer.py:197:stop] 0/422, RunningAvgSamplesPerSec=6.324857186084627, CurrSamplesPerSec=5.690756785945908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:20:50,150] [INFO] [timer.py:197:stop] 0/424, RunningAvgSamplesPerSec=6.324868863513124, CurrSamplesPerSec=5.700334274925125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:21:01,502] [INFO] [timer.py:197:stop] 0/426, RunningAvgSamplesPerSec=6.324832543627125, CurrSamplesPerSec=5.698868991857596, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:21:12,881] [INFO] [timer.py:197:stop] 0/428, RunningAvgSamplesPerSec=6.324672136242403, CurrSamplesPerSec=5.638443932850549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:21:24,242] [INFO] [timer.py:197:stop] 0/430, RunningAvgSamplesPerSec=6.324660692835897, CurrSamplesPerSec=5.690038088967808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:21:35,582] [INFO] [timer.py:197:stop] 0/432, RunningAvgSamplesPerSec=6.324708429204134, CurrSamplesPerSec=5.704986345333319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:21:46,949] [INFO] [timer.py:197:stop] 0/434, RunningAvgSamplesPerSec=6.324708609830103, CurrSamplesPerSec=5.6855702723091985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:21:58,286] [INFO] [timer.py:197:stop] 0/436, RunningAvgSamplesPerSec=6.324717537651673, CurrSamplesPerSec=5.70938394951087, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:22:09,632] [INFO] [timer.py:197:stop] 0/438, RunningAvgSamplesPerSec=6.324745496447643, CurrSamplesPerSec=5.714860750176948, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:22:20,926] [INFO] [logging.py:68:log_dist] [Rank 0] step=220, skipped=4, lr=[8.64942458567722e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:22:20,928] [INFO] [timer.py:197:stop] 0/440, RunningAvgSamplesPerSec=6.324929046177664, CurrSamplesPerSec=5.729245008296917, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:22:32,230] [INFO] [timer.py:197:stop] 0/442, RunningAvgSamplesPerSec=6.325051638910014, CurrSamplesPerSec=5.723627116481493, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:22:43,588] [INFO] [timer.py:197:stop] 0/444, RunningAvgSamplesPerSec=6.325065678448945, CurrSamplesPerSec=5.693626627517337, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:22:54,930] [INFO] [timer.py:197:stop] 0/446, RunningAvgSamplesPerSec=6.325110434850466, CurrSamplesPerSec=5.710919039097566, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:23:06,252] [INFO] [timer.py:197:stop] 0/448, RunningAvgSamplesPerSec=6.325167702164579, CurrSamplesPerSec=5.705739870822447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:23:17,632] [INFO] [timer.py:197:stop] 0/450, RunningAvgSamplesPerSec=6.325104594839814, CurrSamplesPerSec=5.653316282959546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.1225, 'learning_rate': 8.686247975778677e-06, 'epoch': 0.95} -[2022-12-16 13:22:08,082] [INFO] [timer.py:197:stop] 0/452, RunningAvgSamplesPerSec=6.332790148344536, CurrSamplesPerSec=5.718563272508554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:22:19,449] [INFO] [timer.py:197:stop] 0/454, RunningAvgSamplesPerSec=6.3327388875050685, CurrSamplesPerSec=5.680145595658538, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:22:30,809] [INFO] [timer.py:197:stop] 0/456, RunningAvgSamplesPerSec=6.332711167657343, CurrSamplesPerSec=5.712535674159144, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:22:42,108] [INFO] [timer.py:197:stop] 0/458, RunningAvgSamplesPerSec=6.33280234507031, CurrSamplesPerSec=5.7229347443492955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:22:53,449] [INFO] [logging.py:68:log_dist] [Rank 0] step=230, skipped=4, lr=[8.722247506883805e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:22:53,450] [INFO] [timer.py:197:stop] 0/460, RunningAvgSamplesPerSec=6.3328301209678255, CurrSamplesPerSec=5.712547830948624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:23:04,788] [INFO] [timer.py:197:stop] 0/462, RunningAvgSamplesPerSec=6.332812979171486, CurrSamplesPerSec=5.6831748711853285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:23:16,134] [INFO] [timer.py:197:stop] 0/464, RunningAvgSamplesPerSec=6.332817304905055, CurrSamplesPerSec=5.701674358874692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:23:26,839] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192.0, reducing to 4096.0 -[2022-12-16 13:23:26,841] [INFO] [timer.py:197:stop] 0/466, RunningAvgSamplesPerSec=6.334509821736817, CurrSamplesPerSec=6.410038609411664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:23:38,196] [INFO] [timer.py:197:stop] 0/468, RunningAvgSamplesPerSec=6.33448448174401, CurrSamplesPerSec=5.692148857768002, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:23:49,581] [INFO] [timer.py:197:stop] 0/470, RunningAvgSamplesPerSec=6.334334324008184, CurrSamplesPerSec=5.6524056189787935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:23:58,075] [INFO] [timer.py:197:stop] 0/472, RunningAvgSamplesPerSec=6.341034963749929, CurrSamplesPerSec=10.244828838544047, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:24:09,412] [INFO] [timer.py:197:stop] 0/474, RunningAvgSamplesPerSec=6.341031354651321, CurrSamplesPerSec=5.709517043762645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:24:20,810] [INFO] [timer.py:197:stop] 0/476, RunningAvgSamplesPerSec=6.340830579777299, CurrSamplesPerSec=5.65173441535856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:24:32,186] [INFO] [timer.py:197:stop] 0/478, RunningAvgSamplesPerSec=6.34072547922669, CurrSamplesPerSec=5.666355761890989, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:24:43,527] [INFO] [logging.py:68:log_dist] [Rank 0] step=240, skipped=5, lr=[8.785084156039184e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:24:43,529] [INFO] [timer.py:197:stop] 0/480, RunningAvgSamplesPerSec=6.340705507213309, CurrSamplesPerSec=5.708322574935123, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:24:54,875] [INFO] [timer.py:197:stop] 0/482, RunningAvgSamplesPerSec=6.340681063176589, CurrSamplesPerSec=5.698540411049955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:25:06,236] [INFO] [timer.py:197:stop] 0/484, RunningAvgSamplesPerSec=6.340634152728032, CurrSamplesPerSec=5.68413736313393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:25:17,613] [INFO] [timer.py:197:stop] 0/486, RunningAvgSamplesPerSec=6.340556261832466, CurrSamplesPerSec=5.685238647671385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:25:28,931] [INFO] [timer.py:197:stop] 0/488, RunningAvgSamplesPerSec=6.340556874315241, CurrSamplesPerSec=5.725152048399553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:25:40,233] [INFO] [timer.py:197:stop] 0/490, RunningAvgSamplesPerSec=6.3406058880671585, CurrSamplesPerSec=5.7148970070583704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:25:51,607] [INFO] [timer.py:197:stop] 0/492, RunningAvgSamplesPerSec=6.340504282447385, CurrSamplesPerSec=5.659161271286198, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:26:02,916] [INFO] [timer.py:197:stop] 0/494, RunningAvgSamplesPerSec=6.340538253413768, CurrSamplesPerSec=5.714366826410801, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:26:14,293] [INFO] [timer.py:197:stop] 0/496, RunningAvgSamplesPerSec=6.34043777677742, CurrSamplesPerSec=5.673726380737918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:26:25,621] [INFO] [timer.py:197:stop] 0/498, RunningAvgSamplesPerSec=6.340464247689892, CurrSamplesPerSec=5.718628814741922, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:26:36,937] [INFO] [logging.py:68:log_dist] [Rank 0] step=250, skipped=5, lr=[8.852140188761744e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:26:36,939] [INFO] [timer.py:197:stop] 0/500, RunningAvgSamplesPerSec=6.340507097510456, CurrSamplesPerSec=5.714383370258332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:23:29,039] [INFO] [timer.py:197:stop] 0/452, RunningAvgSamplesPerSec=6.3249838784193, CurrSamplesPerSec=5.654148160062199, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:23:40,379] [INFO] [timer.py:197:stop] 0/454, RunningAvgSamplesPerSec=6.325056824708796, CurrSamplesPerSec=5.716592834256081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:23:51,801] [INFO] [timer.py:197:stop] 0/456, RunningAvgSamplesPerSec=6.324918730493177, CurrSamplesPerSec=5.666096698292425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:24:03,158] [INFO] [timer.py:197:stop] 0/458, RunningAvgSamplesPerSec=6.3249018266408745, CurrSamplesPerSec=5.6668892730962055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:24:14,505] [INFO] [logging.py:68:log_dist] [Rank 0] step=230, skipped=4, lr=[8.722247506883805e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:24:14,507] [INFO] [timer.py:197:stop] 0/460, RunningAvgSamplesPerSec=6.324939407967614, CurrSamplesPerSec=5.702041090822069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:24:25,830] [INFO] [timer.py:197:stop] 0/462, RunningAvgSamplesPerSec=6.3249985635148835, CurrSamplesPerSec=5.695622592543446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:24:37,171] [INFO] [timer.py:197:stop] 0/464, RunningAvgSamplesPerSec=6.325064934601947, CurrSamplesPerSec=5.706983489691964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:24:47,866] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192.0, reducing to 4096.0 +[2022-12-16 20:24:47,868] [INFO] [timer.py:197:stop] 0/466, RunningAvgSamplesPerSec=6.326814356293397, CurrSamplesPerSec=6.417096329026, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:24:59,202] [INFO] [timer.py:197:stop] 0/468, RunningAvgSamplesPerSec=6.326888453208697, CurrSamplesPerSec=5.710727563347197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:25:10,596] [INFO] [timer.py:197:stop] 0/470, RunningAvgSamplesPerSec=6.32680186680534, CurrSamplesPerSec=5.657430662739473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:25:19,121] [INFO] [timer.py:197:stop] 0/472, RunningAvgSamplesPerSec=6.333450758411454, CurrSamplesPerSec=10.137390170749411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:25:30,504] [INFO] [timer.py:197:stop] 0/474, RunningAvgSamplesPerSec=6.333370618689054, CurrSamplesPerSec=5.659949520087447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:25:41,898] [INFO] [timer.py:197:stop] 0/476, RunningAvgSamplesPerSec=6.333330907948472, CurrSamplesPerSec=5.689830402708782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:25:53,272] [INFO] [timer.py:197:stop] 0/478, RunningAvgSamplesPerSec=6.333309972082303, CurrSamplesPerSec=5.679410830915424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:26:04,786] [INFO] [logging.py:68:log_dist] [Rank 0] step=240, skipped=5, lr=[8.785084156039184e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:26:04,788] [INFO] [timer.py:197:stop] 0/480, RunningAvgSamplesPerSec=6.333324209339847, CurrSamplesPerSec=5.711554548737424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:26:16,153] [INFO] [timer.py:197:stop] 0/482, RunningAvgSamplesPerSec=6.333275648262772, CurrSamplesPerSec=5.67584738469762, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:26:27,536] [INFO] [timer.py:197:stop] 0/484, RunningAvgSamplesPerSec=6.333180443017503, CurrSamplesPerSec=5.658684801072698, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:26:38,936] [INFO] [timer.py:197:stop] 0/486, RunningAvgSamplesPerSec=6.333063356107658, CurrSamplesPerSec=5.66522088952818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:26:50,275] [INFO] [timer.py:197:stop] 0/488, RunningAvgSamplesPerSec=6.333064958163027, CurrSamplesPerSec=5.716922526210353, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:27:01,596] [INFO] [timer.py:197:stop] 0/490, RunningAvgSamplesPerSec=6.333097497150548, CurrSamplesPerSec=5.708483783263635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:27:12,974] [INFO] [timer.py:197:stop] 0/492, RunningAvgSamplesPerSec=6.333030716341655, CurrSamplesPerSec=5.669087297819405, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:27:24,283] [INFO] [timer.py:197:stop] 0/494, RunningAvgSamplesPerSec=6.33308950716907, CurrSamplesPerSec=5.72159806401921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:27:35,659] [INFO] [timer.py:197:stop] 0/496, RunningAvgSamplesPerSec=6.333030116862234, CurrSamplesPerSec=5.685264896868218, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:27:47,023] [INFO] [timer.py:197:stop] 0/498, RunningAvgSamplesPerSec=6.332996476016625, CurrSamplesPerSec=5.69285118566385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:27:58,379] [INFO] [logging.py:68:log_dist] [Rank 0] step=250, skipped=5, lr=[8.852140188761744e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:27:58,380] [INFO] [timer.py:197:stop] 0/500, RunningAvgSamplesPerSec=6.332979581463783, CurrSamplesPerSec=5.68674728607022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0911, 'learning_rate': 8.852140188761744e-06, 'epoch': 1.06} -[2022-12-16 13:26:48,258] [INFO] [timer.py:197:stop] 0/502, RunningAvgSamplesPerSec=6.340515051225799, CurrSamplesPerSec=5.708866203892745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:26:59,598] [INFO] [timer.py:197:stop] 0/504, RunningAvgSamplesPerSec=6.340501513927871, CurrSamplesPerSec=5.696083789104848, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:27:10,938] [INFO] [timer.py:197:stop] 0/506, RunningAvgSamplesPerSec=6.340491594057469, CurrSamplesPerSec=5.696348986773738, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:27:22,273] [INFO] [timer.py:197:stop] 0/508, RunningAvgSamplesPerSec=6.340492785987803, CurrSamplesPerSec=5.7040514452781945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:27:33,596] [INFO] [timer.py:197:stop] 0/510, RunningAvgSamplesPerSec=6.34044975201933, CurrSamplesPerSec=5.6719408326517975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:27:44,949] [INFO] [timer.py:197:stop] 0/512, RunningAvgSamplesPerSec=6.340408872057434, CurrSamplesPerSec=5.685037331614662, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:27:56,297] [INFO] [timer.py:197:stop] 0/514, RunningAvgSamplesPerSec=6.340377687654657, CurrSamplesPerSec=5.704021628561016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:28:07,656] [INFO] [timer.py:197:stop] 0/516, RunningAvgSamplesPerSec=6.340319751692764, CurrSamplesPerSec=5.679723028624139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:28:18,978] [INFO] [timer.py:197:stop] 0/518, RunningAvgSamplesPerSec=6.340350792432082, CurrSamplesPerSec=5.7098451913181485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:28:30,309] [INFO] [logging.py:68:log_dist] [Rank 0] step=260, skipped=5, lr=[8.916513249749862e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:28:30,311] [INFO] [timer.py:197:stop] 0/520, RunningAvgSamplesPerSec=6.340314121727346, CurrSamplesPerSec=5.703653187538543, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:28:41,680] [INFO] [timer.py:197:stop] 0/522, RunningAvgSamplesPerSec=6.340257665480462, CurrSamplesPerSec=5.697301194271666, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:28:53,006] [INFO] [timer.py:197:stop] 0/524, RunningAvgSamplesPerSec=6.340306276076679, CurrSamplesPerSec=5.723158032534116, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:29:04,397] [INFO] [timer.py:197:stop] 0/526, RunningAvgSamplesPerSec=6.340176406507254, CurrSamplesPerSec=5.6733251519178785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:29:15,729] [INFO] [timer.py:197:stop] 0/528, RunningAvgSamplesPerSec=6.340154943695952, CurrSamplesPerSec=5.701712386349338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:29:27,011] [INFO] [timer.py:197:stop] 0/530, RunningAvgSamplesPerSec=6.3402431766411205, CurrSamplesPerSec=5.734707806521304, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:29:38,377] [INFO] [timer.py:197:stop] 0/532, RunningAvgSamplesPerSec=6.3401683159730045, CurrSamplesPerSec=5.680019395846795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:29:49,771] [INFO] [timer.py:197:stop] 0/534, RunningAvgSamplesPerSec=6.34005297999423, CurrSamplesPerSec=5.670300615513904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:30:01,082] [INFO] [timer.py:197:stop] 0/536, RunningAvgSamplesPerSec=6.340096536081388, CurrSamplesPerSec=5.722985013174985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:30:12,416] [INFO] [timer.py:197:stop] 0/538, RunningAvgSamplesPerSec=6.340091748500424, CurrSamplesPerSec=5.710884290641021, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:30:23,758] [INFO] [logging.py:68:log_dist] [Rank 0] step=270, skipped=5, lr=[8.978409800937961e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:30:23,759] [INFO] [timer.py:197:stop] 0/540, RunningAvgSamplesPerSec=6.3400577102176525, CurrSamplesPerSec=5.694194275689367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:30:35,096] [INFO] [timer.py:197:stop] 0/542, RunningAvgSamplesPerSec=6.34004129781036, CurrSamplesPerSec=5.695970658506859, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:30:46,447] [INFO] [timer.py:197:stop] 0/544, RunningAvgSamplesPerSec=6.340026029384895, CurrSamplesPerSec=5.702012506319923, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:30:57,794] [INFO] [timer.py:197:stop] 0/546, RunningAvgSamplesPerSec=6.3400386927289, CurrSamplesPerSec=5.706275244647619, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:31:09,140] [INFO] [timer.py:197:stop] 0/548, RunningAvgSamplesPerSec=6.34005752347985, CurrSamplesPerSec=5.710305535873723, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:31:20,489] [INFO] [timer.py:197:stop] 0/550, RunningAvgSamplesPerSec=6.340052862396717, CurrSamplesPerSec=5.700895028486481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:28:09,749] [INFO] [timer.py:197:stop] 0/502, RunningAvgSamplesPerSec=6.332968543294653, CurrSamplesPerSec=5.698553960024957, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:28:21,117] [INFO] [timer.py:197:stop] 0/504, RunningAvgSamplesPerSec=6.3329375267007215, CurrSamplesPerSec=5.680363153261814, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:28:32,487] [INFO] [timer.py:197:stop] 0/506, RunningAvgSamplesPerSec=6.332904182885525, CurrSamplesPerSec=5.687939491029711, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:28:43,855] [INFO] [timer.py:197:stop] 0/508, RunningAvgSamplesPerSec=6.33285409700931, CurrSamplesPerSec=5.677202633924842, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:28:55,191] [INFO] [timer.py:197:stop] 0/510, RunningAvgSamplesPerSec=6.332775886204445, CurrSamplesPerSec=5.648280477264634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:29:06,683] [INFO] [timer.py:197:stop] 0/512, RunningAvgSamplesPerSec=6.332755158221311, CurrSamplesPerSec=5.688899257349144, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:29:18,066] [INFO] [timer.py:197:stop] 0/514, RunningAvgSamplesPerSec=6.332672689917679, CurrSamplesPerSec=5.677710568918598, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:29:29,434] [INFO] [timer.py:197:stop] 0/516, RunningAvgSamplesPerSec=6.3326342128881095, CurrSamplesPerSec=5.682176861785842, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:29:40,802] [INFO] [timer.py:197:stop] 0/518, RunningAvgSamplesPerSec=6.332589814616271, CurrSamplesPerSec=5.674145177376899, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:29:52,188] [INFO] [logging.py:68:log_dist] [Rank 0] step=260, skipped=5, lr=[8.916513249749862e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:29:52,189] [INFO] [timer.py:197:stop] 0/520, RunningAvgSamplesPerSec=6.332535441063731, CurrSamplesPerSec=5.690993254986084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:30:03,651] [INFO] [timer.py:197:stop] 0/522, RunningAvgSamplesPerSec=6.332321192236601, CurrSamplesPerSec=5.627202311158195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:30:15,079] [INFO] [timer.py:197:stop] 0/524, RunningAvgSamplesPerSec=6.332264757881449, CurrSamplesPerSec=5.681202047908641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:30:26,506] [INFO] [timer.py:197:stop] 0/526, RunningAvgSamplesPerSec=6.332111989659685, CurrSamplesPerSec=5.663871593105844, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:30:37,853] [INFO] [timer.py:197:stop] 0/528, RunningAvgSamplesPerSec=6.332086845890149, CurrSamplesPerSec=5.697914808915609, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:30:49,299] [INFO] [timer.py:197:stop] 0/530, RunningAvgSamplesPerSec=6.332118305953814, CurrSamplesPerSec=5.707362068898067, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:31:00,725] [INFO] [timer.py:197:stop] 0/532, RunningAvgSamplesPerSec=6.332145135171992, CurrSamplesPerSec=5.7186139518732695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:31:12,098] [INFO] [timer.py:197:stop] 0/534, RunningAvgSamplesPerSec=6.332096332695988, CurrSamplesPerSec=5.686162813500998, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:31:23,474] [INFO] [timer.py:197:stop] 0/536, RunningAvgSamplesPerSec=6.332142347447645, CurrSamplesPerSec=5.717022610095439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:31:34,884] [INFO] [timer.py:197:stop] 0/538, RunningAvgSamplesPerSec=6.3320406629010915, CurrSamplesPerSec=5.6585333113317695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:31:46,283] [INFO] [logging.py:68:log_dist] [Rank 0] step=270, skipped=5, lr=[8.978409800937961e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:31:46,285] [INFO] [timer.py:197:stop] 0/540, RunningAvgSamplesPerSec=6.331949549833869, CurrSamplesPerSec=5.658519236318848, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:31:57,631] [INFO] [timer.py:197:stop] 0/542, RunningAvgSamplesPerSec=6.331908691993134, CurrSamplesPerSec=5.674752614311713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:32:09,011] [INFO] [timer.py:197:stop] 0/544, RunningAvgSamplesPerSec=6.331837872115681, CurrSamplesPerSec=5.664258577485706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:32:20,410] [INFO] [timer.py:197:stop] 0/546, RunningAvgSamplesPerSec=6.331805803303936, CurrSamplesPerSec=5.676086937031499, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:32:31,832] [INFO] [timer.py:197:stop] 0/548, RunningAvgSamplesPerSec=6.331781192265678, CurrSamplesPerSec=5.680064586753917, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:32:43,212] [INFO] [timer.py:197:stop] 0/550, RunningAvgSamplesPerSec=6.3318012701947355, CurrSamplesPerSec=5.7015488960272975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0701, 'learning_rate': 9.00848753507038e-06, 'epoch': 1.17} -[2022-12-16 13:31:31,787] [INFO] [timer.py:197:stop] 0/552, RunningAvgSamplesPerSec=6.340167350310263, CurrSamplesPerSec=5.770476218524265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:31:43,153] [INFO] [timer.py:197:stop] 0/554, RunningAvgSamplesPerSec=6.340122552433611, CurrSamplesPerSec=5.6745541993997355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:31:54,491] [INFO] [timer.py:197:stop] 0/556, RunningAvgSamplesPerSec=6.340139825646102, CurrSamplesPerSec=5.708953135855733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:32:05,850] [INFO] [timer.py:197:stop] 0/558, RunningAvgSamplesPerSec=6.340071243133932, CurrSamplesPerSec=5.691814534287232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:32:17,195] [INFO] [logging.py:68:log_dist] [Rank 0] step=280, skipped=5, lr=[9.038013352913754e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:32:17,197] [INFO] [timer.py:197:stop] 0/560, RunningAvgSamplesPerSec=6.340015870411556, CurrSamplesPerSec=5.687219336567981, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:32:28,543] [INFO] [timer.py:197:stop] 0/562, RunningAvgSamplesPerSec=6.340002215824556, CurrSamplesPerSec=5.7009136737425115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:32:39,909] [INFO] [timer.py:197:stop] 0/564, RunningAvgSamplesPerSec=6.339945455396239, CurrSamplesPerSec=5.670917772159438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:32:51,253] [INFO] [timer.py:197:stop] 0/566, RunningAvgSamplesPerSec=6.339925029302221, CurrSamplesPerSec=5.697719124776133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:33:02,598] [INFO] [timer.py:197:stop] 0/568, RunningAvgSamplesPerSec=6.339901880491667, CurrSamplesPerSec=5.690123242304234, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:33:13,971] [INFO] [timer.py:197:stop] 0/570, RunningAvgSamplesPerSec=6.339801948583433, CurrSamplesPerSec=5.675292747439151, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:33:25,288] [INFO] [timer.py:197:stop] 0/572, RunningAvgSamplesPerSec=6.339862092630734, CurrSamplesPerSec=5.711445663623603, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:33:36,663] [INFO] [timer.py:197:stop] 0/574, RunningAvgSamplesPerSec=6.339783864658929, CurrSamplesPerSec=5.6676619658311305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:33:48,016] [INFO] [timer.py:197:stop] 0/576, RunningAvgSamplesPerSec=6.339745950695436, CurrSamplesPerSec=5.689556646658774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:33:59,343] [INFO] [timer.py:197:stop] 0/578, RunningAvgSamplesPerSec=6.339769267297791, CurrSamplesPerSec=5.712568254471431, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:34:10,697] [INFO] [logging.py:68:log_dist] [Rank 0] step=290, skipped=5, lr=[9.095487745564754e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:34:10,699] [INFO] [timer.py:197:stop] 0/580, RunningAvgSamplesPerSec=6.339728316436596, CurrSamplesPerSec=5.70156197490803, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:34:22,038] [INFO] [timer.py:197:stop] 0/582, RunningAvgSamplesPerSec=6.339692731446194, CurrSamplesPerSec=5.697978911092788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:34:33,374] [INFO] [timer.py:197:stop] 0/584, RunningAvgSamplesPerSec=6.339692650355827, CurrSamplesPerSec=5.7080191201038675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:34:44,700] [INFO] [timer.py:197:stop] 0/586, RunningAvgSamplesPerSec=6.339714029901691, CurrSamplesPerSec=5.718308183769478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:34:56,048] [INFO] [timer.py:197:stop] 0/588, RunningAvgSamplesPerSec=6.339682913023013, CurrSamplesPerSec=5.709339262321484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:35:07,358] [INFO] [timer.py:197:stop] 0/590, RunningAvgSamplesPerSec=6.339736119920085, CurrSamplesPerSec=5.729573470814689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:35:18,701] [INFO] [timer.py:197:stop] 0/592, RunningAvgSamplesPerSec=6.339704018750019, CurrSamplesPerSec=5.694426440167608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:35:30,045] [INFO] [timer.py:197:stop] 0/594, RunningAvgSamplesPerSec=6.3396871452533015, CurrSamplesPerSec=5.697992457397849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:35:41,402] [INFO] [timer.py:197:stop] 0/596, RunningAvgSamplesPerSec=6.339643448245747, CurrSamplesPerSec=5.680999815328403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:35:52,674] [INFO] [timer.py:197:stop] 0/598, RunningAvgSamplesPerSec=6.339779092319611, CurrSamplesPerSec=5.7476326050363165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:36:03,998] [INFO] [logging.py:68:log_dist] [Rank 0] step=300, skipped=5, lr=[9.150979862726452e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:36:04,000] [INFO] [timer.py:197:stop] 0/600, RunningAvgSamplesPerSec=6.339731948606691, CurrSamplesPerSec=5.685480679268439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:32:54,503] [INFO] [timer.py:197:stop] 0/552, RunningAvgSamplesPerSec=6.331942303093126, CurrSamplesPerSec=5.7685950512679565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:33:05,886] [INFO] [timer.py:197:stop] 0/554, RunningAvgSamplesPerSec=6.331877715290052, CurrSamplesPerSec=5.652510360163067, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:33:17,264] [INFO] [timer.py:197:stop] 0/556, RunningAvgSamplesPerSec=6.331889387981903, CurrSamplesPerSec=5.6983301682483605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:33:28,711] [INFO] [timer.py:197:stop] 0/558, RunningAvgSamplesPerSec=6.331829135305767, CurrSamplesPerSec=5.683874023366699, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:33:40,068] [INFO] [logging.py:68:log_dist] [Rank 0] step=280, skipped=5, lr=[9.038013352913754e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:33:40,070] [INFO] [timer.py:197:stop] 0/560, RunningAvgSamplesPerSec=6.331762277422967, CurrSamplesPerSec=5.6758255427552475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:33:51,539] [INFO] [timer.py:197:stop] 0/562, RunningAvgSamplesPerSec=6.33149639148276, CurrSamplesPerSec=5.578608736601625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:34:02,921] [INFO] [timer.py:197:stop] 0/564, RunningAvgSamplesPerSec=6.331446008676222, CurrSamplesPerSec=5.665100373416714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:34:14,469] [INFO] [timer.py:197:stop] 0/566, RunningAvgSamplesPerSec=6.331452750529917, CurrSamplesPerSec=5.698296540516734, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:34:25,876] [INFO] [timer.py:197:stop] 0/568, RunningAvgSamplesPerSec=6.331366092485218, CurrSamplesPerSec=5.647619758537656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:34:37,224] [INFO] [timer.py:197:stop] 0/570, RunningAvgSamplesPerSec=6.331366844477702, CurrSamplesPerSec=5.709335862237885, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:34:48,577] [INFO] [timer.py:197:stop] 0/572, RunningAvgSamplesPerSec=6.331423638643338, CurrSamplesPerSec=5.7014241652722255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:35:00,234] [INFO] [timer.py:197:stop] 0/574, RunningAvgSamplesPerSec=6.331173142949745, CurrSamplesPerSec=5.674498540151139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:35:11,592] [INFO] [timer.py:197:stop] 0/576, RunningAvgSamplesPerSec=6.331154306508723, CurrSamplesPerSec=5.684172749770408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:35:23,056] [INFO] [timer.py:197:stop] 0/578, RunningAvgSamplesPerSec=6.331166114638882, CurrSamplesPerSec=5.694031457786667, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:35:34,645] [INFO] [logging.py:68:log_dist] [Rank 0] step=290, skipped=5, lr=[9.095487745564754e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:35:34,647] [INFO] [timer.py:197:stop] 0/580, RunningAvgSamplesPerSec=6.33109363132063, CurrSamplesPerSec=5.688584603467747, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:35:45,976] [INFO] [timer.py:197:stop] 0/582, RunningAvgSamplesPerSec=6.331107874787885, CurrSamplesPerSec=5.708166230976763, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:35:57,335] [INFO] [timer.py:197:stop] 0/584, RunningAvgSamplesPerSec=6.331087853249488, CurrSamplesPerSec=5.686966795466654, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:36:08,878] [INFO] [timer.py:197:stop] 0/586, RunningAvgSamplesPerSec=6.331042606888964, CurrSamplesPerSec=5.681727775695035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:36:20,260] [INFO] [timer.py:197:stop] 0/588, RunningAvgSamplesPerSec=6.330978840033063, CurrSamplesPerSec=5.674192433800074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:36:31,673] [INFO] [timer.py:197:stop] 0/590, RunningAvgSamplesPerSec=6.331058742308961, CurrSamplesPerSec=5.72288667256049, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:36:43,263] [INFO] [timer.py:197:stop] 0/592, RunningAvgSamplesPerSec=6.331048536749523, CurrSamplesPerSec=5.697260565409411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:36:54,663] [INFO] [timer.py:197:stop] 0/594, RunningAvgSamplesPerSec=6.330940201893569, CurrSamplesPerSec=5.636410670239449, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:37:06,011] [INFO] [timer.py:197:stop] 0/596, RunningAvgSamplesPerSec=6.330946589807912, CurrSamplesPerSec=5.697826519598953, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:37:17,405] [INFO] [timer.py:197:stop] 0/598, RunningAvgSamplesPerSec=6.330878178920464, CurrSamplesPerSec=5.708957749637251, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:37:28,767] [INFO] [logging.py:68:log_dist] [Rank 0] step=300, skipped=5, lr=[9.150979862726452e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:37:28,769] [INFO] [timer.py:197:stop] 0/600, RunningAvgSamplesPerSec=6.330782670927434, CurrSamplesPerSec=5.643513461533772, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.068, 'learning_rate': 9.150979862726452e-06, 'epoch': 1.27} -[2022-12-16 13:36:15,345] [INFO] [timer.py:197:stop] 0/602, RunningAvgSamplesPerSec=6.339726669949757, CurrSamplesPerSec=5.694057788240676, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:36:26,664] [INFO] [timer.py:197:stop] 0/604, RunningAvgSamplesPerSec=6.339747213370603, CurrSamplesPerSec=5.705463611623002, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:36:37,987] [INFO] [timer.py:197:stop] 0/606, RunningAvgSamplesPerSec=6.339748462522443, CurrSamplesPerSec=5.70420465478969, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:36:49,282] [INFO] [timer.py:197:stop] 0/608, RunningAvgSamplesPerSec=6.3397972823506645, CurrSamplesPerSec=5.703827706300002, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:37:00,638] [INFO] [timer.py:197:stop] 0/610, RunningAvgSamplesPerSec=6.339776098051399, CurrSamplesPerSec=5.684790522173071, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:37:12,026] [INFO] [timer.py:197:stop] 0/612, RunningAvgSamplesPerSec=6.339667566301425, CurrSamplesPerSec=5.709359905773123, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:37:23,357] [INFO] [timer.py:197:stop] 0/614, RunningAvgSamplesPerSec=6.339668227082106, CurrSamplesPerSec=5.71898944572501, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:37:34,717] [INFO] [timer.py:197:stop] 0/616, RunningAvgSamplesPerSec=6.339607910614529, CurrSamplesPerSec=5.684605850491065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:37:46,081] [INFO] [timer.py:197:stop] 0/618, RunningAvgSamplesPerSec=6.3395661909756, CurrSamplesPerSec=5.7021224855506185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:37:57,317] [INFO] [logging.py:68:log_dist] [Rank 0] step=310, skipped=5, lr=[9.204621894113846e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:37:57,319] [INFO] [timer.py:197:stop] 0/620, RunningAvgSamplesPerSec=6.339629943446628, CurrSamplesPerSec=5.731616506771275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:38:08,653] [INFO] [timer.py:197:stop] 0/622, RunningAvgSamplesPerSec=6.339628430644952, CurrSamplesPerSec=5.696671512017795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:38:19,973] [INFO] [timer.py:197:stop] 0/624, RunningAvgSamplesPerSec=6.33967461194956, CurrSamplesPerSec=5.721700019089708, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:38:31,257] [INFO] [timer.py:197:stop] 0/626, RunningAvgSamplesPerSec=6.339773691276162, CurrSamplesPerSec=5.75074564508181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:38:42,544] [INFO] [timer.py:197:stop] 0/628, RunningAvgSamplesPerSec=6.3398743535812185, CurrSamplesPerSec=5.729527488686639, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:38:53,868] [INFO] [timer.py:197:stop] 0/630, RunningAvgSamplesPerSec=6.339870442560419, CurrSamplesPerSec=5.700656767380682, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:39:05,212] [INFO] [timer.py:197:stop] 0/632, RunningAvgSamplesPerSec=6.339829317517667, CurrSamplesPerSec=5.68684029237345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:39:16,558] [INFO] [timer.py:197:stop] 0/634, RunningAvgSamplesPerSec=6.339819890170821, CurrSamplesPerSec=5.692244696362038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:39:27,870] [INFO] [timer.py:197:stop] 0/636, RunningAvgSamplesPerSec=6.339825679711417, CurrSamplesPerSec=5.71962529044465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:39:39,187] [INFO] [timer.py:197:stop] 0/638, RunningAvgSamplesPerSec=6.339859209258358, CurrSamplesPerSec=5.729349926332938, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:39:50,542] [INFO] [logging.py:68:log_dist] [Rank 0] step=320, skipped=5, lr=[9.256533232218034e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:39:50,543] [INFO] [timer.py:197:stop] 0/640, RunningAvgSamplesPerSec=6.339814401453925, CurrSamplesPerSec=5.71844510536356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:40:01,872] [INFO] [timer.py:197:stop] 0/642, RunningAvgSamplesPerSec=6.339826688723115, CurrSamplesPerSec=5.720280048717393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:40:13,247] [INFO] [timer.py:197:stop] 0/644, RunningAvgSamplesPerSec=6.339761846868838, CurrSamplesPerSec=5.681088545805493, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:40:24,576] [INFO] [timer.py:197:stop] 0/646, RunningAvgSamplesPerSec=6.3397561426574445, CurrSamplesPerSec=5.706065158483307, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:40:35,928] [INFO] [timer.py:197:stop] 0/648, RunningAvgSamplesPerSec=6.339725988395178, CurrSamplesPerSec=5.689788915516862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:40:47,268] [INFO] [timer.py:197:stop] 0/650, RunningAvgSamplesPerSec=6.339717893553607, CurrSamplesPerSec=5.6690195340352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:37:40,125] [INFO] [timer.py:197:stop] 0/602, RunningAvgSamplesPerSec=6.330782383440869, CurrSamplesPerSec=5.690124207229166, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:37:51,504] [INFO] [timer.py:197:stop] 0/604, RunningAvgSamplesPerSec=6.3306832707920435, CurrSamplesPerSec=5.6678194496573235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:38:02,851] [INFO] [timer.py:197:stop] 0/606, RunningAvgSamplesPerSec=6.330655808571751, CurrSamplesPerSec=5.682673657266739, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:38:14,168] [INFO] [timer.py:197:stop] 0/608, RunningAvgSamplesPerSec=6.330691143435459, CurrSamplesPerSec=5.6872265661282, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:38:25,693] [INFO] [timer.py:197:stop] 0/610, RunningAvgSamplesPerSec=6.330329932714655, CurrSamplesPerSec=5.668653446060722, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:38:37,073] [INFO] [timer.py:197:stop] 0/612, RunningAvgSamplesPerSec=6.330272091806936, CurrSamplesPerSec=5.659673142650592, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:38:48,518] [INFO] [timer.py:197:stop] 0/614, RunningAvgSamplesPerSec=6.330101699475937, CurrSamplesPerSec=5.62189192221988, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:39:00,031] [INFO] [timer.py:197:stop] 0/616, RunningAvgSamplesPerSec=6.330042213874827, CurrSamplesPerSec=5.6853116162218384, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:39:11,430] [INFO] [timer.py:197:stop] 0/618, RunningAvgSamplesPerSec=6.329934203787018, CurrSamplesPerSec=5.662486866094601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:39:22,683] [INFO] [logging.py:68:log_dist] [Rank 0] step=310, skipped=5, lr=[9.204621894113846e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:39:22,684] [INFO] [timer.py:197:stop] 0/620, RunningAvgSamplesPerSec=6.329990960271664, CurrSamplesPerSec=5.721851007076893, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:39:34,075] [INFO] [timer.py:197:stop] 0/622, RunningAvgSamplesPerSec=6.330011262521667, CurrSamplesPerSec=5.712285012692834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:39:45,443] [INFO] [timer.py:197:stop] 0/624, RunningAvgSamplesPerSec=6.329971505255353, CurrSamplesPerSec=5.673877006086818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:39:56,776] [INFO] [timer.py:197:stop] 0/626, RunningAvgSamplesPerSec=6.330001807060071, CurrSamplesPerSec=5.713938909287577, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:40:08,120] [INFO] [timer.py:197:stop] 0/628, RunningAvgSamplesPerSec=6.330083907626634, CurrSamplesPerSec=5.71931502735193, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:40:19,422] [INFO] [timer.py:197:stop] 0/630, RunningAvgSamplesPerSec=6.330141783936141, CurrSamplesPerSec=5.7165628862802125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:40:30,734] [INFO] [timer.py:197:stop] 0/632, RunningAvgSamplesPerSec=6.33018163456153, CurrSamplesPerSec=5.710113372472082, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:40:42,220] [INFO] [timer.py:197:stop] 0/634, RunningAvgSamplesPerSec=6.330237538427551, CurrSamplesPerSec=5.711593437284221, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:40:53,732] [INFO] [timer.py:197:stop] 0/636, RunningAvgSamplesPerSec=6.330298913515279, CurrSamplesPerSec=5.720521903529705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:41:05,190] [INFO] [timer.py:197:stop] 0/638, RunningAvgSamplesPerSec=6.33008533267941, CurrSamplesPerSec=5.573157273614473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:41:16,628] [INFO] [logging.py:68:log_dist] [Rank 0] step=320, skipped=5, lr=[9.256533232218034e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:41:16,630] [INFO] [timer.py:197:stop] 0/640, RunningAvgSamplesPerSec=6.330096923728828, CurrSamplesPerSec=5.724061856567901, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:41:28,236] [INFO] [timer.py:197:stop] 0/642, RunningAvgSamplesPerSec=6.330130764892317, CurrSamplesPerSec=5.707274214632692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:41:39,681] [INFO] [timer.py:197:stop] 0/644, RunningAvgSamplesPerSec=6.329947385873928, CurrSamplesPerSec=5.589888718849701, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:41:51,204] [INFO] [timer.py:197:stop] 0/646, RunningAvgSamplesPerSec=6.33000563881282, CurrSamplesPerSec=5.708520444862126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:42:02,680] [INFO] [timer.py:197:stop] 0/648, RunningAvgSamplesPerSec=6.330004564500237, CurrSamplesPerSec=5.6796963498806345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:42:14,226] [INFO] [timer.py:197:stop] 0/650, RunningAvgSamplesPerSec=6.329629407966906, CurrSamplesPerSec=5.467900776983032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0673, 'learning_rate': 9.281874101213678e-06, 'epoch': 1.38} -[2022-12-16 13:40:58,588] [INFO] [timer.py:197:stop] 0/652, RunningAvgSamplesPerSec=6.339732138996129, CurrSamplesPerSec=5.710183336916263, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:41:09,932] [INFO] [timer.py:197:stop] 0/654, RunningAvgSamplesPerSec=6.339713588073108, CurrSamplesPerSec=5.698703244961602, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:41:21,251] [INFO] [timer.py:197:stop] 0/656, RunningAvgSamplesPerSec=6.3397462039918056, CurrSamplesPerSec=5.704568317909728, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:41:32,597] [INFO] [timer.py:197:stop] 0/658, RunningAvgSamplesPerSec=6.339728329434604, CurrSamplesPerSec=5.688525052206091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:41:43,927] [INFO] [logging.py:68:log_dist] [Rank 0] step=330, skipped=5, lr=[9.306822072655195e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:41:43,928] [INFO] [timer.py:197:stop] 0/660, RunningAvgSamplesPerSec=6.339748008707906, CurrSamplesPerSec=5.718520634315761, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:41:55,285] [INFO] [timer.py:197:stop] 0/662, RunningAvgSamplesPerSec=6.339712410854758, CurrSamplesPerSec=5.693721308198521, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:42:06,580] [INFO] [timer.py:197:stop] 0/664, RunningAvgSamplesPerSec=6.339760320122381, CurrSamplesPerSec=5.715314115949173, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:42:17,869] [INFO] [timer.py:197:stop] 0/666, RunningAvgSamplesPerSec=6.339852918004305, CurrSamplesPerSec=5.742691326057452, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:42:29,166] [INFO] [timer.py:197:stop] 0/668, RunningAvgSamplesPerSec=6.3399289149016615, CurrSamplesPerSec=5.718510644888239, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:42:40,498] [INFO] [timer.py:197:stop] 0/670, RunningAvgSamplesPerSec=6.33993930819201, CurrSamplesPerSec=5.716523930033197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:42:51,850] [INFO] [timer.py:197:stop] 0/672, RunningAvgSamplesPerSec=6.3399087697760335, CurrSamplesPerSec=5.702439850286114, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:43:03,189] [INFO] [timer.py:197:stop] 0/674, RunningAvgSamplesPerSec=6.339904910312488, CurrSamplesPerSec=5.697196479393878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:43:14,515] [INFO] [timer.py:197:stop] 0/676, RunningAvgSamplesPerSec=6.339919649025721, CurrSamplesPerSec=5.707675162721676, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:43:25,854] [INFO] [timer.py:197:stop] 0/678, RunningAvgSamplesPerSec=6.339914916778764, CurrSamplesPerSec=5.711340914229161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:43:37,180] [INFO] [logging.py:68:log_dist] [Rank 0] step=340, skipped=5, lr=[9.355586771917604e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:43:37,182] [INFO] [timer.py:197:stop] 0/680, RunningAvgSamplesPerSec=6.3399149903217955, CurrSamplesPerSec=5.695608574111107, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:43:48,529] [INFO] [timer.py:197:stop] 0/682, RunningAvgSamplesPerSec=6.339897700238457, CurrSamplesPerSec=5.704236412854308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:43:59,879] [INFO] [timer.py:197:stop] 0/684, RunningAvgSamplesPerSec=6.339888606119401, CurrSamplesPerSec=5.7055628096091775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:44:11,185] [INFO] [timer.py:197:stop] 0/686, RunningAvgSamplesPerSec=6.339917140932174, CurrSamplesPerSec=5.706974511179494, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:44:22,550] [INFO] [timer.py:197:stop] 0/688, RunningAvgSamplesPerSec=6.339882372446429, CurrSamplesPerSec=5.692362024576135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:44:33,878] [INFO] [timer.py:197:stop] 0/690, RunningAvgSamplesPerSec=6.339900812437982, CurrSamplesPerSec=5.707581716395236, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:44:45,202] [INFO] [timer.py:197:stop] 0/692, RunningAvgSamplesPerSec=6.339926640281542, CurrSamplesPerSec=5.708136128464922, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:44:56,496] [INFO] [timer.py:197:stop] 0/694, RunningAvgSamplesPerSec=6.339975244187676, CurrSamplesPerSec=5.722016639575232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:45:07,847] [INFO] [timer.py:197:stop] 0/696, RunningAvgSamplesPerSec=6.339976987812794, CurrSamplesPerSec=5.701426102795623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:45:19,156] [INFO] [timer.py:197:stop] 0/698, RunningAvgSamplesPerSec=6.340008928018111, CurrSamplesPerSec=5.707072548359031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:45:30,483] [INFO] [logging.py:68:log_dist] [Rank 0] step=350, skipped=5, lr=[9.402917005361869e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:45:30,485] [INFO] [timer.py:197:stop] 0/700, RunningAvgSamplesPerSec=6.33998969805123, CurrSamplesPerSec=5.688489611358794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:42:25,619] [INFO] [timer.py:197:stop] 0/652, RunningAvgSamplesPerSec=6.3297077715609475, CurrSamplesPerSec=5.719371812973972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:42:36,953] [INFO] [timer.py:197:stop] 0/654, RunningAvgSamplesPerSec=6.3297367998254535, CurrSamplesPerSec=5.705432082423167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:42:48,561] [INFO] [timer.py:197:stop] 0/656, RunningAvgSamplesPerSec=6.329244035664368, CurrSamplesPerSec=5.4295023895352434, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:42:59,916] [INFO] [timer.py:197:stop] 0/658, RunningAvgSamplesPerSec=6.329237844863367, CurrSamplesPerSec=5.688861882844252, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:43:11,315] [INFO] [logging.py:68:log_dist] [Rank 0] step=330, skipped=5, lr=[9.306822072655195e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:43:11,317] [INFO] [timer.py:197:stop] 0/660, RunningAvgSamplesPerSec=6.329147547268884, CurrSamplesPerSec=5.661335155406955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:43:22,771] [INFO] [timer.py:197:stop] 0/662, RunningAvgSamplesPerSec=6.328950035472537, CurrSamplesPerSec=5.603190307343656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:43:34,098] [INFO] [timer.py:197:stop] 0/664, RunningAvgSamplesPerSec=6.328962025890756, CurrSamplesPerSec=5.695756979637728, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:43:45,431] [INFO] [timer.py:197:stop] 0/666, RunningAvgSamplesPerSec=6.3289940857266, CurrSamplesPerSec=5.71611467805298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:43:57,131] [INFO] [timer.py:197:stop] 0/668, RunningAvgSamplesPerSec=6.328336634156604, CurrSamplesPerSec=5.346707418970935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:44:08,494] [INFO] [timer.py:197:stop] 0/670, RunningAvgSamplesPerSec=6.328314264906798, CurrSamplesPerSec=5.697728074190073, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:44:19,922] [INFO] [timer.py:197:stop] 0/672, RunningAvgSamplesPerSec=6.328180041670801, CurrSamplesPerSec=5.648812729681554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:44:31,398] [INFO] [timer.py:197:stop] 0/674, RunningAvgSamplesPerSec=6.3281297840632105, CurrSamplesPerSec=5.669640962282183, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:44:42,725] [INFO] [timer.py:197:stop] 0/676, RunningAvgSamplesPerSec=6.328147856603661, CurrSamplesPerSec=5.706415715042744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:44:54,300] [INFO] [timer.py:197:stop] 0/678, RunningAvgSamplesPerSec=6.328126766932452, CurrSamplesPerSec=5.706175536698659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:45:05,797] [INFO] [logging.py:68:log_dist] [Rank 0] step=340, skipped=5, lr=[9.355586771917604e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:45:05,799] [INFO] [timer.py:197:stop] 0/680, RunningAvgSamplesPerSec=6.328068517423547, CurrSamplesPerSec=5.6591135490713995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:45:17,174] [INFO] [timer.py:197:stop] 0/682, RunningAvgSamplesPerSec=6.328021957238399, CurrSamplesPerSec=5.682638529889822, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:45:28,596] [INFO] [timer.py:197:stop] 0/684, RunningAvgSamplesPerSec=6.327975433273524, CurrSamplesPerSec=5.680331179663342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:45:40,277] [INFO] [timer.py:197:stop] 0/686, RunningAvgSamplesPerSec=6.327854974024864, CurrSamplesPerSec=5.683192919422884, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:45:51,668] [INFO] [timer.py:197:stop] 0/688, RunningAvgSamplesPerSec=6.3277861583761235, CurrSamplesPerSec=5.6660746921346865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:46:03,229] [INFO] [timer.py:197:stop] 0/690, RunningAvgSamplesPerSec=6.327768422044784, CurrSamplesPerSec=5.680019395846795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:46:14,838] [INFO] [timer.py:197:stop] 0/692, RunningAvgSamplesPerSec=6.327696014574452, CurrSamplesPerSec=5.657069646483641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:46:26,177] [INFO] [timer.py:197:stop] 0/694, RunningAvgSamplesPerSec=6.327699873718016, CurrSamplesPerSec=5.690042913451338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:46:37,632] [INFO] [timer.py:197:stop] 0/696, RunningAvgSamplesPerSec=6.3276904427181755, CurrSamplesPerSec=5.683944790522634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:46:49,248] [INFO] [timer.py:197:stop] 0/698, RunningAvgSamplesPerSec=6.327720553451032, CurrSamplesPerSec=5.706627525449706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:47:00,579] [INFO] [logging.py:68:log_dist] [Rank 0] step=350, skipped=5, lr=[9.402917005361869e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:47:00,581] [INFO] [timer.py:197:stop] 0/700, RunningAvgSamplesPerSec=6.327703647205433, CurrSamplesPerSec=5.679101551095431, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0679, 'learning_rate': 9.402917005361869e-06, 'epoch': 1.48} -[2022-12-16 13:45:41,855] [INFO] [timer.py:197:stop] 0/702, RunningAvgSamplesPerSec=6.339953341047152, CurrSamplesPerSec=5.691112944766182, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:45:53,172] [INFO] [timer.py:197:stop] 0/704, RunningAvgSamplesPerSec=6.339972100121671, CurrSamplesPerSec=5.700012061025391, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:46:04,520] [INFO] [timer.py:197:stop] 0/706, RunningAvgSamplesPerSec=6.339948693119949, CurrSamplesPerSec=5.700656525255543, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:46:15,841] [INFO] [timer.py:197:stop] 0/708, RunningAvgSamplesPerSec=6.339976446041088, CurrSamplesPerSec=5.715632950845321, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:46:27,186] [INFO] [timer.py:197:stop] 0/710, RunningAvgSamplesPerSec=6.339961938542896, CurrSamplesPerSec=5.688084845523418, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:46:38,529] [INFO] [timer.py:197:stop] 0/712, RunningAvgSamplesPerSec=6.339936097781599, CurrSamplesPerSec=5.675824342653396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:46:49,905] [INFO] [timer.py:197:stop] 0/714, RunningAvgSamplesPerSec=6.339865841099534, CurrSamplesPerSec=5.675659933496987, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:47:01,280] [INFO] [timer.py:197:stop] 0/716, RunningAvgSamplesPerSec=6.339805621469403, CurrSamplesPerSec=5.6676813516656495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:47:12,616] [INFO] [timer.py:197:stop] 0/718, RunningAvgSamplesPerSec=6.339809820815214, CurrSamplesPerSec=5.699940409091249, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:47:23,994] [INFO] [logging.py:68:log_dist] [Rank 0] step=360, skipped=5, lr=[9.44889475969735e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:47:23,996] [INFO] [timer.py:197:stop] 0/720, RunningAvgSamplesPerSec=6.3397379119814845, CurrSamplesPerSec=5.660866675560336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:47:35,325] [INFO] [timer.py:197:stop] 0/722, RunningAvgSamplesPerSec=6.339759354188557, CurrSamplesPerSec=5.711746566085115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:47:46,687] [INFO] [timer.py:197:stop] 0/724, RunningAvgSamplesPerSec=6.339718324350815, CurrSamplesPerSec=5.6913977108507305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:47:58,043] [INFO] [timer.py:197:stop] 0/726, RunningAvgSamplesPerSec=6.3396828195960975, CurrSamplesPerSec=5.696925883021481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:48:09,400] [INFO] [timer.py:197:stop] 0/728, RunningAvgSamplesPerSec=6.339658217967878, CurrSamplesPerSec=5.686045499809826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:48:20,741] [INFO] [timer.py:197:stop] 0/730, RunningAvgSamplesPerSec=6.339619498024999, CurrSamplesPerSec=5.6979312576386025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:48:32,064] [INFO] [timer.py:197:stop] 0/732, RunningAvgSamplesPerSec=6.339645849883508, CurrSamplesPerSec=5.720338072503252, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:48:43,365] [INFO] [timer.py:197:stop] 0/734, RunningAvgSamplesPerSec=6.339677598600247, CurrSamplesPerSec=5.718338150034742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:48:54,704] [INFO] [timer.py:197:stop] 0/736, RunningAvgSamplesPerSec=6.33964311122071, CurrSamplesPerSec=5.676220403816082, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:49:06,037] [INFO] [timer.py:197:stop] 0/738, RunningAvgSamplesPerSec=6.339608348646385, CurrSamplesPerSec=5.6891620983826305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:49:17,398] [INFO] [logging.py:68:log_dist] [Rank 0] step=370, skipped=5, lr=[9.493595187571683e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:49:17,400] [INFO] [timer.py:197:stop] 0/740, RunningAvgSamplesPerSec=6.339572544644652, CurrSamplesPerSec=5.697713077890738, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:49:28,737] [INFO] [timer.py:197:stop] 0/742, RunningAvgSamplesPerSec=6.339578787219607, CurrSamplesPerSec=5.709940655091284, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:49:40,073] [INFO] [timer.py:197:stop] 0/744, RunningAvgSamplesPerSec=6.339529466412074, CurrSamplesPerSec=5.689445704571697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:49:51,401] [INFO] [timer.py:197:stop] 0/746, RunningAvgSamplesPerSec=6.339517662796265, CurrSamplesPerSec=5.697881911754523, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:50:02,732] [INFO] [timer.py:197:stop] 0/748, RunningAvgSamplesPerSec=6.3395072908390535, CurrSamplesPerSec=5.7089184113183284, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:50:14,073] [INFO] [timer.py:197:stop] 0/750, RunningAvgSamplesPerSec=6.33949993468579, CurrSamplesPerSec=5.695194578076193, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:47:11,950] [INFO] [timer.py:197:stop] 0/702, RunningAvgSamplesPerSec=6.327654490303129, CurrSamplesPerSec=5.675178761431945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:47:23,335] [INFO] [timer.py:197:stop] 0/704, RunningAvgSamplesPerSec=6.327571946536897, CurrSamplesPerSec=5.68916306298161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:47:34,792] [INFO] [timer.py:197:stop] 0/706, RunningAvgSamplesPerSec=6.3273886387415645, CurrSamplesPerSec=5.601829945397302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:47:46,190] [INFO] [timer.py:197:stop] 0/708, RunningAvgSamplesPerSec=6.327378689010138, CurrSamplesPerSec=5.6861664269358885, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:47:57,593] [INFO] [timer.py:197:stop] 0/710, RunningAvgSamplesPerSec=6.327316883649861, CurrSamplesPerSec=5.729989789413974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:48:08,972] [INFO] [timer.py:197:stop] 0/712, RunningAvgSamplesPerSec=6.327270752057368, CurrSamplesPerSec=5.654470688433138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:48:20,337] [INFO] [timer.py:197:stop] 0/714, RunningAvgSamplesPerSec=6.3272791064630285, CurrSamplesPerSec=5.7056947555522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:48:31,825] [INFO] [timer.py:197:stop] 0/716, RunningAvgSamplesPerSec=6.327258717853537, CurrSamplesPerSec=5.680939701404044, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:48:43,188] [INFO] [timer.py:197:stop] 0/718, RunningAvgSamplesPerSec=6.327295608249954, CurrSamplesPerSec=5.7009349827558875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:48:54,690] [INFO] [logging.py:68:log_dist] [Rank 0] step=360, skipped=5, lr=[9.44889475969735e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:48:54,691] [INFO] [timer.py:197:stop] 0/720, RunningAvgSamplesPerSec=6.327035979572224, CurrSamplesPerSec=5.538819333340514, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:49:06,214] [INFO] [timer.py:197:stop] 0/722, RunningAvgSamplesPerSec=6.3271111128208615, CurrSamplesPerSec=5.722535309390941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:49:17,701] [INFO] [timer.py:197:stop] 0/724, RunningAvgSamplesPerSec=6.327128664425442, CurrSamplesPerSec=5.708196333806104, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:49:29,078] [INFO] [timer.py:197:stop] 0/726, RunningAvgSamplesPerSec=6.327087552167851, CurrSamplesPerSec=5.676013965040711, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:49:40,463] [INFO] [timer.py:197:stop] 0/728, RunningAvgSamplesPerSec=6.327109934448353, CurrSamplesPerSec=5.6931926341853965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:49:51,845] [INFO] [timer.py:197:stop] 0/730, RunningAvgSamplesPerSec=6.327038089475233, CurrSamplesPerSec=5.669245100330484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:50:03,266] [INFO] [timer.py:197:stop] 0/732, RunningAvgSamplesPerSec=6.326924670910652, CurrSamplesPerSec=5.621069038048019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:50:14,582] [INFO] [timer.py:197:stop] 0/734, RunningAvgSamplesPerSec=6.326959216687658, CurrSamplesPerSec=5.701227997844524, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:50:25,927] [INFO] [timer.py:197:stop] 0/736, RunningAvgSamplesPerSec=6.326940875652652, CurrSamplesPerSec=5.674422250368862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:50:37,316] [INFO] [timer.py:197:stop] 0/738, RunningAvgSamplesPerSec=6.326823613295239, CurrSamplesPerSec=5.6246842242873045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:50:48,661] [INFO] [logging.py:68:log_dist] [Rank 0] step=370, skipped=5, lr=[9.493595187571683e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:50:48,663] [INFO] [timer.py:197:stop] 0/740, RunningAvgSamplesPerSec=6.3268314840496505, CurrSamplesPerSec=5.707714969358892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:51:00,070] [INFO] [timer.py:197:stop] 0/742, RunningAvgSamplesPerSec=6.326866428321336, CurrSamplesPerSec=5.711735384974162, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:51:11,574] [INFO] [timer.py:197:stop] 0/744, RunningAvgSamplesPerSec=6.326713261271251, CurrSamplesPerSec=5.631804735285801, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:51:22,930] [INFO] [timer.py:197:stop] 0/746, RunningAvgSamplesPerSec=6.326691157515512, CurrSamplesPerSec=5.673347214431089, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:51:34,299] [INFO] [timer.py:197:stop] 0/748, RunningAvgSamplesPerSec=6.326638394503159, CurrSamplesPerSec=5.668620646503149, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:51:45,734] [INFO] [timer.py:197:stop] 0/750, RunningAvgSamplesPerSec=6.326511199481409, CurrSamplesPerSec=5.6745774710472, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0671, 'learning_rate': 9.51548820454122e-06, 'epoch': 1.59} -[2022-12-16 13:50:25,431] [INFO] [timer.py:197:stop] 0/752, RunningAvgSamplesPerSec=6.339481347956785, CurrSamplesPerSec=5.709314976098911, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:50:36,763] [INFO] [timer.py:197:stop] 0/754, RunningAvgSamplesPerSec=6.3395013841517045, CurrSamplesPerSec=5.719909016876233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:50:48,093] [INFO] [timer.py:197:stop] 0/756, RunningAvgSamplesPerSec=6.339493013298119, CurrSamplesPerSec=5.700768147125971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:50:59,392] [INFO] [timer.py:197:stop] 0/758, RunningAvgSamplesPerSec=6.339503676973275, CurrSamplesPerSec=5.708950221892302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:51:10,704] [INFO] [logging.py:68:log_dist] [Rank 0] step=380, skipped=5, lr=[9.53708734662638e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:51:10,706] [INFO] [timer.py:197:stop] 0/760, RunningAvgSamplesPerSec=6.3395140683285325, CurrSamplesPerSec=5.71419847388043, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:51:22,052] [INFO] [timer.py:197:stop] 0/762, RunningAvgSamplesPerSec=6.3395024927179175, CurrSamplesPerSec=5.709905432716866, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:51:33,437] [INFO] [timer.py:197:stop] 0/764, RunningAvgSamplesPerSec=6.339423427139638, CurrSamplesPerSec=5.66285024659257, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:51:44,783] [INFO] [timer.py:197:stop] 0/766, RunningAvgSamplesPerSec=6.339410511132895, CurrSamplesPerSec=5.694031457786667, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:51:56,141] [INFO] [timer.py:197:stop] 0/768, RunningAvgSamplesPerSec=6.3393775231254565, CurrSamplesPerSec=5.6868733031141225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:52:07,511] [INFO] [timer.py:197:stop] 0/770, RunningAvgSamplesPerSec=6.339326280601822, CurrSamplesPerSec=5.685077063917621, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:52:18,846] [INFO] [timer.py:197:stop] 0/772, RunningAvgSamplesPerSec=6.33933460713198, CurrSamplesPerSec=5.72251066678781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:52:30,187] [INFO] [timer.py:197:stop] 0/774, RunningAvgSamplesPerSec=6.339317582023612, CurrSamplesPerSec=5.691922189402574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:52:41,541] [INFO] [timer.py:197:stop] 0/776, RunningAvgSamplesPerSec=6.3393378217346354, CurrSamplesPerSec=5.718710927392318, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:52:52,900] [INFO] [timer.py:197:stop] 0/778, RunningAvgSamplesPerSec=6.339364210343123, CurrSamplesPerSec=5.7224716294350815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:53:04,209] [INFO] [logging.py:68:log_dist] [Rank 0] step=390, skipped=5, lr=[9.57943484127219e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:53:04,211] [INFO] [timer.py:197:stop] 0/780, RunningAvgSamplesPerSec=6.339405370022461, CurrSamplesPerSec=5.728916094373603, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:53:15,543] [INFO] [timer.py:197:stop] 0/782, RunningAvgSamplesPerSec=6.339414951131689, CurrSamplesPerSec=5.707265720579158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:53:26,878] [INFO] [timer.py:197:stop] 0/784, RunningAvgSamplesPerSec=6.339417908910616, CurrSamplesPerSec=5.702162457027009, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:53:38,239] [INFO] [timer.py:197:stop] 0/786, RunningAvgSamplesPerSec=6.3393991975344575, CurrSamplesPerSec=5.719818094602671, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:53:49,623] [INFO] [timer.py:197:stop] 0/788, RunningAvgSamplesPerSec=6.339324809483852, CurrSamplesPerSec=5.669576298657823, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:54:00,943] [INFO] [timer.py:197:stop] 0/790, RunningAvgSamplesPerSec=6.339353765970902, CurrSamplesPerSec=5.719103005166369, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:54:12,273] [INFO] [timer.py:197:stop] 0/792, RunningAvgSamplesPerSec=6.339365994979452, CurrSamplesPerSec=5.6981792094536186, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:54:23,570] [INFO] [timer.py:197:stop] 0/794, RunningAvgSamplesPerSec=6.339428107509493, CurrSamplesPerSec=5.714707454353629, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:54:34,890] [INFO] [timer.py:197:stop] 0/796, RunningAvgSamplesPerSec=6.33942946769859, CurrSamplesPerSec=5.700301591915373, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:54:46,228] [INFO] [timer.py:197:stop] 0/798, RunningAvgSamplesPerSec=6.339429936314821, CurrSamplesPerSec=5.698297024366172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:54:57,531] [INFO] [logging.py:68:log_dist] [Rank 0] step=400, skipped=5, lr=[9.620696382156558e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:54:57,533] [INFO] [timer.py:197:stop] 0/800, RunningAvgSamplesPerSec=6.339431347765237, CurrSamplesPerSec=5.699998505115901, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:51:57,109] [INFO] [timer.py:197:stop] 0/752, RunningAvgSamplesPerSec=6.326485323384035, CurrSamplesPerSec=5.6853226941193356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:52:08,661] [INFO] [timer.py:197:stop] 0/754, RunningAvgSamplesPerSec=6.326162477755519, CurrSamplesPerSec=5.498609592683866, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:52:19,985] [INFO] [timer.py:197:stop] 0/756, RunningAvgSamplesPerSec=6.326188890677415, CurrSamplesPerSec=5.701028695875893, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:52:31,324] [INFO] [timer.py:197:stop] 0/758, RunningAvgSamplesPerSec=6.326163715081771, CurrSamplesPerSec=5.669143569330741, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:52:42,739] [INFO] [logging.py:68:log_dist] [Rank 0] step=380, skipped=5, lr=[9.53708734662638e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:52:42,741] [INFO] [timer.py:197:stop] 0/760, RunningAvgSamplesPerSec=6.326036630362106, CurrSamplesPerSec=5.680581689222482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:52:54,099] [INFO] [timer.py:197:stop] 0/762, RunningAvgSamplesPerSec=6.326032010296816, CurrSamplesPerSec=5.697016078956156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:53:05,616] [INFO] [timer.py:197:stop] 0/764, RunningAvgSamplesPerSec=6.325775425618378, CurrSamplesPerSec=5.5389985402003274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:53:17,029] [INFO] [timer.py:197:stop] 0/766, RunningAvgSamplesPerSec=6.325833860421, CurrSamplesPerSec=5.722426005204077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:53:28,386] [INFO] [timer.py:197:stop] 0/768, RunningAvgSamplesPerSec=6.325830415376826, CurrSamplesPerSec=5.686190275721349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:53:39,816] [INFO] [timer.py:197:stop] 0/770, RunningAvgSamplesPerSec=6.325709680080751, CurrSamplesPerSec=5.627372182760118, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:53:51,153] [INFO] [timer.py:197:stop] 0/772, RunningAvgSamplesPerSec=6.32574588936009, CurrSamplesPerSec=5.720313205022317, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:54:02,473] [INFO] [timer.py:197:stop] 0/774, RunningAvgSamplesPerSec=6.325779808632974, CurrSamplesPerSec=5.700795024194597, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:54:14,088] [INFO] [timer.py:197:stop] 0/776, RunningAvgSamplesPerSec=6.325366824020554, CurrSamplesPerSec=5.711546041938402, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:54:25,416] [INFO] [timer.py:197:stop] 0/778, RunningAvgSamplesPerSec=6.325388120195118, CurrSamplesPerSec=5.688599551753485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:54:36,837] [INFO] [logging.py:68:log_dist] [Rank 0] step=390, skipped=5, lr=[9.57943484127219e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:54:36,839] [INFO] [timer.py:197:stop] 0/780, RunningAvgSamplesPerSec=6.325287416370516, CurrSamplesPerSec=5.61396389033286, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:54:48,184] [INFO] [timer.py:197:stop] 0/782, RunningAvgSamplesPerSec=6.325311851558554, CurrSamplesPerSec=5.685149787510647, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:54:59,503] [INFO] [timer.py:197:stop] 0/784, RunningAvgSamplesPerSec=6.325374166849946, CurrSamplesPerSec=5.711454413166849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:55:10,900] [INFO] [timer.py:197:stop] 0/786, RunningAvgSamplesPerSec=6.3253531728228385, CurrSamplesPerSec=5.716634956736923, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:55:22,255] [INFO] [timer.py:197:stop] 0/788, RunningAvgSamplesPerSec=6.3253672114173085, CurrSamplesPerSec=5.696488243568338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:55:33,802] [INFO] [timer.py:197:stop] 0/790, RunningAvgSamplesPerSec=6.325068850147272, CurrSamplesPerSec=5.498009999649353, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:55:45,139] [INFO] [timer.py:197:stop] 0/792, RunningAvgSamplesPerSec=6.325112867831402, CurrSamplesPerSec=5.697386081443267, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:55:56,474] [INFO] [timer.py:197:stop] 0/794, RunningAvgSamplesPerSec=6.325150134664613, CurrSamplesPerSec=5.681703002256421, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:56:07,910] [INFO] [timer.py:197:stop] 0/796, RunningAvgSamplesPerSec=6.325005608906404, CurrSamplesPerSec=5.594190346236312, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:56:19,261] [INFO] [timer.py:197:stop] 0/798, RunningAvgSamplesPerSec=6.3250222821537205, CurrSamplesPerSec=5.69781031336407, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:56:30,630] [INFO] [logging.py:68:log_dist] [Rank 0] step=400, skipped=5, lr=[9.620696382156558e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:56:30,632] [INFO] [timer.py:197:stop] 0/800, RunningAvgSamplesPerSec=6.324955084688922, CurrSamplesPerSec=5.645958662739079, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0739, 'learning_rate': 9.620696382156558e-06, 'epoch': 1.69} -[2022-12-16 13:55:08,867] [INFO] [timer.py:197:stop] 0/802, RunningAvgSamplesPerSec=6.339439738546504, CurrSamplesPerSec=5.716013652228299, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:55:20,218] [INFO] [timer.py:197:stop] 0/804, RunningAvgSamplesPerSec=6.339440725751527, CurrSamplesPerSec=5.691773018155927, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:55:31,550] [INFO] [timer.py:197:stop] 0/806, RunningAvgSamplesPerSec=6.33944356531772, CurrSamplesPerSec=5.699770000917279, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:55:42,902] [INFO] [timer.py:197:stop] 0/808, RunningAvgSamplesPerSec=6.339425456518778, CurrSamplesPerSec=5.682000057235838, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:55:54,240] [INFO] [timer.py:197:stop] 0/810, RunningAvgSamplesPerSec=6.339427784324849, CurrSamplesPerSec=5.713082537732757, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:56:05,570] [INFO] [timer.py:197:stop] 0/812, RunningAvgSamplesPerSec=6.3394158974920405, CurrSamplesPerSec=5.694748264487391, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:56:16,915] [INFO] [timer.py:197:stop] 0/814, RunningAvgSamplesPerSec=6.339398309772514, CurrSamplesPerSec=5.6812792415486575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:56:28,242] [INFO] [timer.py:197:stop] 0/816, RunningAvgSamplesPerSec=6.339398027966287, CurrSamplesPerSec=5.7140795139842355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:56:39,574] [INFO] [timer.py:197:stop] 0/818, RunningAvgSamplesPerSec=6.339407071520012, CurrSamplesPerSec=5.71556626029901, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:56:50,917] [INFO] [logging.py:68:log_dist] [Rank 0] step=410, skipped=5, lr=[9.660926275674324e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:56:50,919] [INFO] [timer.py:197:stop] 0/820, RunningAvgSamplesPerSec=6.339398405559651, CurrSamplesPerSec=5.700964767211974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:57:02,288] [INFO] [timer.py:197:stop] 0/822, RunningAvgSamplesPerSec=6.339328253275692, CurrSamplesPerSec=5.679953533791154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:57:13,645] [INFO] [timer.py:197:stop] 0/824, RunningAvgSamplesPerSec=6.3393217920803755, CurrSamplesPerSec=5.70598171053535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:57:24,985] [INFO] [timer.py:197:stop] 0/826, RunningAvgSamplesPerSec=6.339315280868552, CurrSamplesPerSec=5.702338095912809, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:57:36,365] [INFO] [timer.py:197:stop] 0/828, RunningAvgSamplesPerSec=6.339260275276137, CurrSamplesPerSec=5.690148330458817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:57:47,721] [INFO] [timer.py:197:stop] 0/830, RunningAvgSamplesPerSec=6.33923096134003, CurrSamplesPerSec=5.700336453805767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:57:59,082] [INFO] [timer.py:197:stop] 0/832, RunningAvgSamplesPerSec=6.339196943996223, CurrSamplesPerSec=5.693846426785746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:58:10,433] [INFO] [timer.py:197:stop] 0/834, RunningAvgSamplesPerSec=6.339177669502994, CurrSamplesPerSec=5.695319036406316, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:58:21,779] [INFO] [timer.py:197:stop] 0/836, RunningAvgSamplesPerSec=6.339162945202363, CurrSamplesPerSec=5.700884374109222, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:58:33,178] [INFO] [timer.py:197:stop] 0/838, RunningAvgSamplesPerSec=6.339079736344104, CurrSamplesPerSec=5.647579597527192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:58:44,466] [INFO] [logging.py:68:log_dist] [Rank 0] step=420, skipped=5, lr=[9.700174853763023e-06], mom=[[0.9, 0.999]] -[2022-12-16 13:58:44,468] [INFO] [timer.py:197:stop] 0/840, RunningAvgSamplesPerSec=6.33911936727273, CurrSamplesPerSec=5.727943757144047, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:58:55,801] [INFO] [timer.py:197:stop] 0/842, RunningAvgSamplesPerSec=6.339122279687559, CurrSamplesPerSec=5.704047081836697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:59:07,134] [INFO] [timer.py:197:stop] 0/844, RunningAvgSamplesPerSec=6.33912318026892, CurrSamplesPerSec=5.696926850254961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:59:18,493] [INFO] [timer.py:197:stop] 0/846, RunningAvgSamplesPerSec=6.339085626899937, CurrSamplesPerSec=5.686659583431364, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:59:29,826] [INFO] [timer.py:197:stop] 0/848, RunningAvgSamplesPerSec=6.339094325820955, CurrSamplesPerSec=5.702049811574694, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 13:59:41,137] [INFO] [timer.py:197:stop] 0/850, RunningAvgSamplesPerSec=6.339128153912398, CurrSamplesPerSec=5.726174495728036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:56:42,048] [INFO] [timer.py:197:stop] 0/802, RunningAvgSamplesPerSec=6.324866416048774, CurrSamplesPerSec=5.690673302590797, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:56:53,416] [INFO] [timer.py:197:stop] 0/804, RunningAvgSamplesPerSec=6.324849206965416, CurrSamplesPerSec=5.679540608704553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:57:04,806] [INFO] [timer.py:197:stop] 0/806, RunningAvgSamplesPerSec=6.324771230961369, CurrSamplesPerSec=5.641830353065095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:57:16,167] [INFO] [timer.py:197:stop] 0/808, RunningAvgSamplesPerSec=6.324769497199892, CurrSamplesPerSec=5.689590894872664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:57:27,551] [INFO] [timer.py:197:stop] 0/810, RunningAvgSamplesPerSec=6.324736821415485, CurrSamplesPerSec=5.685807755696759, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:57:39,228] [INFO] [timer.py:197:stop] 0/812, RunningAvgSamplesPerSec=6.324646795857594, CurrSamplesPerSec=5.683070915568724, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:57:50,570] [INFO] [timer.py:197:stop] 0/814, RunningAvgSamplesPerSec=6.324672285175087, CurrSamplesPerSec=5.70057977262278, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:58:01,922] [INFO] [timer.py:197:stop] 0/816, RunningAvgSamplesPerSec=6.324660014668366, CurrSamplesPerSec=5.694164803509382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:58:13,588] [INFO] [timer.py:197:stop] 0/818, RunningAvgSamplesPerSec=6.324670226228126, CurrSamplesPerSec=5.698775107760346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:58:24,936] [INFO] [logging.py:68:log_dist] [Rank 0] step=410, skipped=5, lr=[9.660926275674324e-06], mom=[[0.9, 0.999]] +[2022-12-16 20:58:24,938] [INFO] [timer.py:197:stop] 0/820, RunningAvgSamplesPerSec=6.3246971948466655, CurrSamplesPerSec=5.701889208658522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:58:36,585] [INFO] [timer.py:197:stop] 0/822, RunningAvgSamplesPerSec=6.324232119390502, CurrSamplesPerSec=5.404802825653943, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:58:47,931] [INFO] [timer.py:197:stop] 0/824, RunningAvgSamplesPerSec=6.324252861625952, CurrSamplesPerSec=5.691459011684438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:58:59,279] [INFO] [timer.py:197:stop] 0/826, RunningAvgSamplesPerSec=6.324276862279914, CurrSamplesPerSec=5.691145039857559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:59:10,688] [INFO] [timer.py:197:stop] 0/828, RunningAvgSamplesPerSec=6.324198814394731, CurrSamplesPerSec=5.654148874633871, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:59:22,046] [INFO] [timer.py:197:stop] 0/830, RunningAvgSamplesPerSec=6.324261490328659, CurrSamplesPerSec=5.718737973915092, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:59:33,412] [INFO] [timer.py:197:stop] 0/832, RunningAvgSamplesPerSec=6.324283135016285, CurrSamplesPerSec=5.693500069483766, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:59:44,762] [INFO] [timer.py:197:stop] 0/834, RunningAvgSamplesPerSec=6.3243167430060785, CurrSamplesPerSec=5.695427790856719, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 20:59:56,411] [INFO] [timer.py:197:stop] 0/836, RunningAvgSamplesPerSec=6.3243108942685415, CurrSamplesPerSec=5.677710568918598, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:00:08,280] [INFO] [timer.py:197:stop] 0/838, RunningAvgSamplesPerSec=6.324302607187967, CurrSamplesPerSec=5.6794915808427024, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:00:20,196] [INFO] [logging.py:68:log_dist] [Rank 0] step=420, skipped=5, lr=[9.700174853763023e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:00:20,197] [INFO] [timer.py:197:stop] 0/840, RunningAvgSamplesPerSec=6.324301713290257, CurrSamplesPerSec=5.685020234855203, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:00:32,195] [INFO] [timer.py:197:stop] 0/842, RunningAvgSamplesPerSec=6.324186164489815, CurrSamplesPerSec=5.6956958278894705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:00:43,595] [INFO] [timer.py:197:stop] 0/844, RunningAvgSamplesPerSec=6.324133462309744, CurrSamplesPerSec=5.6358783857465875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:00:55,109] [INFO] [timer.py:197:stop] 0/846, RunningAvgSamplesPerSec=6.324170851164534, CurrSamplesPerSec=5.717252256195339, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:01:06,656] [INFO] [timer.py:197:stop] 0/848, RunningAvgSamplesPerSec=6.324132295042807, CurrSamplesPerSec=5.647554883343113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:01:18,191] [INFO] [timer.py:197:stop] 0/850, RunningAvgSamplesPerSec=6.323868844928134, CurrSamplesPerSec=5.5051387907455664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0675, 'learning_rate': 9.719445885591654e-06, 'epoch': 1.8} -[2022-12-16 13:59:52,450] [INFO] [timer.py:197:stop] 0/852, RunningAvgSamplesPerSec=6.339135535487173, CurrSamplesPerSec=5.704070111186627, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:00:03,799] [INFO] [timer.py:197:stop] 0/854, RunningAvgSamplesPerSec=6.339113121822704, CurrSamplesPerSec=5.703067658172513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:00:15,141] [INFO] [timer.py:197:stop] 0/856, RunningAvgSamplesPerSec=6.339054028633934, CurrSamplesPerSec=5.676935614246414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:00:26,484] [INFO] [timer.py:197:stop] 0/858, RunningAvgSamplesPerSec=6.339043425322181, CurrSamplesPerSec=5.724982083002299, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:00:37,788] [INFO] [logging.py:68:log_dist] [Rank 0] step=430, skipped=5, lr=[9.738488852516646e-06], mom=[[0.9, 0.999]] -[2022-12-16 14:00:37,790] [INFO] [timer.py:197:stop] 0/860, RunningAvgSamplesPerSec=6.339088069486757, CurrSamplesPerSec=5.7306706569410135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:00:49,138] [INFO] [timer.py:197:stop] 0/862, RunningAvgSamplesPerSec=6.3390868336999295, CurrSamplesPerSec=5.698970864441769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:01:00,453] [INFO] [timer.py:197:stop] 0/864, RunningAvgSamplesPerSec=6.339104410312392, CurrSamplesPerSec=5.71063547488767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:01:11,787] [INFO] [timer.py:197:stop] 0/866, RunningAvgSamplesPerSec=6.339121311704636, CurrSamplesPerSec=5.716024363230836, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:01:23,132] [INFO] [timer.py:197:stop] 0/868, RunningAvgSamplesPerSec=6.339120228266978, CurrSamplesPerSec=5.683368354264379, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:01:34,468] [INFO] [timer.py:197:stop] 0/870, RunningAvgSamplesPerSec=6.33910997682536, CurrSamplesPerSec=5.710466856224587, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:01:45,820] [INFO] [timer.py:197:stop] 0/872, RunningAvgSamplesPerSec=6.339100845900049, CurrSamplesPerSec=5.703384401060349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:01:57,207] [INFO] [timer.py:197:stop] 0/874, RunningAvgSamplesPerSec=6.339047018285702, CurrSamplesPerSec=5.665372737563986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:02:08,567] [INFO] [timer.py:197:stop] 0/876, RunningAvgSamplesPerSec=6.33901200536423, CurrSamplesPerSec=5.6864747902976305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:02:19,890] [INFO] [timer.py:197:stop] 0/878, RunningAvgSamplesPerSec=6.339029791739453, CurrSamplesPerSec=5.7194973302594745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:02:31,207] [INFO] [logging.py:68:log_dist] [Rank 0] step=440, skipped=5, lr=[9.775911746761854e-06], mom=[[0.9, 0.999]] -[2022-12-16 14:02:31,208] [INFO] [timer.py:197:stop] 0/880, RunningAvgSamplesPerSec=6.33903170931319, CurrSamplesPerSec=5.708432797717439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:02:42,490] [INFO] [timer.py:197:stop] 0/882, RunningAvgSamplesPerSec=6.339064910628587, CurrSamplesPerSec=5.726082885578239, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:02:53,825] [INFO] [timer.py:197:stop] 0/884, RunningAvgSamplesPerSec=6.3390667056076735, CurrSamplesPerSec=5.704602019706414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:03:05,198] [INFO] [timer.py:197:stop] 0/886, RunningAvgSamplesPerSec=6.339014928405507, CurrSamplesPerSec=5.678717101004349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:03:16,505] [INFO] [timer.py:197:stop] 0/888, RunningAvgSamplesPerSec=6.339056665813401, CurrSamplesPerSec=5.714859533511053, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:03:27,875] [INFO] [timer.py:197:stop] 0/890, RunningAvgSamplesPerSec=6.339010932760778, CurrSamplesPerSec=5.69104489471728, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:03:39,219] [INFO] [timer.py:197:stop] 0/892, RunningAvgSamplesPerSec=6.338978175113135, CurrSamplesPerSec=5.678695236969803, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:03:50,613] [INFO] [timer.py:197:stop] 0/894, RunningAvgSamplesPerSec=6.338863748832803, CurrSamplesPerSec=5.6349953324302575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:04:01,935] [INFO] [timer.py:197:stop] 0/896, RunningAvgSamplesPerSec=6.338875518439516, CurrSamplesPerSec=5.706703470467459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:04:13,303] [INFO] [timer.py:197:stop] 0/898, RunningAvgSamplesPerSec=6.338837763473147, CurrSamplesPerSec=5.676794430539827, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:04:24,632] [INFO] [logging.py:68:log_dist] [Rank 0] step=450, skipped=5, lr=[9.812484046603779e-06], mom=[[0.9, 0.999]] -[2022-12-16 14:04:24,634] [INFO] [timer.py:197:stop] 0/900, RunningAvgSamplesPerSec=6.338848880013622, CurrSamplesPerSec=5.690834722040079, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:01:29,778] [INFO] [timer.py:197:stop] 0/852, RunningAvgSamplesPerSec=6.323895984034452, CurrSamplesPerSec=5.701067683378528, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:01:41,155] [INFO] [timer.py:197:stop] 0/854, RunningAvgSamplesPerSec=6.323866025614896, CurrSamplesPerSec=5.680294398450153, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:01:52,629] [INFO] [timer.py:197:stop] 0/856, RunningAvgSamplesPerSec=6.323646641240253, CurrSamplesPerSec=5.551309222463983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:02:03,977] [INFO] [timer.py:197:stop] 0/858, RunningAvgSamplesPerSec=6.323661475227139, CurrSamplesPerSec=5.7159530384475685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:02:15,298] [INFO] [logging.py:68:log_dist] [Rank 0] step=430, skipped=5, lr=[9.738488852516646e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:02:15,300] [INFO] [timer.py:197:stop] 0/860, RunningAvgSamplesPerSec=6.323710149257949, CurrSamplesPerSec=5.719581661471569, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:02:26,710] [INFO] [timer.py:197:stop] 0/862, RunningAvgSamplesPerSec=6.323634804344302, CurrSamplesPerSec=5.62526862139189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:02:38,037] [INFO] [timer.py:197:stop] 0/864, RunningAvgSamplesPerSec=6.323649281830787, CurrSamplesPerSec=5.681464178147183, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:02:49,370] [INFO] [timer.py:197:stop] 0/866, RunningAvgSamplesPerSec=6.3236838467673975, CurrSamplesPerSec=5.705553350483724, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:03:00,794] [INFO] [timer.py:197:stop] 0/868, RunningAvgSamplesPerSec=6.323587287957588, CurrSamplesPerSec=5.591940037322728, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:03:12,130] [INFO] [timer.py:197:stop] 0/870, RunningAvgSamplesPerSec=6.323606237462717, CurrSamplesPerSec=5.7091401213955075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:03:23,687] [INFO] [timer.py:197:stop] 0/872, RunningAvgSamplesPerSec=6.323648030461905, CurrSamplesPerSec=5.714125248491172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:03:35,327] [INFO] [timer.py:197:stop] 0/874, RunningAvgSamplesPerSec=6.32362972848686, CurrSamplesPerSec=5.665148674893301, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:03:46,681] [INFO] [timer.py:197:stop] 0/876, RunningAvgSamplesPerSec=6.323635151245853, CurrSamplesPerSec=5.690080062248546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:03:58,094] [INFO] [timer.py:197:stop] 0/878, RunningAvgSamplesPerSec=6.323670798847649, CurrSamplesPerSec=5.71115840184258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:04:09,744] [INFO] [logging.py:68:log_dist] [Rank 0] step=440, skipped=5, lr=[9.775911746761854e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:04:09,746] [INFO] [timer.py:197:stop] 0/880, RunningAvgSamplesPerSec=6.323698927835154, CurrSamplesPerSec=5.699957111577041, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:04:21,036] [INFO] [timer.py:197:stop] 0/882, RunningAvgSamplesPerSec=6.323752552261068, CurrSamplesPerSec=5.719672819971721, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:04:32,626] [INFO] [timer.py:197:stop] 0/884, RunningAvgSamplesPerSec=6.323770566065425, CurrSamplesPerSec=5.690295486589126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:04:44,097] [INFO] [timer.py:197:stop] 0/886, RunningAvgSamplesPerSec=6.323742611986993, CurrSamplesPerSec=5.6883239854983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:04:55,411] [INFO] [timer.py:197:stop] 0/888, RunningAvgSamplesPerSec=6.323806743121909, CurrSamplesPerSec=5.707230045830403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:05:06,778] [INFO] [timer.py:197:stop] 0/890, RunningAvgSamplesPerSec=6.323798891507006, CurrSamplesPerSec=5.698139293801706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:05:18,153] [INFO] [timer.py:197:stop] 0/892, RunningAvgSamplesPerSec=6.323797970601452, CurrSamplesPerSec=5.689324155599842, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:05:29,477] [INFO] [timer.py:197:stop] 0/894, RunningAvgSamplesPerSec=6.323804280166466, CurrSamplesPerSec=5.693873238627004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:05:40,811] [INFO] [timer.py:197:stop] 0/896, RunningAvgSamplesPerSec=6.32381869024255, CurrSamplesPerSec=5.687272353769752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:05:52,541] [INFO] [timer.py:197:stop] 0/898, RunningAvgSamplesPerSec=6.323304078211752, CurrSamplesPerSec=5.66975376791339, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:06:03,859] [INFO] [logging.py:68:log_dist] [Rank 0] step=450, skipped=5, lr=[9.812484046603779e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:06:03,860] [INFO] [timer.py:197:stop] 0/900, RunningAvgSamplesPerSec=6.323361252627297, CurrSamplesPerSec=5.696158486770488, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0662, 'learning_rate': 9.812484046603779e-06, 'epoch': 1.91} -[2022-12-16 14:04:35,954] [INFO] [timer.py:197:stop] 0/902, RunningAvgSamplesPerSec=6.338874271877185, CurrSamplesPerSec=5.710595141563633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:04:47,356] [INFO] [timer.py:197:stop] 0/904, RunningAvgSamplesPerSec=6.338794422774433, CurrSamplesPerSec=5.671700431786738, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:04:58,670] [INFO] [timer.py:197:stop] 0/906, RunningAvgSamplesPerSec=6.338806125893732, CurrSamplesPerSec=5.714749549054262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:05:09,998] [INFO] [timer.py:197:stop] 0/908, RunningAvgSamplesPerSec=6.338802150105241, CurrSamplesPerSec=5.70888028766234, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:05:21,343] [INFO] [timer.py:197:stop] 0/910, RunningAvgSamplesPerSec=6.338808628157282, CurrSamplesPerSec=5.69326556555695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:05:32,694] [INFO] [timer.py:197:stop] 0/912, RunningAvgSamplesPerSec=6.338800685386852, CurrSamplesPerSec=5.699132028984794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:05:44,031] [INFO] [timer.py:197:stop] 0/914, RunningAvgSamplesPerSec=6.338801852033477, CurrSamplesPerSec=5.7040834440531585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:05:55,373] [INFO] [timer.py:197:stop] 0/916, RunningAvgSamplesPerSec=6.338794724643747, CurrSamplesPerSec=5.721220520424191, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:06:06,679] [INFO] [timer.py:197:stop] 0/918, RunningAvgSamplesPerSec=6.338835604451315, CurrSamplesPerSec=5.713366345020804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:06:18,011] [INFO] [logging.py:68:log_dist] [Rank 0] step=460, skipped=5, lr=[9.84824356101363e-06], mom=[[0.9, 0.999]] -[2022-12-16 14:06:18,013] [INFO] [timer.py:197:stop] 0/920, RunningAvgSamplesPerSec=6.3388391544457265, CurrSamplesPerSec=5.708192692317932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:06:29,378] [INFO] [timer.py:197:stop] 0/922, RunningAvgSamplesPerSec=6.3387960574018845, CurrSamplesPerSec=5.702751919676429, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:06:40,716] [INFO] [timer.py:197:stop] 0/924, RunningAvgSamplesPerSec=6.338775821963682, CurrSamplesPerSec=5.704209745753348, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:06:52,042] [INFO] [timer.py:197:stop] 0/926, RunningAvgSamplesPerSec=6.338796058710522, CurrSamplesPerSec=5.7262526720404985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:07:03,367] [INFO] [timer.py:197:stop] 0/928, RunningAvgSamplesPerSec=6.338807359023693, CurrSamplesPerSec=5.711466808399003, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:07:14,713] [INFO] [timer.py:197:stop] 0/930, RunningAvgSamplesPerSec=6.3387945089784, CurrSamplesPerSec=5.711466565354717, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:07:26,078] [INFO] [timer.py:197:stop] 0/932, RunningAvgSamplesPerSec=6.338758976723027, CurrSamplesPerSec=5.670853079407315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:07:37,451] [INFO] [timer.py:197:stop] 0/934, RunningAvgSamplesPerSec=6.338711815063571, CurrSamplesPerSec=5.681176798107868, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:07:48,763] [INFO] [timer.py:197:stop] 0/936, RunningAvgSamplesPerSec=6.338725117276131, CurrSamplesPerSec=5.705479133818621, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:08:00,080] [INFO] [timer.py:197:stop] 0/938, RunningAvgSamplesPerSec=6.338733055113867, CurrSamplesPerSec=5.7039661169667735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:08:11,453] [INFO] [logging.py:68:log_dist] [Rank 0] step=470, skipped=5, lr=[9.883225632758308e-06], mom=[[0.9, 0.999]] -[2022-12-16 14:08:11,454] [INFO] [timer.py:197:stop] 0/940, RunningAvgSamplesPerSec=6.338727478621597, CurrSamplesPerSec=5.71444078788409, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:08:22,842] [INFO] [timer.py:197:stop] 0/942, RunningAvgSamplesPerSec=6.338678473769538, CurrSamplesPerSec=5.684653281316449, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:08:31,392] [INFO] [timer.py:197:stop] 0/944, RunningAvgSamplesPerSec=6.341940146686614, CurrSamplesPerSec=10.143572907692075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:08:42,755] [INFO] [timer.py:197:stop] 0/946, RunningAvgSamplesPerSec=6.341880087981473, CurrSamplesPerSec=5.65531315172878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:08:54,074] [INFO] [timer.py:197:stop] 0/948, RunningAvgSamplesPerSec=6.341899798124187, CurrSamplesPerSec=5.714895547040833, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:09:05,413] [INFO] [timer.py:197:stop] 0/950, RunningAvgSamplesPerSec=6.341895452676643, CurrSamplesPerSec=5.71196582193226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:06:15,202] [INFO] [timer.py:197:stop] 0/902, RunningAvgSamplesPerSec=6.323392772084618, CurrSamplesPerSec=5.693829035456161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:06:26,792] [INFO] [timer.py:197:stop] 0/904, RunningAvgSamplesPerSec=6.323328765368771, CurrSamplesPerSec=5.675209477210574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:06:38,301] [INFO] [timer.py:197:stop] 0/906, RunningAvgSamplesPerSec=6.323365724637102, CurrSamplesPerSec=5.71127262260955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:06:49,669] [INFO] [timer.py:197:stop] 0/908, RunningAvgSamplesPerSec=6.323331986885105, CurrSamplesPerSec=5.659760969514658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:07:01,224] [INFO] [timer.py:197:stop] 0/910, RunningAvgSamplesPerSec=6.323368369110442, CurrSamplesPerSec=5.691273423843159, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:07:12,732] [INFO] [timer.py:197:stop] 0/912, RunningAvgSamplesPerSec=6.3233877302253045, CurrSamplesPerSec=5.696535631113576, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:07:24,152] [INFO] [timer.py:197:stop] 0/914, RunningAvgSamplesPerSec=6.323333379923915, CurrSamplesPerSec=5.6425592209689155, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:07:35,594] [INFO] [timer.py:197:stop] 0/916, RunningAvgSamplesPerSec=6.323348683822525, CurrSamplesPerSec=5.708248043439559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:07:47,094] [INFO] [timer.py:197:stop] 0/918, RunningAvgSamplesPerSec=6.323396275366316, CurrSamplesPerSec=5.695279160815137, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:07:58,467] [INFO] [logging.py:68:log_dist] [Rank 0] step=460, skipped=5, lr=[9.84824356101363e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:07:58,468] [INFO] [timer.py:197:stop] 0/920, RunningAvgSamplesPerSec=6.323380406432734, CurrSamplesPerSec=5.670665239188501, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:08:10,097] [INFO] [timer.py:197:stop] 0/922, RunningAvgSamplesPerSec=6.323363664915916, CurrSamplesPerSec=5.695054176276154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:08:21,496] [INFO] [timer.py:197:stop] 0/924, RunningAvgSamplesPerSec=6.323303982435558, CurrSamplesPerSec=5.652892698429706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:08:32,970] [INFO] [timer.py:197:stop] 0/926, RunningAvgSamplesPerSec=6.323196876502637, CurrSamplesPerSec=5.609877765122621, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:08:44,327] [INFO] [timer.py:197:stop] 0/928, RunningAvgSamplesPerSec=6.3232058943138485, CurrSamplesPerSec=5.68537928839975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:08:55,686] [INFO] [timer.py:197:stop] 0/930, RunningAvgSamplesPerSec=6.323205497567241, CurrSamplesPerSec=5.706007909046854, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:09:07,353] [INFO] [timer.py:197:stop] 0/932, RunningAvgSamplesPerSec=6.322831418899166, CurrSamplesPerSec=5.399923920213184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:09:18,735] [INFO] [timer.py:197:stop] 0/934, RunningAvgSamplesPerSec=6.32280846623546, CurrSamplesPerSec=5.669265454860022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:09:30,257] [INFO] [timer.py:197:stop] 0/936, RunningAvgSamplesPerSec=6.322880930092381, CurrSamplesPerSec=5.72270659354622, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:09:41,888] [INFO] [timer.py:197:stop] 0/938, RunningAvgSamplesPerSec=6.322911512653137, CurrSamplesPerSec=5.6971461789274604, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:09:53,289] [INFO] [logging.py:68:log_dist] [Rank 0] step=470, skipped=5, lr=[9.883225632758308e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:09:53,292] [INFO] [timer.py:197:stop] 0/940, RunningAvgSamplesPerSec=6.322874284590231, CurrSamplesPerSec=5.65775308872727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:10:04,647] [INFO] [timer.py:197:stop] 0/942, RunningAvgSamplesPerSec=6.322877183552834, CurrSamplesPerSec=5.6954686352675905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:10:13,197] [INFO] [timer.py:197:stop] 0/944, RunningAvgSamplesPerSec=6.326150664923261, CurrSamplesPerSec=10.148292026847534, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:10:24,559] [INFO] [timer.py:197:stop] 0/946, RunningAvgSamplesPerSec=6.326127489531616, CurrSamplesPerSec=5.6568171532502465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:10:35,885] [INFO] [timer.py:197:stop] 0/948, RunningAvgSamplesPerSec=6.3261891603196, CurrSamplesPerSec=5.717331163249521, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:10:47,312] [INFO] [timer.py:197:stop] 0/950, RunningAvgSamplesPerSec=6.32620549424659, CurrSamplesPerSec=5.708362390603286, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0615, 'learning_rate': 9.900435550016748e-06, 'epoch': 2.01} -[2022-12-16 14:09:16,729] [INFO] [timer.py:197:stop] 0/952, RunningAvgSamplesPerSec=6.341902533421902, CurrSamplesPerSec=5.701898413407802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:09:28,062] [INFO] [timer.py:197:stop] 0/954, RunningAvgSamplesPerSec=6.341888653558534, CurrSamplesPerSec=5.6888196864128115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:09:39,353] [INFO] [timer.py:197:stop] 0/956, RunningAvgSamplesPerSec=6.341922296957011, CurrSamplesPerSec=5.706562500651045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:09:50,669] [INFO] [timer.py:197:stop] 0/958, RunningAvgSamplesPerSec=6.341944759675942, CurrSamplesPerSec=5.709868753326772, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:10:01,997] [INFO] [logging.py:68:log_dist] [Rank 0] step=480, skipped=5, lr=[9.917463348331534e-06], mom=[[0.9, 0.999]] -[2022-12-16 14:10:01,999] [INFO] [timer.py:197:stop] 0/960, RunningAvgSamplesPerSec=6.341948353997388, CurrSamplesPerSec=5.681460811183431, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:10:13,331] [INFO] [timer.py:197:stop] 0/962, RunningAvgSamplesPerSec=6.341906485418308, CurrSamplesPerSec=5.683349823598064, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:10:24,663] [INFO] [timer.py:197:stop] 0/964, RunningAvgSamplesPerSec=6.34190638859447, CurrSamplesPerSec=5.716055766174184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:10:36,011] [INFO] [timer.py:197:stop] 0/966, RunningAvgSamplesPerSec=6.341885312885797, CurrSamplesPerSec=5.682461215444921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:10:47,359] [INFO] [timer.py:197:stop] 0/968, RunningAvgSamplesPerSec=6.341865407507428, CurrSamplesPerSec=5.686475753985557, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:10:58,703] [INFO] [timer.py:197:stop] 0/970, RunningAvgSamplesPerSec=6.341830751002844, CurrSamplesPerSec=5.699992211322698, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:11:09,981] [INFO] [timer.py:197:stop] 0/972, RunningAvgSamplesPerSec=6.3418804675108085, CurrSamplesPerSec=5.728405313345917, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:11:21,335] [INFO] [timer.py:197:stop] 0/974, RunningAvgSamplesPerSec=6.3418344045834445, CurrSamplesPerSec=5.673371435431231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:11:32,674] [INFO] [timer.py:197:stop] 0/976, RunningAvgSamplesPerSec=6.341824861899911, CurrSamplesPerSec=5.70538988234719, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:11:44,001] [INFO] [timer.py:197:stop] 0/978, RunningAvgSamplesPerSec=6.341817990135774, CurrSamplesPerSec=5.707448712355747, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:11:55,348] [INFO] [logging.py:68:log_dist] [Rank 0] step=490, skipped=5, lr=[9.950987726012135e-06], mom=[[0.9, 0.999]] -[2022-12-16 14:11:55,350] [INFO] [timer.py:197:stop] 0/980, RunningAvgSamplesPerSec=6.341799433135131, CurrSamplesPerSec=5.695586096423975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:12:06,683] [INFO] [timer.py:197:stop] 0/982, RunningAvgSamplesPerSec=6.341779990663049, CurrSamplesPerSec=5.696961912690353, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:12:18,051] [INFO] [timer.py:197:stop] 0/984, RunningAvgSamplesPerSec=6.341719172449315, CurrSamplesPerSec=5.6837885755779025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:12:29,381] [INFO] [timer.py:197:stop] 0/986, RunningAvgSamplesPerSec=6.3417043868592184, CurrSamplesPerSec=5.705059579181187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:12:40,734] [INFO] [timer.py:197:stop] 0/988, RunningAvgSamplesPerSec=6.341692069686432, CurrSamplesPerSec=5.710390568055206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:12:52,100] [INFO] [timer.py:197:stop] 0/990, RunningAvgSamplesPerSec=6.341653951670791, CurrSamplesPerSec=5.689756353319079, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:13:03,398] [INFO] [timer.py:197:stop] 0/992, RunningAvgSamplesPerSec=6.341680481354011, CurrSamplesPerSec=5.729182646250193, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:13:14,687] [INFO] [timer.py:197:stop] 0/994, RunningAvgSamplesPerSec=6.341738585222543, CurrSamplesPerSec=5.734979308517798, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:13:26,027] [INFO] [timer.py:197:stop] 0/996, RunningAvgSamplesPerSec=6.341711829161802, CurrSamplesPerSec=5.68931595605461, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:13:37,337] [INFO] [timer.py:197:stop] 0/998, RunningAvgSamplesPerSec=6.341744360489826, CurrSamplesPerSec=5.727179957039876, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:13:48,636] [INFO] [logging.py:68:log_dist] [Rank 0] step=500, skipped=5, lr=[9.98382788472848e-06], mom=[[0.9, 0.999]] -[2022-12-16 14:13:48,638] [INFO] [timer.py:197:stop] 0/1000, RunningAvgSamplesPerSec=6.341788114796591, CurrSamplesPerSec=5.73382854195397, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:10:58,668] [INFO] [timer.py:197:stop] 0/952, RunningAvgSamplesPerSec=6.326192318553131, CurrSamplesPerSec=5.658891651344986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:11:10,010] [INFO] [timer.py:197:stop] 0/954, RunningAvgSamplesPerSec=6.326204475079454, CurrSamplesPerSec=5.68568973408932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:11:21,376] [INFO] [timer.py:197:stop] 0/956, RunningAvgSamplesPerSec=6.326191030150801, CurrSamplesPerSec=5.6838889469060225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:11:32,728] [INFO] [timer.py:197:stop] 0/958, RunningAvgSamplesPerSec=6.326197330889036, CurrSamplesPerSec=5.6822468649952, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:11:44,292] [INFO] [logging.py:68:log_dist] [Rank 0] step=480, skipped=5, lr=[9.917463348331534e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:11:44,298] [INFO] [timer.py:197:stop] 0/960, RunningAvgSamplesPerSec=6.3259327540553665, CurrSamplesPerSec=5.469384964136454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:11:55,877] [INFO] [timer.py:197:stop] 0/962, RunningAvgSamplesPerSec=6.325935854692364, CurrSamplesPerSec=5.712130639515811, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:12:07,434] [INFO] [timer.py:197:stop] 0/964, RunningAvgSamplesPerSec=6.325921725199111, CurrSamplesPerSec=5.700722626182441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:12:18,923] [INFO] [timer.py:197:stop] 0/966, RunningAvgSamplesPerSec=6.325759514089763, CurrSamplesPerSec=5.568770311968856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:12:30,325] [INFO] [timer.py:197:stop] 0/968, RunningAvgSamplesPerSec=6.325740953625998, CurrSamplesPerSec=5.680124922543402, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:12:41,706] [INFO] [timer.py:197:stop] 0/970, RunningAvgSamplesPerSec=6.325739440815068, CurrSamplesPerSec=5.713497922489823, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:12:53,334] [INFO] [timer.py:197:stop] 0/972, RunningAvgSamplesPerSec=6.325375565220704, CurrSamplesPerSec=5.402155811404497, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:13:04,701] [INFO] [timer.py:197:stop] 0/974, RunningAvgSamplesPerSec=6.3253512622784145, CurrSamplesPerSec=5.666073017760117, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:13:16,312] [INFO] [timer.py:197:stop] 0/976, RunningAvgSamplesPerSec=6.325380050460924, CurrSamplesPerSec=5.706268694383452, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:13:28,013] [INFO] [timer.py:197:stop] 0/978, RunningAvgSamplesPerSec=6.325435711327527, CurrSamplesPerSec=5.72600691245085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:13:39,351] [INFO] [logging.py:68:log_dist] [Rank 0] step=490, skipped=5, lr=[9.950987726012135e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:13:39,353] [INFO] [timer.py:197:stop] 0/980, RunningAvgSamplesPerSec=6.325457773977271, CurrSamplesPerSec=5.694198382492763, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:13:50,967] [INFO] [timer.py:197:stop] 0/982, RunningAvgSamplesPerSec=6.325489117445667, CurrSamplesPerSec=5.702591761786533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:14:02,677] [INFO] [timer.py:197:stop] 0/984, RunningAvgSamplesPerSec=6.325408873295665, CurrSamplesPerSec=5.6763934878966475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:14:14,003] [INFO] [timer.py:197:stop] 0/986, RunningAvgSamplesPerSec=6.325441453698785, CurrSamplesPerSec=5.705198777174381, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:14:25,351] [INFO] [timer.py:197:stop] 0/988, RunningAvgSamplesPerSec=6.325463222633959, CurrSamplesPerSec=5.6933520230469705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:14:36,810] [INFO] [timer.py:197:stop] 0/990, RunningAvgSamplesPerSec=6.325347026707095, CurrSamplesPerSec=5.7087785461304295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:14:48,122] [INFO] [timer.py:197:stop] 0/992, RunningAvgSamplesPerSec=6.3254064143464, CurrSamplesPerSec=5.71673381308937, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:14:59,654] [INFO] [timer.py:197:stop] 0/994, RunningAvgSamplesPerSec=6.325195870913208, CurrSamplesPerSec=5.491621718647289, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:15:11,087] [INFO] [timer.py:197:stop] 0/996, RunningAvgSamplesPerSec=6.325231062084005, CurrSamplesPerSec=5.711203360535411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:15:22,418] [INFO] [timer.py:197:stop] 0/998, RunningAvgSamplesPerSec=6.3252868808302996, CurrSamplesPerSec=5.712874868002988, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:15:33,989] [INFO] [logging.py:68:log_dist] [Rank 0] step=500, skipped=5, lr=[9.98382788472848e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:15:33,991] [INFO] [timer.py:197:stop] 0/1000, RunningAvgSamplesPerSec=6.325035740808991, CurrSamplesPerSec=5.474089503245498, MemAllocated=3.0GB, MaxMemAllocated=19.53GB {'loss': 0.0323, 'learning_rate': 9.98382788472848e-06, 'epoch': 2.12} -[2022-12-16 14:13:59,943] [INFO] [timer.py:197:stop] 0/1002, RunningAvgSamplesPerSec=6.341826419288528, CurrSamplesPerSec=5.737755101866328, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:14:11,272] [INFO] [timer.py:197:stop] 0/1004, RunningAvgSamplesPerSec=6.341834083046816, CurrSamplesPerSec=5.716180164169838, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:14:22,617] [INFO] [timer.py:197:stop] 0/1006, RunningAvgSamplesPerSec=6.341825232645641, CurrSamplesPerSec=5.695088490706839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:14:33,917] [INFO] [timer.py:197:stop] 0/1008, RunningAvgSamplesPerSec=6.341866326025775, CurrSamplesPerSec=5.733372724061965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:14:45,219] [INFO] [timer.py:197:stop] 0/1010, RunningAvgSamplesPerSec=6.34190907891135, CurrSamplesPerSec=5.7319802464981136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:14:56,525] [INFO] [timer.py:197:stop] 0/1012, RunningAvgSamplesPerSec=6.341945427652831, CurrSamplesPerSec=5.736810167567531, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:15:07,839] [INFO] [timer.py:197:stop] 0/1014, RunningAvgSamplesPerSec=6.341972683683957, CurrSamplesPerSec=5.723454312825036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:15:19,158] [INFO] [timer.py:197:stop] 0/1016, RunningAvgSamplesPerSec=6.341996054053865, CurrSamplesPerSec=5.714832523661583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:15:30,481] [INFO] [timer.py:197:stop] 0/1018, RunningAvgSamplesPerSec=6.342007970755142, CurrSamplesPerSec=5.711178572411817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:15:41,794] [INFO] [logging.py:68:log_dist] [Rank 0] step=510, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:15:41,795] [INFO] [timer.py:197:stop] 0/1020, RunningAvgSamplesPerSec=6.342031963556685, CurrSamplesPerSec=5.733811640372937, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:15:53,096] [INFO] [timer.py:197:stop] 0/1022, RunningAvgSamplesPerSec=6.342077485529769, CurrSamplesPerSec=5.72531347603008, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:16:04,384] [INFO] [timer.py:197:stop] 0/1024, RunningAvgSamplesPerSec=6.3421136079117195, CurrSamplesPerSec=5.729963125598802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:16:15,695] [INFO] [timer.py:197:stop] 0/1026, RunningAvgSamplesPerSec=6.342141516295768, CurrSamplesPerSec=5.7137732576155456, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:16:26,993] [INFO] [timer.py:197:stop] 0/1028, RunningAvgSamplesPerSec=6.342184555428346, CurrSamplesPerSec=5.73141874572896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:16:38,254] [INFO] [timer.py:197:stop] 0/1030, RunningAvgSamplesPerSec=6.342268748281073, CurrSamplesPerSec=5.748393992222705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:16:49,595] [INFO] [timer.py:197:stop] 0/1032, RunningAvgSamplesPerSec=6.342256928534129, CurrSamplesPerSec=5.707608172317402, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:17:00,885] [INFO] [timer.py:197:stop] 0/1034, RunningAvgSamplesPerSec=6.34230569518051, CurrSamplesPerSec=5.729498138778034, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:17:12,176] [INFO] [timer.py:197:stop] 0/1036, RunningAvgSamplesPerSec=6.342340358339657, CurrSamplesPerSec=5.728463991073313, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:17:23,508] [INFO] [timer.py:197:stop] 0/1038, RunningAvgSamplesPerSec=6.342352635604062, CurrSamplesPerSec=5.71814934344548, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:17:34,822] [INFO] [logging.py:68:log_dist] [Rank 0] step=520, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:17:34,823] [INFO] [timer.py:197:stop] 0/1040, RunningAvgSamplesPerSec=6.342354697339167, CurrSamplesPerSec=5.694759137565851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:17:46,162] [INFO] [timer.py:197:stop] 0/1042, RunningAvgSamplesPerSec=6.342361815270279, CurrSamplesPerSec=5.706376654359612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:17:57,487] [INFO] [timer.py:197:stop] 0/1044, RunningAvgSamplesPerSec=6.34235592630326, CurrSamplesPerSec=5.700416105141542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:18:08,833] [INFO] [timer.py:197:stop] 0/1046, RunningAvgSamplesPerSec=6.342342159138999, CurrSamplesPerSec=5.691955741941306, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:18:20,144] [INFO] [timer.py:197:stop] 0/1048, RunningAvgSamplesPerSec=6.342370600740833, CurrSamplesPerSec=5.702918629122964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:18:31,465] [INFO] [timer.py:197:stop] 0/1050, RunningAvgSamplesPerSec=6.342387230037039, CurrSamplesPerSec=5.715598631793174, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0333, 'learning_rate': 1e-05, 'epoch': 2.22} -[2022-12-16 14:18:42,812] [INFO] [timer.py:197:stop] 0/1052, RunningAvgSamplesPerSec=6.3423771167743235, CurrSamplesPerSec=5.701767127426415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:18:54,153] [INFO] [timer.py:197:stop] 0/1054, RunningAvgSamplesPerSec=6.342372589094107, CurrSamplesPerSec=5.7018460920706975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:19:05,510] [INFO] [timer.py:197:stop] 0/1056, RunningAvgSamplesPerSec=6.342383222237946, CurrSamplesPerSec=5.714399670907678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:19:16,839] [INFO] [timer.py:197:stop] 0/1058, RunningAvgSamplesPerSec=6.342371050847959, CurrSamplesPerSec=5.6924518346253254, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:19:28,246] [INFO] [logging.py:68:log_dist] [Rank 0] step=530, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:19:28,247] [INFO] [timer.py:197:stop] 0/1060, RunningAvgSamplesPerSec=6.342299448823461, CurrSamplesPerSec=5.650171271874156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:19:39,585] [INFO] [timer.py:197:stop] 0/1062, RunningAvgSamplesPerSec=6.342313857990676, CurrSamplesPerSec=5.7169429810260475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:19:51,015] [INFO] [timer.py:197:stop] 0/1064, RunningAvgSamplesPerSec=6.342204598759972, CurrSamplesPerSec=5.628198329627124, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:20:02,321] [INFO] [timer.py:197:stop] 0/1066, RunningAvgSamplesPerSec=6.342220668936828, CurrSamplesPerSec=5.71764389040324, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:20:13,641] [INFO] [timer.py:197:stop] 0/1068, RunningAvgSamplesPerSec=6.342239595971297, CurrSamplesPerSec=5.710870196981978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:20:24,969] [INFO] [timer.py:197:stop] 0/1070, RunningAvgSamplesPerSec=6.342253991058788, CurrSamplesPerSec=5.718209516579019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:20:36,325] [INFO] [timer.py:197:stop] 0/1072, RunningAvgSamplesPerSec=6.342224287357595, CurrSamplesPerSec=5.677394990102672, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:20:47,669] [INFO] [timer.py:197:stop] 0/1074, RunningAvgSamplesPerSec=6.342204821552335, CurrSamplesPerSec=5.681042376756076, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:20:58,990] [INFO] [timer.py:197:stop] 0/1076, RunningAvgSamplesPerSec=6.3422140360505805, CurrSamplesPerSec=5.7117669838789435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:21:10,325] [INFO] [timer.py:197:stop] 0/1078, RunningAvgSamplesPerSec=6.342189336420485, CurrSamplesPerSec=5.700909557246923, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:21:21,654] [INFO] [logging.py:68:log_dist] [Rank 0] step=540, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:21:21,655] [INFO] [timer.py:197:stop] 0/1080, RunningAvgSamplesPerSec=6.3421901246825785, CurrSamplesPerSec=5.694489014269896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:21:32,959] [INFO] [timer.py:197:stop] 0/1082, RunningAvgSamplesPerSec=6.342219345786515, CurrSamplesPerSec=5.711439344525709, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:21:44,279] [INFO] [timer.py:197:stop] 0/1084, RunningAvgSamplesPerSec=6.342212591177409, CurrSamplesPerSec=5.7033366571490465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:21:55,574] [INFO] [timer.py:197:stop] 0/1086, RunningAvgSamplesPerSec=6.3422593216078065, CurrSamplesPerSec=5.724233232120478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:22:06,888] [INFO] [timer.py:197:stop] 0/1088, RunningAvgSamplesPerSec=6.342280129547322, CurrSamplesPerSec=5.740310165045906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:22:18,193] [INFO] [timer.py:197:stop] 0/1090, RunningAvgSamplesPerSec=6.342288921707194, CurrSamplesPerSec=5.716471339941928, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:22:29,514] [INFO] [timer.py:197:stop] 0/1092, RunningAvgSamplesPerSec=6.342279477932507, CurrSamplesPerSec=5.693720825125829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:22:40,839] [INFO] [timer.py:197:stop] 0/1094, RunningAvgSamplesPerSec=6.342282469920605, CurrSamplesPerSec=5.709584078983662, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:22:52,177] [INFO] [timer.py:197:stop] 0/1096, RunningAvgSamplesPerSec=6.342254628403949, CurrSamplesPerSec=5.687472141105936, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:23:03,476] [INFO] [timer.py:197:stop] 0/1098, RunningAvgSamplesPerSec=6.34227071720925, CurrSamplesPerSec=5.719418363402194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:23:14,765] [INFO] [logging.py:68:log_dist] [Rank 0] step=550, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:23:14,767] [INFO] [timer.py:197:stop] 0/1100, RunningAvgSamplesPerSec=6.342297649309376, CurrSamplesPerSec=5.725382103846932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0353, 'learning_rate': 1e-05, 'epoch': 2.33} -[2022-12-16 14:23:26,073] [INFO] [timer.py:197:stop] 0/1102, RunningAvgSamplesPerSec=6.3423095286973865, CurrSamplesPerSec=5.718222184768506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:23:37,407] [INFO] [timer.py:197:stop] 0/1104, RunningAvgSamplesPerSec=6.342307992493998, CurrSamplesPerSec=5.7094963991746575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:23:48,705] [INFO] [timer.py:197:stop] 0/1106, RunningAvgSamplesPerSec=6.342328757169193, CurrSamplesPerSec=5.738285215559371, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:24:00,071] [INFO] [timer.py:197:stop] 0/1108, RunningAvgSamplesPerSec=6.34232593711831, CurrSamplesPerSec=5.698098894893131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:24:11,376] [INFO] [timer.py:197:stop] 0/1110, RunningAvgSamplesPerSec=6.342336434178085, CurrSamplesPerSec=5.706018582583476, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:24:22,695] [INFO] [timer.py:197:stop] 0/1112, RunningAvgSamplesPerSec=6.342357512645067, CurrSamplesPerSec=5.722553608490977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:24:33,978] [INFO] [timer.py:197:stop] 0/1114, RunningAvgSamplesPerSec=6.342394223584167, CurrSamplesPerSec=5.7394996210805616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:24:45,297] [INFO] [timer.py:197:stop] 0/1116, RunningAvgSamplesPerSec=6.342408601242247, CurrSamplesPerSec=5.7215861125861975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:24:56,637] [INFO] [timer.py:197:stop] 0/1118, RunningAvgSamplesPerSec=6.342398456890344, CurrSamplesPerSec=5.690485835890004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:25:07,926] [INFO] [logging.py:68:log_dist] [Rank 0] step=560, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:25:07,928] [INFO] [timer.py:197:stop] 0/1120, RunningAvgSamplesPerSec=6.342424392095902, CurrSamplesPerSec=5.707812789246779, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:25:19,264] [INFO] [timer.py:197:stop] 0/1122, RunningAvgSamplesPerSec=6.3424174848206185, CurrSamplesPerSec=5.709425237209748, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:25:30,609] [INFO] [timer.py:197:stop] 0/1124, RunningAvgSamplesPerSec=6.3424051767209475, CurrSamplesPerSec=5.706270392598644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:25:41,981] [INFO] [timer.py:197:stop] 0/1126, RunningAvgSamplesPerSec=6.342360219547285, CurrSamplesPerSec=5.688850308903612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:25:53,356] [INFO] [timer.py:197:stop] 0/1128, RunningAvgSamplesPerSec=6.342313010480555, CurrSamplesPerSec=5.661790099182979, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:26:04,709] [INFO] [timer.py:197:stop] 0/1130, RunningAvgSamplesPerSec=6.342280714904902, CurrSamplesPerSec=5.705290690373963, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:26:16,073] [INFO] [timer.py:197:stop] 0/1132, RunningAvgSamplesPerSec=6.342249829287662, CurrSamplesPerSec=5.680866604586072, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:26:27,424] [INFO] [timer.py:197:stop] 0/1134, RunningAvgSamplesPerSec=6.342245949095078, CurrSamplesPerSec=5.696907022034271, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:26:38,757] [INFO] [timer.py:197:stop] 0/1136, RunningAvgSamplesPerSec=6.342257240882108, CurrSamplesPerSec=5.7099586308153585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:26:50,099] [INFO] [timer.py:197:stop] 0/1138, RunningAvgSamplesPerSec=6.342250841337713, CurrSamplesPerSec=5.710703994250225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:27:01,418] [INFO] [logging.py:68:log_dist] [Rank 0] step=570, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:27:01,420] [INFO] [timer.py:197:stop] 0/1140, RunningAvgSamplesPerSec=6.342269594601758, CurrSamplesPerSec=5.701177141826042, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:27:12,760] [INFO] [timer.py:197:stop] 0/1142, RunningAvgSamplesPerSec=6.34225031713007, CurrSamplesPerSec=5.689825096173746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:27:24,106] [INFO] [timer.py:197:stop] 0/1144, RunningAvgSamplesPerSec=6.342225176457225, CurrSamplesPerSec=5.6898470459965065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:27:35,624] [INFO] [timer.py:197:stop] 0/1146, RunningAvgSamplesPerSec=6.3421971463894735, CurrSamplesPerSec=5.691119701597441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:27:47,491] [INFO] [timer.py:197:stop] 0/1148, RunningAvgSamplesPerSec=6.3421702855318784, CurrSamplesPerSec=5.682071018284411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:27:59,341] [INFO] [timer.py:197:stop] 0/1150, RunningAvgSamplesPerSec=6.3421815124282555, CurrSamplesPerSec=5.712243440512417, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0353, 'learning_rate': 1e-05, 'epoch': 2.44} -[2022-12-16 14:28:11,032] [INFO] [timer.py:197:stop] 0/1152, RunningAvgSamplesPerSec=6.342142174405201, CurrSamplesPerSec=5.693715269795763, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:28:22,383] [INFO] [timer.py:197:stop] 0/1154, RunningAvgSamplesPerSec=6.342125125868255, CurrSamplesPerSec=5.702317503207661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:28:33,748] [INFO] [timer.py:197:stop] 0/1156, RunningAvgSamplesPerSec=6.34210247247005, CurrSamplesPerSec=5.685200117179287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:28:45,231] [INFO] [timer.py:197:stop] 0/1158, RunningAvgSamplesPerSec=6.34209466504466, CurrSamplesPerSec=5.7129103701889825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:28:56,537] [INFO] [logging.py:68:log_dist] [Rank 0] step=580, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:28:56,539] [INFO] [timer.py:197:stop] 0/1160, RunningAvgSamplesPerSec=6.342087597571019, CurrSamplesPerSec=5.701697369069446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:29:07,897] [INFO] [timer.py:197:stop] 0/1162, RunningAvgSamplesPerSec=6.342061366679014, CurrSamplesPerSec=5.694718061709522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:29:19,229] [INFO] [timer.py:197:stop] 0/1164, RunningAvgSamplesPerSec=6.342063688900911, CurrSamplesPerSec=5.702904574752718, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:29:30,596] [INFO] [timer.py:197:stop] 0/1166, RunningAvgSamplesPerSec=6.342025215062624, CurrSamplesPerSec=5.688731919840502, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:29:41,953] [INFO] [timer.py:197:stop] 0/1168, RunningAvgSamplesPerSec=6.341998272912349, CurrSamplesPerSec=5.687137402836735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:29:53,298] [INFO] [timer.py:197:stop] 0/1170, RunningAvgSamplesPerSec=6.341989796889964, CurrSamplesPerSec=5.698433957059607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:30:04,664] [INFO] [timer.py:197:stop] 0/1172, RunningAvgSamplesPerSec=6.34195403052995, CurrSamplesPerSec=5.681861507890479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:30:16,028] [INFO] [timer.py:197:stop] 0/1174, RunningAvgSamplesPerSec=6.341920898227156, CurrSamplesPerSec=5.6802061735601095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:30:27,378] [INFO] [timer.py:197:stop] 0/1176, RunningAvgSamplesPerSec=6.341886173091101, CurrSamplesPerSec=5.679934544650027, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:30:38,734] [INFO] [timer.py:197:stop] 0/1178, RunningAvgSamplesPerSec=6.341867225778261, CurrSamplesPerSec=5.713497436055486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:30:50,084] [INFO] [logging.py:68:log_dist] [Rank 0] step=590, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:30:50,086] [INFO] [timer.py:197:stop] 0/1180, RunningAvgSamplesPerSec=6.34185257294048, CurrSamplesPerSec=5.6980594641755085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:31:01,423] [INFO] [timer.py:197:stop] 0/1182, RunningAvgSamplesPerSec=6.341854212156899, CurrSamplesPerSec=5.709254504302453, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:31:12,772] [INFO] [timer.py:197:stop] 0/1184, RunningAvgSamplesPerSec=6.341848295622397, CurrSamplesPerSec=5.699178250528177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:31:24,084] [INFO] [timer.py:197:stop] 0/1186, RunningAvgSamplesPerSec=6.3418571902670635, CurrSamplesPerSec=5.715483994542817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:31:35,424] [INFO] [timer.py:197:stop] 0/1188, RunningAvgSamplesPerSec=6.341833946588137, CurrSamplesPerSec=5.705600161334081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:31:46,795] [INFO] [timer.py:197:stop] 0/1190, RunningAvgSamplesPerSec=6.34179553395068, CurrSamplesPerSec=5.6929719194984365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:31:58,106] [INFO] [timer.py:197:stop] 0/1192, RunningAvgSamplesPerSec=6.341823510750819, CurrSamplesPerSec=5.711299841849379, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:32:09,451] [INFO] [timer.py:197:stop] 0/1194, RunningAvgSamplesPerSec=6.3418122267706485, CurrSamplesPerSec=5.715445296381856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:32:20,735] [INFO] [timer.py:197:stop] 0/1196, RunningAvgSamplesPerSec=6.341849477491008, CurrSamplesPerSec=5.734557609331755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:32:32,046] [INFO] [timer.py:197:stop] 0/1198, RunningAvgSamplesPerSec=6.341877244248112, CurrSamplesPerSec=5.71894119651206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:32:43,387] [INFO] [logging.py:68:log_dist] [Rank 0] step=600, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:32:43,389] [INFO] [timer.py:197:stop] 0/1200, RunningAvgSamplesPerSec=6.341878607049019, CurrSamplesPerSec=5.696733168302691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0347, 'learning_rate': 1e-05, 'epoch': 2.54} -[2022-12-16 14:32:54,711] [INFO] [timer.py:197:stop] 0/1202, RunningAvgSamplesPerSec=6.341896471273075, CurrSamplesPerSec=5.717042091551064, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:33:06,065] [INFO] [timer.py:197:stop] 0/1204, RunningAvgSamplesPerSec=6.341897101506787, CurrSamplesPerSec=5.679245733300272, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:33:17,347] [INFO] [timer.py:197:stop] 0/1206, RunningAvgSamplesPerSec=6.341951670126494, CurrSamplesPerSec=5.735465773043581, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:33:28,700] [INFO] [timer.py:197:stop] 0/1208, RunningAvgSamplesPerSec=6.341933813423766, CurrSamplesPerSec=5.693047018526851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:33:40,014] [INFO] [timer.py:197:stop] 0/1210, RunningAvgSamplesPerSec=6.341962570506191, CurrSamplesPerSec=5.726071648268367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:33:51,341] [INFO] [timer.py:197:stop] 0/1212, RunningAvgSamplesPerSec=6.34198104703568, CurrSamplesPerSec=5.713993885386633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:34:02,655] [INFO] [timer.py:197:stop] 0/1214, RunningAvgSamplesPerSec=6.341989706884567, CurrSamplesPerSec=5.719417875959229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:34:13,996] [INFO] [timer.py:197:stop] 0/1216, RunningAvgSamplesPerSec=6.341986548452563, CurrSamplesPerSec=5.713432497815178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:34:25,329] [INFO] [timer.py:197:stop] 0/1218, RunningAvgSamplesPerSec=6.341989702811427, CurrSamplesPerSec=5.704049748383485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:34:36,668] [INFO] [logging.py:68:log_dist] [Rank 0] step=610, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:34:36,669] [INFO] [timer.py:197:stop] 0/1220, RunningAvgSamplesPerSec=6.341983907936107, CurrSamplesPerSec=5.704984162896505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:34:47,992] [INFO] [timer.py:197:stop] 0/1222, RunningAvgSamplesPerSec=6.3420001667946035, CurrSamplesPerSec=5.7295521917594305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:34:59,295] [INFO] [timer.py:197:stop] 0/1224, RunningAvgSamplesPerSec=6.34203665776226, CurrSamplesPerSec=5.742030444913072, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:35:10,632] [INFO] [timer.py:197:stop] 0/1226, RunningAvgSamplesPerSec=6.342035936585133, CurrSamplesPerSec=5.711309320038435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:35:21,990] [INFO] [timer.py:197:stop] 0/1228, RunningAvgSamplesPerSec=6.342023840723309, CurrSamplesPerSec=5.703659731799368, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:35:33,332] [INFO] [timer.py:197:stop] 0/1230, RunningAvgSamplesPerSec=6.34202606042488, CurrSamplesPerSec=5.719129568004103, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:35:44,666] [INFO] [timer.py:197:stop] 0/1232, RunningAvgSamplesPerSec=6.342021991006609, CurrSamplesPerSec=5.695910227336529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:35:56,016] [INFO] [timer.py:197:stop] 0/1234, RunningAvgSamplesPerSec=6.342006012194455, CurrSamplesPerSec=5.683263669619541, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:36:07,345] [INFO] [timer.py:197:stop] 0/1236, RunningAvgSamplesPerSec=6.3419942199098385, CurrSamplesPerSec=5.70789798994556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:36:18,750] [INFO] [timer.py:197:stop] 0/1238, RunningAvgSamplesPerSec=6.341966640691746, CurrSamplesPerSec=5.686711144545196, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:36:30,081] [INFO] [logging.py:68:log_dist] [Rank 0] step=620, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:36:30,083] [INFO] [timer.py:197:stop] 0/1240, RunningAvgSamplesPerSec=6.3419657033705965, CurrSamplesPerSec=5.701581593341636, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:36:41,393] [INFO] [timer.py:197:stop] 0/1242, RunningAvgSamplesPerSec=6.341980020428165, CurrSamplesPerSec=5.731832395172248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:36:52,764] [INFO] [timer.py:197:stop] 0/1244, RunningAvgSamplesPerSec=6.341953373133708, CurrSamplesPerSec=5.680820439143911, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:37:04,058] [INFO] [timer.py:197:stop] 0/1246, RunningAvgSamplesPerSec=6.34197841556084, CurrSamplesPerSec=5.717911830283779, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:37:15,402] [INFO] [timer.py:197:stop] 0/1248, RunningAvgSamplesPerSec=6.341969066741227, CurrSamplesPerSec=5.705706640688583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:37:26,914] [INFO] [timer.py:197:stop] 0/1250, RunningAvgSamplesPerSec=6.341964101473373, CurrSamplesPerSec=5.70826430910481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0348, 'learning_rate': 1e-05, 'epoch': 2.65} -[2022-12-16 14:37:38,762] [INFO] [timer.py:197:stop] 0/1252, RunningAvgSamplesPerSec=6.341913431600818, CurrSamplesPerSec=5.674062660433857, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:37:50,589] [INFO] [timer.py:197:stop] 0/1254, RunningAvgSamplesPerSec=6.341901815390098, CurrSamplesPerSec=5.694548448860545, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:38:02,393] [INFO] [timer.py:197:stop] 0/1256, RunningAvgSamplesPerSec=6.341883576631976, CurrSamplesPerSec=5.688104612379961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:38:14,000] [INFO] [timer.py:197:stop] 0/1258, RunningAvgSamplesPerSec=6.341872794684727, CurrSamplesPerSec=5.703657065617219, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:38:25,393] [INFO] [logging.py:68:log_dist] [Rank 0] step=630, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:38:25,394] [INFO] [timer.py:197:stop] 0/1260, RunningAvgSamplesPerSec=6.341865366147259, CurrSamplesPerSec=5.705734049457164, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:38:36,741] [INFO] [timer.py:197:stop] 0/1262, RunningAvgSamplesPerSec=6.341855550405924, CurrSamplesPerSec=5.691122838703123, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:38:48,184] [INFO] [timer.py:197:stop] 0/1264, RunningAvgSamplesPerSec=6.341869424864738, CurrSamplesPerSec=5.712905750016515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:38:59,484] [INFO] [timer.py:197:stop] 0/1266, RunningAvgSamplesPerSec=6.341905958246278, CurrSamplesPerSec=5.743291164373362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:39:10,797] [INFO] [timer.py:197:stop] 0/1268, RunningAvgSamplesPerSec=6.341933834782191, CurrSamplesPerSec=5.732381980420735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:39:22,092] [INFO] [timer.py:197:stop] 0/1270, RunningAvgSamplesPerSec=6.341976379604057, CurrSamplesPerSec=5.735517732822823, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:39:33,396] [INFO] [timer.py:197:stop] 0/1272, RunningAvgSamplesPerSec=6.341977077258627, CurrSamplesPerSec=5.720831078371896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:39:44,729] [INFO] [timer.py:197:stop] 0/1274, RunningAvgSamplesPerSec=6.3419649563084874, CurrSamplesPerSec=5.691029933577238, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:39:56,055] [INFO] [timer.py:197:stop] 0/1276, RunningAvgSamplesPerSec=6.341979825791403, CurrSamplesPerSec=5.709503199728328, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:40:07,411] [INFO] [timer.py:197:stop] 0/1278, RunningAvgSamplesPerSec=6.341966728201985, CurrSamplesPerSec=5.696792166154243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:40:18,742] [INFO] [logging.py:68:log_dist] [Rank 0] step=640, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:40:18,743] [INFO] [timer.py:197:stop] 0/1280, RunningAvgSamplesPerSec=6.341970256692958, CurrSamplesPerSec=5.720286143597552, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:40:30,044] [INFO] [timer.py:197:stop] 0/1282, RunningAvgSamplesPerSec=6.342008220842321, CurrSamplesPerSec=5.737926071842598, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:40:41,373] [INFO] [timer.py:197:stop] 0/1284, RunningAvgSamplesPerSec=6.3419988418237585, CurrSamplesPerSec=5.7099489141936965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:40:52,693] [INFO] [timer.py:197:stop] 0/1286, RunningAvgSamplesPerSec=6.342022034261011, CurrSamplesPerSec=5.724150472559608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:41:04,024] [INFO] [timer.py:197:stop] 0/1288, RunningAvgSamplesPerSec=6.3420292694938984, CurrSamplesPerSec=5.6933339102268805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:41:15,413] [INFO] [timer.py:197:stop] 0/1290, RunningAvgSamplesPerSec=6.341983459677655, CurrSamplesPerSec=5.637183596584856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:41:26,783] [INFO] [timer.py:197:stop] 0/1292, RunningAvgSamplesPerSec=6.341963235738544, CurrSamplesPerSec=5.684480896928181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:41:38,215] [INFO] [timer.py:197:stop] 0/1294, RunningAvgSamplesPerSec=6.34183759172952, CurrSamplesPerSec=5.602544304448964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:41:49,551] [INFO] [timer.py:197:stop] 0/1296, RunningAvgSamplesPerSec=6.341831440484137, CurrSamplesPerSec=5.700941278631235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:42:00,960] [INFO] [timer.py:197:stop] 0/1298, RunningAvgSamplesPerSec=6.341805815820332, CurrSamplesPerSec=5.6858438857398, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:42:12,912] [INFO] [logging.py:68:log_dist] [Rank 0] step=650, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:42:12,913] [INFO] [timer.py:197:stop] 0/1300, RunningAvgSamplesPerSec=6.341745443623, CurrSamplesPerSec=5.6577931560620875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0364, 'learning_rate': 1e-05, 'epoch': 2.75} -[2022-12-16 14:42:24,852] [INFO] [timer.py:197:stop] 0/1302, RunningAvgSamplesPerSec=6.341699753784161, CurrSamplesPerSec=5.693112701616461, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:42:36,739] [INFO] [timer.py:197:stop] 0/1304, RunningAvgSamplesPerSec=6.341670847975745, CurrSamplesPerSec=5.672541564369074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:42:48,129] [INFO] [timer.py:197:stop] 0/1306, RunningAvgSamplesPerSec=6.341618362263155, CurrSamplesPerSec=5.645468028242119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:42:59,447] [INFO] [timer.py:197:stop] 0/1308, RunningAvgSamplesPerSec=6.341632092950719, CurrSamplesPerSec=5.696219648453861, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:43:10,887] [INFO] [timer.py:197:stop] 0/1310, RunningAvgSamplesPerSec=6.341590601962275, CurrSamplesPerSec=5.6654717420843825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:43:22,240] [INFO] [timer.py:197:stop] 0/1312, RunningAvgSamplesPerSec=6.341584614965287, CurrSamplesPerSec=5.689816171568963, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:43:33,568] [INFO] [timer.py:197:stop] 0/1314, RunningAvgSamplesPerSec=6.341594613430739, CurrSamplesPerSec=5.7093586914483065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:43:44,956] [INFO] [timer.py:197:stop] 0/1316, RunningAvgSamplesPerSec=6.341563466477754, CurrSamplesPerSec=5.670641041289616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:43:56,265] [INFO] [timer.py:197:stop] 0/1318, RunningAvgSamplesPerSec=6.341569001790131, CurrSamplesPerSec=5.7292819370508505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:44:07,568] [INFO] [logging.py:68:log_dist] [Rank 0] step=660, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:44:07,570] [INFO] [timer.py:197:stop] 0/1320, RunningAvgSamplesPerSec=6.341599110928043, CurrSamplesPerSec=5.733176556079528, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:44:18,913] [INFO] [timer.py:197:stop] 0/1322, RunningAvgSamplesPerSec=6.341575713584571, CurrSamplesPerSec=5.6912922475459755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:44:30,220] [INFO] [timer.py:197:stop] 0/1324, RunningAvgSamplesPerSec=6.341604782515842, CurrSamplesPerSec=5.733804781788765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:44:41,563] [INFO] [timer.py:197:stop] 0/1326, RunningAvgSamplesPerSec=6.341584816278544, CurrSamplesPerSec=5.698715826869556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:44:52,882] [INFO] [timer.py:197:stop] 0/1328, RunningAvgSamplesPerSec=6.3416033148122475, CurrSamplesPerSec=5.721217106174578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:45:04,217] [INFO] [timer.py:197:stop] 0/1330, RunningAvgSamplesPerSec=6.341592936615098, CurrSamplesPerSec=5.684606813545637, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:45:15,593] [INFO] [timer.py:197:stop] 0/1332, RunningAvgSamplesPerSec=6.341554846742973, CurrSamplesPerSec=5.664314992225257, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:45:26,924] [INFO] [timer.py:197:stop] 0/1334, RunningAvgSamplesPerSec=6.341556591018235, CurrSamplesPerSec=5.699659628332756, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:45:38,272] [INFO] [timer.py:197:stop] 0/1336, RunningAvgSamplesPerSec=6.341547460152438, CurrSamplesPerSec=5.697217760627823, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:45:49,609] [INFO] [timer.py:197:stop] 0/1338, RunningAvgSamplesPerSec=6.3415444610859675, CurrSamplesPerSec=5.698977639944709, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:46:00,947] [INFO] [logging.py:68:log_dist] [Rank 0] step=670, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:46:00,949] [INFO] [timer.py:197:stop] 0/1340, RunningAvgSamplesPerSec=6.341538353802266, CurrSamplesPerSec=5.699747006276326, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:46:12,281] [INFO] [timer.py:197:stop] 0/1342, RunningAvgSamplesPerSec=6.341546638536307, CurrSamplesPerSec=5.7052620732819355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:46:23,633] [INFO] [timer.py:197:stop] 0/1344, RunningAvgSamplesPerSec=6.341530953943566, CurrSamplesPerSec=5.711561597247238, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:46:34,962] [INFO] [timer.py:197:stop] 0/1346, RunningAvgSamplesPerSec=6.341535474785801, CurrSamplesPerSec=5.688151860544939, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:46:46,287] [INFO] [timer.py:197:stop] 0/1348, RunningAvgSamplesPerSec=6.341530994988656, CurrSamplesPerSec=5.706824549931542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:46:57,648] [INFO] [timer.py:197:stop] 0/1350, RunningAvgSamplesPerSec=6.341510467487031, CurrSamplesPerSec=5.679943438282595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0368, 'learning_rate': 1e-05, 'epoch': 2.86} -[2022-12-16 14:47:08,981] [INFO] [timer.py:197:stop] 0/1352, RunningAvgSamplesPerSec=6.341513942550877, CurrSamplesPerSec=5.706243221276868, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:47:20,362] [INFO] [timer.py:197:stop] 0/1354, RunningAvgSamplesPerSec=6.341465545784576, CurrSamplesPerSec=5.655782857749175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:47:31,746] [INFO] [timer.py:197:stop] 0/1356, RunningAvgSamplesPerSec=6.3414346374436645, CurrSamplesPerSec=5.672131394177037, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:47:43,085] [INFO] [timer.py:197:stop] 0/1358, RunningAvgSamplesPerSec=6.34142760831968, CurrSamplesPerSec=5.706950245070932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:47:54,415] [INFO] [logging.py:68:log_dist] [Rank 0] step=680, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:47:54,416] [INFO] [timer.py:197:stop] 0/1360, RunningAvgSamplesPerSec=6.3414369763987555, CurrSamplesPerSec=5.711564270824477, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:48:05,784] [INFO] [timer.py:197:stop] 0/1362, RunningAvgSamplesPerSec=6.341397986420672, CurrSamplesPerSec=5.664015720082998, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:48:17,363] [INFO] [timer.py:197:stop] 0/1364, RunningAvgSamplesPerSec=6.341386306228154, CurrSamplesPerSec=5.696870025845178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:48:29,358] [INFO] [timer.py:197:stop] 0/1366, RunningAvgSamplesPerSec=6.341305811961416, CurrSamplesPerSec=5.64646078457769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:48:40,976] [INFO] [timer.py:197:stop] 0/1368, RunningAvgSamplesPerSec=6.341278132620111, CurrSamplesPerSec=5.6836075791694585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:48:52,791] [INFO] [timer.py:197:stop] 0/1370, RunningAvgSamplesPerSec=6.341239448006165, CurrSamplesPerSec=5.673257046981256, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:49:04,402] [INFO] [timer.py:197:stop] 0/1372, RunningAvgSamplesPerSec=6.341231709995396, CurrSamplesPerSec=5.701490284002487, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:49:15,720] [INFO] [timer.py:197:stop] 0/1374, RunningAvgSamplesPerSec=6.3412303326014925, CurrSamplesPerSec=5.698467586413149, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:49:27,138] [INFO] [timer.py:197:stop] 0/1376, RunningAvgSamplesPerSec=6.341202115120962, CurrSamplesPerSec=5.696106754198011, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:49:38,516] [INFO] [timer.py:197:stop] 0/1378, RunningAvgSamplesPerSec=6.341153200835064, CurrSamplesPerSec=5.654166262600238, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:49:49,936] [INFO] [logging.py:68:log_dist] [Rank 0] step=690, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:49:49,938] [INFO] [timer.py:197:stop] 0/1380, RunningAvgSamplesPerSec=6.341132224930991, CurrSamplesPerSec=5.68624496005538, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:50:01,253] [INFO] [timer.py:197:stop] 0/1382, RunningAvgSamplesPerSec=6.34112280256516, CurrSamplesPerSec=5.695159054075012, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:50:12,589] [INFO] [timer.py:197:stop] 0/1384, RunningAvgSamplesPerSec=6.341123372534469, CurrSamplesPerSec=5.6996046856792315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:50:23,995] [INFO] [timer.py:197:stop] 0/1386, RunningAvgSamplesPerSec=6.341059047870384, CurrSamplesPerSec=5.647039024132055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:50:35,562] [INFO] [timer.py:197:stop] 0/1388, RunningAvgSamplesPerSec=6.341045577125367, CurrSamplesPerSec=5.692134856429986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:50:46,957] [INFO] [timer.py:197:stop] 0/1390, RunningAvgSamplesPerSec=6.340997144458177, CurrSamplesPerSec=5.681990676077318, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:50:58,305] [INFO] [timer.py:197:stop] 0/1392, RunningAvgSamplesPerSec=6.3409874033737506, CurrSamplesPerSec=5.687776307523125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:51:09,675] [INFO] [timer.py:197:stop] 0/1394, RunningAvgSamplesPerSec=6.340967599324559, CurrSamplesPerSec=5.682356083068968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:51:21,023] [INFO] [timer.py:197:stop] 0/1396, RunningAvgSamplesPerSec=6.34094175660239, CurrSamplesPerSec=5.678554205979396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:51:32,541] [INFO] [timer.py:197:stop] 0/1398, RunningAvgSamplesPerSec=6.340900730423903, CurrSamplesPerSec=5.660423338680644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:51:44,143] [INFO] [logging.py:68:log_dist] [Rank 0] step=700, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:51:44,145] [INFO] [timer.py:197:stop] 0/1400, RunningAvgSamplesPerSec=6.3408752815569525, CurrSamplesPerSec=5.673991419538569, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0384, 'learning_rate': 1e-05, 'epoch': 2.97} -[2022-12-16 14:51:55,483] [INFO] [timer.py:197:stop] 0/1402, RunningAvgSamplesPerSec=6.340859458400576, CurrSamplesPerSec=5.685104274845516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:52:07,020] [INFO] [timer.py:197:stop] 0/1404, RunningAvgSamplesPerSec=6.3408292407124565, CurrSamplesPerSec=5.687765702124439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:52:18,470] [INFO] [timer.py:197:stop] 0/1406, RunningAvgSamplesPerSec=6.340845264199506, CurrSamplesPerSec=5.7009642829093865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:52:29,807] [INFO] [timer.py:197:stop] 0/1408, RunningAvgSamplesPerSec=6.3408450665175335, CurrSamplesPerSec=5.693712129831391, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:52:41,150] [INFO] [timer.py:197:stop] 0/1410, RunningAvgSamplesPerSec=6.340834831800001, CurrSamplesPerSec=5.713949855771624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:52:52,490] [INFO] [timer.py:197:stop] 0/1412, RunningAvgSamplesPerSec=6.340832970670184, CurrSamplesPerSec=5.697333601041515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:53:03,836] [INFO] [timer.py:197:stop] 0/1414, RunningAvgSamplesPerSec=6.340821690172965, CurrSamplesPerSec=5.699762255333288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:53:12,376] [INFO] [timer.py:197:stop] 0/1416, RunningAvgSamplesPerSec=6.343001806368919, CurrSamplesPerSec=10.222363241398545, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:53:23,743] [INFO] [timer.py:197:stop] 0/1418, RunningAvgSamplesPerSec=6.342979836865651, CurrSamplesPerSec=5.6932759499770516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:53:35,103] [INFO] [logging.py:68:log_dist] [Rank 0] step=710, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:53:35,104] [INFO] [timer.py:197:stop] 0/1420, RunningAvgSamplesPerSec=6.3429412114598085, CurrSamplesPerSec=5.680045356492783, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:53:46,496] [INFO] [timer.py:197:stop] 0/1422, RunningAvgSamplesPerSec=6.3429387545618505, CurrSamplesPerSec=5.716258554728885, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:53:57,827] [INFO] [timer.py:197:stop] 0/1424, RunningAvgSamplesPerSec=6.342909086580767, CurrSamplesPerSec=5.685790895167144, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:54:09,182] [INFO] [timer.py:197:stop] 0/1426, RunningAvgSamplesPerSec=6.342904424723496, CurrSamplesPerSec=5.694281969307971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:54:20,544] [INFO] [timer.py:197:stop] 0/1428, RunningAvgSamplesPerSec=6.34289700017322, CurrSamplesPerSec=5.698439521601493, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:54:31,873] [INFO] [timer.py:197:stop] 0/1430, RunningAvgSamplesPerSec=6.342893310403501, CurrSamplesPerSec=5.709524815881511, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:54:43,237] [INFO] [timer.py:197:stop] 0/1432, RunningAvgSamplesPerSec=6.342840839202115, CurrSamplesPerSec=5.677154847083064, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:54:54,580] [INFO] [timer.py:197:stop] 0/1434, RunningAvgSamplesPerSec=6.342816101843901, CurrSamplesPerSec=5.694050782866752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:55:05,948] [INFO] [timer.py:197:stop] 0/1436, RunningAvgSamplesPerSec=6.342783522446399, CurrSamplesPerSec=5.674854826198604, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:55:17,329] [INFO] [timer.py:197:stop] 0/1438, RunningAvgSamplesPerSec=6.342740813957726, CurrSamplesPerSec=5.663693535908116, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:55:28,880] [INFO] [logging.py:68:log_dist] [Rank 0] step=720, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:55:28,882] [INFO] [timer.py:197:stop] 0/1440, RunningAvgSamplesPerSec=6.342599648821451, CurrSamplesPerSec=5.694897591728672, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:55:40,282] [INFO] [timer.py:197:stop] 0/1442, RunningAvgSamplesPerSec=6.342540221445991, CurrSamplesPerSec=5.6591770197937095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:55:51,670] [INFO] [timer.py:197:stop] 0/1444, RunningAvgSamplesPerSec=6.342520759184326, CurrSamplesPerSec=5.697625762296395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:56:03,038] [INFO] [timer.py:197:stop] 0/1446, RunningAvgSamplesPerSec=6.3424922000532105, CurrSamplesPerSec=5.678319730434744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:56:14,426] [INFO] [timer.py:197:stop] 0/1448, RunningAvgSamplesPerSec=6.342433462125882, CurrSamplesPerSec=5.656962351987449, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:56:25,856] [INFO] [timer.py:197:stop] 0/1450, RunningAvgSamplesPerSec=6.34238288396144, CurrSamplesPerSec=5.660801018168518, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0236, 'learning_rate': 1e-05, 'epoch': 3.07} -[2022-12-16 14:56:37,239] [INFO] [timer.py:197:stop] 0/1452, RunningAvgSamplesPerSec=6.342361993677473, CurrSamplesPerSec=5.68302784248084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:56:48,968] [INFO] [timer.py:197:stop] 0/1454, RunningAvgSamplesPerSec=6.342314594444791, CurrSamplesPerSec=5.657922424784969, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:57:00,697] [INFO] [timer.py:197:stop] 0/1456, RunningAvgSamplesPerSec=6.342268591669828, CurrSamplesPerSec=5.6867578876713685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:57:12,340] [INFO] [timer.py:197:stop] 0/1458, RunningAvgSamplesPerSec=6.34223068656754, CurrSamplesPerSec=5.667312325582682, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:57:24,102] [INFO] [logging.py:68:log_dist] [Rank 0] step=730, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:57:24,103] [INFO] [timer.py:197:stop] 0/1460, RunningAvgSamplesPerSec=6.342212515723276, CurrSamplesPerSec=5.696579150982275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:57:35,745] [INFO] [timer.py:197:stop] 0/1462, RunningAvgSamplesPerSec=6.342151666537463, CurrSamplesPerSec=5.655240951176899, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:57:47,382] [INFO] [timer.py:197:stop] 0/1464, RunningAvgSamplesPerSec=6.342130155753065, CurrSamplesPerSec=5.695470085375787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:57:58,732] [INFO] [timer.py:197:stop] 0/1466, RunningAvgSamplesPerSec=6.342123397647371, CurrSamplesPerSec=5.701415204243638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:58:10,167] [INFO] [timer.py:197:stop] 0/1468, RunningAvgSamplesPerSec=6.342083483163338, CurrSamplesPerSec=5.672903119485949, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:58:21,529] [INFO] [timer.py:197:stop] 0/1470, RunningAvgSamplesPerSec=6.342080925878628, CurrSamplesPerSec=5.703203123854703, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:58:32,862] [INFO] [timer.py:197:stop] 0/1472, RunningAvgSamplesPerSec=6.342080072093458, CurrSamplesPerSec=5.70864427228525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:58:44,209] [INFO] [timer.py:197:stop] 0/1474, RunningAvgSamplesPerSec=6.34205363377033, CurrSamplesPerSec=5.676758175313035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:58:55,673] [INFO] [timer.py:197:stop] 0/1476, RunningAvgSamplesPerSec=6.342038899550425, CurrSamplesPerSec=5.692143546907543, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:59:07,023] [INFO] [timer.py:197:stop] 0/1478, RunningAvgSamplesPerSec=6.342022666973257, CurrSamplesPerSec=5.692293944653019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:59:18,376] [INFO] [logging.py:68:log_dist] [Rank 0] step=740, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 14:59:18,376] [INFO] [timer.py:197:stop] 0/1480, RunningAvgSamplesPerSec=6.342011608288597, CurrSamplesPerSec=5.692075713606984, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:59:29,773] [INFO] [timer.py:197:stop] 0/1482, RunningAvgSamplesPerSec=6.341969359985125, CurrSamplesPerSec=5.6732767109147755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:59:41,274] [INFO] [timer.py:197:stop] 0/1484, RunningAvgSamplesPerSec=6.341953698000013, CurrSamplesPerSec=5.6926032144284955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 14:59:52,679] [INFO] [timer.py:197:stop] 0/1486, RunningAvgSamplesPerSec=6.341940489551061, CurrSamplesPerSec=5.70655085457492, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:00:04,027] [INFO] [timer.py:197:stop] 0/1488, RunningAvgSamplesPerSec=6.341926388483119, CurrSamplesPerSec=5.696006192633686, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:00:15,606] [INFO] [timer.py:197:stop] 0/1490, RunningAvgSamplesPerSec=6.341889248128019, CurrSamplesPerSec=5.661455750178363, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:00:27,090] [INFO] [timer.py:197:stop] 0/1492, RunningAvgSamplesPerSec=6.341863503136832, CurrSamplesPerSec=5.680092470863853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:00:38,422] [INFO] [timer.py:197:stop] 0/1494, RunningAvgSamplesPerSec=6.34186409141219, CurrSamplesPerSec=5.709879441312319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:00:49,791] [INFO] [timer.py:197:stop] 0/1496, RunningAvgSamplesPerSec=6.341834904526626, CurrSamplesPerSec=5.687857054469082, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:01:01,198] [INFO] [timer.py:197:stop] 0/1498, RunningAvgSamplesPerSec=6.341831456889497, CurrSamplesPerSec=5.6952240609171, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:01:12,713] [INFO] [logging.py:68:log_dist] [Rank 0] step=750, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:01:12,714] [INFO] [timer.py:197:stop] 0/1500, RunningAvgSamplesPerSec=6.341677946328467, CurrSamplesPerSec=5.672521186308156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0188, 'learning_rate': 1e-05, 'epoch': 3.18} -[2022-12-16 15:01:24,132] [INFO] [timer.py:197:stop] 0/1502, RunningAvgSamplesPerSec=6.341638775258454, CurrSamplesPerSec=5.674385305739847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:01:35,509] [INFO] [timer.py:197:stop] 0/1504, RunningAvgSamplesPerSec=6.341605500941506, CurrSamplesPerSec=5.663864183791158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:01:46,957] [INFO] [timer.py:197:stop] 0/1506, RunningAvgSamplesPerSec=6.341589086574736, CurrSamplesPerSec=5.685052983667712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:01:58,324] [INFO] [timer.py:197:stop] 0/1508, RunningAvgSamplesPerSec=6.341588805379671, CurrSamplesPerSec=5.714745899200174, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:02:09,883] [INFO] [timer.py:197:stop] 0/1510, RunningAvgSamplesPerSec=6.341402469690666, CurrSamplesPerSec=5.49238308723314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:02:21,434] [INFO] [timer.py:197:stop] 0/1512, RunningAvgSamplesPerSec=6.341350248424772, CurrSamplesPerSec=5.64702999565295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:02:32,771] [INFO] [timer.py:197:stop] 0/1514, RunningAvgSamplesPerSec=6.341346331910384, CurrSamplesPerSec=5.697509184428209, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:02:44,097] [INFO] [timer.py:197:stop] 0/1516, RunningAvgSamplesPerSec=6.341353366978005, CurrSamplesPerSec=5.702388003556426, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:02:55,459] [INFO] [timer.py:197:stop] 0/1518, RunningAvgSamplesPerSec=6.341341164701243, CurrSamplesPerSec=5.680299927621715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:03:06,830] [INFO] [logging.py:68:log_dist] [Rank 0] step=760, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:03:06,831] [INFO] [timer.py:197:stop] 0/1520, RunningAvgSamplesPerSec=6.341327370475824, CurrSamplesPerSec=5.696119324643216, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:03:18,237] [INFO] [timer.py:197:stop] 0/1522, RunningAvgSamplesPerSec=6.3412688547482485, CurrSamplesPerSec=5.640309421300005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:03:29,836] [INFO] [timer.py:197:stop] 0/1524, RunningAvgSamplesPerSec=6.341192736338762, CurrSamplesPerSec=5.62095345330348, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:03:41,247] [INFO] [timer.py:197:stop] 0/1526, RunningAvgSamplesPerSec=6.3411631021869335, CurrSamplesPerSec=5.681983940905689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:03:52,544] [INFO] [timer.py:197:stop] 0/1528, RunningAvgSamplesPerSec=6.341152967024458, CurrSamplesPerSec=5.692563859733904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:04:03,975] [INFO] [timer.py:197:stop] 0/1530, RunningAvgSamplesPerSec=6.341126463250525, CurrSamplesPerSec=5.688522400154612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:04:15,350] [INFO] [timer.py:197:stop] 0/1532, RunningAvgSamplesPerSec=6.3411357711857494, CurrSamplesPerSec=5.722231561425265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:04:26,819] [INFO] [timer.py:197:stop] 0/1534, RunningAvgSamplesPerSec=6.341032355356612, CurrSamplesPerSec=5.592960434656473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:04:38,309] [INFO] [timer.py:197:stop] 0/1536, RunningAvgSamplesPerSec=6.341019027499484, CurrSamplesPerSec=5.7054393583614305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:04:49,822] [INFO] [timer.py:197:stop] 0/1538, RunningAvgSamplesPerSec=6.340996967931803, CurrSamplesPerSec=5.67102990975972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:05:01,167] [INFO] [logging.py:68:log_dist] [Rank 0] step=770, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:05:01,168] [INFO] [timer.py:197:stop] 0/1540, RunningAvgSamplesPerSec=6.3409852431183245, CurrSamplesPerSec=5.704927420125413, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:05:12,589] [INFO] [timer.py:197:stop] 0/1542, RunningAvgSamplesPerSec=6.340993330961346, CurrSamplesPerSec=5.696307162711856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:05:23,944] [INFO] [timer.py:197:stop] 0/1544, RunningAvgSamplesPerSec=6.340975620108428, CurrSamplesPerSec=5.693175005359446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:05:35,280] [INFO] [timer.py:197:stop] 0/1546, RunningAvgSamplesPerSec=6.340972755518048, CurrSamplesPerSec=5.672557387435001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:05:46,641] [INFO] [timer.py:197:stop] 0/1548, RunningAvgSamplesPerSec=6.340963188590646, CurrSamplesPerSec=5.6946083679150945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:05:58,057] [INFO] [timer.py:197:stop] 0/1550, RunningAvgSamplesPerSec=6.3409774774642935, CurrSamplesPerSec=5.707322024491383, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.018, 'learning_rate': 1e-05, 'epoch': 3.28} -[2022-12-16 15:06:09,366] [INFO] [timer.py:197:stop] 0/1552, RunningAvgSamplesPerSec=6.340983462641784, CurrSamplesPerSec=5.684134233724698, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:06:20,722] [INFO] [timer.py:197:stop] 0/1554, RunningAvgSamplesPerSec=6.340964370973567, CurrSamplesPerSec=5.6702874401091625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:06:32,034] [INFO] [timer.py:197:stop] 0/1556, RunningAvgSamplesPerSec=6.34098163609854, CurrSamplesPerSec=5.711575451265504, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:06:43,575] [INFO] [timer.py:197:stop] 0/1558, RunningAvgSamplesPerSec=6.3409544491119885, CurrSamplesPerSec=5.653666341686975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:06:54,908] [INFO] [logging.py:68:log_dist] [Rank 0] step=780, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:06:54,909] [INFO] [timer.py:197:stop] 0/1560, RunningAvgSamplesPerSec=6.340939628127967, CurrSamplesPerSec=5.696208769797546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:07:06,270] [INFO] [timer.py:197:stop] 0/1562, RunningAvgSamplesPerSec=6.340902908469031, CurrSamplesPerSec=5.646739911729805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:07:17,608] [INFO] [timer.py:197:stop] 0/1564, RunningAvgSamplesPerSec=6.340920513538756, CurrSamplesPerSec=5.727755537884269, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:07:28,943] [INFO] [timer.py:197:stop] 0/1566, RunningAvgSamplesPerSec=6.340918573203408, CurrSamplesPerSec=5.710421423215805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:07:40,276] [INFO] [timer.py:197:stop] 0/1568, RunningAvgSamplesPerSec=6.340909584614333, CurrSamplesPerSec=5.693910437455586, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:07:51,731] [INFO] [timer.py:197:stop] 0/1570, RunningAvgSamplesPerSec=6.340812940881451, CurrSamplesPerSec=5.600275587292008, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:08:03,160] [INFO] [timer.py:197:stop] 0/1572, RunningAvgSamplesPerSec=6.340797815746698, CurrSamplesPerSec=5.6831344435491795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:08:14,551] [INFO] [timer.py:197:stop] 0/1574, RunningAvgSamplesPerSec=6.3407527776718755, CurrSamplesPerSec=5.678339669712356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:08:26,109] [INFO] [timer.py:197:stop] 0/1576, RunningAvgSamplesPerSec=6.340728823079258, CurrSamplesPerSec=5.681213350273135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:08:37,451] [INFO] [timer.py:197:stop] 0/1578, RunningAvgSamplesPerSec=6.340723657680421, CurrSamplesPerSec=5.703325266587802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:08:48,816] [INFO] [logging.py:68:log_dist] [Rank 0] step=790, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:08:48,818] [INFO] [timer.py:197:stop] 0/1580, RunningAvgSamplesPerSec=6.340702822409906, CurrSamplesPerSec=5.6887936454989125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:09:00,327] [INFO] [timer.py:197:stop] 0/1582, RunningAvgSamplesPerSec=6.340607479790234, CurrSamplesPerSec=5.687409480116973, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:09:11,689] [INFO] [timer.py:197:stop] 0/1584, RunningAvgSamplesPerSec=6.340585472875107, CurrSamplesPerSec=5.703302970301742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:09:23,029] [INFO] [timer.py:197:stop] 0/1586, RunningAvgSamplesPerSec=6.340578666142596, CurrSamplesPerSec=5.705081646702652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:09:34,366] [INFO] [timer.py:197:stop] 0/1588, RunningAvgSamplesPerSec=6.340575615865826, CurrSamplesPerSec=5.719122257106722, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:09:45,710] [INFO] [timer.py:197:stop] 0/1590, RunningAvgSamplesPerSec=6.340566764043695, CurrSamplesPerSec=5.715390779107138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:09:57,055] [INFO] [timer.py:197:stop] 0/1592, RunningAvgSamplesPerSec=6.340557130296451, CurrSamplesPerSec=5.699683348396006, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:10:08,390] [INFO] [timer.py:197:stop] 0/1594, RunningAvgSamplesPerSec=6.340557571688335, CurrSamplesPerSec=5.70996810455332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:10:19,713] [INFO] [timer.py:197:stop] 0/1596, RunningAvgSamplesPerSec=6.3405689893037245, CurrSamplesPerSec=5.716324774516089, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:10:31,023] [INFO] [timer.py:197:stop] 0/1598, RunningAvgSamplesPerSec=6.340562882035166, CurrSamplesPerSec=5.696043419337321, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:10:42,347] [INFO] [logging.py:68:log_dist] [Rank 0] step=800, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:10:42,348] [INFO] [timer.py:197:stop] 0/1600, RunningAvgSamplesPerSec=6.3405693929765485, CurrSamplesPerSec=5.706464238375231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0174, 'learning_rate': 1e-05, 'epoch': 3.39} -[2022-12-16 15:10:53,716] [INFO] [timer.py:197:stop] 0/1602, RunningAvgSamplesPerSec=6.340545818042899, CurrSamplesPerSec=5.665583903559678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:11:05,052] [INFO] [timer.py:197:stop] 0/1604, RunningAvgSamplesPerSec=6.340532394750886, CurrSamplesPerSec=5.684705046826125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:11:16,380] [INFO] [timer.py:197:stop] 0/1606, RunningAvgSamplesPerSec=6.3405379936170565, CurrSamplesPerSec=5.723209769550559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:11:27,689] [INFO] [timer.py:197:stop] 0/1608, RunningAvgSamplesPerSec=6.340558373519565, CurrSamplesPerSec=5.724374832199237, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:11:39,003] [INFO] [timer.py:197:stop] 0/1610, RunningAvgSamplesPerSec=6.340574932775503, CurrSamplesPerSec=5.714505505743788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:11:50,368] [INFO] [timer.py:197:stop] 0/1612, RunningAvgSamplesPerSec=6.340552845058058, CurrSamplesPerSec=5.672900481980691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:12:01,863] [INFO] [timer.py:197:stop] 0/1614, RunningAvgSamplesPerSec=6.340556605308443, CurrSamplesPerSec=5.707677104495976, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:12:13,214] [INFO] [timer.py:197:stop] 0/1616, RunningAvgSamplesPerSec=6.340529980463788, CurrSamplesPerSec=5.693129122625681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:12:24,544] [INFO] [timer.py:197:stop] 0/1618, RunningAvgSamplesPerSec=6.340522357131446, CurrSamplesPerSec=5.6907620942189405, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:12:35,862] [INFO] [logging.py:68:log_dist] [Rank 0] step=810, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:12:35,863] [INFO] [timer.py:197:stop] 0/1620, RunningAvgSamplesPerSec=6.340536145377171, CurrSamplesPerSec=5.7158288937529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:12:47,196] [INFO] [timer.py:197:stop] 0/1622, RunningAvgSamplesPerSec=6.3405388983082975, CurrSamplesPerSec=5.697895699489032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:12:58,558] [INFO] [timer.py:197:stop] 0/1624, RunningAvgSamplesPerSec=6.3405193550861245, CurrSamplesPerSec=5.697626487899726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:13:09,878] [INFO] [timer.py:197:stop] 0/1626, RunningAvgSamplesPerSec=6.340530865823653, CurrSamplesPerSec=5.716477426708531, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:13:21,183] [INFO] [timer.py:197:stop] 0/1628, RunningAvgSamplesPerSec=6.340541185528216, CurrSamplesPerSec=5.716781538069418, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:13:32,508] [INFO] [timer.py:197:stop] 0/1630, RunningAvgSamplesPerSec=6.340549925763438, CurrSamplesPerSec=5.704277626137134, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:13:43,844] [INFO] [timer.py:197:stop] 0/1632, RunningAvgSamplesPerSec=6.34055012552681, CurrSamplesPerSec=5.70801232308509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:13:55,123] [INFO] [timer.py:197:stop] 0/1634, RunningAvgSamplesPerSec=6.3405811966256245, CurrSamplesPerSec=5.737563784561477, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:14:06,435] [INFO] [timer.py:197:stop] 0/1636, RunningAvgSamplesPerSec=6.340600660492391, CurrSamplesPerSec=5.7175949331930624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:14:17,751] [INFO] [timer.py:197:stop] 0/1638, RunningAvgSamplesPerSec=6.340616583305436, CurrSamplesPerSec=5.731508568669221, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:14:29,026] [INFO] [logging.py:68:log_dist] [Rank 0] step=820, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:14:29,028] [INFO] [timer.py:197:stop] 0/1640, RunningAvgSamplesPerSec=6.340639941750262, CurrSamplesPerSec=5.7271948644686015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:14:40,485] [INFO] [timer.py:197:stop] 0/1642, RunningAvgSamplesPerSec=6.340665258371482, CurrSamplesPerSec=5.7339821304526515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:14:51,789] [INFO] [timer.py:197:stop] 0/1644, RunningAvgSamplesPerSec=6.340689499225285, CurrSamplesPerSec=5.729058170773215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:15:03,134] [INFO] [timer.py:197:stop] 0/1646, RunningAvgSamplesPerSec=6.3406821277773915, CurrSamplesPerSec=5.714029644744564, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:15:14,464] [INFO] [timer.py:197:stop] 0/1648, RunningAvgSamplesPerSec=6.3406864146534705, CurrSamplesPerSec=5.720165223602495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:15:25,781] [INFO] [timer.py:197:stop] 0/1650, RunningAvgSamplesPerSec=6.3407000988458115, CurrSamplesPerSec=5.725634404589424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0192, 'learning_rate': 1e-05, 'epoch': 3.5} -[2022-12-16 15:15:37,132] [INFO] [timer.py:197:stop] 0/1652, RunningAvgSamplesPerSec=6.340688070776378, CurrSamplesPerSec=5.681139525002643, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:15:48,450] [INFO] [timer.py:197:stop] 0/1654, RunningAvgSamplesPerSec=6.3407015706076875, CurrSamplesPerSec=5.712932255317997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:15:59,771] [INFO] [timer.py:197:stop] 0/1656, RunningAvgSamplesPerSec=6.34071232163291, CurrSamplesPerSec=5.690258334978994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:16:11,095] [INFO] [timer.py:197:stop] 0/1658, RunningAvgSamplesPerSec=6.340721311658182, CurrSamplesPerSec=5.715881715444265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:16:22,381] [INFO] [logging.py:68:log_dist] [Rank 0] step=830, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:16:22,383] [INFO] [timer.py:197:stop] 0/1660, RunningAvgSamplesPerSec=6.340756882047056, CurrSamplesPerSec=5.73620530816962, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:16:33,702] [INFO] [timer.py:197:stop] 0/1662, RunningAvgSamplesPerSec=6.340768352793115, CurrSamplesPerSec=5.712284526465004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:16:44,994] [INFO] [timer.py:197:stop] 0/1664, RunningAvgSamplesPerSec=6.340788107983439, CurrSamplesPerSec=5.718592754145091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:16:56,373] [INFO] [timer.py:197:stop] 0/1666, RunningAvgSamplesPerSec=6.340797551500921, CurrSamplesPerSec=5.708101171107673, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:17:07,706] [INFO] [timer.py:197:stop] 0/1668, RunningAvgSamplesPerSec=6.340805651338497, CurrSamplesPerSec=5.721101024112321, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:17:19,005] [INFO] [timer.py:197:stop] 0/1670, RunningAvgSamplesPerSec=6.34083286597194, CurrSamplesPerSec=5.724742536840911, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:17:30,358] [INFO] [timer.py:197:stop] 0/1672, RunningAvgSamplesPerSec=6.340835867117041, CurrSamplesPerSec=5.718272858087763, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:17:41,674] [INFO] [timer.py:197:stop] 0/1674, RunningAvgSamplesPerSec=6.34085070644047, CurrSamplesPerSec=5.728282094031116, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:17:52,982] [INFO] [timer.py:197:stop] 0/1676, RunningAvgSamplesPerSec=6.340870906975045, CurrSamplesPerSec=5.727989958307704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:18:04,315] [INFO] [timer.py:197:stop] 0/1678, RunningAvgSamplesPerSec=6.340874137995883, CurrSamplesPerSec=5.6954224738759125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:18:15,671] [INFO] [logging.py:68:log_dist] [Rank 0] step=840, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:18:15,673] [INFO] [timer.py:197:stop] 0/1680, RunningAvgSamplesPerSec=6.340857783353727, CurrSamplesPerSec=5.6949985974417725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:18:27,057] [INFO] [timer.py:197:stop] 0/1682, RunningAvgSamplesPerSec=6.340822690011218, CurrSamplesPerSec=5.6497331758688745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:18:38,349] [INFO] [timer.py:197:stop] 0/1684, RunningAvgSamplesPerSec=6.340832385934641, CurrSamplesPerSec=5.7044698818484925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:18:49,705] [INFO] [timer.py:197:stop] 0/1686, RunningAvgSamplesPerSec=6.340817948806496, CurrSamplesPerSec=5.689992498002407, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:19:01,049] [INFO] [timer.py:197:stop] 0/1688, RunningAvgSamplesPerSec=6.340812410463375, CurrSamplesPerSec=5.711610451189968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:19:12,368] [INFO] [timer.py:197:stop] 0/1690, RunningAvgSamplesPerSec=6.340822290105834, CurrSamplesPerSec=5.712652381474077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:19:23,713] [INFO] [timer.py:197:stop] 0/1692, RunningAvgSamplesPerSec=6.340802834576483, CurrSamplesPerSec=5.702006208078319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:19:35,035] [INFO] [timer.py:197:stop] 0/1694, RunningAvgSamplesPerSec=6.340800674912201, CurrSamplesPerSec=5.70051779093254, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:19:46,368] [INFO] [timer.py:197:stop] 0/1696, RunningAvgSamplesPerSec=6.340802979386393, CurrSamplesPerSec=5.71140458973723, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:19:57,706] [INFO] [timer.py:197:stop] 0/1698, RunningAvgSamplesPerSec=6.340799458449714, CurrSamplesPerSec=5.697038084295858, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:20:09,092] [INFO] [logging.py:68:log_dist] [Rank 0] step=850, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:20:09,094] [INFO] [timer.py:197:stop] 0/1700, RunningAvgSamplesPerSec=6.340776519732791, CurrSamplesPerSec=5.693090968073443, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0172, 'learning_rate': 1e-05, 'epoch': 3.6} -[2022-12-16 15:20:20,415] [INFO] [timer.py:197:stop] 0/1702, RunningAvgSamplesPerSec=6.340774932475345, CurrSamplesPerSec=5.696837624348786, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:20:31,736] [INFO] [timer.py:197:stop] 0/1704, RunningAvgSamplesPerSec=6.340782842982672, CurrSamplesPerSec=5.710392511677529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:20:43,066] [INFO] [timer.py:197:stop] 0/1706, RunningAvgSamplesPerSec=6.340784748235628, CurrSamplesPerSec=5.723442353636006, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:20:54,378] [INFO] [timer.py:197:stop] 0/1708, RunningAvgSamplesPerSec=6.340788744151501, CurrSamplesPerSec=5.710971284076315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:21:05,705] [INFO] [timer.py:197:stop] 0/1710, RunningAvgSamplesPerSec=6.340796318366405, CurrSamplesPerSec=5.712790734447396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:21:17,043] [INFO] [timer.py:197:stop] 0/1712, RunningAvgSamplesPerSec=6.340794253176509, CurrSamplesPerSec=5.705561839441021, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:21:28,393] [INFO] [timer.py:197:stop] 0/1714, RunningAvgSamplesPerSec=6.340780816552236, CurrSamplesPerSec=5.709888186057529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:21:39,736] [INFO] [timer.py:197:stop] 0/1716, RunningAvgSamplesPerSec=6.340773996797511, CurrSamplesPerSec=5.692864949062375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:21:51,084] [INFO] [timer.py:197:stop] 0/1718, RunningAvgSamplesPerSec=6.3407637629341504, CurrSamplesPerSec=5.68697932560317, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:22:02,431] [INFO] [logging.py:68:log_dist] [Rank 0] step=860, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:22:02,432] [INFO] [timer.py:197:stop] 0/1720, RunningAvgSamplesPerSec=6.340755202209772, CurrSamplesPerSec=5.697098055754913, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:22:13,758] [INFO] [timer.py:197:stop] 0/1722, RunningAvgSamplesPerSec=6.340751182900368, CurrSamplesPerSec=5.707358185779483, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:22:25,078] [INFO] [timer.py:197:stop] 0/1724, RunningAvgSamplesPerSec=6.3407617876526725, CurrSamplesPerSec=5.698471699382909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:22:36,432] [INFO] [timer.py:197:stop] 0/1726, RunningAvgSamplesPerSec=6.340748989367053, CurrSamplesPerSec=5.697477984936619, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:22:47,756] [INFO] [timer.py:197:stop] 0/1728, RunningAvgSamplesPerSec=6.340759082484556, CurrSamplesPerSec=5.723073351794558, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:22:59,083] [INFO] [timer.py:197:stop] 0/1730, RunningAvgSamplesPerSec=6.340763571016226, CurrSamplesPerSec=5.715076594904665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:23:10,419] [INFO] [timer.py:197:stop] 0/1732, RunningAvgSamplesPerSec=6.340760591870521, CurrSamplesPerSec=5.720671366374499, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:23:21,765] [INFO] [timer.py:197:stop] 0/1734, RunningAvgSamplesPerSec=6.3407506244159615, CurrSamplesPerSec=5.702110615341308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:23:33,078] [INFO] [timer.py:197:stop] 0/1736, RunningAvgSamplesPerSec=6.340764084573826, CurrSamplesPerSec=5.722994042135287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:23:44,427] [INFO] [timer.py:197:stop] 0/1738, RunningAvgSamplesPerSec=6.340759095569804, CurrSamplesPerSec=5.703108612234952, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:23:55,739] [INFO] [logging.py:68:log_dist] [Rank 0] step=870, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:23:55,740] [INFO] [timer.py:197:stop] 0/1740, RunningAvgSamplesPerSec=6.340772651542185, CurrSamplesPerSec=5.708757664068845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:24:07,134] [INFO] [timer.py:197:stop] 0/1742, RunningAvgSamplesPerSec=6.340728237008029, CurrSamplesPerSec=5.6599414049939645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:24:18,462] [INFO] [timer.py:197:stop] 0/1744, RunningAvgSamplesPerSec=6.340720477023084, CurrSamplesPerSec=5.686679099443914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:24:29,834] [INFO] [timer.py:197:stop] 0/1746, RunningAvgSamplesPerSec=6.340691354009204, CurrSamplesPerSec=5.671267617798262, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:24:41,199] [INFO] [timer.py:197:stop] 0/1748, RunningAvgSamplesPerSec=6.34066862116125, CurrSamplesPerSec=5.671456935833448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:24:52,552] [INFO] [timer.py:197:stop] 0/1750, RunningAvgSamplesPerSec=6.340654665427781, CurrSamplesPerSec=5.697200106865696, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0187, 'learning_rate': 1e-05, 'epoch': 3.71} -[2022-12-16 15:25:03,919] [INFO] [timer.py:197:stop] 0/1752, RunningAvgSamplesPerSec=6.340650056369047, CurrSamplesPerSec=5.699635424390096, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:25:15,329] [INFO] [timer.py:197:stop] 0/1754, RunningAvgSamplesPerSec=6.340586232602359, CurrSamplesPerSec=5.617304973847408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:25:26,838] [INFO] [timer.py:197:stop] 0/1756, RunningAvgSamplesPerSec=6.34054369343057, CurrSamplesPerSec=5.63907597079815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:25:38,202] [INFO] [timer.py:197:stop] 0/1758, RunningAvgSamplesPerSec=6.340520750611476, CurrSamplesPerSec=5.692025262129367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:25:49,546] [INFO] [logging.py:68:log_dist] [Rank 0] step=880, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:25:49,548] [INFO] [timer.py:197:stop] 0/1760, RunningAvgSamplesPerSec=6.340509283801745, CurrSamplesPerSec=5.699267065755165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:26:00,858] [INFO] [timer.py:197:stop] 0/1762, RunningAvgSamplesPerSec=6.340512797895522, CurrSamplesPerSec=5.690896252043385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:26:12,227] [INFO] [timer.py:197:stop] 0/1764, RunningAvgSamplesPerSec=6.340485387877073, CurrSamplesPerSec=5.685304873174933, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:26:23,560] [INFO] [timer.py:197:stop] 0/1766, RunningAvgSamplesPerSec=6.340484955568013, CurrSamplesPerSec=5.709429366012479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:26:34,873] [INFO] [timer.py:197:stop] 0/1768, RunningAvgSamplesPerSec=6.3404757888004575, CurrSamplesPerSec=5.709597923410203, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:26:46,218] [INFO] [timer.py:197:stop] 0/1770, RunningAvgSamplesPerSec=6.340500276138529, CurrSamplesPerSec=5.719488556056539, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:26:57,528] [INFO] [timer.py:197:stop] 0/1772, RunningAvgSamplesPerSec=6.340494914187493, CurrSamplesPerSec=5.700560403199792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:27:08,873] [INFO] [timer.py:197:stop] 0/1774, RunningAvgSamplesPerSec=6.340475694361281, CurrSamplesPerSec=5.6971311856910365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:27:20,208] [INFO] [timer.py:197:stop] 0/1776, RunningAvgSamplesPerSec=6.340464658491628, CurrSamplesPerSec=5.688927469397443, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:27:31,527] [INFO] [timer.py:197:stop] 0/1778, RunningAvgSamplesPerSec=6.340466106938646, CurrSamplesPerSec=5.696105062026932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:27:42,858] [INFO] [logging.py:68:log_dist] [Rank 0] step=890, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:27:42,859] [INFO] [timer.py:197:stop] 0/1780, RunningAvgSamplesPerSec=6.340468950864724, CurrSamplesPerSec=5.7109836772115425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:27:54,179] [INFO] [timer.py:197:stop] 0/1782, RunningAvgSamplesPerSec=6.34048081034785, CurrSamplesPerSec=5.718655129518526, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:28:05,546] [INFO] [timer.py:197:stop] 0/1784, RunningAvgSamplesPerSec=6.3404639178617, CurrSamplesPerSec=5.689838603736941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:28:16,891] [INFO] [timer.py:197:stop] 0/1786, RunningAvgSamplesPerSec=6.340447917585286, CurrSamplesPerSec=5.701282729620904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:28:28,169] [INFO] [timer.py:197:stop] 0/1788, RunningAvgSamplesPerSec=6.340477045430172, CurrSamplesPerSec=5.719102274082581, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:28:39,479] [INFO] [timer.py:197:stop] 0/1790, RunningAvgSamplesPerSec=6.340496164823028, CurrSamplesPerSec=5.7324916652807145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:28:50,816] [INFO] [timer.py:197:stop] 0/1792, RunningAvgSamplesPerSec=6.340487880983273, CurrSamplesPerSec=5.698580332321764, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:29:02,190] [INFO] [timer.py:197:stop] 0/1794, RunningAvgSamplesPerSec=6.340450777396085, CurrSamplesPerSec=5.665844833683194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:29:13,517] [INFO] [timer.py:197:stop] 0/1796, RunningAvgSamplesPerSec=6.340458415597113, CurrSamplesPerSec=5.732360435673601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:29:24,826] [INFO] [timer.py:197:stop] 0/1798, RunningAvgSamplesPerSec=6.340466973403078, CurrSamplesPerSec=5.70002295421367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:29:36,125] [INFO] [logging.py:68:log_dist] [Rank 0] step=900, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:29:36,127] [INFO] [timer.py:197:stop] 0/1800, RunningAvgSamplesPerSec=6.340469889682115, CurrSamplesPerSec=5.709452681716447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0204, 'learning_rate': 1e-05, 'epoch': 3.81} -[2022-12-16 15:29:47,449] [INFO] [timer.py:197:stop] 0/1802, RunningAvgSamplesPerSec=6.340470448543859, CurrSamplesPerSec=5.705118749623552, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:29:58,796] [INFO] [timer.py:197:stop] 0/1804, RunningAvgSamplesPerSec=6.340465008023673, CurrSamplesPerSec=5.704326113114681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:30:10,126] [INFO] [timer.py:197:stop] 0/1806, RunningAvgSamplesPerSec=6.340469993364399, CurrSamplesPerSec=5.716778859605554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:30:21,486] [INFO] [timer.py:197:stop] 0/1808, RunningAvgSamplesPerSec=6.340455654310352, CurrSamplesPerSec=5.696311030807412, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:30:32,902] [INFO] [timer.py:197:stop] 0/1810, RunningAvgSamplesPerSec=6.340440008637558, CurrSamplesPerSec=5.69354861493453, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:30:44,244] [INFO] [timer.py:197:stop] 0/1812, RunningAvgSamplesPerSec=6.3404269337809795, CurrSamplesPerSec=5.687951302311593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:30:55,551] [INFO] [timer.py:197:stop] 0/1814, RunningAvgSamplesPerSec=6.340436811922203, CurrSamplesPerSec=5.7140992186554405, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:31:06,943] [INFO] [timer.py:197:stop] 0/1816, RunningAvgSamplesPerSec=6.3403996757853776, CurrSamplesPerSec=5.67528506823798, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:31:18,308] [INFO] [timer.py:197:stop] 0/1818, RunningAvgSamplesPerSec=6.340382725721073, CurrSamplesPerSec=5.690074996474909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:31:29,646] [INFO] [logging.py:68:log_dist] [Rank 0] step=910, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:31:29,648] [INFO] [timer.py:197:stop] 0/1820, RunningAvgSamplesPerSec=6.340380453070602, CurrSamplesPerSec=5.697219453460077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:31:40,982] [INFO] [timer.py:197:stop] 0/1822, RunningAvgSamplesPerSec=6.340372173208823, CurrSamplesPerSec=5.696034958680131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:31:52,366] [INFO] [timer.py:197:stop] 0/1824, RunningAvgSamplesPerSec=6.340341872616558, CurrSamplesPerSec=5.65803381050704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:32:03,713] [INFO] [timer.py:197:stop] 0/1826, RunningAvgSamplesPerSec=6.340335362702489, CurrSamplesPerSec=5.68163012630602, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:32:15,088] [INFO] [timer.py:197:stop] 0/1828, RunningAvgSamplesPerSec=6.340311460005998, CurrSamplesPerSec=5.672971455704383, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:32:26,454] [INFO] [timer.py:197:stop] 0/1830, RunningAvgSamplesPerSec=6.340292625240316, CurrSamplesPerSec=5.683508180460664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:32:37,789] [INFO] [timer.py:197:stop] 0/1832, RunningAvgSamplesPerSec=6.340283810735743, CurrSamplesPerSec=5.711948562823965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:32:49,111] [INFO] [timer.py:197:stop] 0/1834, RunningAvgSamplesPerSec=6.340296033180755, CurrSamplesPerSec=5.712358677165236, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:33:00,434] [INFO] [timer.py:197:stop] 0/1836, RunningAvgSamplesPerSec=6.340306904575268, CurrSamplesPerSec=5.722238392350369, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:33:11,791] [INFO] [timer.py:197:stop] 0/1838, RunningAvgSamplesPerSec=6.340302653827454, CurrSamplesPerSec=5.7009608927935735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:33:23,121] [INFO] [logging.py:68:log_dist] [Rank 0] step=920, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:33:23,123] [INFO] [timer.py:197:stop] 0/1840, RunningAvgSamplesPerSec=6.340307186096981, CurrSamplesPerSec=5.721817345090641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:33:34,450] [INFO] [timer.py:197:stop] 0/1842, RunningAvgSamplesPerSec=6.340310574451815, CurrSamplesPerSec=5.707220095809952, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:33:45,752] [INFO] [timer.py:197:stop] 0/1844, RunningAvgSamplesPerSec=6.340314647155094, CurrSamplesPerSec=5.702791899977914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:33:57,050] [INFO] [timer.py:197:stop] 0/1846, RunningAvgSamplesPerSec=6.340344920432395, CurrSamplesPerSec=5.73337958161255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:34:08,381] [INFO] [timer.py:197:stop] 0/1848, RunningAvgSamplesPerSec=6.340351678136677, CurrSamplesPerSec=5.699651398969188, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:34:19,739] [INFO] [timer.py:197:stop] 0/1850, RunningAvgSamplesPerSec=6.340329804278119, CurrSamplesPerSec=5.6642150719709745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0219, 'learning_rate': 1e-05, 'epoch': 3.92} -[2022-12-16 15:34:31,332] [INFO] [timer.py:197:stop] 0/1852, RunningAvgSamplesPerSec=6.340320173759496, CurrSamplesPerSec=5.685434679637573, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:34:42,641] [INFO] [timer.py:197:stop] 0/1854, RunningAvgSamplesPerSec=6.340341633522805, CurrSamplesPerSec=5.720974461013235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:34:53,975] [INFO] [timer.py:197:stop] 0/1856, RunningAvgSamplesPerSec=6.340345032511093, CurrSamplesPerSec=5.698232915031294, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:35:05,293] [INFO] [timer.py:197:stop] 0/1858, RunningAvgSamplesPerSec=6.34036156019452, CurrSamplesPerSec=5.730119198060043, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:35:16,627] [INFO] [logging.py:68:log_dist] [Rank 0] step=930, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:35:16,628] [INFO] [timer.py:197:stop] 0/1860, RunningAvgSamplesPerSec=6.340354586223302, CurrSamplesPerSec=5.699637844775112, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:35:28,009] [INFO] [timer.py:197:stop] 0/1862, RunningAvgSamplesPerSec=6.340331703176067, CurrSamplesPerSec=5.7007633044378485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:35:39,364] [INFO] [timer.py:197:stop] 0/1864, RunningAvgSamplesPerSec=6.340320629171417, CurrSamplesPerSec=5.700264551624048, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:35:50,747] [INFO] [timer.py:197:stop] 0/1866, RunningAvgSamplesPerSec=6.340291769112233, CurrSamplesPerSec=5.670955150866777, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:36:02,205] [INFO] [timer.py:197:stop] 0/1868, RunningAvgSamplesPerSec=6.3402612229421065, CurrSamplesPerSec=5.660783350621326, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:36:13,563] [INFO] [timer.py:197:stop] 0/1870, RunningAvgSamplesPerSec=6.3402478378844656, CurrSamplesPerSec=5.695013579455546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:36:24,928] [INFO] [timer.py:197:stop] 0/1872, RunningAvgSamplesPerSec=6.3402215069873495, CurrSamplesPerSec=5.676158230558184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:36:36,261] [INFO] [timer.py:197:stop] 0/1874, RunningAvgSamplesPerSec=6.340214184418767, CurrSamplesPerSec=5.703797891921867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:36:47,636] [INFO] [timer.py:197:stop] 0/1876, RunningAvgSamplesPerSec=6.34017937906379, CurrSamplesPerSec=5.6780811906590385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:36:58,980] [INFO] [timer.py:197:stop] 0/1878, RunningAvgSamplesPerSec=6.340165382597096, CurrSamplesPerSec=5.690802871737469, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:37:10,344] [INFO] [logging.py:68:log_dist] [Rank 0] step=940, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:37:10,346] [INFO] [timer.py:197:stop] 0/1880, RunningAvgSamplesPerSec=6.340154199169903, CurrSamplesPerSec=5.695819099556473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:37:21,776] [INFO] [timer.py:197:stop] 0/1882, RunningAvgSamplesPerSec=6.340082275520113, CurrSamplesPerSec=5.59697386324921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:37:33,153] [INFO] [timer.py:197:stop] 0/1884, RunningAvgSamplesPerSec=6.340077121816984, CurrSamplesPerSec=5.682787222717036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:37:44,559] [INFO] [timer.py:197:stop] 0/1886, RunningAvgSamplesPerSec=6.340016406206763, CurrSamplesPerSec=5.617691028794858, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:37:53,121] [INFO] [timer.py:197:stop] 0/1888, RunningAvgSamplesPerSec=6.341654344181224, CurrSamplesPerSec=10.16626217811499, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:38:04,512] [INFO] [timer.py:197:stop] 0/1890, RunningAvgSamplesPerSec=6.341630658368521, CurrSamplesPerSec=5.689458727983866, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:38:15,890] [INFO] [timer.py:197:stop] 0/1892, RunningAvgSamplesPerSec=6.341630694542799, CurrSamplesPerSec=5.717106624821156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:38:27,291] [INFO] [timer.py:197:stop] 0/1894, RunningAvgSamplesPerSec=6.341593704282419, CurrSamplesPerSec=5.663913659259904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:38:38,668] [INFO] [timer.py:197:stop] 0/1896, RunningAvgSamplesPerSec=6.3415848072494505, CurrSamplesPerSec=5.700920696013412, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:38:50,040] [INFO] [timer.py:197:stop] 0/1898, RunningAvgSamplesPerSec=6.341580824069318, CurrSamplesPerSec=5.683621538588577, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:39:01,468] [INFO] [logging.py:68:log_dist] [Rank 0] step=950, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:39:01,470] [INFO] [timer.py:197:stop] 0/1900, RunningAvgSamplesPerSec=6.34155899470158, CurrSamplesPerSec=5.697557556408557, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0187, 'learning_rate': 1e-05, 'epoch': 4.03} -[2022-12-16 15:39:12,952] [INFO] [timer.py:197:stop] 0/1902, RunningAvgSamplesPerSec=6.34151319605015, CurrSamplesPerSec=5.6264130146308915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:39:24,327] [INFO] [timer.py:197:stop] 0/1904, RunningAvgSamplesPerSec=6.341484615721599, CurrSamplesPerSec=5.678025462199816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:39:35,656] [INFO] [timer.py:197:stop] 0/1906, RunningAvgSamplesPerSec=6.341489394394688, CurrSamplesPerSec=5.706058123533021, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:39:46,994] [INFO] [timer.py:197:stop] 0/1908, RunningAvgSamplesPerSec=6.341491713254405, CurrSamplesPerSec=5.715185861867555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:39:58,386] [INFO] [timer.py:197:stop] 0/1910, RunningAvgSamplesPerSec=6.341472882637041, CurrSamplesPerSec=5.677095294757984, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:40:09,784] [INFO] [timer.py:197:stop] 0/1912, RunningAvgSamplesPerSec=6.3414513606525675, CurrSamplesPerSec=5.6910752998571565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:40:21,180] [INFO] [timer.py:197:stop] 0/1914, RunningAvgSamplesPerSec=6.341433961090274, CurrSamplesPerSec=5.682636123920992, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:40:32,536] [INFO] [timer.py:197:stop] 0/1916, RunningAvgSamplesPerSec=6.341419848976018, CurrSamplesPerSec=5.69269279063943, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:40:43,871] [INFO] [timer.py:197:stop] 0/1918, RunningAvgSamplesPerSec=6.341421151163785, CurrSamplesPerSec=5.706983975017767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:40:55,185] [INFO] [logging.py:68:log_dist] [Rank 0] step=960, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:40:55,187] [INFO] [timer.py:197:stop] 0/1920, RunningAvgSamplesPerSec=6.341423927935272, CurrSamplesPerSec=5.699510051257391, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:41:06,555] [INFO] [timer.py:197:stop] 0/1922, RunningAvgSamplesPerSec=6.341407820716116, CurrSamplesPerSec=5.692106129762454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:41:17,882] [INFO] [timer.py:197:stop] 0/1924, RunningAvgSamplesPerSec=6.34140485881814, CurrSamplesPerSec=5.680903393209991, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:41:29,236] [INFO] [timer.py:197:stop] 0/1926, RunningAvgSamplesPerSec=6.341392016956714, CurrSamplesPerSec=5.700544181409319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:41:40,560] [INFO] [timer.py:197:stop] 0/1928, RunningAvgSamplesPerSec=6.341396073096842, CurrSamplesPerSec=5.723259067011961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:41:51,832] [INFO] [timer.py:197:stop] 0/1930, RunningAvgSamplesPerSec=6.341426372801827, CurrSamplesPerSec=5.72995089457395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:42:03,168] [INFO] [timer.py:197:stop] 0/1932, RunningAvgSamplesPerSec=6.341441799114161, CurrSamplesPerSec=5.716127823844544, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:42:14,459] [INFO] [timer.py:197:stop] 0/1934, RunningAvgSamplesPerSec=6.341471385915023, CurrSamplesPerSec=5.733927748894998, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:42:25,807] [INFO] [timer.py:197:stop] 0/1936, RunningAvgSamplesPerSec=6.341461312162032, CurrSamplesPerSec=5.716151924619451, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:42:37,153] [INFO] [timer.py:197:stop] 0/1938, RunningAvgSamplesPerSec=6.341441825644964, CurrSamplesPerSec=5.688323262262418, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:42:48,497] [INFO] [logging.py:68:log_dist] [Rank 0] step=970, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:42:48,499] [INFO] [timer.py:197:stop] 0/1940, RunningAvgSamplesPerSec=6.341433751250798, CurrSamplesPerSec=5.708959206622437, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:42:59,850] [INFO] [timer.py:197:stop] 0/1942, RunningAvgSamplesPerSec=6.341421168411483, CurrSamplesPerSec=5.700994309825445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:43:11,167] [INFO] [timer.py:197:stop] 0/1944, RunningAvgSamplesPerSec=6.341437433207248, CurrSamplesPerSec=5.729389057586628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:43:22,482] [INFO] [timer.py:197:stop] 0/1946, RunningAvgSamplesPerSec=6.341449750940114, CurrSamplesPerSec=5.731650284155366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:43:33,841] [INFO] [timer.py:197:stop] 0/1948, RunningAvgSamplesPerSec=6.34143681656222, CurrSamplesPerSec=5.707239753200862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:43:45,211] [INFO] [timer.py:197:stop] 0/1950, RunningAvgSamplesPerSec=6.341424387903559, CurrSamplesPerSec=5.698836567618656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.0111, 'learning_rate': 1e-05, 'epoch': 4.13} -[2022-12-16 15:43:56,545] [INFO] [timer.py:197:stop] 0/1952, RunningAvgSamplesPerSec=6.341417935741718, CurrSamplesPerSec=5.69639999845513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:44:07,909] [INFO] [timer.py:197:stop] 0/1954, RunningAvgSamplesPerSec=6.34139789710389, CurrSamplesPerSec=5.689112180831886, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:44:19,241] [INFO] [timer.py:197:stop] 0/1956, RunningAvgSamplesPerSec=6.3414133504784935, CurrSamplesPerSec=5.720193015245668, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:44:30,531] [INFO] [timer.py:197:stop] 0/1958, RunningAvgSamplesPerSec=6.341444321124853, CurrSamplesPerSec=5.728681842652188, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:44:41,837] [INFO] [logging.py:68:log_dist] [Rank 0] step=980, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:44:41,839] [INFO] [timer.py:197:stop] 0/1960, RunningAvgSamplesPerSec=6.341451451493984, CurrSamplesPerSec=5.710415106384128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:44:53,144] [INFO] [timer.py:197:stop] 0/1962, RunningAvgSamplesPerSec=6.341471134508416, CurrSamplesPerSec=5.711562326404418, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:45:04,445] [INFO] [timer.py:197:stop] 0/1964, RunningAvgSamplesPerSec=6.341493080264879, CurrSamplesPerSec=5.729100721703047, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:45:15,779] [INFO] [timer.py:197:stop] 0/1966, RunningAvgSamplesPerSec=6.34148308742656, CurrSamplesPerSec=5.688469600735318, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:45:27,069] [INFO] [timer.py:197:stop] 0/1968, RunningAvgSamplesPerSec=6.341511108059361, CurrSamplesPerSec=5.731986611130398, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:45:38,375] [INFO] [timer.py:197:stop] 0/1970, RunningAvgSamplesPerSec=6.3415193795566775, CurrSamplesPerSec=5.733448892776753, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:45:49,691] [INFO] [timer.py:197:stop] 0/1972, RunningAvgSamplesPerSec=6.341533700701917, CurrSamplesPerSec=5.739973842519514, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:46:00,986] [INFO] [timer.py:197:stop] 0/1974, RunningAvgSamplesPerSec=6.3415605832917, CurrSamplesPerSec=5.722849338175083, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:46:12,325] [INFO] [timer.py:197:stop] 0/1976, RunningAvgSamplesPerSec=6.341561196651521, CurrSamplesPerSec=5.719616515849108, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:46:23,642] [INFO] [timer.py:197:stop] 0/1978, RunningAvgSamplesPerSec=6.341577111524756, CurrSamplesPerSec=5.718702155602035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:46:35,049] [INFO] [logging.py:68:log_dist] [Rank 0] step=990, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:46:35,051] [INFO] [timer.py:197:stop] 0/1980, RunningAvgSamplesPerSec=6.341536046420819, CurrSamplesPerSec=5.6339966705043025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:46:46,373] [INFO] [timer.py:197:stop] 0/1982, RunningAvgSamplesPerSec=6.341540306208283, CurrSamplesPerSec=5.709136721549093, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:46:57,772] [INFO] [timer.py:197:stop] 0/1984, RunningAvgSamplesPerSec=6.341494762804846, CurrSamplesPerSec=5.629541542374531, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:47:09,086] [INFO] [timer.py:197:stop] 0/1986, RunningAvgSamplesPerSec=6.3414999295135335, CurrSamplesPerSec=5.7012602071255065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:47:20,422] [INFO] [timer.py:197:stop] 0/1988, RunningAvgSamplesPerSec=6.341500411199719, CurrSamplesPerSec=5.715253517003507, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:47:31,766] [INFO] [timer.py:197:stop] 0/1990, RunningAvgSamplesPerSec=6.341498268714996, CurrSamplesPerSec=5.716839977905756, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:47:43,079] [INFO] [timer.py:197:stop] 0/1992, RunningAvgSamplesPerSec=6.341516519800052, CurrSamplesPerSec=5.747599131276496, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:47:54,350] [INFO] [timer.py:197:stop] 0/1994, RunningAvgSamplesPerSec=6.341548967038678, CurrSamplesPerSec=5.74344624333017, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:48:05,695] [INFO] [timer.py:197:stop] 0/1996, RunningAvgSamplesPerSec=6.341542401618161, CurrSamplesPerSec=5.699316435634214, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:48:16,977] [INFO] [timer.py:197:stop] 0/1998, RunningAvgSamplesPerSec=6.341574552805016, CurrSamplesPerSec=5.7254710049596, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -[2022-12-16 15:48:28,278] [INFO] [logging.py:68:log_dist] [Rank 0] step=1000, skipped=5, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-16 15:48:28,280] [INFO] [timer.py:197:stop] 0/2000, RunningAvgSamplesPerSec=6.341592336361306, CurrSamplesPerSec=5.702190558521644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB -{'loss': 0.012, 'learning_rate': 1e-05, 'epoch': 4.24} -{'eval_loss': 0.167724609375, 'eval_wer': 10.242905287195448, 'eval_runtime': 2152.7201, 'eval_samples_per_second': 3.583, 'eval_steps_per_second': 0.448, 'epoch': 4.24} -[2022-12-16 16:24:24,591] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step1000 is begin to save! -[2022-12-16 16:24:24,601] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt -[2022-12-16 16:24:24,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt... -[2022-12-16 16:24:28,419] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt. -[2022-12-16 16:24:28,420] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt... -[2022-12-16 16:24:43,492] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt. -[2022-12-16 16:24:43,493] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt -[2022-12-16 16:24:43,493] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +[2022-12-16 21:15:45,433] [INFO] [timer.py:197:stop] 0/1002, RunningAvgSamplesPerSec=6.325084596844273, CurrSamplesPerSec=5.713662342040977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:15:56,836] [INFO] [timer.py:197:stop] 0/1004, RunningAvgSamplesPerSec=6.3251158291307386, CurrSamplesPerSec=5.713022229276716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:16:08,258] [INFO] [timer.py:197:stop] 0/1006, RunningAvgSamplesPerSec=6.325055351476069, CurrSamplesPerSec=5.623066744820601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:16:19,582] [INFO] [timer.py:197:stop] 0/1008, RunningAvgSamplesPerSec=6.325102609875333, CurrSamplesPerSec=5.707804050857386, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:16:30,974] [INFO] [timer.py:197:stop] 0/1010, RunningAvgSamplesPerSec=6.325160154746798, CurrSamplesPerSec=5.720153278154641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:16:42,337] [INFO] [timer.py:197:stop] 0/1012, RunningAvgSamplesPerSec=6.32517427631391, CurrSamplesPerSec=5.688713354181847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:16:53,934] [INFO] [timer.py:197:stop] 0/1014, RunningAvgSamplesPerSec=6.325238765075366, CurrSamplesPerSec=5.72578779867803, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:17:05,268] [INFO] [timer.py:197:stop] 0/1016, RunningAvgSamplesPerSec=6.325275504519772, CurrSamplesPerSec=5.704129988366727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:17:16,885] [INFO] [timer.py:197:stop] 0/1018, RunningAvgSamplesPerSec=6.324959984011779, CurrSamplesPerSec=5.429061828725207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:17:28,234] [INFO] [logging.py:68:log_dist] [Rank 0] step=510, skipped=5, lr=[9.991111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:17:28,236] [INFO] [timer.py:197:stop] 0/1020, RunningAvgSamplesPerSec=6.3249907983214415, CurrSamplesPerSec=5.71215640831638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:17:39,550] [INFO] [timer.py:197:stop] 0/1022, RunningAvgSamplesPerSec=6.325041232963377, CurrSamplesPerSec=5.714046186639779, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:17:51,113] [INFO] [timer.py:197:stop] 0/1024, RunningAvgSamplesPerSec=6.32477933640267, CurrSamplesPerSec=5.468682765885077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:18:02,491] [INFO] [timer.py:197:stop] 0/1026, RunningAvgSamplesPerSec=6.324800913545075, CurrSamplesPerSec=5.684177323585509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:18:13,861] [INFO] [timer.py:197:stop] 0/1028, RunningAvgSamplesPerSec=6.324787956456297, CurrSamplesPerSec=5.6712122626343895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:18:25,298] [INFO] [timer.py:197:stop] 0/1030, RunningAvgSamplesPerSec=6.324702766601665, CurrSamplesPerSec=5.589238563916994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:18:36,685] [INFO] [timer.py:197:stop] 0/1032, RunningAvgSamplesPerSec=6.324675894419773, CurrSamplesPerSec=5.675779219192053, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:18:48,075] [INFO] [timer.py:197:stop] 0/1034, RunningAvgSamplesPerSec=6.324696787274121, CurrSamplesPerSec=5.694624797553552, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:18:59,489] [INFO] [timer.py:197:stop] 0/1036, RunningAvgSamplesPerSec=6.324615631271562, CurrSamplesPerSec=5.6163487641731304, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:19:10,966] [INFO] [timer.py:197:stop] 0/1038, RunningAvgSamplesPerSec=6.32453266339884, CurrSamplesPerSec=5.621838704111991, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:19:22,518] [INFO] [logging.py:68:log_dist] [Rank 0] step=520, skipped=5, lr=[9.96888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:19:22,519] [INFO] [timer.py:197:stop] 0/1040, RunningAvgSamplesPerSec=6.324513595083433, CurrSamplesPerSec=5.658441467170767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:19:33,943] [INFO] [timer.py:197:stop] 0/1042, RunningAvgSamplesPerSec=6.324515006176603, CurrSamplesPerSec=5.7085328273627, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:19:45,299] [INFO] [timer.py:197:stop] 0/1044, RunningAvgSamplesPerSec=6.324501904454941, CurrSamplesPerSec=5.677590721690172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:19:56,828] [INFO] [timer.py:197:stop] 0/1046, RunningAvgSamplesPerSec=6.324538332751554, CurrSamplesPerSec=5.715457708942437, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:20:08,266] [INFO] [timer.py:197:stop] 0/1048, RunningAvgSamplesPerSec=6.324571637922376, CurrSamplesPerSec=5.7061095518455724, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:20:19,629] [INFO] [timer.py:197:stop] 0/1050, RunningAvgSamplesPerSec=6.324574801815149, CurrSamplesPerSec=5.683213133585038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0333, 'learning_rate': 9.957777777777779e-06, 'epoch': 2.22} +[2022-12-16 21:20:30,987] [INFO] [timer.py:197:stop] 0/1052, RunningAvgSamplesPerSec=6.324583864783749, CurrSamplesPerSec=5.699014179541857, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:20:42,360] [INFO] [timer.py:197:stop] 0/1054, RunningAvgSamplesPerSec=6.324593248570674, CurrSamplesPerSec=5.691380817152285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:20:53,737] [INFO] [timer.py:197:stop] 0/1056, RunningAvgSamplesPerSec=6.32459481413206, CurrSamplesPerSec=5.6858901328649205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:21:05,096] [INFO] [timer.py:197:stop] 0/1058, RunningAvgSamplesPerSec=6.324578587579753, CurrSamplesPerSec=5.666338298872446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:21:16,463] [INFO] [logging.py:68:log_dist] [Rank 0] step=530, skipped=5, lr=[9.946666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:21:16,465] [INFO] [timer.py:197:stop] 0/1060, RunningAvgSamplesPerSec=6.324570744039958, CurrSamplesPerSec=5.681243890929784, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:21:27,793] [INFO] [timer.py:197:stop] 0/1062, RunningAvgSamplesPerSec=6.324617494273643, CurrSamplesPerSec=5.72019130873348, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:21:39,228] [INFO] [timer.py:197:stop] 0/1064, RunningAvgSamplesPerSec=6.324530287745588, CurrSamplesPerSec=5.622556998055028, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:21:50,614] [INFO] [timer.py:197:stop] 0/1066, RunningAvgSamplesPerSec=6.324571372955135, CurrSamplesPerSec=5.71121672676978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:22:02,217] [INFO] [timer.py:197:stop] 0/1068, RunningAvgSamplesPerSec=6.324523452438587, CurrSamplesPerSec=5.70700047614419, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:22:13,577] [INFO] [timer.py:197:stop] 0/1070, RunningAvgSamplesPerSec=6.324533424174116, CurrSamplesPerSec=5.6957316002942715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:22:25,169] [INFO] [timer.py:197:stop] 0/1072, RunningAvgSamplesPerSec=6.324543786478323, CurrSamplesPerSec=5.690434930076675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:22:36,671] [INFO] [timer.py:197:stop] 0/1074, RunningAvgSamplesPerSec=6.324471864626404, CurrSamplesPerSec=5.684128937809232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:22:48,004] [INFO] [timer.py:197:stop] 0/1076, RunningAvgSamplesPerSec=6.324501261828952, CurrSamplesPerSec=5.704121018830548, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:22:59,356] [INFO] [timer.py:197:stop] 0/1078, RunningAvgSamplesPerSec=6.3244863543294665, CurrSamplesPerSec=5.678301472905768, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:23:10,753] [INFO] [logging.py:68:log_dist] [Rank 0] step=540, skipped=5, lr=[9.924444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:23:10,755] [INFO] [timer.py:197:stop] 0/1080, RunningAvgSamplesPerSec=6.324440605807113, CurrSamplesPerSec=5.695234694147735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:23:22,089] [INFO] [timer.py:197:stop] 0/1082, RunningAvgSamplesPerSec=6.324469069745208, CurrSamplesPerSec=5.685915906328817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:23:33,429] [INFO] [timer.py:197:stop] 0/1084, RunningAvgSamplesPerSec=6.324481265006702, CurrSamplesPerSec=5.698475086538935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:23:44,770] [INFO] [timer.py:197:stop] 0/1086, RunningAvgSamplesPerSec=6.3245014505218595, CurrSamplesPerSec=5.715240861964207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:23:56,096] [INFO] [timer.py:197:stop] 0/1088, RunningAvgSamplesPerSec=6.324539515791824, CurrSamplesPerSec=5.726388508460027, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:24:07,404] [INFO] [timer.py:197:stop] 0/1090, RunningAvgSamplesPerSec=6.324577731653262, CurrSamplesPerSec=5.710771543316395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:24:19,122] [INFO] [timer.py:197:stop] 0/1092, RunningAvgSamplesPerSec=6.324580708869267, CurrSamplesPerSec=5.683601562199607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:24:30,678] [INFO] [timer.py:197:stop] 0/1094, RunningAvgSamplesPerSec=6.324609515934535, CurrSamplesPerSec=5.708574831147178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:24:42,259] [INFO] [timer.py:197:stop] 0/1096, RunningAvgSamplesPerSec=6.32433704712248, CurrSamplesPerSec=5.450139213985013, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:24:53,756] [INFO] [timer.py:197:stop] 0/1098, RunningAvgSamplesPerSec=6.3243561501135, CurrSamplesPerSec=5.696628716087094, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:25:05,322] [INFO] [logging.py:68:log_dist] [Rank 0] step=550, skipped=5, lr=[9.902222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:25:05,323] [INFO] [timer.py:197:stop] 0/1100, RunningAvgSamplesPerSec=6.324366355272438, CurrSamplesPerSec=5.685196504972429, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0356, 'learning_rate': 9.902222222222223e-06, 'epoch': 2.33} +[2022-12-16 21:25:16,754] [INFO] [timer.py:197:stop] 0/1102, RunningAvgSamplesPerSec=6.324272647895259, CurrSamplesPerSec=5.596251823753526, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:25:28,285] [INFO] [timer.py:197:stop] 0/1104, RunningAvgSamplesPerSec=6.3242917552540545, CurrSamplesPerSec=5.701883879606733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:25:39,645] [INFO] [timer.py:197:stop] 0/1106, RunningAvgSamplesPerSec=6.324275104668984, CurrSamplesPerSec=5.686045740695403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:25:51,097] [INFO] [timer.py:197:stop] 0/1108, RunningAvgSamplesPerSec=6.3241656983143075, CurrSamplesPerSec=5.577740987465537, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:26:02,481] [INFO] [timer.py:197:stop] 0/1110, RunningAvgSamplesPerSec=6.324114079876728, CurrSamplesPerSec=5.626380230354328, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:26:13,901] [INFO] [timer.py:197:stop] 0/1112, RunningAvgSamplesPerSec=6.324040600499698, CurrSamplesPerSec=5.613003653490731, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:26:25,478] [INFO] [timer.py:197:stop] 0/1114, RunningAvgSamplesPerSec=6.323775004653686, CurrSamplesPerSec=5.447233828195022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:26:36,786] [INFO] [timer.py:197:stop] 0/1116, RunningAvgSamplesPerSec=6.3238317786318134, CurrSamplesPerSec=5.725627077052026, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:26:48,119] [INFO] [timer.py:197:stop] 0/1118, RunningAvgSamplesPerSec=6.323858000238239, CurrSamplesPerSec=5.6966565212796345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:26:59,437] [INFO] [logging.py:68:log_dist] [Rank 0] step=560, skipped=5, lr=[9.88e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:26:59,438] [INFO] [timer.py:197:stop] 0/1120, RunningAvgSamplesPerSec=6.323885458404519, CurrSamplesPerSec=5.6730858326379225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:27:10,785] [INFO] [timer.py:197:stop] 0/1122, RunningAvgSamplesPerSec=6.323900620387515, CurrSamplesPerSec=5.690677163038752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:27:22,351] [INFO] [timer.py:197:stop] 0/1124, RunningAvgSamplesPerSec=6.323915903130612, CurrSamplesPerSec=5.696965539863474, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:27:34,000] [INFO] [timer.py:197:stop] 0/1126, RunningAvgSamplesPerSec=6.323864250923531, CurrSamplesPerSec=5.662085075555017, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:27:45,372] [INFO] [timer.py:197:stop] 0/1128, RunningAvgSamplesPerSec=6.323853592054257, CurrSamplesPerSec=5.660074352251678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:27:56,729] [INFO] [timer.py:197:stop] 0/1130, RunningAvgSamplesPerSec=6.323836945601488, CurrSamplesPerSec=5.681051033395681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:28:08,190] [INFO] [timer.py:197:stop] 0/1132, RunningAvgSamplesPerSec=6.323840085148342, CurrSamplesPerSec=5.6790268195333224, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:28:19,494] [INFO] [timer.py:197:stop] 0/1134, RunningAvgSamplesPerSec=6.323901462456079, CurrSamplesPerSec=5.720255425533773, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:28:30,806] [INFO] [timer.py:197:stop] 0/1136, RunningAvgSamplesPerSec=6.323955250686382, CurrSamplesPerSec=5.715921149785817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:28:42,142] [INFO] [timer.py:197:stop] 0/1138, RunningAvgSamplesPerSec=6.323982101549626, CurrSamplesPerSec=5.718288206433784, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:28:53,449] [INFO] [logging.py:68:log_dist] [Rank 0] step=570, skipped=5, lr=[9.857777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:28:53,450] [INFO] [timer.py:197:stop] 0/1140, RunningAvgSamplesPerSec=6.324037655604661, CurrSamplesPerSec=5.70284278480875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:29:04,783] [INFO] [timer.py:197:stop] 0/1142, RunningAvgSamplesPerSec=6.324054750381438, CurrSamplesPerSec=5.6922710102938465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:29:16,157] [INFO] [timer.py:197:stop] 0/1144, RunningAvgSamplesPerSec=6.324025693628083, CurrSamplesPerSec=5.708841678873624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:29:27,507] [INFO] [timer.py:197:stop] 0/1146, RunningAvgSamplesPerSec=6.324022208813268, CurrSamplesPerSec=5.691424016952267, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:29:38,855] [INFO] [timer.py:197:stop] 0/1148, RunningAvgSamplesPerSec=6.32402470004462, CurrSamplesPerSec=5.684709862270304, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:29:50,263] [INFO] [timer.py:197:stop] 0/1150, RunningAvgSamplesPerSec=6.324047398655615, CurrSamplesPerSec=5.70717083158569, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0348, 'learning_rate': 9.846666666666668e-06, 'epoch': 2.44} +[2022-12-16 21:30:01,599] [INFO] [timer.py:197:stop] 0/1152, RunningAvgSamplesPerSec=6.324083612239293, CurrSamplesPerSec=5.733073456941207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:30:13,060] [INFO] [timer.py:197:stop] 0/1154, RunningAvgSamplesPerSec=6.32398511164039, CurrSamplesPerSec=5.608629462518138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:30:24,764] [INFO] [timer.py:197:stop] 0/1156, RunningAvgSamplesPerSec=6.32395490744807, CurrSamplesPerSec=5.665791976052485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:30:36,281] [INFO] [timer.py:197:stop] 0/1158, RunningAvgSamplesPerSec=6.323960705938503, CurrSamplesPerSec=5.7071327312664195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:30:47,832] [INFO] [logging.py:68:log_dist] [Rank 0] step=580, skipped=5, lr=[9.835555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:30:47,832] [INFO] [timer.py:197:stop] 0/1160, RunningAvgSamplesPerSec=6.323719233865647, CurrSamplesPerSec=5.472872328800012, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:30:59,391] [INFO] [timer.py:197:stop] 0/1162, RunningAvgSamplesPerSec=6.32370184523637, CurrSamplesPerSec=5.674197471333234, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:31:10,850] [INFO] [timer.py:197:stop] 0/1164, RunningAvgSamplesPerSec=6.323718795363503, CurrSamplesPerSec=5.697172538195727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:31:22,211] [INFO] [timer.py:197:stop] 0/1166, RunningAvgSamplesPerSec=6.3237189852523725, CurrSamplesPerSec=5.701931114731109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:31:33,559] [INFO] [timer.py:197:stop] 0/1168, RunningAvgSamplesPerSec=6.323734374802737, CurrSamplesPerSec=5.700105501726921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:31:44,892] [INFO] [timer.py:197:stop] 0/1170, RunningAvgSamplesPerSec=6.323763582374604, CurrSamplesPerSec=5.704374116034424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:31:56,244] [INFO] [timer.py:197:stop] 0/1172, RunningAvgSamplesPerSec=6.32377243788173, CurrSamplesPerSec=5.694551589747411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:32:07,599] [INFO] [timer.py:197:stop] 0/1174, RunningAvgSamplesPerSec=6.323779297278022, CurrSamplesPerSec=5.6859893740268745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:32:18,933] [INFO] [timer.py:197:stop] 0/1176, RunningAvgSamplesPerSec=6.323790989360503, CurrSamplesPerSec=5.69161975163729, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:32:30,268] [INFO] [timer.py:197:stop] 0/1178, RunningAvgSamplesPerSec=6.323817303486766, CurrSamplesPerSec=5.726107314664906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:32:41,713] [INFO] [logging.py:68:log_dist] [Rank 0] step=590, skipped=5, lr=[9.813333333333333e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:32:41,714] [INFO] [timer.py:197:stop] 0/1180, RunningAvgSamplesPerSec=6.323843584883303, CurrSamplesPerSec=5.706581425626131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:32:53,026] [INFO] [timer.py:197:stop] 0/1182, RunningAvgSamplesPerSec=6.3238942954570305, CurrSamplesPerSec=5.729338187061064, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:33:04,406] [INFO] [timer.py:197:stop] 0/1184, RunningAvgSamplesPerSec=6.323874179833749, CurrSamplesPerSec=5.657513889005977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:33:15,727] [INFO] [timer.py:197:stop] 0/1186, RunningAvgSamplesPerSec=6.323901512870132, CurrSamplesPerSec=5.709603024005327, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:33:27,073] [INFO] [timer.py:197:stop] 0/1188, RunningAvgSamplesPerSec=6.323900985123022, CurrSamplesPerSec=5.709132107478294, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:33:38,449] [INFO] [timer.py:197:stop] 0/1190, RunningAvgSamplesPerSec=6.323886756213642, CurrSamplesPerSec=5.694773393442737, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:33:49,761] [INFO] [timer.py:197:stop] 0/1192, RunningAvgSamplesPerSec=6.323938229239878, CurrSamplesPerSec=5.711202145426297, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:34:01,125] [INFO] [timer.py:197:stop] 0/1194, RunningAvgSamplesPerSec=6.323934641391922, CurrSamplesPerSec=5.702443242161101, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:34:12,448] [INFO] [timer.py:197:stop] 0/1196, RunningAvgSamplesPerSec=6.323959876909366, CurrSamplesPerSec=5.703602045869347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:34:23,812] [INFO] [timer.py:197:stop] 0/1198, RunningAvgSamplesPerSec=6.323975224149886, CurrSamplesPerSec=5.688260341444645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:34:35,164] [INFO] [logging.py:68:log_dist] [Rank 0] step=600, skipped=5, lr=[9.791111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:34:35,166] [INFO] [timer.py:197:stop] 0/1200, RunningAvgSamplesPerSec=6.323984429999333, CurrSamplesPerSec=5.689274958682965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.034, 'learning_rate': 9.791111111111112e-06, 'epoch': 2.54} +[2022-12-16 21:34:46,518] [INFO] [timer.py:197:stop] 0/1202, RunningAvgSamplesPerSec=6.323997457450842, CurrSamplesPerSec=5.695443016811219, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:34:58,030] [INFO] [timer.py:197:stop] 0/1204, RunningAvgSamplesPerSec=6.324019030245103, CurrSamplesPerSec=5.679552865802275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:35:09,353] [INFO] [timer.py:197:stop] 0/1206, RunningAvgSamplesPerSec=6.324070278143984, CurrSamplesPerSec=5.712041909103797, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:35:20,728] [INFO] [timer.py:197:stop] 0/1208, RunningAvgSamplesPerSec=6.324057885317574, CurrSamplesPerSec=5.679879741258521, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:35:32,055] [INFO] [timer.py:197:stop] 0/1210, RunningAvgSamplesPerSec=6.324093491416535, CurrSamplesPerSec=5.708959449453373, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:35:43,396] [INFO] [timer.py:197:stop] 0/1212, RunningAvgSamplesPerSec=6.324121293486445, CurrSamplesPerSec=5.702608722105414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:35:54,754] [INFO] [timer.py:197:stop] 0/1214, RunningAvgSamplesPerSec=6.32410811693974, CurrSamplesPerSec=5.677839068060557, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:36:06,166] [INFO] [timer.py:197:stop] 0/1216, RunningAvgSamplesPerSec=6.324056865254654, CurrSamplesPerSec=5.649525567777039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:36:17,527] [INFO] [timer.py:197:stop] 0/1218, RunningAvgSamplesPerSec=6.324057753210374, CurrSamplesPerSec=5.6874906986638285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:36:28,891] [INFO] [logging.py:68:log_dist] [Rank 0] step=610, skipped=5, lr=[9.76888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:36:28,892] [INFO] [timer.py:197:stop] 0/1220, RunningAvgSamplesPerSec=6.324054379245348, CurrSamplesPerSec=5.6906122599537055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:36:40,250] [INFO] [timer.py:197:stop] 0/1222, RunningAvgSamplesPerSec=6.324061368820376, CurrSamplesPerSec=5.700368410913226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:36:51,599] [INFO] [timer.py:197:stop] 0/1224, RunningAvgSamplesPerSec=6.324082612140597, CurrSamplesPerSec=5.709710138608091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:37:02,945] [INFO] [timer.py:197:stop] 0/1226, RunningAvgSamplesPerSec=6.324099967940003, CurrSamplesPerSec=5.710087379174428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:37:14,312] [INFO] [timer.py:197:stop] 0/1228, RunningAvgSamplesPerSec=6.324095213948072, CurrSamplesPerSec=5.6951648538816695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:37:25,643] [INFO] [timer.py:197:stop] 0/1230, RunningAvgSamplesPerSec=6.3241268422499015, CurrSamplesPerSec=5.732377573527465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:37:36,998] [INFO] [timer.py:197:stop] 0/1232, RunningAvgSamplesPerSec=6.32411884613214, CurrSamplesPerSec=5.675400258437628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:37:48,363] [INFO] [timer.py:197:stop] 0/1234, RunningAvgSamplesPerSec=6.3241130252354365, CurrSamplesPerSec=5.674158610594783, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:37:59,689] [INFO] [timer.py:197:stop] 0/1236, RunningAvgSamplesPerSec=6.324133915811186, CurrSamplesPerSec=5.713030983651014, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:38:11,060] [INFO] [timer.py:197:stop] 0/1238, RunningAvgSamplesPerSec=6.324125635080051, CurrSamplesPerSec=5.683048536732076, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:38:22,399] [INFO] [logging.py:68:log_dist] [Rank 0] step=620, skipped=5, lr=[9.746666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:38:22,401] [INFO] [timer.py:197:stop] 0/1240, RunningAvgSamplesPerSec=6.3241300316642866, CurrSamplesPerSec=5.6885821924612765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:38:33,727] [INFO] [timer.py:197:stop] 0/1242, RunningAvgSamplesPerSec=6.324153194541566, CurrSamplesPerSec=5.721168331625024, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:38:45,078] [INFO] [timer.py:197:stop] 0/1244, RunningAvgSamplesPerSec=6.324166885839952, CurrSamplesPerSec=5.6995364324043365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:38:56,402] [INFO] [timer.py:197:stop] 0/1246, RunningAvgSamplesPerSec=6.324190757241607, CurrSamplesPerSec=5.701532184211459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:39:07,779] [INFO] [timer.py:197:stop] 0/1248, RunningAvgSamplesPerSec=6.324176492543799, CurrSamplesPerSec=5.685249002830177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:39:19,130] [INFO] [timer.py:197:stop] 0/1250, RunningAvgSamplesPerSec=6.3241874762463635, CurrSamplesPerSec=5.705175011182494, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0339, 'learning_rate': 9.735555555555556e-06, 'epoch': 2.65} +[2022-12-16 21:39:30,528] [INFO] [timer.py:197:stop] 0/1252, RunningAvgSamplesPerSec=6.324153428015963, CurrSamplesPerSec=5.670633614253161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:39:41,877] [INFO] [timer.py:197:stop] 0/1254, RunningAvgSamplesPerSec=6.324166879634101, CurrSamplesPerSec=5.691319276670348, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:39:53,239] [INFO] [timer.py:197:stop] 0/1256, RunningAvgSamplesPerSec=6.324167247062276, CurrSamplesPerSec=5.681665722246253, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:40:04,580] [INFO] [timer.py:197:stop] 0/1258, RunningAvgSamplesPerSec=6.324174890989224, CurrSamplesPerSec=5.681159003145008, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:40:15,934] [INFO] [logging.py:68:log_dist] [Rank 0] step=630, skipped=5, lr=[9.724444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:40:15,936] [INFO] [timer.py:197:stop] 0/1260, RunningAvgSamplesPerSec=6.324179951438752, CurrSamplesPerSec=5.6964870347127015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:40:27,272] [INFO] [timer.py:197:stop] 0/1262, RunningAvgSamplesPerSec=6.324214385657734, CurrSamplesPerSec=5.709893530081676, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:40:38,622] [INFO] [timer.py:197:stop] 0/1264, RunningAvgSamplesPerSec=6.324239302104727, CurrSamplesPerSec=5.69377396361336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:40:49,947] [INFO] [timer.py:197:stop] 0/1266, RunningAvgSamplesPerSec=6.324277602074355, CurrSamplesPerSec=5.720856194199715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:41:01,249] [INFO] [timer.py:197:stop] 0/1268, RunningAvgSamplesPerSec=6.324335850701666, CurrSamplesPerSec=5.734853355839019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:41:12,564] [INFO] [timer.py:197:stop] 0/1270, RunningAvgSamplesPerSec=6.324382124670813, CurrSamplesPerSec=5.71265311090979, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:41:23,900] [INFO] [timer.py:197:stop] 0/1272, RunningAvgSamplesPerSec=6.3243761836890755, CurrSamplesPerSec=5.69152514083604, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:41:35,201] [INFO] [timer.py:197:stop] 0/1274, RunningAvgSamplesPerSec=6.324422547556908, CurrSamplesPerSec=5.7167786161089635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:41:46,505] [INFO] [timer.py:197:stop] 0/1276, RunningAvgSamplesPerSec=6.324480723648808, CurrSamplesPerSec=5.724640229146444, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:41:57,832] [INFO] [timer.py:197:stop] 0/1278, RunningAvgSamplesPerSec=6.324515620440186, CurrSamplesPerSec=5.7136565044984415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:42:09,174] [INFO] [logging.py:68:log_dist] [Rank 0] step=640, skipped=5, lr=[9.702222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:42:09,176] [INFO] [timer.py:197:stop] 0/1280, RunningAvgSamplesPerSec=6.32453233632449, CurrSamplesPerSec=5.711460003166846, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:42:20,519] [INFO] [timer.py:197:stop] 0/1282, RunningAvgSamplesPerSec=6.324589203102045, CurrSamplesPerSec=5.729852069808556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:42:31,881] [INFO] [timer.py:197:stop] 0/1284, RunningAvgSamplesPerSec=6.324574959448107, CurrSamplesPerSec=5.683294472976667, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:42:43,238] [INFO] [timer.py:197:stop] 0/1286, RunningAvgSamplesPerSec=6.324582465903206, CurrSamplesPerSec=5.690985533237665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:42:54,569] [INFO] [timer.py:197:stop] 0/1288, RunningAvgSamplesPerSec=6.3246143105854, CurrSamplesPerSec=5.701603391759544, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:43:06,039] [INFO] [timer.py:197:stop] 0/1290, RunningAvgSamplesPerSec=6.3245165663862775, CurrSamplesPerSec=5.573252155643943, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:43:17,387] [INFO] [timer.py:197:stop] 0/1292, RunningAvgSamplesPerSec=6.324514180927693, CurrSamplesPerSec=5.684713233086082, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:43:28,763] [INFO] [timer.py:197:stop] 0/1294, RunningAvgSamplesPerSec=6.324465620148037, CurrSamplesPerSec=5.665202955258279, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:43:40,109] [INFO] [timer.py:197:stop] 0/1296, RunningAvgSamplesPerSec=6.324476536227191, CurrSamplesPerSec=5.703485950413083, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:43:51,455] [INFO] [timer.py:197:stop] 0/1298, RunningAvgSamplesPerSec=6.32448733334034, CurrSamplesPerSec=5.699092583967394, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:44:02,827] [INFO] [logging.py:68:log_dist] [Rank 0] step=650, skipped=5, lr=[9.68e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:44:02,829] [INFO] [timer.py:197:stop] 0/1300, RunningAvgSamplesPerSec=6.324472640902777, CurrSamplesPerSec=5.674295345181151, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0354, 'learning_rate': 9.68e-06, 'epoch': 2.75} +[2022-12-16 21:44:14,174] [INFO] [timer.py:197:stop] 0/1302, RunningAvgSamplesPerSec=6.324469450338527, CurrSamplesPerSec=5.7018911464980055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:44:25,494] [INFO] [timer.py:197:stop] 0/1304, RunningAvgSamplesPerSec=6.324474296992287, CurrSamplesPerSec=5.684456580977391, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:44:36,890] [INFO] [timer.py:197:stop] 0/1306, RunningAvgSamplesPerSec=6.324445030815587, CurrSamplesPerSec=5.6593521681943075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:44:48,254] [INFO] [timer.py:197:stop] 0/1308, RunningAvgSamplesPerSec=6.3244409931410575, CurrSamplesPerSec=5.66902528072435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:44:59,617] [INFO] [timer.py:197:stop] 0/1310, RunningAvgSamplesPerSec=6.324437449468802, CurrSamplesPerSec=5.692400893631308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:45:10,999] [INFO] [timer.py:197:stop] 0/1312, RunningAvgSamplesPerSec=6.324450667657297, CurrSamplesPerSec=5.703528849436146, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:45:22,350] [INFO] [timer.py:197:stop] 0/1314, RunningAvgSamplesPerSec=6.324459340859516, CurrSamplesPerSec=5.700481716272428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:45:33,715] [INFO] [timer.py:197:stop] 0/1316, RunningAvgSamplesPerSec=6.324453866475892, CurrSamplesPerSec=5.684826157724787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:45:45,083] [INFO] [timer.py:197:stop] 0/1318, RunningAvgSamplesPerSec=6.324431190376394, CurrSamplesPerSec=5.692041676866035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:45:56,680] [INFO] [logging.py:68:log_dist] [Rank 0] step=660, skipped=5, lr=[9.657777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:45:56,682] [INFO] [timer.py:197:stop] 0/1320, RunningAvgSamplesPerSec=6.32442696063329, CurrSamplesPerSec=5.687795349133654, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:46:08,055] [INFO] [timer.py:197:stop] 0/1322, RunningAvgSamplesPerSec=6.324400513545528, CurrSamplesPerSec=5.6830059452395725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:46:19,405] [INFO] [timer.py:197:stop] 0/1324, RunningAvgSamplesPerSec=6.324409371583161, CurrSamplesPerSec=5.70627475944235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:46:30,768] [INFO] [timer.py:197:stop] 0/1326, RunningAvgSamplesPerSec=6.3243939378709815, CurrSamplesPerSec=5.6951795951100905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:46:42,121] [INFO] [timer.py:197:stop] 0/1328, RunningAvgSamplesPerSec=6.324402324882206, CurrSamplesPerSec=5.699022649051794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:46:53,470] [INFO] [timer.py:197:stop] 0/1330, RunningAvgSamplesPerSec=6.324400154231076, CurrSamplesPerSec=5.6892438493070046, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:47:04,850] [INFO] [timer.py:197:stop] 0/1332, RunningAvgSamplesPerSec=6.324392944136967, CurrSamplesPerSec=5.685559434291239, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:47:16,202] [INFO] [timer.py:197:stop] 0/1334, RunningAvgSamplesPerSec=6.3243919800862995, CurrSamplesPerSec=5.687262473216299, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:47:27,578] [INFO] [timer.py:197:stop] 0/1336, RunningAvgSamplesPerSec=6.3243769326191455, CurrSamplesPerSec=5.686703434412646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:47:39,062] [INFO] [timer.py:197:stop] 0/1338, RunningAvgSamplesPerSec=6.324378950957167, CurrSamplesPerSec=5.691266183990615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:47:50,436] [INFO] [logging.py:68:log_dist] [Rank 0] step=670, skipped=5, lr=[9.635555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:47:50,438] [INFO] [timer.py:197:stop] 0/1340, RunningAvgSamplesPerSec=6.324378271402667, CurrSamplesPerSec=5.691395056120047, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:48:01,811] [INFO] [timer.py:197:stop] 0/1342, RunningAvgSamplesPerSec=6.324364127126357, CurrSamplesPerSec=5.663703573734233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:48:13,192] [INFO] [timer.py:197:stop] 0/1344, RunningAvgSamplesPerSec=6.324348771990201, CurrSamplesPerSec=5.698139777624442, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:48:24,528] [INFO] [timer.py:197:stop] 0/1346, RunningAvgSamplesPerSec=6.324374215372536, CurrSamplesPerSec=5.702518106714946, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:48:35,895] [INFO] [timer.py:197:stop] 0/1348, RunningAvgSamplesPerSec=6.3243590936320375, CurrSamplesPerSec=5.687893692645465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:48:47,295] [INFO] [timer.py:197:stop] 0/1350, RunningAvgSamplesPerSec=6.324326854499699, CurrSamplesPerSec=5.655780951123609, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0374, 'learning_rate': 9.624444444444445e-06, 'epoch': 2.86} +[2022-12-16 21:48:58,904] [INFO] [timer.py:197:stop] 0/1352, RunningAvgSamplesPerSec=6.324318267548198, CurrSamplesPerSec=5.6796292936049895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:49:10,273] [INFO] [timer.py:197:stop] 0/1354, RunningAvgSamplesPerSec=6.324296695879865, CurrSamplesPerSec=5.667203924328584, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:49:21,654] [INFO] [timer.py:197:stop] 0/1356, RunningAvgSamplesPerSec=6.32428018514822, CurrSamplesPerSec=5.6751492458016735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:49:33,006] [INFO] [timer.py:197:stop] 0/1358, RunningAvgSamplesPerSec=6.324275196063561, CurrSamplesPerSec=5.69712223815206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:49:44,386] [INFO] [logging.py:68:log_dist] [Rank 0] step=680, skipped=5, lr=[9.613333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:49:44,388] [INFO] [timer.py:197:stop] 0/1360, RunningAvgSamplesPerSec=6.324255960679552, CurrSamplesPerSec=5.670191860369816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:49:55,738] [INFO] [timer.py:197:stop] 0/1362, RunningAvgSamplesPerSec=6.324254309513514, CurrSamplesPerSec=5.6942959812107965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:50:07,114] [INFO] [timer.py:197:stop] 0/1364, RunningAvgSamplesPerSec=6.324256979747847, CurrSamplesPerSec=5.6949874818050725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:50:18,496] [INFO] [timer.py:197:stop] 0/1366, RunningAvgSamplesPerSec=6.324236172921695, CurrSamplesPerSec=5.687761122532687, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:50:29,947] [INFO] [timer.py:197:stop] 0/1368, RunningAvgSamplesPerSec=6.3242387362212655, CurrSamplesPerSec=5.692255077058373, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:50:41,282] [INFO] [timer.py:197:stop] 0/1370, RunningAvgSamplesPerSec=6.324249802937796, CurrSamplesPerSec=5.703811950695486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:50:52,601] [INFO] [timer.py:197:stop] 0/1372, RunningAvgSamplesPerSec=6.32427068603822, CurrSamplesPerSec=5.704097261816972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:51:03,926] [INFO] [timer.py:197:stop] 0/1374, RunningAvgSamplesPerSec=6.3242886615535285, CurrSamplesPerSec=5.698351457952698, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:51:15,332] [INFO] [timer.py:197:stop] 0/1376, RunningAvgSamplesPerSec=6.324278565827469, CurrSamplesPerSec=5.688882378479673, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:51:26,682] [INFO] [timer.py:197:stop] 0/1378, RunningAvgSamplesPerSec=6.324272743331345, CurrSamplesPerSec=5.677133235207755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:51:38,045] [INFO] [logging.py:68:log_dist] [Rank 0] step=690, skipped=5, lr=[9.591111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:51:38,047] [INFO] [timer.py:197:stop] 0/1380, RunningAvgSamplesPerSec=6.324266336132586, CurrSamplesPerSec=5.672529097773243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:51:49,338] [INFO] [timer.py:197:stop] 0/1382, RunningAvgSamplesPerSec=6.3242997387905024, CurrSamplesPerSec=5.70745550803231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:52:00,686] [INFO] [timer.py:197:stop] 0/1384, RunningAvgSamplesPerSec=6.324323307663187, CurrSamplesPerSec=5.692134373626455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:52:12,025] [INFO] [timer.py:197:stop] 0/1386, RunningAvgSamplesPerSec=6.324327041155843, CurrSamplesPerSec=5.6880930415204345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:52:23,323] [INFO] [timer.py:197:stop] 0/1388, RunningAvgSamplesPerSec=6.324380802604177, CurrSamplesPerSec=5.735462831952154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:52:34,646] [INFO] [timer.py:197:stop] 0/1390, RunningAvgSamplesPerSec=6.324399578264787, CurrSamplesPerSec=5.719052804513825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:52:45,967] [INFO] [timer.py:197:stop] 0/1392, RunningAvgSamplesPerSec=6.3244345536156, CurrSamplesPerSec=5.704161503176972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:52:57,335] [INFO] [timer.py:197:stop] 0/1394, RunningAvgSamplesPerSec=6.32447811176164, CurrSamplesPerSec=5.718143983956547, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:53:08,631] [INFO] [timer.py:197:stop] 0/1396, RunningAvgSamplesPerSec=6.324519518597146, CurrSamplesPerSec=5.714886786951273, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:53:19,938] [INFO] [timer.py:197:stop] 0/1398, RunningAvgSamplesPerSec=6.32455268106817, CurrSamplesPerSec=5.7012866044767625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:53:31,265] [INFO] [logging.py:68:log_dist] [Rank 0] step=700, skipped=5, lr=[9.56888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:53:31,267] [INFO] [timer.py:197:stop] 0/1400, RunningAvgSamplesPerSec=6.324579534160159, CurrSamplesPerSec=5.692990030015316, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0371, 'learning_rate': 9.56888888888889e-06, 'epoch': 2.97} +[2022-12-16 21:53:42,558] [INFO] [timer.py:197:stop] 0/1402, RunningAvgSamplesPerSec=6.32462746208894, CurrSamplesPerSec=5.7200996461464815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:53:53,895] [INFO] [timer.py:197:stop] 0/1404, RunningAvgSamplesPerSec=6.324632008464894, CurrSamplesPerSec=5.690293315377784, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:54:05,179] [INFO] [timer.py:197:stop] 0/1406, RunningAvgSamplesPerSec=6.324698640337143, CurrSamplesPerSec=5.726335492400019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:54:16,499] [INFO] [timer.py:197:stop] 0/1408, RunningAvgSamplesPerSec=6.324731919832654, CurrSamplesPerSec=5.702019773539062, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:54:27,822] [INFO] [timer.py:197:stop] 0/1410, RunningAvgSamplesPerSec=6.3247630088837274, CurrSamplesPerSec=5.724249100746272, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:54:39,170] [INFO] [timer.py:197:stop] 0/1412, RunningAvgSamplesPerSec=6.3247716831156175, CurrSamplesPerSec=5.682469154661037, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:54:50,513] [INFO] [timer.py:197:stop] 0/1414, RunningAvgSamplesPerSec=6.32478804755692, CurrSamplesPerSec=5.704216776146671, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:54:59,057] [INFO] [timer.py:197:stop] 0/1416, RunningAvgSamplesPerSec=6.32697491966737, CurrSamplesPerSec=10.183929802384258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:55:10,405] [INFO] [timer.py:197:stop] 0/1418, RunningAvgSamplesPerSec=6.326984384492439, CurrSamplesPerSec=5.698068656563964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:55:21,716] [INFO] [logging.py:68:log_dist] [Rank 0] step=710, skipped=5, lr=[9.546666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:55:21,718] [INFO] [timer.py:197:stop] 0/1420, RunningAvgSamplesPerSec=6.327008366408901, CurrSamplesPerSec=5.722761494483829, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:55:33,025] [INFO] [timer.py:197:stop] 0/1422, RunningAvgSamplesPerSec=6.32703881467068, CurrSamplesPerSec=5.725638068365156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:55:44,329] [INFO] [timer.py:197:stop] 0/1424, RunningAvgSamplesPerSec=6.3270593471696595, CurrSamplesPerSec=5.7123326634216784, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:55:55,642] [INFO] [timer.py:197:stop] 0/1426, RunningAvgSamplesPerSec=6.327096793945078, CurrSamplesPerSec=5.710449849131292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:56:06,990] [INFO] [timer.py:197:stop] 0/1428, RunningAvgSamplesPerSec=6.327105470684101, CurrSamplesPerSec=5.6794182809613725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:56:18,294] [INFO] [timer.py:197:stop] 0/1430, RunningAvgSamplesPerSec=6.3271388039928596, CurrSamplesPerSec=5.714867320181734, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:56:29,628] [INFO] [timer.py:197:stop] 0/1432, RunningAvgSamplesPerSec=6.3271327648059374, CurrSamplesPerSec=5.6914160526940245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:56:40,960] [INFO] [timer.py:197:stop] 0/1434, RunningAvgSamplesPerSec=6.327141902219056, CurrSamplesPerSec=5.689690747652517, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:56:52,302] [INFO] [timer.py:197:stop] 0/1436, RunningAvgSamplesPerSec=6.327156027945619, CurrSamplesPerSec=5.689736092584112, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:57:03,628] [INFO] [timer.py:197:stop] 0/1438, RunningAvgSamplesPerSec=6.3271860924201, CurrSamplesPerSec=5.701005206768365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:57:14,933] [INFO] [logging.py:68:log_dist] [Rank 0] step=720, skipped=5, lr=[9.524444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:57:14,935] [INFO] [timer.py:197:stop] 0/1440, RunningAvgSamplesPerSec=6.327240541450005, CurrSamplesPerSec=5.733791309617626, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:57:26,263] [INFO] [timer.py:197:stop] 0/1442, RunningAvgSamplesPerSec=6.327265439071706, CurrSamplesPerSec=5.709909562214138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:57:37,604] [INFO] [timer.py:197:stop] 0/1444, RunningAvgSamplesPerSec=6.327289199225549, CurrSamplesPerSec=5.7069866443111605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:57:49,080] [INFO] [timer.py:197:stop] 0/1446, RunningAvgSamplesPerSec=6.327306402988852, CurrSamplesPerSec=5.691615407194863, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:58:00,419] [INFO] [timer.py:197:stop] 0/1448, RunningAvgSamplesPerSec=6.327318797851507, CurrSamplesPerSec=5.702858777371699, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:58:11,758] [INFO] [timer.py:197:stop] 0/1450, RunningAvgSamplesPerSec=6.3273351798091335, CurrSamplesPerSec=5.695963648425355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0227, 'learning_rate': 9.513333333333334e-06, 'epoch': 3.07} +[2022-12-16 21:58:23,085] [INFO] [timer.py:197:stop] 0/1452, RunningAvgSamplesPerSec=6.32736439156718, CurrSamplesPerSec=5.69986222339413, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:58:34,423] [INFO] [timer.py:197:stop] 0/1454, RunningAvgSamplesPerSec=6.327382444473141, CurrSamplesPerSec=5.691851947598792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:58:45,728] [INFO] [timer.py:197:stop] 0/1456, RunningAvgSamplesPerSec=6.327411355372915, CurrSamplesPerSec=5.7264754862424025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:58:57,189] [INFO] [timer.py:197:stop] 0/1458, RunningAvgSamplesPerSec=6.327422689770682, CurrSamplesPerSec=5.687144391209955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:59:08,503] [INFO] [logging.py:68:log_dist] [Rank 0] step=730, skipped=5, lr=[9.502222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-16 21:59:08,505] [INFO] [timer.py:197:stop] 0/1460, RunningAvgSamplesPerSec=6.327457442468501, CurrSamplesPerSec=5.7197886003305705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:59:19,811] [INFO] [timer.py:197:stop] 0/1462, RunningAvgSamplesPerSec=6.327485371631291, CurrSamplesPerSec=5.720289800531883, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:59:31,139] [INFO] [timer.py:197:stop] 0/1464, RunningAvgSamplesPerSec=6.327510730275065, CurrSamplesPerSec=5.717306565446505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:59:42,470] [INFO] [timer.py:197:stop] 0/1466, RunningAvgSamplesPerSec=6.327533540895815, CurrSamplesPerSec=5.704577773769327, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 21:59:53,873] [INFO] [timer.py:197:stop] 0/1468, RunningAvgSamplesPerSec=6.327495467322438, CurrSamplesPerSec=5.645587947967342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:00:05,302] [INFO] [timer.py:197:stop] 0/1470, RunningAvgSamplesPerSec=6.327436755212707, CurrSamplesPerSec=5.624859129929112, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:00:16,701] [INFO] [timer.py:197:stop] 0/1472, RunningAvgSamplesPerSec=6.3274364325893995, CurrSamplesPerSec=5.687119088560816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:00:28,151] [INFO] [timer.py:197:stop] 0/1474, RunningAvgSamplesPerSec=6.327424697035128, CurrSamplesPerSec=5.67494696401265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:00:39,471] [INFO] [timer.py:197:stop] 0/1476, RunningAvgSamplesPerSec=6.327443763191702, CurrSamplesPerSec=5.70665906786341, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:00:50,872] [INFO] [timer.py:197:stop] 0/1478, RunningAvgSamplesPerSec=6.327434800015111, CurrSamplesPerSec=5.677509785642671, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:01:02,243] [INFO] [logging.py:68:log_dist] [Rank 0] step=740, skipped=5, lr=[9.48e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:01:02,245] [INFO] [timer.py:197:stop] 0/1480, RunningAvgSamplesPerSec=6.327453438484531, CurrSamplesPerSec=5.707022801349469, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:01:13,631] [INFO] [timer.py:197:stop] 0/1482, RunningAvgSamplesPerSec=6.327449584064208, CurrSamplesPerSec=5.686699097472274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:01:25,011] [INFO] [timer.py:197:stop] 0/1484, RunningAvgSamplesPerSec=6.327461812433459, CurrSamplesPerSec=5.6979624620946625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:01:36,382] [INFO] [timer.py:197:stop] 0/1486, RunningAvgSamplesPerSec=6.327474119248579, CurrSamplesPerSec=5.7187698940143425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:01:47,707] [INFO] [timer.py:197:stop] 0/1488, RunningAvgSamplesPerSec=6.327500660405285, CurrSamplesPerSec=5.706598409685004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:01:59,104] [INFO] [timer.py:197:stop] 0/1490, RunningAvgSamplesPerSec=6.327497573482637, CurrSamplesPerSec=5.683719497130948, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:02:10,486] [INFO] [timer.py:197:stop] 0/1492, RunningAvgSamplesPerSec=6.32748801533402, CurrSamplesPerSec=5.683161635883977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:02:21,823] [INFO] [timer.py:197:stop] 0/1494, RunningAvgSamplesPerSec=6.32750880749429, CurrSamplesPerSec=5.711333623231045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:02:33,231] [INFO] [timer.py:197:stop] 0/1496, RunningAvgSamplesPerSec=6.327513699728768, CurrSamplesPerSec=5.706028528414905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:02:44,547] [INFO] [timer.py:197:stop] 0/1498, RunningAvgSamplesPerSec=6.327550204303378, CurrSamplesPerSec=5.724850952803729, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:02:56,026] [INFO] [logging.py:68:log_dist] [Rank 0] step=750, skipped=5, lr=[9.457777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:02:56,027] [INFO] [timer.py:197:stop] 0/1500, RunningAvgSamplesPerSec=6.327458217506727, CurrSamplesPerSec=5.5676383899106145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0173, 'learning_rate': 9.457777777777778e-06, 'epoch': 3.18} +[2022-12-16 22:03:07,403] [INFO] [timer.py:197:stop] 0/1502, RunningAvgSamplesPerSec=6.327455446968343, CurrSamplesPerSec=5.696816345954642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:03:18,794] [INFO] [timer.py:197:stop] 0/1504, RunningAvgSamplesPerSec=6.327460698300773, CurrSamplesPerSec=5.6907456868615665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:03:30,167] [INFO] [timer.py:197:stop] 0/1506, RunningAvgSamplesPerSec=6.327476845175162, CurrSamplesPerSec=5.700586309832747, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:03:41,557] [INFO] [timer.py:197:stop] 0/1508, RunningAvgSamplesPerSec=6.327479752759413, CurrSamplesPerSec=5.698833422003168, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:03:53,085] [INFO] [timer.py:197:stop] 0/1510, RunningAvgSamplesPerSec=6.327483658107985, CurrSamplesPerSec=5.690109492159518, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:04:04,525] [INFO] [timer.py:197:stop] 0/1512, RunningAvgSamplesPerSec=6.327482981825684, CurrSamplesPerSec=5.691443324337489, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:04:15,856] [INFO] [timer.py:197:stop] 0/1514, RunningAvgSamplesPerSec=6.327504069039711, CurrSamplesPerSec=5.702331554684467, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:04:27,180] [INFO] [timer.py:197:stop] 0/1516, RunningAvgSamplesPerSec=6.32752931475808, CurrSamplesPerSec=5.699813812273017, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:04:38,838] [INFO] [timer.py:197:stop] 0/1518, RunningAvgSamplesPerSec=6.327495958559439, CurrSamplesPerSec=5.70111127252215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:04:50,193] [INFO] [logging.py:68:log_dist] [Rank 0] step=760, skipped=5, lr=[9.435555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:04:50,195] [INFO] [timer.py:197:stop] 0/1520, RunningAvgSamplesPerSec=6.327491850184652, CurrSamplesPerSec=5.6779257782447745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:05:01,599] [INFO] [timer.py:197:stop] 0/1522, RunningAvgSamplesPerSec=6.3275112010285595, CurrSamplesPerSec=5.704700217859312, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:05:13,081] [INFO] [timer.py:197:stop] 0/1524, RunningAvgSamplesPerSec=6.327502103613446, CurrSamplesPerSec=5.698904562156051, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:05:24,417] [INFO] [timer.py:197:stop] 0/1526, RunningAvgSamplesPerSec=6.327493535489943, CurrSamplesPerSec=5.677983426370301, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:05:35,707] [INFO] [timer.py:197:stop] 0/1528, RunningAvgSamplesPerSec=6.327507103762715, CurrSamplesPerSec=5.697476050095679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:05:47,074] [INFO] [timer.py:197:stop] 0/1530, RunningAvgSamplesPerSec=6.327507935110874, CurrSamplesPerSec=5.715472068638608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:05:58,447] [INFO] [timer.py:197:stop] 0/1532, RunningAvgSamplesPerSec=6.32751138193243, CurrSamplesPerSec=5.6919378793894335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:06:09,787] [INFO] [timer.py:197:stop] 0/1534, RunningAvgSamplesPerSec=6.327521338018502, CurrSamplesPerSec=5.7004705792212755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:06:21,190] [INFO] [timer.py:197:stop] 0/1536, RunningAvgSamplesPerSec=6.3274682129513, CurrSamplesPerSec=5.712174397994211, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:06:32,541] [INFO] [timer.py:197:stop] 0/1538, RunningAvgSamplesPerSec=6.3274698474125985, CurrSamplesPerSec=5.68027300306161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:06:43,888] [INFO] [logging.py:68:log_dist] [Rank 0] step=770, skipped=5, lr=[9.413333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:06:43,890] [INFO] [timer.py:197:stop] 0/1540, RunningAvgSamplesPerSec=6.327473245108575, CurrSamplesPerSec=5.6981792094536186, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:06:55,564] [INFO] [timer.py:197:stop] 0/1542, RunningAvgSamplesPerSec=6.327513329572601, CurrSamplesPerSec=5.718835197312315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:07:07,225] [INFO] [timer.py:197:stop] 0/1544, RunningAvgSamplesPerSec=6.327515501200996, CurrSamplesPerSec=5.701398008835235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:07:18,958] [INFO] [timer.py:197:stop] 0/1546, RunningAvgSamplesPerSec=6.3272162957930815, CurrSamplesPerSec=5.309439632999244, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:07:30,285] [INFO] [timer.py:197:stop] 0/1548, RunningAvgSamplesPerSec=6.327237769724863, CurrSamplesPerSec=5.715100200108538, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:07:41,816] [INFO] [timer.py:197:stop] 0/1550, RunningAvgSamplesPerSec=6.327274161216004, CurrSamplesPerSec=5.71891780324732, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0172, 'learning_rate': 9.402222222222222e-06, 'epoch': 3.28} +[2022-12-16 22:07:53,410] [INFO] [timer.py:197:stop] 0/1552, RunningAvgSamplesPerSec=6.32706992914816, CurrSamplesPerSec=5.411217698198677, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:08:04,941] [INFO] [timer.py:197:stop] 0/1554, RunningAvgSamplesPerSec=6.327092359809133, CurrSamplesPerSec=5.700676379585338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:08:16,285] [INFO] [timer.py:197:stop] 0/1556, RunningAvgSamplesPerSec=6.327101079548158, CurrSamplesPerSec=5.68027853219152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:08:27,892] [INFO] [timer.py:197:stop] 0/1558, RunningAvgSamplesPerSec=6.326898704258696, CurrSamplesPerSec=5.425364903600726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:08:39,298] [INFO] [logging.py:68:log_dist] [Rank 0] step=780, skipped=5, lr=[9.391111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:08:39,300] [INFO] [timer.py:197:stop] 0/1560, RunningAvgSamplesPerSec=6.326863672241038, CurrSamplesPerSec=5.64888880789383, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:08:50,744] [INFO] [timer.py:197:stop] 0/1562, RunningAvgSamplesPerSec=6.326881633242561, CurrSamplesPerSec=5.688473217102632, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:09:02,169] [INFO] [timer.py:197:stop] 0/1564, RunningAvgSamplesPerSec=6.326828353757074, CurrSamplesPerSec=5.6164079889420755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:09:13,635] [INFO] [timer.py:197:stop] 0/1566, RunningAvgSamplesPerSec=6.32684329058874, CurrSamplesPerSec=5.711484793733482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:09:25,042] [INFO] [timer.py:197:stop] 0/1568, RunningAvgSamplesPerSec=6.326860661533723, CurrSamplesPerSec=5.69864977247261, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:09:36,536] [INFO] [timer.py:197:stop] 0/1570, RunningAvgSamplesPerSec=6.326751882842448, CurrSamplesPerSec=5.55672099720942, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:09:48,053] [INFO] [timer.py:197:stop] 0/1572, RunningAvgSamplesPerSec=6.326774718534809, CurrSamplesPerSec=5.70892423918322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:09:59,564] [INFO] [timer.py:197:stop] 0/1574, RunningAvgSamplesPerSec=6.32673483292785, CurrSamplesPerSec=5.659623979889483, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:10:10,906] [INFO] [timer.py:197:stop] 0/1576, RunningAvgSamplesPerSec=6.326735484116569, CurrSamplesPerSec=5.687612892361142, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:10:22,483] [INFO] [timer.py:197:stop] 0/1578, RunningAvgSamplesPerSec=6.326743472057318, CurrSamplesPerSec=5.687099328553298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:10:34,085] [INFO] [logging.py:68:log_dist] [Rank 0] step=790, skipped=5, lr=[9.368888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:10:34,086] [INFO] [timer.py:197:stop] 0/1580, RunningAvgSamplesPerSec=6.326741989967983, CurrSamplesPerSec=5.6911527620388345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:10:45,432] [INFO] [timer.py:197:stop] 0/1582, RunningAvgSamplesPerSec=6.326739758130946, CurrSamplesPerSec=5.690173660070088, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:10:56,913] [INFO] [timer.py:197:stop] 0/1584, RunningAvgSamplesPerSec=6.326729646225332, CurrSamplesPerSec=5.6936201062598, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:11:08,620] [INFO] [timer.py:197:stop] 0/1586, RunningAvgSamplesPerSec=6.326719942273098, CurrSamplesPerSec=5.678790863330024, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:11:19,982] [INFO] [timer.py:197:stop] 0/1588, RunningAvgSamplesPerSec=6.326714577519651, CurrSamplesPerSec=5.6897175203704835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:11:31,391] [INFO] [timer.py:197:stop] 0/1590, RunningAvgSamplesPerSec=6.326709002994419, CurrSamplesPerSec=5.695294385974962, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:11:42,904] [INFO] [timer.py:197:stop] 0/1592, RunningAvgSamplesPerSec=6.326720212022566, CurrSamplesPerSec=5.700918516686109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:11:54,288] [INFO] [timer.py:197:stop] 0/1594, RunningAvgSamplesPerSec=6.326701126454511, CurrSamplesPerSec=5.660456759611574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:12:05,611] [INFO] [timer.py:197:stop] 0/1596, RunningAvgSamplesPerSec=6.326729466778808, CurrSamplesPerSec=5.713571618248627, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:12:16,916] [INFO] [timer.py:197:stop] 0/1598, RunningAvgSamplesPerSec=6.3267459876247045, CurrSamplesPerSec=5.7029544923126725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:12:28,446] [INFO] [logging.py:68:log_dist] [Rank 0] step=800, skipped=5, lr=[9.346666666666666e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:12:28,448] [INFO] [timer.py:197:stop] 0/1600, RunningAvgSamplesPerSec=6.326609933797941, CurrSamplesPerSec=5.5002245491774175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0166, 'learning_rate': 9.346666666666666e-06, 'epoch': 3.39} +[2022-12-16 22:12:39,770] [INFO] [timer.py:197:stop] 0/1602, RunningAvgSamplesPerSec=6.326638422676115, CurrSamplesPerSec=5.707492641836485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:12:51,065] [INFO] [timer.py:197:stop] 0/1604, RunningAvgSamplesPerSec=6.3266746590610845, CurrSamplesPerSec=5.729000214488998, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:13:02,571] [INFO] [timer.py:197:stop] 0/1606, RunningAvgSamplesPerSec=6.326559939629545, CurrSamplesPerSec=5.546773415140327, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:13:13,872] [INFO] [timer.py:197:stop] 0/1608, RunningAvgSamplesPerSec=6.326604435719887, CurrSamplesPerSec=5.730451430762789, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:13:25,198] [INFO] [timer.py:197:stop] 0/1610, RunningAvgSamplesPerSec=6.326630818586847, CurrSamplesPerSec=5.708273048903529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:13:36,873] [INFO] [timer.py:197:stop] 0/1612, RunningAvgSamplesPerSec=6.32665586766633, CurrSamplesPerSec=5.703729295796363, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:13:48,186] [INFO] [timer.py:197:stop] 0/1614, RunningAvgSamplesPerSec=6.3266910772859575, CurrSamplesPerSec=5.71860006368878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:13:59,799] [INFO] [timer.py:197:stop] 0/1616, RunningAvgSamplesPerSec=6.326687842512467, CurrSamplesPerSec=5.6908894957427, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:14:11,397] [INFO] [timer.py:197:stop] 0/1618, RunningAvgSamplesPerSec=6.326692582176606, CurrSamplesPerSec=5.701328259509893, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:14:22,723] [INFO] [logging.py:68:log_dist] [Rank 0] step=810, skipped=5, lr=[9.324444444444444e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:14:22,725] [INFO] [timer.py:197:stop] 0/1620, RunningAvgSamplesPerSec=6.326714089116822, CurrSamplesPerSec=5.699270695864197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:14:34,324] [INFO] [timer.py:197:stop] 0/1622, RunningAvgSamplesPerSec=6.326731860344668, CurrSamplesPerSec=5.691813568788928, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:14:46,047] [INFO] [timer.py:197:stop] 0/1624, RunningAvgSamplesPerSec=6.326712534725961, CurrSamplesPerSec=5.690206468282752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:14:57,368] [INFO] [timer.py:197:stop] 0/1626, RunningAvgSamplesPerSec=6.326740345466834, CurrSamplesPerSec=5.712110948419259, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:15:08,686] [INFO] [timer.py:197:stop] 0/1628, RunningAvgSamplesPerSec=6.326758798477797, CurrSamplesPerSec=5.6973323918270635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:15:20,061] [INFO] [timer.py:197:stop] 0/1630, RunningAvgSamplesPerSec=6.326747846596995, CurrSamplesPerSec=5.691386609266169, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:15:31,412] [INFO] [timer.py:197:stop] 0/1632, RunningAvgSamplesPerSec=6.3267535500364955, CurrSamplesPerSec=5.690381612642104, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:15:42,701] [INFO] [timer.py:197:stop] 0/1634, RunningAvgSamplesPerSec=6.326795979692171, CurrSamplesPerSec=5.725662737910497, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:15:54,393] [INFO] [timer.py:197:stop] 0/1636, RunningAvgSamplesPerSec=6.326834150424974, CurrSamplesPerSec=5.719259705105001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:16:05,823] [INFO] [timer.py:197:stop] 0/1638, RunningAvgSamplesPerSec=6.326843525216495, CurrSamplesPerSec=5.697779594336017, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:16:17,363] [INFO] [logging.py:68:log_dist] [Rank 0] step=820, skipped=5, lr=[9.302222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:16:17,364] [INFO] [timer.py:197:stop] 0/1640, RunningAvgSamplesPerSec=6.326679026146229, CurrSamplesPerSec=5.458330096859955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:16:28,822] [INFO] [timer.py:197:stop] 0/1642, RunningAvgSamplesPerSec=6.326724094955409, CurrSamplesPerSec=5.730091554512378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:16:40,240] [INFO] [timer.py:197:stop] 0/1644, RunningAvgSamplesPerSec=6.326761135677585, CurrSamplesPerSec=5.722593622930806, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:16:51,697] [INFO] [timer.py:197:stop] 0/1646, RunningAvgSamplesPerSec=6.326697672667059, CurrSamplesPerSec=5.612376038068795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:17:03,218] [INFO] [timer.py:197:stop] 0/1648, RunningAvgSamplesPerSec=6.326712992902165, CurrSamplesPerSec=5.70818492382536, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:17:14,675] [INFO] [timer.py:197:stop] 0/1650, RunningAvgSamplesPerSec=6.32673517454783, CurrSamplesPerSec=5.714327900088595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0196, 'learning_rate': 9.291111111111112e-06, 'epoch': 3.5} +[2022-12-16 22:17:26,082] [INFO] [timer.py:197:stop] 0/1652, RunningAvgSamplesPerSec=6.326699396447215, CurrSamplesPerSec=5.621307520169431, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:17:37,569] [INFO] [timer.py:197:stop] 0/1654, RunningAvgSamplesPerSec=6.3267304307190875, CurrSamplesPerSec=5.707525164650132, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:17:49,057] [INFO] [timer.py:197:stop] 0/1656, RunningAvgSamplesPerSec=6.326770394708087, CurrSamplesPerSec=5.701385415080797, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:18:00,478] [INFO] [timer.py:197:stop] 0/1658, RunningAvgSamplesPerSec=6.326760667852926, CurrSamplesPerSec=5.662375782745366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:18:11,888] [INFO] [logging.py:68:log_dist] [Rank 0] step=830, skipped=5, lr=[9.280000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:18:11,890] [INFO] [timer.py:197:stop] 0/1660, RunningAvgSamplesPerSec=6.326821622093543, CurrSamplesPerSec=5.7435494700991665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:18:23,408] [INFO] [timer.py:197:stop] 0/1662, RunningAvgSamplesPerSec=6.326843356419004, CurrSamplesPerSec=5.699876262773075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:18:34,855] [INFO] [timer.py:197:stop] 0/1664, RunningAvgSamplesPerSec=6.326798218964038, CurrSamplesPerSec=5.607935810568727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:18:46,396] [INFO] [timer.py:197:stop] 0/1666, RunningAvgSamplesPerSec=6.326834587271337, CurrSamplesPerSec=5.716924474281732, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:18:57,827] [INFO] [timer.py:197:stop] 0/1668, RunningAvgSamplesPerSec=6.326838624746808, CurrSamplesPerSec=5.694305403046297, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:19:09,201] [INFO] [timer.py:197:stop] 0/1670, RunningAvgSamplesPerSec=6.326862994715842, CurrSamplesPerSec=5.696425141989684, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:19:20,826] [INFO] [timer.py:197:stop] 0/1672, RunningAvgSamplesPerSec=6.326887429548338, CurrSamplesPerSec=5.722593622930806, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:19:32,408] [INFO] [timer.py:197:stop] 0/1674, RunningAvgSamplesPerSec=6.326924458197643, CurrSamplesPerSec=5.73617883162889, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:19:43,753] [INFO] [timer.py:197:stop] 0/1676, RunningAvgSamplesPerSec=6.326932090614424, CurrSamplesPerSec=5.685106201292763, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:19:55,356] [INFO] [timer.py:197:stop] 0/1678, RunningAvgSamplesPerSec=6.326963550525718, CurrSamplesPerSec=5.708342482699776, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:20:06,777] [INFO] [logging.py:68:log_dist] [Rank 0] step=840, skipped=5, lr=[9.257777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:20:06,779] [INFO] [timer.py:197:stop] 0/1680, RunningAvgSamplesPerSec=6.326964525629489, CurrSamplesPerSec=5.694321106174752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:20:18,115] [INFO] [timer.py:197:stop] 0/1682, RunningAvgSamplesPerSec=6.326980760880848, CurrSamplesPerSec=5.6901266195429265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:20:29,491] [INFO] [timer.py:197:stop] 0/1684, RunningAvgSamplesPerSec=6.327030240339728, CurrSamplesPerSec=5.727282599924787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:20:40,822] [INFO] [timer.py:197:stop] 0/1686, RunningAvgSamplesPerSec=6.327050807508987, CurrSamplesPerSec=5.70611585915527, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:20:52,189] [INFO] [timer.py:197:stop] 0/1688, RunningAvgSamplesPerSec=6.327042771339603, CurrSamplesPerSec=5.677036223927891, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:21:03,656] [INFO] [timer.py:197:stop] 0/1690, RunningAvgSamplesPerSec=6.327078544945673, CurrSamplesPerSec=5.716653948578718, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:21:14,994] [INFO] [timer.py:197:stop] 0/1692, RunningAvgSamplesPerSec=6.327080143901506, CurrSamplesPerSec=5.695177661829884, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:21:26,497] [INFO] [timer.py:197:stop] 0/1694, RunningAvgSamplesPerSec=6.3269586642354865, CurrSamplesPerSec=5.512899898920243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:21:37,801] [INFO] [timer.py:197:stop] 0/1696, RunningAvgSamplesPerSec=6.326997931656717, CurrSamplesPerSec=5.730388308477891, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:21:49,125] [INFO] [timer.py:197:stop] 0/1698, RunningAvgSamplesPerSec=6.327020896294555, CurrSamplesPerSec=5.703355560734074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:22:00,877] [INFO] [logging.py:68:log_dist] [Rank 0] step=850, skipped=5, lr=[9.235555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:22:00,879] [INFO] [timer.py:197:stop] 0/1700, RunningAvgSamplesPerSec=6.327048230282517, CurrSamplesPerSec=5.725344248442001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0166, 'learning_rate': 9.235555555555556e-06, 'epoch': 3.6} +[2022-12-16 22:22:12,166] [INFO] [timer.py:197:stop] 0/1702, RunningAvgSamplesPerSec=6.327088307183525, CurrSamplesPerSec=5.719386679782303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:22:23,543] [INFO] [timer.py:197:stop] 0/1704, RunningAvgSamplesPerSec=6.327120891773229, CurrSamplesPerSec=5.709233133050226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:22:35,131] [INFO] [timer.py:197:stop] 0/1706, RunningAvgSamplesPerSec=6.327090682954414, CurrSamplesPerSec=5.721107852338529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:22:46,419] [INFO] [timer.py:197:stop] 0/1708, RunningAvgSamplesPerSec=6.327128147702684, CurrSamplesPerSec=5.719666238913165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:22:57,727] [INFO] [timer.py:197:stop] 0/1710, RunningAvgSamplesPerSec=6.327163186849155, CurrSamplesPerSec=5.71158128455646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:23:09,157] [INFO] [timer.py:197:stop] 0/1712, RunningAvgSamplesPerSec=6.3271082387375905, CurrSamplesPerSec=5.706069039842615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:23:20,498] [INFO] [timer.py:197:stop] 0/1714, RunningAvgSamplesPerSec=6.327118251822114, CurrSamplesPerSec=5.709560033560292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:23:31,806] [INFO] [timer.py:197:stop] 0/1716, RunningAvgSamplesPerSec=6.32716470276775, CurrSamplesPerSec=5.730877909610886, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:23:43,096] [INFO] [timer.py:197:stop] 0/1718, RunningAvgSamplesPerSec=6.327212694616481, CurrSamplesPerSec=5.729023934851759, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:23:54,396] [INFO] [logging.py:68:log_dist] [Rank 0] step=860, skipped=5, lr=[9.213333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:23:54,398] [INFO] [timer.py:197:stop] 0/1720, RunningAvgSamplesPerSec=6.327252695395273, CurrSamplesPerSec=5.7282911397132175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:24:05,684] [INFO] [timer.py:197:stop] 0/1722, RunningAvgSamplesPerSec=6.327292967660206, CurrSamplesPerSec=5.7332882305885375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:24:16,984] [INFO] [timer.py:197:stop] 0/1724, RunningAvgSamplesPerSec=6.327334122464313, CurrSamplesPerSec=5.705825252211353, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:24:28,430] [INFO] [timer.py:197:stop] 0/1726, RunningAvgSamplesPerSec=6.327362763895628, CurrSamplesPerSec=5.7203936594189075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:24:39,753] [INFO] [timer.py:197:stop] 0/1728, RunningAvgSamplesPerSec=6.327388228071011, CurrSamplesPerSec=5.7061243497864975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:24:51,089] [INFO] [timer.py:197:stop] 0/1730, RunningAvgSamplesPerSec=6.327403411302091, CurrSamplesPerSec=5.6941735001864995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:25:02,398] [INFO] [timer.py:197:stop] 0/1732, RunningAvgSamplesPerSec=6.327439575613748, CurrSamplesPerSec=5.738262154434546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:25:13,746] [INFO] [timer.py:197:stop] 0/1734, RunningAvgSamplesPerSec=6.327447907599327, CurrSamplesPerSec=5.701845849844518, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:25:25,056] [INFO] [timer.py:197:stop] 0/1736, RunningAvgSamplesPerSec=6.3274811248164715, CurrSamplesPerSec=5.718736755597752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:25:36,352] [INFO] [timer.py:197:stop] 0/1738, RunningAvgSamplesPerSec=6.327525118665114, CurrSamplesPerSec=5.739397030686239, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:25:47,809] [INFO] [logging.py:68:log_dist] [Rank 0] step=870, skipped=5, lr=[9.191111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:25:47,810] [INFO] [timer.py:197:stop] 0/1740, RunningAvgSamplesPerSec=6.327570720702815, CurrSamplesPerSec=5.728890663197119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:25:59,188] [INFO] [timer.py:197:stop] 0/1742, RunningAvgSamplesPerSec=6.327554770194363, CurrSamplesPerSec=5.671244133657371, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:26:10,526] [INFO] [timer.py:197:stop] 0/1744, RunningAvgSamplesPerSec=6.327555642826735, CurrSamplesPerSec=5.702799896105481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:26:21,899] [INFO] [timer.py:197:stop] 0/1746, RunningAvgSamplesPerSec=6.3275406381037325, CurrSamplesPerSec=5.694381745222209, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:26:33,330] [INFO] [timer.py:197:stop] 0/1748, RunningAvgSamplesPerSec=6.3275595001379035, CurrSamplesPerSec=5.700081051921007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:26:44,667] [INFO] [timer.py:197:stop] 0/1750, RunningAvgSamplesPerSec=6.3275690092916745, CurrSamplesPerSec=5.699173652540018, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0188, 'learning_rate': 9.180000000000002e-06, 'epoch': 3.71} +[2022-12-16 22:26:56,176] [INFO] [timer.py:197:stop] 0/1752, RunningAvgSamplesPerSec=6.327599408724447, CurrSamplesPerSec=5.720589685059201, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:27:07,474] [INFO] [timer.py:197:stop] 0/1754, RunningAvgSamplesPerSec=6.327626901835506, CurrSamplesPerSec=5.717025532305319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:27:19,040] [INFO] [timer.py:197:stop] 0/1756, RunningAvgSamplesPerSec=6.327463841076069, CurrSamplesPerSec=5.456514256106413, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:27:30,379] [INFO] [timer.py:197:stop] 0/1758, RunningAvgSamplesPerSec=6.327479196720069, CurrSamplesPerSec=5.714399670907678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:27:41,726] [INFO] [logging.py:68:log_dist] [Rank 0] step=880, skipped=5, lr=[9.168888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:27:41,728] [INFO] [timer.py:197:stop] 0/1760, RunningAvgSamplesPerSec=6.327481260641632, CurrSamplesPerSec=5.699712393640301, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:27:53,131] [INFO] [timer.py:197:stop] 0/1762, RunningAvgSamplesPerSec=6.3274335041248, CurrSamplesPerSec=5.595196403806098, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:28:04,504] [INFO] [timer.py:197:stop] 0/1764, RunningAvgSamplesPerSec=6.327418345128578, CurrSamplesPerSec=5.674870902112147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:28:16,055] [INFO] [timer.py:197:stop] 0/1766, RunningAvgSamplesPerSec=6.327419979238297, CurrSamplesPerSec=5.698506538894324, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:28:27,389] [INFO] [timer.py:197:stop] 0/1768, RunningAvgSamplesPerSec=6.327411032748365, CurrSamplesPerSec=5.686275554833261, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:28:38,668] [INFO] [timer.py:197:stop] 0/1770, RunningAvgSamplesPerSec=6.327451486650583, CurrSamplesPerSec=5.7193138087887085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:28:50,097] [INFO] [timer.py:197:stop] 0/1772, RunningAvgSamplesPerSec=6.327448629806222, CurrSamplesPerSec=5.677691594771782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:29:01,797] [INFO] [timer.py:197:stop] 0/1774, RunningAvgSamplesPerSec=6.327447834994487, CurrSamplesPerSec=5.693589432396962, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:29:13,137] [INFO] [timer.py:197:stop] 0/1776, RunningAvgSamplesPerSec=6.3274420899994555, CurrSamplesPerSec=5.6716673572904135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:29:24,741] [INFO] [timer.py:197:stop] 0/1778, RunningAvgSamplesPerSec=6.327465683861914, CurrSamplesPerSec=5.705979769914438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:29:36,093] [INFO] [logging.py:68:log_dist] [Rank 0] step=890, skipped=5, lr=[9.146666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:29:36,094] [INFO] [timer.py:197:stop] 0/1780, RunningAvgSamplesPerSec=6.327472717879045, CurrSamplesPerSec=5.712534458483042, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:29:47,438] [INFO] [timer.py:197:stop] 0/1782, RunningAvgSamplesPerSec=6.327477872742945, CurrSamplesPerSec=5.687404901098838, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:29:58,859] [INFO] [timer.py:197:stop] 0/1784, RunningAvgSamplesPerSec=6.327497107616166, CurrSamplesPerSec=5.715082435349324, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:30:10,346] [INFO] [timer.py:197:stop] 0/1786, RunningAvgSamplesPerSec=6.327481575537458, CurrSamplesPerSec=5.696479056278363, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:30:21,630] [INFO] [timer.py:197:stop] 0/1788, RunningAvgSamplesPerSec=6.327516818382441, CurrSamplesPerSec=5.706524893701314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:30:33,134] [INFO] [timer.py:197:stop] 0/1790, RunningAvgSamplesPerSec=6.327544074013899, CurrSamplesPerSec=5.719948994340342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:30:44,768] [INFO] [timer.py:197:stop] 0/1792, RunningAvgSamplesPerSec=6.327517602571258, CurrSamplesPerSec=5.67557857248875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:30:56,109] [INFO] [timer.py:197:stop] 0/1794, RunningAvgSamplesPerSec=6.327513822697181, CurrSamplesPerSec=5.687254038624659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:31:07,643] [INFO] [timer.py:197:stop] 0/1796, RunningAvgSamplesPerSec=6.327525078782281, CurrSamplesPerSec=5.715674085571417, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:31:19,212] [INFO] [timer.py:197:stop] 0/1798, RunningAvgSamplesPerSec=6.327477564547856, CurrSamplesPerSec=5.673090628417221, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:31:30,491] [INFO] [logging.py:68:log_dist] [Rank 0] step=900, skipped=5, lr=[9.124444444444444e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:31:30,493] [INFO] [timer.py:197:stop] 0/1800, RunningAvgSamplesPerSec=6.327505898154873, CurrSamplesPerSec=5.719943143944835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0201, 'learning_rate': 9.124444444444444e-06, 'epoch': 3.81} +[2022-12-16 22:31:42,020] [INFO] [timer.py:197:stop] 0/1802, RunningAvgSamplesPerSec=6.327532702887273, CurrSamplesPerSec=5.720189114647878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:31:53,397] [INFO] [timer.py:197:stop] 0/1804, RunningAvgSamplesPerSec=6.327515890612733, CurrSamplesPerSec=5.675125729502997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:32:04,723] [INFO] [timer.py:197:stop] 0/1806, RunningAvgSamplesPerSec=6.327534028822243, CurrSamplesPerSec=5.714394561738922, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:32:16,328] [INFO] [timer.py:197:stop] 0/1808, RunningAvgSamplesPerSec=6.327547488856713, CurrSamplesPerSec=5.7109933973552405, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:32:27,941] [INFO] [timer.py:197:stop] 0/1810, RunningAvgSamplesPerSec=6.327525969813436, CurrSamplesPerSec=5.6556872897253445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:32:39,261] [INFO] [timer.py:197:stop] 0/1812, RunningAvgSamplesPerSec=6.327539814732807, CurrSamplesPerSec=5.705926645929653, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:32:50,811] [INFO] [timer.py:197:stop] 0/1814, RunningAvgSamplesPerSec=6.327557862560616, CurrSamplesPerSec=5.702386549923387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:33:02,582] [INFO] [timer.py:197:stop] 0/1816, RunningAvgSamplesPerSec=6.327521828081135, CurrSamplesPerSec=5.651765591854346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:33:13,943] [INFO] [timer.py:197:stop] 0/1818, RunningAvgSamplesPerSec=6.327518212922253, CurrSamplesPerSec=5.679664624133516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:33:25,525] [INFO] [logging.py:68:log_dist] [Rank 0] step=910, skipped=5, lr=[9.102222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:33:25,526] [INFO] [timer.py:197:stop] 0/1820, RunningAvgSamplesPerSec=6.32753376849523, CurrSamplesPerSec=5.699907972659344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:33:36,981] [INFO] [timer.py:197:stop] 0/1822, RunningAvgSamplesPerSec=6.327490357640737, CurrSamplesPerSec=5.683644643984692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:33:48,325] [INFO] [timer.py:197:stop] 0/1824, RunningAvgSamplesPerSec=6.3274986839712275, CurrSamplesPerSec=5.688544098830273, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:33:59,676] [INFO] [timer.py:197:stop] 0/1826, RunningAvgSamplesPerSec=6.327526659579, CurrSamplesPerSec=5.709614439656007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:34:11,315] [INFO] [timer.py:197:stop] 0/1828, RunningAvgSamplesPerSec=6.327508865546779, CurrSamplesPerSec=5.671136780061463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:34:22,675] [INFO] [timer.py:197:stop] 0/1830, RunningAvgSamplesPerSec=6.327503801542838, CurrSamplesPerSec=5.6768350081309285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:34:34,008] [INFO] [timer.py:197:stop] 0/1832, RunningAvgSamplesPerSec=6.327506325462348, CurrSamplesPerSec=5.701579655712555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:34:45,563] [INFO] [timer.py:197:stop] 0/1834, RunningAvgSamplesPerSec=6.327367375204487, CurrSamplesPerSec=5.699337974721977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:34:56,901] [INFO] [timer.py:197:stop] 0/1836, RunningAvgSamplesPerSec=6.327379195232485, CurrSamplesPerSec=5.704727132017407, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:35:08,597] [INFO] [timer.py:197:stop] 0/1838, RunningAvgSamplesPerSec=6.3271557453667935, CurrSamplesPerSec=5.376965988972667, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:35:20,153] [INFO] [logging.py:68:log_dist] [Rank 0] step=920, skipped=5, lr=[9.080000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:35:20,155] [INFO] [timer.py:197:stop] 0/1840, RunningAvgSamplesPerSec=6.327150200517756, CurrSamplesPerSec=5.699684558608608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:35:31,485] [INFO] [timer.py:197:stop] 0/1842, RunningAvgSamplesPerSec=6.327162084321393, CurrSamplesPerSec=5.706075347062752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:35:42,800] [INFO] [timer.py:197:stop] 0/1844, RunningAvgSamplesPerSec=6.327168810082029, CurrSamplesPerSec=5.691259909466644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:35:54,479] [INFO] [timer.py:197:stop] 0/1846, RunningAvgSamplesPerSec=6.32719038402284, CurrSamplesPerSec=5.714115760953486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:36:05,815] [INFO] [timer.py:197:stop] 0/1848, RunningAvgSamplesPerSec=6.327205165796917, CurrSamplesPerSec=5.698616624872424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:36:17,200] [INFO] [timer.py:197:stop] 0/1850, RunningAvgSamplesPerSec=6.327176775421694, CurrSamplesPerSec=5.642583654232158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0204, 'learning_rate': 9.06888888888889e-06, 'epoch': 3.92} +[2022-12-16 22:36:28,679] [INFO] [timer.py:197:stop] 0/1852, RunningAvgSamplesPerSec=6.327188291303244, CurrSamplesPerSec=5.703051179780791, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:36:40,131] [INFO] [timer.py:197:stop] 0/1854, RunningAvgSamplesPerSec=6.327226112581387, CurrSamplesPerSec=5.734747256027386, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:36:51,510] [INFO] [timer.py:197:stop] 0/1856, RunningAvgSamplesPerSec=6.327212472477824, CurrSamplesPerSec=5.660549146666839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:37:02,922] [INFO] [timer.py:197:stop] 0/1858, RunningAvgSamplesPerSec=6.327249935928411, CurrSamplesPerSec=5.742332367647951, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:37:14,245] [INFO] [logging.py:68:log_dist] [Rank 0] step=930, skipped=5, lr=[9.057777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:37:14,247] [INFO] [timer.py:197:stop] 0/1860, RunningAvgSamplesPerSec=6.327266281775782, CurrSamplesPerSec=5.716212055721702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:37:25,655] [INFO] [timer.py:197:stop] 0/1862, RunningAvgSamplesPerSec=6.3272304696234105, CurrSamplesPerSec=5.653700635485529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:37:36,996] [INFO] [timer.py:197:stop] 0/1864, RunningAvgSamplesPerSec=6.327240547607681, CurrSamplesPerSec=5.701383235398178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:37:48,403] [INFO] [timer.py:197:stop] 0/1866, RunningAvgSamplesPerSec=6.327233090336316, CurrSamplesPerSec=5.6837647469343695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:37:59,744] [INFO] [timer.py:197:stop] 0/1868, RunningAvgSamplesPerSec=6.327240665312632, CurrSamplesPerSec=5.687485878507279, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:38:11,229] [INFO] [timer.py:197:stop] 0/1870, RunningAvgSamplesPerSec=6.327244748394163, CurrSamplesPerSec=5.691447427173725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:38:22,637] [INFO] [timer.py:197:stop] 0/1872, RunningAvgSamplesPerSec=6.3272248737369186, CurrSamplesPerSec=5.657825353438808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:38:34,073] [INFO] [timer.py:197:stop] 0/1874, RunningAvgSamplesPerSec=6.327195858796056, CurrSamplesPerSec=5.637339864727594, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:38:45,464] [INFO] [timer.py:197:stop] 0/1876, RunningAvgSamplesPerSec=6.327186040553401, CurrSamplesPerSec=5.683998228114204, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:38:56,954] [INFO] [timer.py:197:stop] 0/1878, RunningAvgSamplesPerSec=6.327190810218982, CurrSamplesPerSec=5.687580837096232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:39:08,592] [INFO] [logging.py:68:log_dist] [Rank 0] step=940, skipped=5, lr=[9.035555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:39:08,594] [INFO] [timer.py:197:stop] 0/1880, RunningAvgSamplesPerSec=6.32700099351177, CurrSamplesPerSec=5.406531701958022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:39:19,900] [INFO] [timer.py:197:stop] 0/1882, RunningAvgSamplesPerSec=6.327025337615598, CurrSamplesPerSec=5.704069383941154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:39:31,203] [INFO] [timer.py:197:stop] 0/1884, RunningAvgSamplesPerSec=6.327061342294388, CurrSamplesPerSec=5.719577030502328, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:39:42,655] [INFO] [timer.py:197:stop] 0/1886, RunningAvgSamplesPerSec=6.326975385899959, CurrSamplesPerSec=5.544955760650507, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:39:51,166] [INFO] [timer.py:197:stop] 0/1888, RunningAvgSamplesPerSec=6.3286334775112, CurrSamplesPerSec=10.195979896061841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:40:02,504] [INFO] [timer.py:197:stop] 0/1890, RunningAvgSamplesPerSec=6.328647509479657, CurrSamplesPerSec=5.710262048965369, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:40:14,194] [INFO] [timer.py:197:stop] 0/1892, RunningAvgSamplesPerSec=6.3284261862826945, CurrSamplesPerSec=5.367132041920802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:40:25,543] [INFO] [timer.py:197:stop] 0/1894, RunningAvgSamplesPerSec=6.32843106698793, CurrSamplesPerSec=5.6931508564600115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:40:36,849] [INFO] [timer.py:197:stop] 0/1896, RunningAvgSamplesPerSec=6.328464305906655, CurrSamplesPerSec=5.728721698325411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:40:48,477] [INFO] [timer.py:197:stop] 0/1898, RunningAvgSamplesPerSec=6.328273681309304, CurrSamplesPerSec=5.381708745107983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:40:59,811] [INFO] [logging.py:68:log_dist] [Rank 0] step=950, skipped=5, lr=[9.013333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:40:59,813] [INFO] [timer.py:197:stop] 0/1900, RunningAvgSamplesPerSec=6.328286660507801, CurrSamplesPerSec=5.713606642642039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0177, 'learning_rate': 9.013333333333334e-06, 'epoch': 4.03} +[2022-12-16 22:41:11,142] [INFO] [timer.py:197:stop] 0/1902, RunningAvgSamplesPerSec=6.328296409917468, CurrSamplesPerSec=5.689266276962426, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:41:22,496] [INFO] [timer.py:197:stop] 0/1904, RunningAvgSamplesPerSec=6.328285659790885, CurrSamplesPerSec=5.687977576563542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:41:33,999] [INFO] [timer.py:197:stop] 0/1906, RunningAvgSamplesPerSec=6.328301415114474, CurrSamplesPerSec=5.708266979596024, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:41:45,322] [INFO] [timer.py:197:stop] 0/1908, RunningAvgSamplesPerSec=6.328300809211748, CurrSamplesPerSec=5.697474840820759, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:41:56,767] [INFO] [timer.py:197:stop] 0/1910, RunningAvgSamplesPerSec=6.328245084111687, CurrSamplesPerSec=5.605507272331195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:42:08,230] [INFO] [timer.py:197:stop] 0/1912, RunningAvgSamplesPerSec=6.32823748333477, CurrSamplesPerSec=5.698080509950753, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:42:19,577] [INFO] [timer.py:197:stop] 0/1914, RunningAvgSamplesPerSec=6.328233811772137, CurrSamplesPerSec=5.692649088685696, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:42:31,224] [INFO] [timer.py:197:stop] 0/1916, RunningAvgSamplesPerSec=6.328043612491889, CurrSamplesPerSec=5.422718309822167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:42:42,605] [INFO] [timer.py:197:stop] 0/1918, RunningAvgSamplesPerSec=6.3280308734851065, CurrSamplesPerSec=5.674925368944043, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:42:53,953] [INFO] [logging.py:68:log_dist] [Rank 0] step=960, skipped=5, lr=[8.991111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:42:53,955] [INFO] [timer.py:197:stop] 0/1920, RunningAvgSamplesPerSec=6.328023141891153, CurrSamplesPerSec=5.678259192763114, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:43:05,610] [INFO] [timer.py:197:stop] 0/1922, RunningAvgSamplesPerSec=6.327827961176475, CurrSamplesPerSec=5.4101643964847765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:43:16,922] [INFO] [timer.py:197:stop] 0/1924, RunningAvgSamplesPerSec=6.32784426629273, CurrSamplesPerSec=5.692889095536372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:43:28,308] [INFO] [timer.py:197:stop] 0/1926, RunningAvgSamplesPerSec=6.327825843494196, CurrSamplesPerSec=5.682812727446553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:43:39,676] [INFO] [timer.py:197:stop] 0/1928, RunningAvgSamplesPerSec=6.327806702706537, CurrSamplesPerSec=5.673969112151267, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:43:50,965] [INFO] [timer.py:197:stop] 0/1930, RunningAvgSamplesPerSec=6.327840292960915, CurrSamplesPerSec=5.721387335868599, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:44:02,315] [INFO] [timer.py:197:stop] 0/1932, RunningAvgSamplesPerSec=6.327859995305297, CurrSamplesPerSec=5.708636502563485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:44:13,930] [INFO] [timer.py:197:stop] 0/1934, RunningAvgSamplesPerSec=6.327838062306126, CurrSamplesPerSec=5.69510153996506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:44:25,317] [INFO] [timer.py:197:stop] 0/1936, RunningAvgSamplesPerSec=6.327819423140711, CurrSamplesPerSec=5.685719600318748, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:44:36,691] [INFO] [timer.py:197:stop] 0/1938, RunningAvgSamplesPerSec=6.327798215144098, CurrSamplesPerSec=5.670691833157158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:44:48,357] [INFO] [logging.py:68:log_dist] [Rank 0] step=970, skipped=5, lr=[8.96888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:44:48,359] [INFO] [timer.py:197:stop] 0/1940, RunningAvgSamplesPerSec=6.327595320689516, CurrSamplesPerSec=5.7164657401281005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:44:59,718] [INFO] [timer.py:197:stop] 0/1942, RunningAvgSamplesPerSec=6.327599374628342, CurrSamplesPerSec=5.7112604712469635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:45:11,048] [INFO] [timer.py:197:stop] 0/1944, RunningAvgSamplesPerSec=6.327614975553183, CurrSamplesPerSec=5.709293604393959, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:45:22,718] [INFO] [timer.py:197:stop] 0/1946, RunningAvgSamplesPerSec=6.327627028381739, CurrSamplesPerSec=5.716932997109624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:45:34,352] [INFO] [timer.py:197:stop] 0/1948, RunningAvgSamplesPerSec=6.327614865246932, CurrSamplesPerSec=5.685503318103534, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:45:45,765] [INFO] [timer.py:197:stop] 0/1950, RunningAvgSamplesPerSec=6.327580884082605, CurrSamplesPerSec=5.649820694504452, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0101, 'learning_rate': 8.957777777777778e-06, 'epoch': 4.13} +[2022-12-16 22:45:57,261] [INFO] [timer.py:197:stop] 0/1952, RunningAvgSamplesPerSec=6.32759681131148, CurrSamplesPerSec=5.711184404892128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:46:08,750] [INFO] [timer.py:197:stop] 0/1954, RunningAvgSamplesPerSec=6.327597897269227, CurrSamplesPerSec=5.704721797659587, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:46:20,109] [INFO] [timer.py:197:stop] 0/1956, RunningAvgSamplesPerSec=6.327596837568829, CurrSamplesPerSec=5.676017325551704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:46:31,499] [INFO] [timer.py:197:stop] 0/1958, RunningAvgSamplesPerSec=6.3276281314148894, CurrSamplesPerSec=5.714223774783396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:46:42,824] [INFO] [logging.py:68:log_dist] [Rank 0] step=980, skipped=5, lr=[8.946666666666669e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:46:42,826] [INFO] [timer.py:197:stop] 0/1960, RunningAvgSamplesPerSec=6.327636555796214, CurrSamplesPerSec=5.699254723419047, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:46:54,425] [INFO] [timer.py:197:stop] 0/1962, RunningAvgSamplesPerSec=6.327482014444832, CurrSamplesPerSec=5.433102247841417, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:47:05,753] [INFO] [timer.py:197:stop] 0/1964, RunningAvgSamplesPerSec=6.327500616331316, CurrSamplesPerSec=5.705595310433084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:47:17,115] [INFO] [timer.py:197:stop] 0/1966, RunningAvgSamplesPerSec=6.327492170652153, CurrSamplesPerSec=5.685668538891042, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:47:28,632] [INFO] [timer.py:197:stop] 0/1968, RunningAvgSamplesPerSec=6.327392003434301, CurrSamplesPerSec=5.509042049946703, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:47:40,071] [INFO] [timer.py:197:stop] 0/1970, RunningAvgSamplesPerSec=6.327330482222493, CurrSamplesPerSec=5.6039946277232415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:47:51,426] [INFO] [timer.py:197:stop] 0/1972, RunningAvgSamplesPerSec=6.3273298008094425, CurrSamplesPerSec=5.7071977690610645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:48:02,785] [INFO] [timer.py:197:stop] 0/1974, RunningAvgSamplesPerSec=6.327328324697508, CurrSamplesPerSec=5.662362644142834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:48:14,144] [INFO] [timer.py:197:stop] 0/1976, RunningAvgSamplesPerSec=6.327325920760691, CurrSamplesPerSec=5.696949096715646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:48:25,463] [INFO] [timer.py:197:stop] 0/1978, RunningAvgSamplesPerSec=6.3273483838352025, CurrSamplesPerSec=5.7194220192270775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:48:37,007] [INFO] [logging.py:68:log_dist] [Rank 0] step=990, skipped=5, lr=[8.924444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:48:37,009] [INFO] [timer.py:197:stop] 0/1980, RunningAvgSamplesPerSec=6.327227334854434, CurrSamplesPerSec=5.493117913683248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:48:48,361] [INFO] [timer.py:197:stop] 0/1982, RunningAvgSamplesPerSec=6.327218692310974, CurrSamplesPerSec=5.67055886580819, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:48:59,691] [INFO] [timer.py:197:stop] 0/1984, RunningAvgSamplesPerSec=6.3272315122953255, CurrSamplesPerSec=5.713750149849061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:49:11,345] [INFO] [timer.py:197:stop] 0/1986, RunningAvgSamplesPerSec=6.327033256452263, CurrSamplesPerSec=5.3792464182237305, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:49:22,687] [INFO] [timer.py:197:stop] 0/1988, RunningAvgSamplesPerSec=6.32703996328013, CurrSamplesPerSec=5.709569748858481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:49:34,309] [INFO] [timer.py:197:stop] 0/1990, RunningAvgSamplesPerSec=6.327031926644361, CurrSamplesPerSec=5.697876832079683, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:49:46,057] [INFO] [timer.py:197:stop] 0/1992, RunningAvgSamplesPerSec=6.327036540568657, CurrSamplesPerSec=5.711242487325227, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:49:57,349] [INFO] [timer.py:197:stop] 0/1994, RunningAvgSamplesPerSec=6.32706622307384, CurrSamplesPerSec=5.728870856379531, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:50:08,810] [INFO] [timer.py:197:stop] 0/1996, RunningAvgSamplesPerSec=6.327081375070768, CurrSamplesPerSec=5.718303554862198, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:50:20,250] [INFO] [timer.py:197:stop] 0/1998, RunningAvgSamplesPerSec=6.327057320293952, CurrSamplesPerSec=5.702582070221039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-16 22:50:31,570] [INFO] [logging.py:68:log_dist] [Rank 0] step=1000, skipped=5, lr=[8.902222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-16 22:50:31,571] [INFO] [timer.py:197:stop] 0/2000, RunningAvgSamplesPerSec=6.327079086755388, CurrSamplesPerSec=5.695145037924359, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0106, 'learning_rate': 8.902222222222224e-06, 'epoch': 4.24} +{'eval_loss': 0.1624755859375, 'eval_wer': 9.988766321062228, 'eval_runtime': 2123.3545, 'eval_samples_per_second': 3.633, 'eval_steps_per_second': 0.454, 'epoch': 4.24} +[2022-12-16 23:25:59,883] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step1000 is begin to save! +[2022-12-16 23:25:59,893] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt +[2022-12-16 23:25:59,893] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt... +[2022-12-16 23:26:04,551] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt. +[2022-12-16 23:26:04,552] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt... +[2022-12-16 23:26:26,839] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt. +[2022-12-16 23:26:26,839] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt +[2022-12-16 23:26:26,839] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now!