diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -1,13 +1,5 @@ -[2022-12-14 16:16:14,483] [WARNING] [runner.py:179:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. -[2022-12-14 16:16:14,494] [INFO] [runner.py:508:main] cmd = /home/milan/hf_env/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 run_speech_recognition_seq2seq_streaming.py --deepspeed=ds_config.json --model_name_or_path=openai/whisper-small --dataset_name=facebook/voxpopuli --dataset_config_name=hr --language=croatian --train_split_name=train+validation --eval_split_name=test --model_index_name=Whisper Small Croatian --max_steps=5000 --output_dir=./ --per_device_train_batch_size=64 --per_device_eval_batch_size=32 --logging_steps=25 --learning_rate=1e-5 --warmup_steps=500 --evaluation_strategy=steps --eval_steps=1000 --save_strategy=steps --save_steps=1000 --generation_max_length=225 --length_column_name=input_length --max_duration_in_seconds=30 --text_column_name=normalized_text --freeze_feature_encoder=False --report_to=tensorboard --metric_for_best_model=wer --greater_is_better=False --load_best_model_at_end --gradient_checkpointing --fp16 --overwrite_output_dir --do_train --do_eval --predict_with_generate --do_normalize_eval --streaming --use_auth_token --push_to_hub -[2022-12-14 16:16:16,035] [INFO] [launch.py:142:main] WORLD INFO DICT: {'localhost': [0]} -[2022-12-14 16:16:16,035] [INFO] [launch.py:148:main] nnodes=1, num_local_procs=1, node_rank=0 -[2022-12-14 16:16:16,035] [INFO] [launch.py:161:main] global_rank_mapping=defaultdict(, {'localhost': [0]}) -[2022-12-14 16:16:16,035] [INFO] [launch.py:162:main] dist_world_size=1 -[2022-12-14 16:16:16,035] [INFO] [launch.py:164:main] Setting CUDA_VISIBLE_DEVICES=0 -[2022-12-14 16:16:20,163] [INFO] [comm.py:654:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl -12/14/2022 16:16:20 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: True -12/14/2022 16:16:20 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( +12/14/2022 23:13:58 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: True +12/14/2022 23:13:58 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( _n_gpu=1, adafactor=False, adam_beta1=0.9, @@ -24,7 +16,7 @@ ddp_bucket_cap_mb=None, ddp_find_unused_parameters=None, ddp_timeout=1800, debug=[], -deepspeed=ds_config.json, +deepspeed=None, disable_tqdm=False, do_eval=True, do_predict=False, @@ -60,11 +52,11 @@ label_smoothing_factor=0.0, learning_rate=1e-05, length_column_name=input_length, load_best_model_at_end=True, -local_rank=0, +local_rank=-1, log_level=passive, log_level_replica=passive, log_on_each_node=True, -logging_dir=./runs/Dec14_16-16-20_129-146-123-136, +logging_dir=./runs/Dec14_23-13-58_129-146-123-136, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=25, @@ -117,7 +109,7 @@ warmup_steps=500, weight_decay=0.0, xpu_backend=None, ) -12/14/2022 16:16:20 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( +12/14/2022 23:13:58 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( _n_gpu=1, adafactor=False, adam_beta1=0.9, @@ -134,7 +126,7 @@ ddp_bucket_cap_mb=None, ddp_find_unused_parameters=None, ddp_timeout=1800, debug=[], -deepspeed=ds_config.json, +deepspeed=None, disable_tqdm=False, do_eval=True, do_predict=False, @@ -170,11 +162,11 @@ label_smoothing_factor=0.0, learning_rate=1e-05, length_column_name=input_length, load_best_model_at_end=True, -local_rank=0, +local_rank=-1, log_level=passive, log_level_replica=passive, log_on_each_node=True, -logging_dir=./runs/Dec14_16-16-20_129-146-123-136, +logging_dir=./runs/Dec14_23-13-58_129-146-123-136, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=25, @@ -227,3654 +219,7 @@ warmup_steps=500, weight_decay=0.0, xpu_backend=None, ) -12/14/2022 16:16:21 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/facebook/voxpopuli/resolve/main/voxpopuli.py not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpytvg4dlk -12/14/2022 16:16:21 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/facebook/voxpopuli/resolve/main/voxpopuli.py in cache at /home/milan/.cache/huggingface/datasets/downloads/106bf524483c334048ae062c58ec9b0e6b97d2b58ee9189e35d7de119584e588.e2ef81d0abcf78daf2af04b0007d1e3b9b865252392ed14408b8bf57cce986b7.py -12/14/2022 16:16:21 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/106bf524483c334048ae062c58ec9b0e6b97d2b58ee9189e35d7de119584e588.e2ef81d0abcf78daf2af04b0007d1e3b9b865252392ed14408b8bf57cce986b7.py -12/14/2022 16:16:22 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/facebook/voxpopuli/resolve/main/README.md not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpecycnlp_ -12/14/2022 16:16:22 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/facebook/voxpopuli/resolve/main/README.md in cache at /home/milan/.cache/huggingface/datasets/downloads/64e94143cbc03db672e215dcfdbc3f01c69ce8fd2fafc243def70ffe62bfcea6.e94f2c74349ad27464fb7b6584da6f25bff8564f6df09f92efdd7436695ce51b -12/14/2022 16:16:22 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/64e94143cbc03db672e215dcfdbc3f01c69ce8fd2fafc243def70ffe62bfcea6.e94f2c74349ad27464fb7b6584da6f25bff8564f6df09f92efdd7436695ce51b -12/14/2022 16:16:22 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/facebook--voxpopuli/b5ff837284f0778eefe0f642734e142d8c3f574eba8c9c8a4b13602297f73604 -12/14/2022 16:16:24 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/facebook--voxpopuli/b5ff837284f0778eefe0f642734e142d8c3f574eba8c9c8a4b13602297f73604 -12/14/2022 16:16:25 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/facebook--voxpopuli/b5ff837284f0778eefe0f642734e142d8c3f574eba8c9c8a4b13602297f73604 -12/14/2022 16:16:50 - WARNING - huggingface_hub.repository - /home/milan/whisper-small-hr-vox/./ is already a clone of https://huggingface.co/mikr/whisper-small-hr-vox. Make sure you pull the latest changes with `repo.git_pull()`. -[2022-12-14 16:16:54,254] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.7, git-hash=unknown, git-branch=unknown -[2022-12-14 16:16:54,570] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False -[2022-12-14 16:16:55,696] [WARNING] [cpu_adam.py:83:__init__] FP16 params for CPUAdam may not work on AMD CPUs -Installed CUDA version 11.6 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination -[1/3] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/includes -I/usr/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/TH -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/THC -isystem /usr/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/common/custom_cuda_kernel.cu -o custom_cuda_kernel.cuda.o -[2/3] c++ -MMD -MF cpu_adam.o.d -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/includes -I/usr/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/TH -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/THC -isystem /usr/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -L/usr/lib64 -lcudart -lcublas -g -march=native -fopenmp -D__AVX256__ -c /home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/adam/cpu_adam.cpp -o cpu_adam.o -[3/3] c++ cpu_adam.o custom_cuda_kernel.cuda.o -shared -lcurand -L/home/milan/hf_env/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/lib64 -lcudart -o cpu_adam.so -Time to load cpu_adam op: 27.422913312911987 seconds -Adam Optimizer #0 is created with AVX2 arithmetic capability. -Config: alpha=0.000010, betas=(0.900000, 0.999000), weight_decay=0.000000, adam_w=1 -[2022-12-14 16:17:24,680] [INFO] [logging.py:68:log_dist] [Rank 0] Using DeepSpeed Optimizer param name adamw as basic optimizer -[2022-12-14 16:17:24,726] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = DeepSpeedCPUAdam -[2022-12-14 16:17:24,726] [INFO] [utils.py:52:is_zero_supported_optimizer] Checking ZeRO support for optimizer=DeepSpeedCPUAdam type= -[2022-12-14 16:17:24,727] [INFO] [logging.py:68:log_dist] [Rank 0] Creating fp16 ZeRO stage 2 optimizer -[2022-12-14 16:17:24,727] [INFO] [stage_1_and_2.py:140:__init__] Reduce bucket size 200000000 -[2022-12-14 16:17:24,727] [INFO] [stage_1_and_2.py:141:__init__] Allgather bucket size 200000000 -[2022-12-14 16:17:24,727] [INFO] [stage_1_and_2.py:142:__init__] CPU Offload: True -[2022-12-14 16:17:24,727] [INFO] [stage_1_and_2.py:143:__init__] Round robin gradient partitioning: False -[1/2] c++ -MMD -MF flatten_unflatten.o.d -DTORCH_EXTENSION_NAME=utils -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/TH -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/THC -isystem /usr/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -c /home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/utils/flatten_unflatten.cpp -o flatten_unflatten.o -[2/2] c++ flatten_unflatten.o -shared -L/home/milan/hf_env/lib/python3.8/site-packages/torch/lib -lc10 -ltorch_cpu -ltorch -ltorch_python -o utils.so -Time to load utils op: 14.912862777709961 seconds -Rank: 0 partition count [1] and sizes[(241734912, False)] -[2022-12-14 16:17:40,429] [INFO] [utils.py:827:see_memory_usage] Before initializing optimizer states -[2022-12-14 16:17:40,430] [INFO] [utils.py:828:see_memory_usage] MA 0.53 GB Max_MA 0.53 GB CA 0.94 GB Max_CA 1 GB -[2022-12-14 16:17:40,430] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 7.45 GB, percent = 3.8% -[2022-12-14 16:17:41,202] [INFO] [utils.py:827:see_memory_usage] After initializing optimizer states -[2022-12-14 16:17:41,202] [INFO] [utils.py:828:see_memory_usage] MA 0.53 GB Max_MA 0.53 GB CA 0.94 GB Max_CA 1 GB -[2022-12-14 16:17:41,203] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 10.27 GB, percent = 5.2% -[2022-12-14 16:17:41,203] [INFO] [stage_1_and_2.py:525:__init__] optimizer state initialized -[2022-12-14 16:17:41,269] [INFO] [utils.py:827:see_memory_usage] After initializing ZeRO optimizer -[2022-12-14 16:17:41,270] [INFO] [utils.py:828:see_memory_usage] MA 0.53 GB Max_MA 0.53 GB CA 0.94 GB Max_CA 1 GB -[2022-12-14 16:17:41,270] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 10.27 GB, percent = 5.2% -[2022-12-14 16:17:41,279] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = adamw -[2022-12-14 16:17:41,279] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using configured LR scheduler = WarmupLR -[2022-12-14 16:17:41,279] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = -[2022-12-14 16:17:41,279] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 16:17:41,280] [INFO] [config.py:1020:print] DeepSpeedEngine configuration: -[2022-12-14 16:17:41,280] [INFO] [config.py:1024:print] activation_checkpointing_config { - "partition_activations": false, - "contiguous_memory_optimization": false, - "cpu_checkpointing": false, - "number_checkpoints": null, - "synchronize_checkpoint_boundary": false, - "profile": false -} -[2022-12-14 16:17:41,280] [INFO] [config.py:1024:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] amp_enabled .................. False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] amp_params ................... False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] autotuning_config ............ { - "enabled": false, - "start_step": null, - "end_step": null, - "metric_path": null, - "arg_mappings": null, - "metric": "throughput", - "model_info": null, - "results_dir": "autotuning_results", - "exps_dir": "autotuning_exps", - "overwrite": true, - "fast": true, - "start_profile_step": 3, - "end_profile_step": 5, - "tuner_type": "gridsearch", - "tuner_early_stopping": 5, - "tuner_num_trials": 50, - "model_info_path": null, - "mp_size": 1, - "max_train_batch_size": null, - "min_train_batch_size": 1, - "max_train_micro_batch_size_per_gpu": 1.024000e+03, - "min_train_micro_batch_size_per_gpu": 1, - "num_tuning_micro_batch_sizes": 3 -} -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] bfloat16_enabled ............. False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] checkpoint_parallel_write_pipeline False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] checkpoint_tag_validation_enabled True -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] checkpoint_tag_validation_fail False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] comms_config ................. -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] communication_data_type ...... None -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] curriculum_enabled ........... False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] curriculum_params ............ False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] dataloader_drop_last ......... False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] disable_allgather ............ False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] dump_state ................... False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 1000, 'delayed_shift': 2, 'min_scale': 1} -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] eigenvalue_enabled ........... False -[2022-12-14 16:17:41,281] [INFO] [config.py:1024:print] eigenvalue_gas_boundary_resolution 1 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] eigenvalue_layer_name ........ bert.encoder.layer -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] eigenvalue_layer_num ......... 0 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] eigenvalue_max_iter .......... 100 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] eigenvalue_stability ......... 1e-06 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] eigenvalue_tol ............... 0.01 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] eigenvalue_verbose ........... False -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] elasticity_enabled ........... False -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] flops_profiler_config ........ { - "enabled": false, - "profile_step": 1, - "module_depth": -1, - "top_modules": 1, - "detailed": true, - "output_file": null -} -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] fp16_auto_cast ............... False -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] fp16_enabled ................. True -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] fp16_master_weights_and_gradients False -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] global_rank .................. 0 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] grad_accum_dtype ............. None -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] gradient_accumulation_steps .. 1 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] gradient_clipping ............ 1.0 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] gradient_predivide_factor .... 1.0 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] initial_dynamic_scale ........ 65536 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] load_universal_checkpoint .... False -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] loss_scale ................... 0 -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] memory_breakdown ............. False -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] monitor_config ............... -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] nebula_config ................ { - "enabled": false, - "persistent_storage_path": null, - "persistent_time_interval": 100, - "num_of_version_in_retention": 2, - "enable_nebula_load": true, - "load_path": null -} -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] optimizer_legacy_fusion ...... False -[2022-12-14 16:17:41,282] [INFO] [config.py:1024:print] optimizer_name ............... adamw -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] optimizer_params ............. {'lr': 1e-05, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0} -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] pld_enabled .................. False -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] pld_params ................... False -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] prescale_gradients ........... False -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] scheduler_name ............... WarmupLR -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] scheduler_params ............. {'warmup_min_lr': 0, 'warmup_max_lr': 1e-05, 'warmup_num_steps': 500} -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] sparse_attention ............. None -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] sparse_gradients_enabled ..... False -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] steps_per_print .............. 10 -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] train_batch_size ............. 64 -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] train_micro_batch_size_per_gpu 64 -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] use_node_local_storage ....... False -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] wall_clock_breakdown ......... False -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] world_size ................... 1 -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] zero_allow_untested_optimizer False -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=200000000 allgather_partitions=True allgather_bucket_size=200000000 overlap_comm=True load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='cpu', nvme_path=None, buffer_count=4, pin_memory=True, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=100,000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] zero_enabled ................. True -[2022-12-14 16:17:41,283] [INFO] [config.py:1024:print] zero_optimization_stage ...... 2 -[2022-12-14 16:17:41,284] [INFO] [config.py:1009:print_user_config] json = { - "fp16": { - "enabled": true, - "loss_scale": 0, - "loss_scale_window": 1000, - "initial_scale_power": 16, - "hysteresis": 2, - "min_loss_scale": 1 - }, - "optimizer": { - "type": "AdamW", - "params": { - "lr": 1e-05, - "betas": [0.9, 0.999], - "eps": 1e-08, - "weight_decay": 0.0 - } - }, - "scheduler": { - "type": "WarmupLR", - "params": { - "warmup_min_lr": 0, - "warmup_max_lr": 1e-05, - "warmup_num_steps": 500 - } - }, - "zero_optimization": { - "stage": 2, - "offload_optimizer": { - "device": "cpu", - "pin_memory": true - }, - "allgather_partitions": true, - "allgather_bucket_size": 2.000000e+08, - "overlap_comm": true, - "reduce_scatter": true, - "reduce_bucket_size": 2.000000e+08, - "contiguous_gradients": true - }, - "gradient_accumulation_steps": 1, - "gradient_clipping": 1.0, - "train_batch_size": 64, - "train_micro_batch_size_per_gpu": 64 -} -Time to load utils op: 0.0003387928009033203 seconds -[2022-12-14 16:19:01,227] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 65536 -[2022-12-14 16:19:08,720] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768.0 -[2022-12-14 16:19:15,883] [INFO] [timer.py:197:stop] 0/3, RunningAvgSamplesPerSec=29.600569261531366, CurrSamplesPerSec=29.600569261531366, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:19:22,453] [INFO] [timer.py:197:stop] 0/4, RunningAvgSamplesPerSec=29.84915059859713, CurrSamplesPerSec=30.10194239536303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:19:29,055] [INFO] [timer.py:197:stop] 0/5, RunningAvgSamplesPerSec=29.860358523922898, CurrSamplesPerSec=29.882799644036442, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:19:35,856] [INFO] [timer.py:197:stop] 0/6, RunningAvgSamplesPerSec=29.738395511956575, CurrSamplesPerSec=29.378411930087303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:19:42,753] [INFO] [timer.py:197:stop] 0/7, RunningAvgSamplesPerSec=29.8687180635254, CurrSamplesPerSec=30.40163431370184, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:19:49,351] [INFO] [timer.py:197:stop] 0/8, RunningAvgSamplesPerSec=29.89304192333273, CurrSamplesPerSec=30.01525790256393, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:19:56,629] [INFO] [timer.py:197:stop] 0/9, RunningAvgSamplesPerSec=29.940243610774065, CurrSamplesPerSec=30.226614036065587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:20:03,623] [INFO] [logging.py:68:log_dist] [Rank 0] step=10, skipped=2, lr=[3.3460541819326935e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:20:03,623] [INFO] [timer.py:197:stop] 0/10, RunningAvgSamplesPerSec=29.907359011353645, CurrSamplesPerSec=29.679174019602694, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:20:11,000] [INFO] [timer.py:197:stop] 0/11, RunningAvgSamplesPerSec=29.902437357989612, CurrSamplesPerSec=29.86312236889635, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:20:17,763] [INFO] [timer.py:197:stop] 0/12, RunningAvgSamplesPerSec=29.86817169313274, CurrSamplesPerSec=29.56327853542334, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:20:24,057] [INFO] [timer.py:197:stop] 0/13, RunningAvgSamplesPerSec=29.837106570960852, CurrSamplesPerSec=29.52997286879566, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:20:30,994] [INFO] [timer.py:197:stop] 0/14, RunningAvgSamplesPerSec=29.79637836635386, CurrSamplesPerSec=29.355598064273707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:20:37,212] [INFO] [timer.py:197:stop] 0/15, RunningAvgSamplesPerSec=29.825892070138377, CurrSamplesPerSec=30.184671832841286, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:20:44,782] [INFO] [timer.py:197:stop] 0/16, RunningAvgSamplesPerSec=29.840228129760057, CurrSamplesPerSec=30.027858906049715, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:20:51,255] [INFO] [timer.py:197:stop] 0/17, RunningAvgSamplesPerSec=29.884671093684663, CurrSamplesPerSec=30.521068864924246, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:20:57,498] [INFO] [timer.py:197:stop] 0/18, RunningAvgSamplesPerSec=29.890886340636435, CurrSamplesPerSec=29.98442624269254, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:21:04,165] [INFO] [timer.py:197:stop] 0/19, RunningAvgSamplesPerSec=29.882107919579553, CurrSamplesPerSec=29.74235113697945, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:21:10,795] [INFO] [logging.py:68:log_dist] [Rank 0] step=20, skipped=2, lr=[4.650931663140581e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:21:10,796] [INFO] [timer.py:197:stop] 0/20, RunningAvgSamplesPerSec=29.867278880670913, CurrSamplesPerSec=29.61741821420407, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:21:17,272] [INFO] [timer.py:197:stop] 0/21, RunningAvgSamplesPerSec=29.844038356753472, CurrSamplesPerSec=29.431808249247034, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:21:24,087] [INFO] [timer.py:197:stop] 0/22, RunningAvgSamplesPerSec=29.8036097174558, CurrSamplesPerSec=29.055754893608178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:21:31,311] [INFO] [timer.py:197:stop] 0/23, RunningAvgSamplesPerSec=29.828601725834293, CurrSamplesPerSec=30.337394059176777, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:21:38,722] [INFO] [timer.py:197:stop] 0/24, RunningAvgSamplesPerSec=29.819506304936226, CurrSamplesPerSec=29.629775624935704, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:21:45,429] [INFO] [timer.py:197:stop] 0/25, RunningAvgSamplesPerSec=29.838995717230414, CurrSamplesPerSec=30.27430218153701, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 1.1454, 'learning_rate': 5.0453611334320685e-06, 'epoch': 0.01} -[2022-12-14 16:21:52,173] [INFO] [timer.py:197:stop] 0/26, RunningAvgSamplesPerSec=29.848569209233407, CurrSamplesPerSec=30.070467621386197, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:21:59,549] [INFO] [timer.py:197:stop] 0/27, RunningAvgSamplesPerSec=29.84642259063844, CurrSamplesPerSec=29.794996211740298, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:06,836] [INFO] [timer.py:197:stop] 0/28, RunningAvgSamplesPerSec=29.83080926488274, CurrSamplesPerSec=29.44571657612171, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:13,897] [INFO] [timer.py:197:stop] 0/29, RunningAvgSamplesPerSec=29.798796059547932, CurrSamplesPerSec=28.9899154554482, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:20,873] [INFO] [logging.py:68:log_dist] [Rank 0] step=30, skipped=2, lr=[5.361890013661856e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:22:20,874] [INFO] [timer.py:197:stop] 0/30, RunningAvgSamplesPerSec=29.782128428164537, CurrSamplesPerSec=29.33904560362086, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:27,598] [INFO] [timer.py:197:stop] 0/31, RunningAvgSamplesPerSec=29.79391391839002, CurrSamplesPerSec=30.127737078406284, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:34,420] [INFO] [timer.py:197:stop] 0/32, RunningAvgSamplesPerSec=29.77782222578812, CurrSamplesPerSec=29.318607812202202, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:36,804] [INFO] [timer.py:197:stop] 0/33, RunningAvgSamplesPerSec=29.815271188935967, CurrSamplesPerSec=30.98425690543709, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:38,921] [INFO] [timer.py:197:stop] 0/34, RunningAvgSamplesPerSec=29.838762334848617, CurrSamplesPerSec=30.585807894767726, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:41,059] [INFO] [timer.py:197:stop] 0/35, RunningAvgSamplesPerSec=29.85245103221372, CurrSamplesPerSec=30.297219581472785, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:43,229] [INFO] [timer.py:197:stop] 0/36, RunningAvgSamplesPerSec=29.853716367740542, CurrSamplesPerSec=29.895532700546468, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:45,414] [INFO] [timer.py:197:stop] 0/37, RunningAvgSamplesPerSec=29.847766064629422, CurrSamplesPerSec=29.646857583115096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:47,558] [INFO] [timer.py:197:stop] 0/38, RunningAvgSamplesPerSec=29.858230162057044, CurrSamplesPerSec=30.22915333852326, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:49,715] [INFO] [timer.py:197:stop] 0/39, RunningAvgSamplesPerSec=29.86308115526223, CurrSamplesPerSec=30.038772874542357, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:22:51,612] [INFO] [logging.py:68:log_dist] [Rank 0] step=40, skipped=2, lr=[5.853283267612517e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:22:51,613] [INFO] [timer.py:197:stop] 0/40, RunningAvgSamplesPerSec=29.962941114579873, CurrSamplesPerSec=34.1935478775732, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:24:12,666] [INFO] [timer.py:197:stop] 0/41, RunningAvgSamplesPerSec=29.970648309388704, CurrSamplesPerSec=30.266488744867694, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:24:19,348] [INFO] [timer.py:197:stop] 0/42, RunningAvgSamplesPerSec=29.967836807384465, CurrSamplesPerSec=29.85859816825456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:24:26,128] [INFO] [timer.py:197:stop] 0/43, RunningAvgSamplesPerSec=29.915549602132852, CurrSamplesPerSec=27.96391714398192, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:24:32,349] [INFO] [timer.py:197:stop] 0/44, RunningAvgSamplesPerSec=29.924257849065697, CurrSamplesPerSec=30.285713834421983, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:24:38,469] [INFO] [timer.py:197:stop] 0/45, RunningAvgSamplesPerSec=29.933067535113366, CurrSamplesPerSec=30.30781697264223, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:24:44,757] [INFO] [timer.py:197:stop] 0/46, RunningAvgSamplesPerSec=29.92950799097278, CurrSamplesPerSec=29.777244382422875, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:24:51,275] [INFO] [timer.py:197:stop] 0/47, RunningAvgSamplesPerSec=29.921765216164907, CurrSamplesPerSec=29.585004544070873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:24:57,333] [INFO] [timer.py:197:stop] 0/48, RunningAvgSamplesPerSec=29.93684697840389, CurrSamplesPerSec=30.631627222624974, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:25:04,379] [INFO] [timer.py:197:stop] 0/49, RunningAvgSamplesPerSec=29.945457187529467, CurrSamplesPerSec=30.346952574679083, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:25:10,893] [INFO] [logging.py:68:log_dist] [Rank 0] step=50, skipped=2, lr=[6.229195710491767e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:25:10,894] [INFO] [timer.py:197:stop] 0/50, RunningAvgSamplesPerSec=29.936708270411458, CurrSamplesPerSec=29.531197613525528, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.6809, 'learning_rate': 6.229195710491767e-06, 'epoch': 1.0} -[2022-12-14 16:25:17,390] [INFO] [timer.py:197:stop] 0/51, RunningAvgSamplesPerSec=29.923998528458622, CurrSamplesPerSec=29.326368767303713, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:25:23,613] [INFO] [timer.py:197:stop] 0/52, RunningAvgSamplesPerSec=29.933519342684296, CurrSamplesPerSec=30.407578325703934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:25:29,915] [INFO] [timer.py:197:stop] 0/53, RunningAvgSamplesPerSec=29.929955276297893, CurrSamplesPerSec=29.752827669312406, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:25:36,916] [INFO] [timer.py:197:stop] 0/54, RunningAvgSamplesPerSec=29.92547656948279, CurrSamplesPerSec=29.698826409611765, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:25:42,876] [INFO] [timer.py:197:stop] 0/55, RunningAvgSamplesPerSec=29.93065553177582, CurrSamplesPerSec=30.202454154083505, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:25:49,272] [INFO] [timer.py:197:stop] 0/56, RunningAvgSamplesPerSec=29.899238839639505, CurrSamplesPerSec=28.323559197599526, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:25:55,606] [INFO] [timer.py:197:stop] 0/57, RunningAvgSamplesPerSec=29.89069645566325, CurrSamplesPerSec=29.436546193811203, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:26:02,276] [INFO] [timer.py:197:stop] 0/58, RunningAvgSamplesPerSec=29.893624214223227, CurrSamplesPerSec=30.055538972204985, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:26:08,719] [INFO] [timer.py:197:stop] 0/59, RunningAvgSamplesPerSec=29.886494804178632, CurrSamplesPerSec=29.492603706809728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:26:14,917] [INFO] [logging.py:68:log_dist] [Rank 0] step=60, skipped=2, lr=[6.533707268809618e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:26:14,918] [INFO] [timer.py:197:stop] 0/60, RunningAvgSamplesPerSec=29.889899928001835, CurrSamplesPerSec=30.085282977251627, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:26:21,235] [INFO] [timer.py:197:stop] 0/61, RunningAvgSamplesPerSec=29.895606343684157, CurrSamplesPerSec=30.23034825175896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:26:27,350] [INFO] [timer.py:197:stop] 0/62, RunningAvgSamplesPerSec=29.883619954645205, CurrSamplesPerSec=29.19304253334299, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:26:35,474] [INFO] [timer.py:197:stop] 0/63, RunningAvgSamplesPerSec=29.879221451621035, CurrSamplesPerSec=29.617660032848754, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:26:41,644] [INFO] [timer.py:197:stop] 0/64, RunningAvgSamplesPerSec=29.87529914928358, CurrSamplesPerSec=29.637970544145965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:26:48,110] [INFO] [timer.py:197:stop] 0/65, RunningAvgSamplesPerSec=29.870454393485574, CurrSamplesPerSec=29.573117752775172, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:26:54,077] [INFO] [timer.py:197:stop] 0/66, RunningAvgSamplesPerSec=29.870881651018696, CurrSamplesPerSec=29.897823538819935, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:00,909] [INFO] [timer.py:197:stop] 0/67, RunningAvgSamplesPerSec=29.87632228435029, CurrSamplesPerSec=30.228693781661896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:07,172] [INFO] [timer.py:197:stop] 0/68, RunningAvgSamplesPerSec=29.873972757703843, CurrSamplesPerSec=29.722042161636438, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:13,579] [INFO] [timer.py:197:stop] 0/69, RunningAvgSamplesPerSec=29.87591488697557, CurrSamplesPerSec=30.00465614332373, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:20,026] [INFO] [logging.py:68:log_dist] [Rank 0] step=70, skipped=2, lr=[6.7896601657751925e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:27:20,027] [INFO] [timer.py:197:stop] 0/70, RunningAvgSamplesPerSec=29.874000743173745, CurrSamplesPerSec=29.74630946259818, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:26,280] [INFO] [timer.py:197:stop] 0/71, RunningAvgSamplesPerSec=29.87049695465629, CurrSamplesPerSec=29.634152227445416, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:32,641] [INFO] [timer.py:197:stop] 0/72, RunningAvgSamplesPerSec=29.84548165815693, CurrSamplesPerSec=28.215083637108194, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:35,527] [INFO] [timer.py:197:stop] 0/73, RunningAvgSamplesPerSec=29.84350267426596, CurrSamplesPerSec=29.70562296105138, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:37,710] [INFO] [timer.py:197:stop] 0/74, RunningAvgSamplesPerSec=29.841297256115247, CurrSamplesPerSec=29.685541368316066, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:39,889] [INFO] [timer.py:197:stop] 0/75, RunningAvgSamplesPerSec=29.839675017515923, CurrSamplesPerSec=29.723335549739872, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.5112, 'learning_rate': 6.903829450223392e-06, 'epoch': 1.01} -[2022-12-14 16:27:42,044] [INFO] [timer.py:197:stop] 0/76, RunningAvgSamplesPerSec=29.84288306778642, CurrSamplesPerSec=30.07894860395078, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:44,204] [INFO] [timer.py:197:stop] 0/77, RunningAvgSamplesPerSec=29.844626360714454, CurrSamplesPerSec=29.974197679182442, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:46,327] [INFO] [timer.py:197:stop] 0/78, RunningAvgSamplesPerSec=29.853271035650735, CurrSamplesPerSec=30.516211301170568, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:48,474] [INFO] [timer.py:197:stop] 0/79, RunningAvgSamplesPerSec=29.85746118631038, CurrSamplesPerSec=30.179391434929336, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:27:50,396] [INFO] [logging.py:68:log_dist] [Rank 0] step=80, skipped=2, lr=[7.010432126517687e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:27:50,397] [INFO] [timer.py:197:stop] 0/80, RunningAvgSamplesPerSec=29.901887840184944, CurrSamplesPerSec=33.771141414456565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:28:32,721] [INFO] [timer.py:197:stop] 0/81, RunningAvgSamplesPerSec=29.910223767557383, CurrSamplesPerSec=30.575063998118356, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:28:39,414] [INFO] [timer.py:197:stop] 0/82, RunningAvgSamplesPerSec=29.90696612188145, CurrSamplesPerSec=29.651835343147045, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:28:46,210] [INFO] [timer.py:197:stop] 0/83, RunningAvgSamplesPerSec=29.909513766649606, CurrSamplesPerSec=30.114741305460907, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:28:52,560] [INFO] [timer.py:197:stop] 0/84, RunningAvgSamplesPerSec=29.900797653094696, CurrSamplesPerSec=29.21127419310433, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:00,548] [INFO] [timer.py:197:stop] 0/85, RunningAvgSamplesPerSec=29.880049346772864, CurrSamplesPerSec=28.27140119897457, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:07,189] [INFO] [timer.py:197:stop] 0/86, RunningAvgSamplesPerSec=29.875095643265876, CurrSamplesPerSec=29.469586331958048, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:13,780] [INFO] [timer.py:197:stop] 0/87, RunningAvgSamplesPerSec=29.873480942036643, CurrSamplesPerSec=29.738466344800997, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:20,389] [INFO] [timer.py:197:stop] 0/88, RunningAvgSamplesPerSec=29.870455495306782, CurrSamplesPerSec=29.615513213515204, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:27,129] [INFO] [timer.py:197:stop] 0/89, RunningAvgSamplesPerSec=29.875796502723066, CurrSamplesPerSec=30.342380061133404, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:33,382] [INFO] [logging.py:68:log_dist] [Rank 0] step=90, skipped=2, lr=[7.204536060149867e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:29:33,383] [INFO] [timer.py:197:stop] 0/90, RunningAvgSamplesPerSec=29.869865350806034, CurrSamplesPerSec=29.36271696580997, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:40,351] [INFO] [timer.py:197:stop] 0/91, RunningAvgSamplesPerSec=29.868044681571746, CurrSamplesPerSec=29.708690315305468, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:46,887] [INFO] [timer.py:197:stop] 0/92, RunningAvgSamplesPerSec=29.86866381701748, CurrSamplesPerSec=29.923869862628116, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:53,102] [INFO] [timer.py:197:stop] 0/93, RunningAvgSamplesPerSec=29.865531586317697, CurrSamplesPerSec=29.58629581550289, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:29:59,546] [INFO] [timer.py:197:stop] 0/94, RunningAvgSamplesPerSec=29.870006835274786, CurrSamplesPerSec=30.28294637614371, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:30:05,944] [INFO] [timer.py:197:stop] 0/95, RunningAvgSamplesPerSec=29.874263124768145, CurrSamplesPerSec=30.27109985730257, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:30:14,446] [INFO] [timer.py:197:stop] 0/96, RunningAvgSamplesPerSec=29.86604516278834, CurrSamplesPerSec=29.121044226897173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:30:21,070] [INFO] [timer.py:197:stop] 0/97, RunningAvgSamplesPerSec=29.867700867887265, CurrSamplesPerSec=30.024161112259424, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:30:27,501] [INFO] [timer.py:197:stop] 0/98, RunningAvgSamplesPerSec=29.871347653688392, CurrSamplesPerSec=30.221900778527722, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:30:34,630] [INFO] [timer.py:197:stop] 0/99, RunningAvgSamplesPerSec=29.876555553069753, CurrSamplesPerSec=30.38511280850852, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:30:41,121] [INFO] [logging.py:68:log_dist] [Rank 0] step=100, skipped=2, lr=[7.377725845391017e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:30:41,122] [INFO] [timer.py:197:stop] 0/100, RunningAvgSamplesPerSec=29.880088935430027, CurrSamplesPerSec=30.22684548296153, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.3992, 'learning_rate': 7.377725845391017e-06, 'epoch': 2.0} -[2022-12-14 16:30:47,576] [INFO] [timer.py:197:stop] 0/101, RunningAvgSamplesPerSec=29.882057356650414, CurrSamplesPerSec=30.07622891216638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:30:53,800] [INFO] [timer.py:197:stop] 0/102, RunningAvgSamplesPerSec=29.877705999429168, CurrSamplesPerSec=29.45310547131088, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:31:00,954] [INFO] [timer.py:197:stop] 0/103, RunningAvgSamplesPerSec=29.876934423208343, CurrSamplesPerSec=29.799977530854, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:31:08,257] [INFO] [timer.py:197:stop] 0/104, RunningAvgSamplesPerSec=29.875741221222004, CurrSamplesPerSec=29.755716771416825, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:31:15,420] [INFO] [timer.py:197:stop] 0/105, RunningAvgSamplesPerSec=29.880548017566653, CurrSamplesPerSec=30.379101938528244, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:31:21,810] [INFO] [timer.py:197:stop] 0/106, RunningAvgSamplesPerSec=29.886416017391934, CurrSamplesPerSec=30.50341902252864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:31:29,165] [INFO] [timer.py:197:stop] 0/107, RunningAvgSamplesPerSec=29.881744953860476, CurrSamplesPerSec=29.4037990737055, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:31:36,262] [INFO] [timer.py:197:stop] 0/108, RunningAvgSamplesPerSec=29.871422054696275, CurrSamplesPerSec=28.825819368241685, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:31:43,322] [INFO] [timer.py:197:stop] 0/109, RunningAvgSamplesPerSec=29.86719284968771, CurrSamplesPerSec=29.4255879791084, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:31:51,521] [INFO] [logging.py:68:log_dist] [Rank 0] step=110, skipped=2, lr=[7.5340731916996546e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:31:51,521] [INFO] [timer.py:197:stop] 0/110, RunningAvgSamplesPerSec=29.870941853730013, CurrSamplesPerSec=30.27759739069556, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:31:58,443] [INFO] [timer.py:197:stop] 0/111, RunningAvgSamplesPerSec=29.87520709214369, CurrSamplesPerSec=30.34313461958872, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:32:05,126] [INFO] [timer.py:197:stop] 0/112, RunningAvgSamplesPerSec=29.87780408333542, CurrSamplesPerSec=30.163608772135834, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:32:07,505] [INFO] [timer.py:197:stop] 0/113, RunningAvgSamplesPerSec=29.881818444378382, CurrSamplesPerSec=30.33008262048034, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:32:09,766] [INFO] [timer.py:197:stop] 0/114, RunningAvgSamplesPerSec=29.870171326592605, CurrSamplesPerSec=28.631438649619344, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:32:11,916] [INFO] [timer.py:197:stop] 0/115, RunningAvgSamplesPerSec=29.872571740612898, CurrSamplesPerSec=30.143881639579124, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:32:14,102] [INFO] [timer.py:197:stop] 0/116, RunningAvgSamplesPerSec=29.87038578645115, CurrSamplesPerSec=29.625416662757967, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:32:16,274] [INFO] [timer.py:197:stop] 0/117, RunningAvgSamplesPerSec=29.873335758051173, CurrSamplesPerSec=30.21349543216306, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:32:18,500] [INFO] [timer.py:197:stop] 0/118, RunningAvgSamplesPerSec=29.868307046180544, CurrSamplesPerSec=29.3010831127219, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:32:20,661] [INFO] [timer.py:197:stop] 0/119, RunningAvgSamplesPerSec=29.87283275972825, CurrSamplesPerSec=30.407288992055616, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:32:22,522] [INFO] [logging.py:68:log_dist] [Rank 0] step=120, skipped=2, lr=[7.676565519355727e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:32:22,522] [INFO] [timer.py:197:stop] 0/120, RunningAvgSamplesPerSec=29.909364193458057, CurrSamplesPerSec=34.90329605345792, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:33:04,127] [INFO] [timer.py:197:stop] 0/121, RunningAvgSamplesPerSec=29.911876844300526, CurrSamplesPerSec=30.21136337443817, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:33:10,170] [INFO] [timer.py:197:stop] 0/122, RunningAvgSamplesPerSec=29.908932172072607, CurrSamplesPerSec=29.562607844113302, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:33:18,146] [INFO] [timer.py:197:stop] 0/123, RunningAvgSamplesPerSec=29.909881954328096, CurrSamplesPerSec=30.024295439415305, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:33:24,693] [INFO] [timer.py:197:stop] 0/124, RunningAvgSamplesPerSec=29.90567085183338, CurrSamplesPerSec=29.404733142381172, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:33:31,608] [INFO] [timer.py:197:stop] 0/125, RunningAvgSamplesPerSec=29.90631773337061, CurrSamplesPerSec=29.985447808251635, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.3358, 'learning_rate': 7.743343231239583e-06, 'epoch': 3.0} -[2022-12-14 16:33:38,458] [INFO] [timer.py:197:stop] 0/126, RunningAvgSamplesPerSec=29.90886748799945, CurrSamplesPerSec=30.22583803410272, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:33:45,022] [INFO] [timer.py:197:stop] 0/127, RunningAvgSamplesPerSec=29.908483296224187, CurrSamplesPerSec=29.860919888528024, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:33:51,120] [INFO] [timer.py:197:stop] 0/128, RunningAvgSamplesPerSec=29.905950933985928, CurrSamplesPerSec=29.592747339099116, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:33:57,483] [INFO] [timer.py:197:stop] 0/129, RunningAvgSamplesPerSec=29.90300921506578, CurrSamplesPerSec=29.53692634932309, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:34:04,064] [INFO] [logging.py:68:log_dist] [Rank 0] step=130, skipped=2, lr=[7.807459757842952e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:34:04,065] [INFO] [timer.py:197:stop] 0/130, RunningAvgSamplesPerSec=29.906086703096868, CurrSamplesPerSec=30.302144496625832, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:34:11,245] [INFO] [timer.py:197:stop] 0/131, RunningAvgSamplesPerSec=29.907799224541503, CurrSamplesPerSec=30.12863316903949, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:34:17,251] [INFO] [timer.py:197:stop] 0/132, RunningAvgSamplesPerSec=29.91030660270893, CurrSamplesPerSec=30.237322165174902, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:34:23,115] [INFO] [timer.py:197:stop] 0/133, RunningAvgSamplesPerSec=29.904425082389203, CurrSamplesPerSec=29.159032278282943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:34:29,397] [INFO] [timer.py:197:stop] 0/134, RunningAvgSamplesPerSec=29.90399273353205, CurrSamplesPerSec=29.847462917146313, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:34:35,609] [INFO] [timer.py:197:stop] 0/135, RunningAvgSamplesPerSec=29.895897067159932, CurrSamplesPerSec=28.864418593198145, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:34:42,768] [INFO] [timer.py:197:stop] 0/136, RunningAvgSamplesPerSec=29.881730721352636, CurrSamplesPerSec=28.110149586142146, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:34:49,054] [INFO] [timer.py:197:stop] 0/137, RunningAvgSamplesPerSec=29.88144481168244, CurrSamplesPerSec=29.843182339438705, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:34:55,248] [INFO] [timer.py:197:stop] 0/138, RunningAvgSamplesPerSec=29.882595254226466, CurrSamplesPerSec=30.038722453127765, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:35:01,701] [INFO] [timer.py:197:stop] 0/139, RunningAvgSamplesPerSec=29.882899321443116, CurrSamplesPerSec=29.924310190278536, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:35:07,703] [INFO] [logging.py:68:log_dist] [Rank 0] step=140, skipped=2, lr=[7.928502661991142e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:35:07,704] [INFO] [timer.py:197:stop] 0/140, RunningAvgSamplesPerSec=29.881293437822908, CurrSamplesPerSec=29.662907025147042, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:35:14,151] [INFO] [timer.py:197:stop] 0/141, RunningAvgSamplesPerSec=29.882948600597054, CurrSamplesPerSec=30.113133245802892, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:35:20,851] [INFO] [timer.py:197:stop] 0/142, RunningAvgSamplesPerSec=29.882069588442416, CurrSamplesPerSec=29.760388013558035, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:35:28,341] [INFO] [timer.py:197:stop] 0/143, RunningAvgSamplesPerSec=29.87650998317363, CurrSamplesPerSec=29.118065427374287, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:35:35,007] [INFO] [timer.py:197:stop] 0/144, RunningAvgSamplesPerSec=29.878907635319162, CurrSamplesPerSec=30.22087324536063, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:35:41,232] [INFO] [timer.py:197:stop] 0/145, RunningAvgSamplesPerSec=29.882533726480787, CurrSamplesPerSec=30.40653123954701, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:35:47,312] [INFO] [timer.py:197:stop] 0/146, RunningAvgSamplesPerSec=29.881781988278192, CurrSamplesPerSec=29.774671445957235, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:35:54,533] [INFO] [timer.py:197:stop] 0/147, RunningAvgSamplesPerSec=29.8771309268439, CurrSamplesPerSec=29.22216240912961, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:01,232] [INFO] [timer.py:197:stop] 0/148, RunningAvgSamplesPerSec=29.875274842520092, CurrSamplesPerSec=29.608561876864286, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:07,750] [INFO] [timer.py:197:stop] 0/149, RunningAvgSamplesPerSec=29.87885509226357, CurrSamplesPerSec=30.410943956090765, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:14,287] [INFO] [logging.py:68:log_dist] [Rank 0] step=150, skipped=2, lr=[8.041073861170494e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:36:14,288] [INFO] [timer.py:197:stop] 0/150, RunningAvgSamplesPerSec=29.876929387415014, CurrSamplesPerSec=29.596525623964286, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.2592, 'learning_rate': 8.041073861170494e-06, 'epoch': 3.01} -[2022-12-14 16:36:20,713] [INFO] [timer.py:197:stop] 0/151, RunningAvgSamplesPerSec=29.875216058247887, CurrSamplesPerSec=29.623791781520946, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:27,616] [INFO] [timer.py:197:stop] 0/152, RunningAvgSamplesPerSec=29.87231476962854, CurrSamplesPerSec=29.446230157271902, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:30,183] [INFO] [timer.py:197:stop] 0/153, RunningAvgSamplesPerSec=29.874785772461607, CurrSamplesPerSec=30.250123987715384, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:32,333] [INFO] [timer.py:197:stop] 0/154, RunningAvgSamplesPerSec=29.87641699052338, CurrSamplesPerSec=30.124792189955944, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:34,523] [INFO] [timer.py:197:stop] 0/155, RunningAvgSamplesPerSec=29.874405362716786, CurrSamplesPerSec=29.571755961243202, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:36,630] [INFO] [timer.py:197:stop] 0/156, RunningAvgSamplesPerSec=29.880095328286824, CurrSamplesPerSec=30.776961281986928, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:38,758] [INFO] [timer.py:197:stop] 0/157, RunningAvgSamplesPerSec=29.883637352381953, CurrSamplesPerSec=30.439317868331734, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:40,880] [INFO] [timer.py:197:stop] 0/158, RunningAvgSamplesPerSec=29.887815316898752, CurrSamplesPerSec=30.549836487779118, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:43,077] [INFO] [timer.py:197:stop] 0/159, RunningAvgSamplesPerSec=29.885222634072555, CurrSamplesPerSec=29.486199008190678, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:36:44,982] [INFO] [logging.py:68:log_dist] [Rank 0] step=160, skipped=2, lr=[8.146282038785833e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:36:44,983] [INFO] [timer.py:197:stop] 0/160, RunningAvgSamplesPerSec=29.90864738746714, CurrSamplesPerSec=34.10570813994707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:37:28,021] [INFO] [timer.py:197:stop] 0/161, RunningAvgSamplesPerSec=29.90887691921785, CurrSamplesPerSec=29.94518724258098, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:37:34,791] [INFO] [timer.py:197:stop] 0/162, RunningAvgSamplesPerSec=29.910841113769926, CurrSamplesPerSec=30.22646427819808, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:37:41,259] [INFO] [timer.py:197:stop] 0/163, RunningAvgSamplesPerSec=29.909696215366104, CurrSamplesPerSec=29.72763448787347, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:37:48,767] [INFO] [timer.py:197:stop] 0/164, RunningAvgSamplesPerSec=29.901290616209685, CurrSamplesPerSec=28.606934240449807, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:37:55,281] [INFO] [timer.py:197:stop] 0/165, RunningAvgSamplesPerSec=29.895942698523754, CurrSamplesPerSec=29.054125860698672, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:38:01,495] [INFO] [timer.py:197:stop] 0/166, RunningAvgSamplesPerSec=29.899972330883493, CurrSamplesPerSec=30.571648017843103, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:38:07,907] [INFO] [timer.py:197:stop] 0/167, RunningAvgSamplesPerSec=29.90355808626835, CurrSamplesPerSec=30.503491813496467, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:38:14,322] [INFO] [timer.py:197:stop] 0/168, RunningAvgSamplesPerSec=29.906009140721142, CurrSamplesPerSec=30.31601125609953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:38:21,526] [INFO] [timer.py:197:stop] 0/169, RunningAvgSamplesPerSec=29.903646064132417, CurrSamplesPerSec=29.516484659557914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:38:28,351] [INFO] [logging.py:68:log_dist] [Rank 0] step=170, skipped=2, lr=[8.245031542220927e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:38:28,352] [INFO] [timer.py:197:stop] 0/170, RunningAvgSamplesPerSec=29.90449434601322, CurrSamplesPerSec=30.046835755175707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:38:35,152] [INFO] [timer.py:197:stop] 0/171, RunningAvgSamplesPerSec=29.898398923967992, CurrSamplesPerSec=28.908475084433583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:38:41,924] [INFO] [timer.py:197:stop] 0/172, RunningAvgSamplesPerSec=29.896456789041316, CurrSamplesPerSec=29.57182111587593, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:38:48,250] [INFO] [timer.py:197:stop] 0/173, RunningAvgSamplesPerSec=29.896608794692224, CurrSamplesPerSec=29.92247224176661, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:38:54,779] [INFO] [timer.py:197:stop] 0/174, RunningAvgSamplesPerSec=29.898726314889203, CurrSamplesPerSec=30.265287560614187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:39:01,225] [INFO] [timer.py:197:stop] 0/175, RunningAvgSamplesPerSec=29.896820195289397, CurrSamplesPerSec=29.572544351580362, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.21, 'learning_rate': 8.292222957399574e-06, 'epoch': 4.0} -[2022-12-14 16:39:07,847] [INFO] [timer.py:197:stop] 0/176, RunningAvgSamplesPerSec=29.889302012131214, CurrSamplesPerSec=28.64319465591145, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:39:14,184] [INFO] [timer.py:197:stop] 0/177, RunningAvgSamplesPerSec=29.888261139806485, CurrSamplesPerSec=29.708246446714497, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:39:20,655] [INFO] [timer.py:197:stop] 0/178, RunningAvgSamplesPerSec=29.886673310134526, CurrSamplesPerSec=29.611377297625964, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:39:27,290] [INFO] [timer.py:197:stop] 0/179, RunningAvgSamplesPerSec=29.887984821092203, CurrSamplesPerSec=30.12061758858853, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:39:33,945] [INFO] [logging.py:68:log_dist] [Rank 0] step=180, skipped=2, lr=[8.338069703233054e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:39:33,946] [INFO] [timer.py:197:stop] 0/180, RunningAvgSamplesPerSec=29.885175803115224, CurrSamplesPerSec=29.396161263298527, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:39:40,842] [INFO] [timer.py:197:stop] 0/181, RunningAvgSamplesPerSec=29.884914796797343, CurrSamplesPerSec=29.83852819001653, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:39:46,981] [INFO] [timer.py:197:stop] 0/182, RunningAvgSamplesPerSec=29.88567300775043, CurrSamplesPerSec=30.022015398901747, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:39:53,691] [INFO] [timer.py:197:stop] 0/183, RunningAvgSamplesPerSec=29.88566176371313, CurrSamplesPerSec=29.883637974816008, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:00,231] [INFO] [timer.py:197:stop] 0/184, RunningAvgSamplesPerSec=29.886014268661206, CurrSamplesPerSec=29.949954924800284, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:06,660] [INFO] [timer.py:197:stop] 0/185, RunningAvgSamplesPerSec=29.888595078639423, CurrSamplesPerSec=30.365843797645397, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:13,513] [INFO] [timer.py:197:stop] 0/186, RunningAvgSamplesPerSec=29.884039474679355, CurrSamplesPerSec=29.073110080473356, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:20,556] [INFO] [timer.py:197:stop] 0/187, RunningAvgSamplesPerSec=29.880472326870574, CurrSamplesPerSec=29.238299771276466, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:27,253] [INFO] [timer.py:197:stop] 0/188, RunningAvgSamplesPerSec=29.876584153321808, CurrSamplesPerSec=29.174272386068537, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:33,431] [INFO] [timer.py:197:stop] 0/189, RunningAvgSamplesPerSec=29.879861588062557, CurrSamplesPerSec=30.502230152562593, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:39,511] [INFO] [logging.py:68:log_dist] [Rank 0] step=190, skipped=2, lr=[8.426021206646023e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:40:39,511] [INFO] [timer.py:197:stop] 0/190, RunningAvgSamplesPerSec=29.882009257413785, CurrSamplesPerSec=30.289124311969722, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:45,859] [INFO] [timer.py:197:stop] 0/191, RunningAvgSamplesPerSec=29.87874432775277, CurrSamplesPerSec=29.277357707549758, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:52,573] [INFO] [timer.py:197:stop] 0/192, RunningAvgSamplesPerSec=29.878534970613565, CurrSamplesPerSec=29.8390190795545, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:55,042] [INFO] [timer.py:197:stop] 0/193, RunningAvgSamplesPerSec=29.88132102762991, CurrSamplesPerSec=30.420269643923607, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:57,215] [INFO] [timer.py:197:stop] 0/194, RunningAvgSamplesPerSec=29.880930133440202, CurrSamplesPerSec=29.806456398270033, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:40:59,358] [INFO] [timer.py:197:stop] 0/195, RunningAvgSamplesPerSec=29.88271622032109, CurrSamplesPerSec=30.229646952197744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:41:01,487] [INFO] [timer.py:197:stop] 0/196, RunningAvgSamplesPerSec=29.885456800568246, CurrSamplesPerSec=30.423969107656138, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:41:03,626] [INFO] [timer.py:197:stop] 0/197, RunningAvgSamplesPerSec=29.887509851203806, CurrSamplesPerSec=30.291209250643238, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:41:05,748] [INFO] [timer.py:197:stop] 0/198, RunningAvgSamplesPerSec=29.89072355476984, CurrSamplesPerSec=30.530885862211292, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:41:07,897] [INFO] [timer.py:197:stop] 0/199, RunningAvgSamplesPerSec=29.892240725512746, CurrSamplesPerSec=30.1926094790247, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:41:09,791] [INFO] [logging.py:68:log_dist] [Rank 0] step=200, skipped=2, lr=[8.509413541357755e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:41:09,792] [INFO] [timer.py:197:stop] 0/200, RunningAvgSamplesPerSec=29.911202818710223, CurrSamplesPerSec=34.182927857316585, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.185, 'learning_rate': 8.509413541357755e-06, 'epoch': 4.01} -[2022-12-14 16:41:50,835] [INFO] [timer.py:197:stop] 0/201, RunningAvgSamplesPerSec=29.91464103479585, CurrSamplesPerSec=30.611342718507494, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:41:57,110] [INFO] [timer.py:197:stop] 0/202, RunningAvgSamplesPerSec=29.9141631965936, CurrSamplesPerSec=29.819376213057296, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:42:04,041] [INFO] [timer.py:197:stop] 0/203, RunningAvgSamplesPerSec=29.910821638508406, CurrSamplesPerSec=29.25718750006812, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:42:10,458] [INFO] [timer.py:197:stop] 0/204, RunningAvgSamplesPerSec=29.905148149922873, CurrSamplesPerSec=28.806866031640013, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:42:16,569] [INFO] [timer.py:197:stop] 0/205, RunningAvgSamplesPerSec=29.90836666698895, CurrSamplesPerSec=30.57302685378593, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:42:23,314] [INFO] [timer.py:197:stop] 0/206, RunningAvgSamplesPerSec=29.905273932183338, CurrSamplesPerSec=29.290420460606313, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:42:30,184] [INFO] [timer.py:197:stop] 0/207, RunningAvgSamplesPerSec=29.90471382678926, CurrSamplesPerSec=29.79088936476989, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:42:37,095] [INFO] [timer.py:197:stop] 0/208, RunningAvgSamplesPerSec=29.90266419580228, CurrSamplesPerSec=29.488340077369205, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:42:43,713] [INFO] [timer.py:197:stop] 0/209, RunningAvgSamplesPerSec=29.901245007746052, CurrSamplesPerSec=29.61173661258017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:42:50,193] [INFO] [logging.py:68:log_dist] [Rank 0] step=210, skipped=2, lr=[8.588696173868873e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:42:50,194] [INFO] [timer.py:197:stop] 0/210, RunningAvgSamplesPerSec=29.900280404110475, CurrSamplesPerSec=29.7019383708647, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:42:56,920] [INFO] [timer.py:197:stop] 0/211, RunningAvgSamplesPerSec=29.90117772847084, CurrSamplesPerSec=30.08899921559238, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:43:03,393] [INFO] [timer.py:197:stop] 0/212, RunningAvgSamplesPerSec=29.902888460119943, CurrSamplesPerSec=30.264779133499882, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:43:09,588] [INFO] [timer.py:197:stop] 0/213, RunningAvgSamplesPerSec=29.903968234020752, CurrSamplesPerSec=30.132461597972455, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:43:15,765] [INFO] [timer.py:197:stop] 0/214, RunningAvgSamplesPerSec=29.89919358249735, CurrSamplesPerSec=28.924732142145203, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:43:22,142] [INFO] [timer.py:197:stop] 0/215, RunningAvgSamplesPerSec=29.89956791530157, CurrSamplesPerSec=29.97913866033063, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:43:28,787] [INFO] [timer.py:197:stop] 0/216, RunningAvgSamplesPerSec=29.898774537732756, CurrSamplesPerSec=29.730739318228185, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:43:34,944] [INFO] [timer.py:197:stop] 0/217, RunningAvgSamplesPerSec=29.899603873458314, CurrSamplesPerSec=30.078146462794248, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:43:40,949] [INFO] [timer.py:197:stop] 0/218, RunningAvgSamplesPerSec=29.90012109619336, CurrSamplesPerSec=30.01174104585376, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:43:47,669] [INFO] [timer.py:197:stop] 0/219, RunningAvgSamplesPerSec=29.898399157609145, CurrSamplesPerSec=29.53105141849687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:43:54,203] [INFO] [logging.py:68:log_dist] [Rank 0] step=220, skipped=2, lr=[8.664255215314613e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:43:54,203] [INFO] [timer.py:197:stop] 0/220, RunningAvgSamplesPerSec=29.894954379871145, CurrSamplesPerSec=29.165755103686102, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:44:00,866] [INFO] [timer.py:197:stop] 0/221, RunningAvgSamplesPerSec=29.895256373815528, CurrSamplesPerSec=29.961237021349028, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:44:07,380] [INFO] [timer.py:197:stop] 0/222, RunningAvgSamplesPerSec=29.89420330961516, CurrSamplesPerSec=29.665355772586512, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:44:15,243] [INFO] [timer.py:197:stop] 0/223, RunningAvgSamplesPerSec=29.896603421247487, CurrSamplesPerSec=30.434165389201446, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:44:22,682] [INFO] [timer.py:197:stop] 0/224, RunningAvgSamplesPerSec=29.894522666023157, CurrSamplesPerSec=29.441673154166466, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:44:29,909] [INFO] [timer.py:197:stop] 0/225, RunningAvgSamplesPerSec=29.891065244111157, CurrSamplesPerSec=29.14281774910588, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.1341, 'learning_rate': 8.700744577655557e-06, 'epoch': 5.0} -[2022-12-14 16:44:36,000] [INFO] [timer.py:197:stop] 0/226, RunningAvgSamplesPerSec=29.891384110688907, CurrSamplesPerSec=29.962661676823494, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:44:43,660] [INFO] [timer.py:197:stop] 0/227, RunningAvgSamplesPerSec=29.89109564404624, CurrSamplesPerSec=29.82661911927094, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:44:50,479] [INFO] [timer.py:197:stop] 0/228, RunningAvgSamplesPerSec=29.892177299288036, CurrSamplesPerSec=30.137556405024, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:44:57,338] [INFO] [timer.py:197:stop] 0/229, RunningAvgSamplesPerSec=29.89248940754049, CurrSamplesPerSec=29.9631934491519, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:03,748] [INFO] [logging.py:68:log_dist] [Rank 0] step=230, skipped=2, lr=[8.73642479617159e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:45:03,749] [INFO] [timer.py:197:stop] 0/230, RunningAvgSamplesPerSec=29.893747111293134, CurrSamplesPerSec=30.18201104151401, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:10,011] [INFO] [timer.py:197:stop] 0/231, RunningAvgSamplesPerSec=29.894146637027717, CurrSamplesPerSec=29.985518148113808, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:16,607] [INFO] [timer.py:197:stop] 0/232, RunningAvgSamplesPerSec=29.88797323962098, CurrSamplesPerSec=28.538380061238847, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:19,093] [INFO] [timer.py:197:stop] 0/233, RunningAvgSamplesPerSec=29.890426280544805, CurrSamplesPerSec=30.465528279545815, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:21,220] [INFO] [timer.py:197:stop] 0/234, RunningAvgSamplesPerSec=29.893281457558572, CurrSamplesPerSec=30.56777331911125, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:23,340] [INFO] [timer.py:197:stop] 0/235, RunningAvgSamplesPerSec=29.89609505979747, CurrSamplesPerSec=30.56348548955633, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:25,510] [INFO] [timer.py:197:stop] 0/236, RunningAvgSamplesPerSec=29.895865435989688, CurrSamplesPerSec=29.8424590762622, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:27,644] [INFO] [timer.py:197:stop] 0/237, RunningAvgSamplesPerSec=29.897799563113182, CurrSamplesPerSec=30.357371937140496, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:29,816] [INFO] [timer.py:197:stop] 0/238, RunningAvgSamplesPerSec=29.897440781526907, CurrSamplesPerSec=29.813365218906597, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:31,986] [INFO] [timer.py:197:stop] 0/239, RunningAvgSamplesPerSec=29.897258762652665, CurrSamplesPerSec=29.85436420049251, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:45:33,884] [INFO] [logging.py:68:log_dist] [Rank 0] step=240, skipped=2, lr=[8.805495997504354e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:45:33,885] [INFO] [timer.py:197:stop] 0/240, RunningAvgSamplesPerSec=29.912886875648738, CurrSamplesPerSec=34.14270222363772, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:46:16,834] [INFO] [timer.py:197:stop] 0/241, RunningAvgSamplesPerSec=29.91298844738415, CurrSamplesPerSec=29.937182154621404, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:46:23,171] [INFO] [timer.py:197:stop] 0/242, RunningAvgSamplesPerSec=29.9037725477038, CurrSamplesPerSec=27.85286664015944, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:46:29,529] [INFO] [timer.py:197:stop] 0/243, RunningAvgSamplesPerSec=29.904032567935364, CurrSamplesPerSec=29.96656846953241, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:46:35,647] [INFO] [timer.py:197:stop] 0/244, RunningAvgSamplesPerSec=29.902895421982265, CurrSamplesPerSec=29.631342286983593, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:46:41,721] [INFO] [timer.py:197:stop] 0/245, RunningAvgSamplesPerSec=29.904249114961235, CurrSamplesPerSec=30.2354864336176, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:46:48,420] [INFO] [timer.py:197:stop] 0/246, RunningAvgSamplesPerSec=29.906507298344664, CurrSamplesPerSec=30.465545567667025, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:46:54,997] [INFO] [timer.py:197:stop] 0/247, RunningAvgSamplesPerSec=29.904651479006407, CurrSamplesPerSec=29.45861320715065, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:47:01,096] [INFO] [timer.py:197:stop] 0/248, RunningAvgSamplesPerSec=29.9003623901369, CurrSamplesPerSec=28.885352992865485, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:47:09,485] [INFO] [timer.py:197:stop] 0/249, RunningAvgSamplesPerSec=29.898604913913687, CurrSamplesPerSec=29.472453044638062, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:47:16,980] [INFO] [logging.py:68:log_dist] [Rank 0] step=250, skipped=2, lr=[8.871723942761204e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:47:16,981] [INFO] [timer.py:197:stop] 0/250, RunningAvgSamplesPerSec=29.891859182639145, CurrSamplesPerSec=28.313972291623664, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.1139, 'learning_rate': 8.871723942761204e-06, 'epoch': 6.0} -[2022-12-14 16:47:25,337] [INFO] [timer.py:197:stop] 0/251, RunningAvgSamplesPerSec=29.893462887658067, CurrSamplesPerSec=30.29656646644681, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:47:31,969] [INFO] [timer.py:197:stop] 0/252, RunningAvgSamplesPerSec=29.895590220264072, CurrSamplesPerSec=30.434890012926317, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:47:37,992] [INFO] [timer.py:197:stop] 0/253, RunningAvgSamplesPerSec=29.89430894502289, CurrSamplesPerSec=29.577399416793977, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:47:44,079] [INFO] [timer.py:197:stop] 0/254, RunningAvgSamplesPerSec=29.893720359990652, CurrSamplesPerSec=29.746714912627244, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:47:50,410] [INFO] [timer.py:197:stop] 0/255, RunningAvgSamplesPerSec=29.892986584361992, CurrSamplesPerSec=29.70921639895934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:47:56,427] [INFO] [timer.py:197:stop] 0/256, RunningAvgSamplesPerSec=29.893559606344997, CurrSamplesPerSec=30.03924348257456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:48:03,483] [INFO] [timer.py:197:stop] 0/257, RunningAvgSamplesPerSec=29.895903266581495, CurrSamplesPerSec=30.503335833276516, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:48:09,653] [INFO] [timer.py:197:stop] 0/258, RunningAvgSamplesPerSec=29.895849131860434, CurrSamplesPerSec=29.882051174157176, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:48:15,795] [INFO] [timer.py:197:stop] 0/259, RunningAvgSamplesPerSec=29.894191691255383, CurrSamplesPerSec=29.475847875960405, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:48:21,884] [INFO] [logging.py:68:log_dist] [Rank 0] step=260, skipped=2, lr=[8.935333486807386e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:48:21,884] [INFO] [timer.py:197:stop] 0/260, RunningAvgSamplesPerSec=29.896080018219457, CurrSamplesPerSec=30.389419539129022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:48:28,516] [INFO] [timer.py:197:stop] 0/261, RunningAvgSamplesPerSec=29.898679389251754, CurrSamplesPerSec=30.584765921859635, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:48:34,747] [INFO] [timer.py:197:stop] 0/262, RunningAvgSamplesPerSec=29.89745708099995, CurrSamplesPerSec=29.58420896829772, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:48:41,539] [INFO] [timer.py:197:stop] 0/263, RunningAvgSamplesPerSec=29.895875918651516, CurrSamplesPerSec=29.490371303286864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:48:48,203] [INFO] [timer.py:197:stop] 0/264, RunningAvgSamplesPerSec=29.895695793736387, CurrSamplesPerSec=29.84875728704407, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:48:54,408] [INFO] [timer.py:197:stop] 0/265, RunningAvgSamplesPerSec=29.896197225763203, CurrSamplesPerSec=30.028154500177248, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:00,447] [INFO] [timer.py:197:stop] 0/266, RunningAvgSamplesPerSec=29.89719937318894, CurrSamplesPerSec=30.163117312735828, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:08,292] [INFO] [timer.py:197:stop] 0/267, RunningAvgSamplesPerSec=29.897553374537637, CurrSamplesPerSec=29.9913038941201, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:14,785] [INFO] [timer.py:197:stop] 0/268, RunningAvgSamplesPerSec=29.897578563288715, CurrSamplesPerSec=29.904255078567953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:21,282] [INFO] [timer.py:197:stop] 0/269, RunningAvgSamplesPerSec=29.8958208068856, CurrSamplesPerSec=29.435484219389963, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:27,474] [INFO] [logging.py:68:log_dist] [Rank 0] step=270, skipped=2, lr=[8.996523822524443e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:49:27,474] [INFO] [timer.py:197:stop] 0/270, RunningAvgSamplesPerSec=29.895295255381498, CurrSamplesPerSec=29.755631013731563, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:33,857] [INFO] [timer.py:197:stop] 0/271, RunningAvgSamplesPerSec=29.894348242444387, CurrSamplesPerSec=29.642693268242866, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:40,199] [INFO] [timer.py:197:stop] 0/272, RunningAvgSamplesPerSec=29.892225441201937, CurrSamplesPerSec=29.331934969068403, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:42,699] [INFO] [timer.py:197:stop] 0/273, RunningAvgSamplesPerSec=29.891306209120906, CurrSamplesPerSec=29.645164870613847, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:44,812] [INFO] [timer.py:197:stop] 0/274, RunningAvgSamplesPerSec=29.89405258775281, CurrSamplesPerSec=30.657396199190565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:46,966] [INFO] [timer.py:197:stop] 0/275, RunningAvgSamplesPerSec=29.89472603154315, CurrSamplesPerSec=30.07903623540886, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0887, 'learning_rate': 9.026267958246849e-06, 'epoch': 6.01} -[2022-12-14 16:49:49,092] [INFO] [timer.py:197:stop] 0/276, RunningAvgSamplesPerSec=29.896864601304152, CurrSamplesPerSec=30.492365761005505, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:51,222] [INFO] [timer.py:197:stop] 0/277, RunningAvgSamplesPerSec=29.89877526012867, CurrSamplesPerSec=30.431660527289253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:53,386] [INFO] [timer.py:197:stop] 0/278, RunningAvgSamplesPerSec=29.898893049854365, CurrSamplesPerSec=29.931320483744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:55,562] [INFO] [timer.py:197:stop] 0/279, RunningAvgSamplesPerSec=29.898438320139068, CurrSamplesPerSec=29.773459446395496, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:49:57,439] [INFO] [logging.py:68:log_dist] [Rank 0] step=280, skipped=2, lr=[9.055472243083868e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:49:57,440] [INFO] [timer.py:197:stop] 0/280, RunningAvgSamplesPerSec=29.912678965172393, CurrSamplesPerSec=34.459042160024424, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:50:43,017] [INFO] [timer.py:197:stop] 0/281, RunningAvgSamplesPerSec=29.9048444724259, CurrSamplesPerSec=27.875207011495448, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:50:50,331] [INFO] [timer.py:197:stop] 0/282, RunningAvgSamplesPerSec=29.90575769638925, CurrSamplesPerSec=30.16274449214962, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:50:56,769] [INFO] [timer.py:197:stop] 0/283, RunningAvgSamplesPerSec=29.90477983580079, CurrSamplesPerSec=29.633471773872163, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:51:03,807] [INFO] [timer.py:197:stop] 0/284, RunningAvgSamplesPerSec=29.903308059184262, CurrSamplesPerSec=29.495400358161355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:51:10,377] [INFO] [timer.py:197:stop] 0/285, RunningAvgSamplesPerSec=29.901247990848436, CurrSamplesPerSec=29.331418957088907, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:51:17,026] [INFO] [timer.py:197:stop] 0/286, RunningAvgSamplesPerSec=29.901487696379526, CurrSamplesPerSec=29.969479157048916, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:51:23,751] [INFO] [timer.py:197:stop] 0/287, RunningAvgSamplesPerSec=29.90259543474367, CurrSamplesPerSec=30.22055002917519, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:51:30,123] [INFO] [timer.py:197:stop] 0/288, RunningAvgSamplesPerSec=29.904432389028543, CurrSamplesPerSec=30.437326381900572, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:51:37,048] [INFO] [timer.py:197:stop] 0/289, RunningAvgSamplesPerSec=29.903441184102462, CurrSamplesPerSec=29.622627992246166, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:51:43,348] [INFO] [logging.py:68:log_dist] [Rank 0] step=290, skipped=2, lr=[9.11233723905084e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:51:43,348] [INFO] [timer.py:197:stop] 0/290, RunningAvgSamplesPerSec=29.904026856633884, CurrSamplesPerSec=30.073068351850548, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:51:50,721] [INFO] [timer.py:197:stop] 0/291, RunningAvgSamplesPerSec=29.905649695422376, CurrSamplesPerSec=30.380473775460235, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:51:57,009] [INFO] [timer.py:197:stop] 0/292, RunningAvgSamplesPerSec=29.90447533133414, CurrSamplesPerSec=29.568905727084207, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:52:03,521] [INFO] [timer.py:197:stop] 0/293, RunningAvgSamplesPerSec=29.904385369870248, CurrSamplesPerSec=29.87831936394744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:52:10,176] [INFO] [timer.py:197:stop] 0/294, RunningAvgSamplesPerSec=29.90432655541064, CurrSamplesPerSec=29.88722137103881, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:52:16,398] [INFO] [timer.py:197:stop] 0/295, RunningAvgSamplesPerSec=29.903681896366468, CurrSamplesPerSec=29.71662300353973, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:52:23,921] [INFO] [timer.py:197:stop] 0/296, RunningAvgSamplesPerSec=29.90237215457338, CurrSamplesPerSec=29.52349673046265, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:52:30,261] [INFO] [timer.py:197:stop] 0/297, RunningAvgSamplesPerSec=29.90029200070283, CurrSamplesPerSec=29.301025542338483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:52:36,498] [INFO] [timer.py:197:stop] 0/298, RunningAvgSamplesPerSec=29.90018203540275, CurrSamplesPerSec=29.86777754782775, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:52:43,293] [INFO] [timer.py:197:stop] 0/299, RunningAvgSamplesPerSec=29.898101699743936, CurrSamplesPerSec=29.294790090283477, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:52:49,826] [INFO] [logging.py:68:log_dist] [Rank 0] step=300, skipped=2, lr=[9.16726106663399e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:52:49,826] [INFO] [timer.py:197:stop] 0/300, RunningAvgSamplesPerSec=29.898291975704918, CurrSamplesPerSec=29.95491131493284, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0656, 'learning_rate': 9.16726106663399e-06, 'epoch': 7.0} -[2022-12-14 16:52:56,340] [INFO] [timer.py:197:stop] 0/301, RunningAvgSamplesPerSec=29.896398750531528, CurrSamplesPerSec=29.342701644581112, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:53:02,764] [INFO] [timer.py:197:stop] 0/302, RunningAvgSamplesPerSec=29.894165090958168, CurrSamplesPerSec=29.24094328583615, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:53:09,804] [INFO] [timer.py:197:stop] 0/303, RunningAvgSamplesPerSec=29.89530749415042, CurrSamplesPerSec=30.242016384861973, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:53:16,258] [INFO] [timer.py:197:stop] 0/304, RunningAvgSamplesPerSec=29.89465175580126, CurrSamplesPerSec=29.698573406581612, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:53:23,381] [INFO] [timer.py:197:stop] 0/305, RunningAvgSamplesPerSec=29.894623645037804, CurrSamplesPerSec=29.886136612595564, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:53:29,606] [INFO] [timer.py:197:stop] 0/306, RunningAvgSamplesPerSec=29.89623407037442, CurrSamplesPerSec=30.39231660535038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:53:36,664] [INFO] [timer.py:197:stop] 0/307, RunningAvgSamplesPerSec=29.893678791257845, CurrSamplesPerSec=29.136611492788177, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:53:43,317] [INFO] [timer.py:197:stop] 0/308, RunningAvgSamplesPerSec=29.894424240974, CurrSamplesPerSec=30.123534617401965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:53:50,202] [INFO] [timer.py:197:stop] 0/309, RunningAvgSamplesPerSec=29.89414779957358, CurrSamplesPerSec=29.809796199664163, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:53:56,919] [INFO] [logging.py:68:log_dist] [Rank 0] step=310, skipped=2, lr=[9.220371891879027e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:53:56,920] [INFO] [timer.py:197:stop] 0/310, RunningAvgSamplesPerSec=29.89206282018668, CurrSamplesPerSec=29.26543602356625, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:03,836] [INFO] [timer.py:197:stop] 0/311, RunningAvgSamplesPerSec=29.893254527161083, CurrSamplesPerSec=30.264878087773916, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:10,533] [INFO] [timer.py:197:stop] 0/312, RunningAvgSamplesPerSec=29.894660173393245, CurrSamplesPerSec=30.335429599287323, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:12,950] [INFO] [timer.py:197:stop] 0/313, RunningAvgSamplesPerSec=29.894290673977526, CurrSamplesPerSec=29.780184481847193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:15,093] [INFO] [timer.py:197:stop] 0/314, RunningAvgSamplesPerSec=29.895338078289655, CurrSamplesPerSec=30.224680912200906, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:17,249] [INFO] [timer.py:197:stop] 0/315, RunningAvgSamplesPerSec=29.895872010624874, CurrSamplesPerSec=30.063395371027823, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:19,387] [INFO] [timer.py:197:stop] 0/316, RunningAvgSamplesPerSec=29.89718989442782, CurrSamplesPerSec=30.315477157809276, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:21,508] [INFO] [timer.py:197:stop] 0/317, RunningAvgSamplesPerSec=29.899231787911315, CurrSamplesPerSec=30.55448217960817, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:23,633] [INFO] [timer.py:197:stop] 0/318, RunningAvgSamplesPerSec=29.901080301258954, CurrSamplesPerSec=30.49496376986114, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:25,808] [INFO] [timer.py:197:stop] 0/319, RunningAvgSamplesPerSec=29.900744841635667, CurrSamplesPerSec=29.795115267668866, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:54:27,735] [INFO] [logging.py:68:log_dist] [Rank 0] step=320, skipped=2, lr=[9.271785592148743e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:54:27,736] [INFO] [timer.py:197:stop] 0/320, RunningAvgSamplesPerSec=29.911293719653127, CurrSamplesPerSec=33.67768812884054, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:55:10,865] [INFO] [timer.py:197:stop] 0/321, RunningAvgSamplesPerSec=29.91291173277871, CurrSamplesPerSec=30.436473953493223, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:55:17,550] [INFO] [timer.py:197:stop] 0/322, RunningAvgSamplesPerSec=29.91233759174174, CurrSamplesPerSec=29.730304668791224, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:55:24,060] [INFO] [timer.py:197:stop] 0/323, RunningAvgSamplesPerSec=29.911987843807836, CurrSamplesPerSec=29.80048700264239, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:55:30,812] [INFO] [timer.py:197:stop] 0/324, RunningAvgSamplesPerSec=29.91190763617, CurrSamplesPerSec=29.8861831956958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:55:36,907] [INFO] [timer.py:197:stop] 0/325, RunningAvgSamplesPerSec=29.913488367401925, CurrSamplesPerSec=30.431322437018373, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0581, 'learning_rate': 9.296889251455016e-06, 'epoch': 8.0} -[2022-12-14 16:55:43,524] [INFO] [timer.py:197:stop] 0/326, RunningAvgSamplesPerSec=29.914541973559068, CurrSamplesPerSec=30.25878507636347, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:55:50,284] [INFO] [timer.py:197:stop] 0/327, RunningAvgSamplesPerSec=29.90928590033612, CurrSamplesPerSec=28.298325738387017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:55:57,195] [INFO] [timer.py:197:stop] 0/328, RunningAvgSamplesPerSec=29.910322053496838, CurrSamplesPerSec=30.250918282596864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:56:05,130] [INFO] [timer.py:197:stop] 0/329, RunningAvgSamplesPerSec=29.90859380540244, CurrSamplesPerSec=29.35563337732087, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:56:12,058] [INFO] [logging.py:68:log_dist] [Rank 0] step=330, skipped=2, lr=[9.321607278590771e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:56:12,058] [INFO] [timer.py:197:stop] 0/330, RunningAvgSamplesPerSec=29.907732778590503, CurrSamplesPerSec=29.62881085286702, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:56:18,739] [INFO] [timer.py:197:stop] 0/331, RunningAvgSamplesPerSec=29.90681446695206, CurrSamplesPerSec=29.608620662079996, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:56:25,623] [INFO] [timer.py:197:stop] 0/332, RunningAvgSamplesPerSec=29.9050179571439, CurrSamplesPerSec=29.3254556889605, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:56:33,074] [INFO] [timer.py:197:stop] 0/333, RunningAvgSamplesPerSec=29.90644487108301, CurrSamplesPerSec=30.384882370912152, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:56:39,664] [INFO] [timer.py:197:stop] 0/334, RunningAvgSamplesPerSec=29.90749419731803, CurrSamplesPerSec=30.25891468936011, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:56:46,242] [INFO] [timer.py:197:stop] 0/335, RunningAvgSamplesPerSec=29.9074117732332, CurrSamplesPerSec=29.88007206780411, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:56:52,955] [INFO] [timer.py:197:stop] 0/336, RunningAvgSamplesPerSec=29.907204431777707, CurrSamplesPerSec=29.83831923518171, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:56:59,427] [INFO] [timer.py:197:stop] 0/337, RunningAvgSamplesPerSec=29.90783380265536, CurrSamplesPerSec=30.11953609922964, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:57:06,061] [INFO] [timer.py:197:stop] 0/338, RunningAvgSamplesPerSec=29.90741746181621, CurrSamplesPerSec=29.768592627258062, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:57:12,421] [INFO] [timer.py:197:stop] 0/339, RunningAvgSamplesPerSec=29.90760980362913, CurrSamplesPerSec=29.97237702369169, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:57:18,770] [INFO] [logging.py:68:log_dist] [Rank 0] step=340, skipped=2, lr=[9.369932589894792e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:57:18,771] [INFO] [timer.py:197:stop] 0/340, RunningAvgSamplesPerSec=29.908087945193127, CurrSamplesPerSec=30.070097087596157, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:57:25,561] [INFO] [timer.py:197:stop] 0/341, RunningAvgSamplesPerSec=29.90789551759885, CurrSamplesPerSec=29.84299654371694, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:57:31,888] [INFO] [timer.py:197:stop] 0/342, RunningAvgSamplesPerSec=29.90827921001602, CurrSamplesPerSec=30.038920778334997, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:57:38,720] [INFO] [timer.py:197:stop] 0/343, RunningAvgSamplesPerSec=29.908441918781286, CurrSamplesPerSec=29.96386571665629, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:57:45,378] [INFO] [timer.py:197:stop] 0/344, RunningAvgSamplesPerSec=29.91022627616752, CurrSamplesPerSec=30.531365071067608, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:57:51,830] [INFO] [timer.py:197:stop] 0/345, RunningAvgSamplesPerSec=29.91096045525199, CurrSamplesPerSec=30.164181596750968, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:57:58,953] [INFO] [timer.py:197:stop] 0/346, RunningAvgSamplesPerSec=29.909288355696958, CurrSamplesPerSec=29.346579967941675, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:05,990] [INFO] [timer.py:197:stop] 0/347, RunningAvgSamplesPerSec=29.90674383756232, CurrSamplesPerSec=29.05639020305504, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:12,277] [INFO] [timer.py:197:stop] 0/348, RunningAvgSamplesPerSec=29.90587822989526, CurrSamplesPerSec=29.610204684942264, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:18,735] [INFO] [timer.py:197:stop] 0/349, RunningAvgSamplesPerSec=29.906074783669883, CurrSamplesPerSec=29.97423784319403, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:25,228] [INFO] [logging.py:68:log_dist] [Rank 0] step=350, skipped=2, lr=[9.416848797368692e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:58:25,229] [INFO] [timer.py:197:stop] 0/350, RunningAvgSamplesPerSec=29.9041242453122, CurrSamplesPerSec=29.242309819913388, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0417, 'learning_rate': 9.416848797368692e-06, 'epoch': 8.01} -[2022-12-14 16:58:31,496] [INFO] [timer.py:197:stop] 0/351, RunningAvgSamplesPerSec=29.906699262066486, CurrSamplesPerSec=30.83056683537779, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:38,803] [INFO] [timer.py:197:stop] 0/352, RunningAvgSamplesPerSec=29.903709786780272, CurrSamplesPerSec=28.8956542865573, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:41,143] [INFO] [timer.py:197:stop] 0/353, RunningAvgSamplesPerSec=29.90592156166608, CurrSamplesPerSec=30.700673884565617, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:43,289] [INFO] [timer.py:197:stop] 0/354, RunningAvgSamplesPerSec=29.906715970376048, CurrSamplesPerSec=30.188185203220396, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:45,473] [INFO] [timer.py:197:stop] 0/355, RunningAvgSamplesPerSec=29.905995404204994, CurrSamplesPerSec=29.65449520000884, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:47,676] [INFO] [timer.py:197:stop] 0/356, RunningAvgSamplesPerSec=29.905534106537285, CurrSamplesPerSec=29.74358037927585, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:49,843] [INFO] [timer.py:197:stop] 0/357, RunningAvgSamplesPerSec=29.906605904883662, CurrSamplesPerSec=30.29091187298816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:51,963] [INFO] [timer.py:197:stop] 0/358, RunningAvgSamplesPerSec=29.908384659901674, CurrSamplesPerSec=30.553501459351626, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:54,105] [INFO] [timer.py:197:stop] 0/359, RunningAvgSamplesPerSec=29.909309229228302, CurrSamplesPerSec=30.242128818733313, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:58:55,988] [INFO] [logging.py:68:log_dist] [Rank 0] step=360, skipped=2, lr=[9.462435753420545e-06], mom=[[0.9, 0.999]] -[2022-12-14 16:58:55,988] [INFO] [timer.py:197:stop] 0/360, RunningAvgSamplesPerSec=29.920161536970852, CurrSamplesPerSec=34.372581064208184, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:59:37,241] [INFO] [timer.py:197:stop] 0/361, RunningAvgSamplesPerSec=29.91911723401767, CurrSamplesPerSec=29.549883500331237, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:59:43,813] [INFO] [timer.py:197:stop] 0/362, RunningAvgSamplesPerSec=29.9191702094041, CurrSamplesPerSec=29.9382005034619, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:59:50,536] [INFO] [timer.py:197:stop] 0/363, RunningAvgSamplesPerSec=29.919841076456315, CurrSamplesPerSec=30.16332406266985, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 16:59:57,251] [INFO] [timer.py:197:stop] 0/364, RunningAvgSamplesPerSec=29.918559758093185, CurrSamplesPerSec=29.463065505831796, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:00:03,904] [INFO] [timer.py:197:stop] 0/365, RunningAvgSamplesPerSec=29.918710009373058, CurrSamplesPerSec=29.97320030741921, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:00:11,146] [INFO] [timer.py:197:stop] 0/366, RunningAvgSamplesPerSec=29.914147915879507, CurrSamplesPerSec=28.34520362504587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:00:18,617] [INFO] [timer.py:197:stop] 0/367, RunningAvgSamplesPerSec=29.914952535081028, CurrSamplesPerSec=30.210737755913275, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:00:24,915] [INFO] [timer.py:197:stop] 0/368, RunningAvgSamplesPerSec=29.913759154102458, CurrSamplesPerSec=29.484443628225232, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:00:31,425] [INFO] [timer.py:197:stop] 0/369, RunningAvgSamplesPerSec=29.914507278671447, CurrSamplesPerSec=30.190857269980047, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:00:37,975] [INFO] [logging.py:68:log_dist] [Rank 0] step=370, skipped=2, lr=[9.506766709342328e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:00:37,975] [INFO] [timer.py:197:stop] 0/370, RunningAvgSamplesPerSec=29.91217844652727, CurrSamplesPerSec=29.081302379920864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:00:44,783] [INFO] [timer.py:197:stop] 0/371, RunningAvgSamplesPerSec=29.911494985747904, CurrSamplesPerSec=29.66208431052056, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:00:51,367] [INFO] [timer.py:197:stop] 0/372, RunningAvgSamplesPerSec=29.910344162979843, CurrSamplesPerSec=29.49165108689956, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:00:57,593] [INFO] [timer.py:197:stop] 0/373, RunningAvgSamplesPerSec=29.911814208493062, CurrSamplesPerSec=30.465832553345333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:01:03,946] [INFO] [timer.py:197:stop] 0/374, RunningAvgSamplesPerSec=29.91133592389149, CurrSamplesPerSec=29.73494158476053, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:01:10,190] [INFO] [timer.py:197:stop] 0/375, RunningAvgSamplesPerSec=29.909455629123215, CurrSamplesPerSec=29.226012113692462, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0344, 'learning_rate': 9.528482449516371e-06, 'epoch': 9.0} -[2022-12-14 17:01:16,915] [INFO] [timer.py:197:stop] 0/376, RunningAvgSamplesPerSec=29.9101051810725, CurrSamplesPerSec=30.154372014238085, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:01:23,045] [INFO] [timer.py:197:stop] 0/377, RunningAvgSamplesPerSec=29.91008860016499, CurrSamplesPerSec=29.903888629634782, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:01:29,521] [INFO] [timer.py:197:stop] 0/378, RunningAvgSamplesPerSec=29.907999007179324, CurrSamplesPerSec=29.14445990744925, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:01:36,033] [INFO] [timer.py:197:stop] 0/379, RunningAvgSamplesPerSec=29.907106299706733, CurrSamplesPerSec=29.575183484595996, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:01:42,146] [INFO] [logging.py:68:log_dist] [Rank 0] step=380, skipped=2, lr=[9.549909023428816e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:01:42,146] [INFO] [timer.py:197:stop] 0/380, RunningAvgSamplesPerSec=29.908239058447485, CurrSamplesPerSec=30.34149178813811, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:01:48,943] [INFO] [timer.py:197:stop] 0/381, RunningAvgSamplesPerSec=29.909724541270474, CurrSamplesPerSec=30.48200930305233, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:01:55,168] [INFO] [timer.py:197:stop] 0/382, RunningAvgSamplesPerSec=29.91093039527844, CurrSamplesPerSec=30.375059350276846, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:02:01,996] [INFO] [timer.py:197:stop] 0/383, RunningAvgSamplesPerSec=29.910329917193575, CurrSamplesPerSec=29.683880342876517, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:02:08,807] [INFO] [timer.py:197:stop] 0/384, RunningAvgSamplesPerSec=29.9104266095955, CurrSamplesPerSec=29.947311964590977, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:02:15,112] [INFO] [timer.py:197:stop] 0/385, RunningAvgSamplesPerSec=29.910031136382674, CurrSamplesPerSec=29.759721546108228, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:02:21,627] [INFO] [timer.py:197:stop] 0/386, RunningAvgSamplesPerSec=29.90925423821233, CurrSamplesPerSec=29.614640850759056, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:02:28,953] [INFO] [timer.py:197:stop] 0/387, RunningAvgSamplesPerSec=29.908659900389534, CurrSamplesPerSec=29.68216698967376, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:02:35,421] [INFO] [timer.py:197:stop] 0/388, RunningAvgSamplesPerSec=29.909814630426162, CurrSamplesPerSec=30.36111106084269, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:02:41,686] [INFO] [timer.py:197:stop] 0/389, RunningAvgSamplesPerSec=29.911149013348545, CurrSamplesPerSec=30.435269591505826, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:02:48,287] [INFO] [logging.py:68:log_dist] [Rank 0] step=390, skipped=2, lr=[9.591924776618972e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:02:48,287] [INFO] [timer.py:197:stop] 0/390, RunningAvgSamplesPerSec=29.909647329276165, CurrSamplesPerSec=29.339600365231963, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:02:54,383] [INFO] [timer.py:197:stop] 0/391, RunningAvgSamplesPerSec=29.909537479726403, CurrSamplesPerSec=29.866976660661052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:03:00,919] [INFO] [timer.py:197:stop] 0/392, RunningAvgSamplesPerSec=29.91063872707509, CurrSamplesPerSec=30.345264734811877, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:03:03,320] [INFO] [timer.py:197:stop] 0/393, RunningAvgSamplesPerSec=29.91263156233573, CurrSamplesPerSec=30.710624346670286, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:03:05,454] [INFO] [timer.py:197:stop] 0/394, RunningAvgSamplesPerSec=29.913726487749567, CurrSamplesPerSec=30.34807447455813, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:03:07,641] [INFO] [timer.py:197:stop] 0/395, RunningAvgSamplesPerSec=29.912948501963996, CurrSamplesPerSec=29.611063720791755, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:03:09,832] [INFO] [timer.py:197:stop] 0/396, RunningAvgSamplesPerSec=29.912030444337542, CurrSamplesPerSec=29.555544643126503, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:03:11,957] [INFO] [timer.py:197:stop] 0/397, RunningAvgSamplesPerSec=29.91370389986625, CurrSamplesPerSec=30.587944335471246, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:03:14,081] [INFO] [timer.py:197:stop] 0/398, RunningAvgSamplesPerSec=29.915155058106357, CurrSamplesPerSec=30.49958932114541, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:03:16,277] [INFO] [timer.py:197:stop] 0/399, RunningAvgSamplesPerSec=29.914107168157873, CurrSamplesPerSec=29.504834460612074, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:03:18,182] [INFO] [logging.py:68:log_dist] [Rank 0] step=400, skipped=2, lr=[9.632871309784314e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:03:18,182] [INFO] [timer.py:197:stop] 0/400, RunningAvgSamplesPerSec=29.92319330402866, CurrSamplesPerSec=34.02625278090482, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0277, 'learning_rate': 9.632871309784314e-06, 'epoch': 9.01} -[2022-12-14 17:04:00,219] [INFO] [timer.py:197:stop] 0/401, RunningAvgSamplesPerSec=29.923624113130717, CurrSamplesPerSec=30.09607677028784, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:04:06,728] [INFO] [timer.py:197:stop] 0/402, RunningAvgSamplesPerSec=29.922805824325504, CurrSamplesPerSec=29.59984139050597, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:04:13,407] [INFO] [timer.py:197:stop] 0/403, RunningAvgSamplesPerSec=29.920893640128966, CurrSamplesPerSec=29.17513168104055, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:04:19,462] [INFO] [timer.py:197:stop] 0/404, RunningAvgSamplesPerSec=29.917980863159624, CurrSamplesPerSec=28.793949864383823, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:04:25,649] [INFO] [timer.py:197:stop] 0/405, RunningAvgSamplesPerSec=29.91689952783603, CurrSamplesPerSec=29.488443737625744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:04:32,134] [INFO] [timer.py:197:stop] 0/406, RunningAvgSamplesPerSec=29.918150975053766, CurrSamplesPerSec=30.43115339470001, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:04:38,625] [INFO] [timer.py:197:stop] 0/407, RunningAvgSamplesPerSec=29.918880453970214, CurrSamplesPerSec=30.216529117446065, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:04:45,258] [INFO] [timer.py:197:stop] 0/408, RunningAvgSamplesPerSec=29.91474986404059, CurrSamplesPerSec=28.33066468533, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:04:51,742] [INFO] [timer.py:197:stop] 0/409, RunningAvgSamplesPerSec=29.915414740248504, CurrSamplesPerSec=30.187818552440135, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:04:57,894] [INFO] [logging.py:68:log_dist] [Rank 0] step=410, skipped=2, lr=[9.672801694334265e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:04:57,895] [INFO] [timer.py:197:stop] 0/410, RunningAvgSamplesPerSec=29.914526715351123, CurrSamplesPerSec=29.557425663643926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:05:04,173] [INFO] [timer.py:197:stop] 0/411, RunningAvgSamplesPerSec=29.91348045431862, CurrSamplesPerSec=29.492626388986245, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:05:10,790] [INFO] [timer.py:197:stop] 0/412, RunningAvgSamplesPerSec=29.91303565367492, CurrSamplesPerSec=29.732214586842154, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:05:16,931] [INFO] [timer.py:197:stop] 0/413, RunningAvgSamplesPerSec=29.913353007247164, CurrSamplesPerSec=30.044037801689207, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:05:22,987] [INFO] [timer.py:197:stop] 0/414, RunningAvgSamplesPerSec=29.912661814384254, CurrSamplesPerSec=29.63126051555557, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:05:29,355] [INFO] [timer.py:197:stop] 0/415, RunningAvgSamplesPerSec=29.91309886807259, CurrSamplesPerSec=30.094258148395994, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:05:36,674] [INFO] [timer.py:197:stop] 0/416, RunningAvgSamplesPerSec=29.912853490135078, CurrSamplesPerSec=29.81185539961434, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:05:42,967] [INFO] [timer.py:197:stop] 0/417, RunningAvgSamplesPerSec=29.913983024887486, CurrSamplesPerSec=30.38905486447072, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:05:49,474] [INFO] [timer.py:197:stop] 0/418, RunningAvgSamplesPerSec=29.91317250928748, CurrSamplesPerSec=29.58055769397335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:05:55,541] [INFO] [timer.py:197:stop] 0/419, RunningAvgSamplesPerSec=29.913339866874793, CurrSamplesPerSec=29.983123428928216, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:06:01,879] [INFO] [logging.py:68:log_dist] [Rank 0] step=420, skipped=2, lr=[9.71176514582969e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:06:01,880] [INFO] [timer.py:197:stop] 0/420, RunningAvgSamplesPerSec=29.913919604412595, CurrSamplesPerSec=30.157644554169018, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:06:08,258] [INFO] [timer.py:197:stop] 0/421, RunningAvgSamplesPerSec=29.913501817917656, CurrSamplesPerSec=29.73988307255285, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:06:14,629] [INFO] [timer.py:197:stop] 0/422, RunningAvgSamplesPerSec=29.913065095791428, CurrSamplesPerSec=29.731193738046514, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:06:21,032] [INFO] [timer.py:197:stop] 0/423, RunningAvgSamplesPerSec=29.91225092536343, CurrSamplesPerSec=29.57417339039528, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:06:27,596] [INFO] [timer.py:197:stop] 0/424, RunningAvgSamplesPerSec=29.910256400964272, CurrSamplesPerSec=29.093544308091587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:06:33,738] [INFO] [timer.py:197:stop] 0/425, RunningAvgSamplesPerSec=29.91287926190205, CurrSamplesPerSec=31.062360908993288, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0201, 'learning_rate': 9.73089868785391e-06, 'epoch': 10.01} -[2022-12-14 17:06:40,148] [INFO] [timer.py:197:stop] 0/426, RunningAvgSamplesPerSec=29.91272795042511, CurrSamplesPerSec=29.84886017767289, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:06:46,889] [INFO] [timer.py:197:stop] 0/427, RunningAvgSamplesPerSec=29.912709036054824, CurrSamplesPerSec=29.904691497650607, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:06:53,615] [INFO] [timer.py:197:stop] 0/428, RunningAvgSamplesPerSec=29.913341011284587, CurrSamplesPerSec=30.184369755118354, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:06:59,514] [INFO] [timer.py:197:stop] 0/429, RunningAvgSamplesPerSec=29.91478897562596, CurrSamplesPerSec=30.544639577620245, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:06,840] [INFO] [logging.py:68:log_dist] [Rank 0] step=430, skipped=2, lr=[9.74980738869138e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:07:06,841] [INFO] [timer.py:197:stop] 0/430, RunningAvgSamplesPerSec=29.915653499972326, CurrSamplesPerSec=30.289428490057038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:13,024] [INFO] [timer.py:197:stop] 0/431, RunningAvgSamplesPerSec=29.91436405162306, CurrSamplesPerSec=29.37250024783877, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:19,549] [INFO] [timer.py:197:stop] 0/432, RunningAvgSamplesPerSec=29.915314757222646, CurrSamplesPerSec=30.3288181299895, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:21,901] [INFO] [timer.py:197:stop] 0/433, RunningAvgSamplesPerSec=29.91688368249784, CurrSamplesPerSec=30.60712292393048, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:24,073] [INFO] [timer.py:197:stop] 0/434, RunningAvgSamplesPerSec=29.91664758696752, CurrSamplesPerSec=29.81523615013076, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:26,257] [INFO] [timer.py:197:stop] 0/435, RunningAvgSamplesPerSec=29.916357469082858, CurrSamplesPerSec=29.79155061630621, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:28,380] [INFO] [timer.py:197:stop] 0/436, RunningAvgSamplesPerSec=29.917709063700702, CurrSamplesPerSec=30.51465373852896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:30,494] [INFO] [timer.py:197:stop] 0/437, RunningAvgSamplesPerSec=29.919365952286498, CurrSamplesPerSec=30.656205799767186, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:32,662] [INFO] [timer.py:197:stop] 0/438, RunningAvgSamplesPerSec=29.919260336993254, CurrSamplesPerSec=29.873388285391616, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:34,812] [INFO] [timer.py:197:stop] 0/439, RunningAvgSamplesPerSec=29.919722331839022, CurrSamplesPerSec=30.122520522464683, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:07:36,684] [INFO] [logging.py:68:log_dist] [Rank 0] step=440, skipped=2, lr=[9.786970978782465e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:07:36,685] [INFO] [timer.py:197:stop] 0/440, RunningAvgSamplesPerSec=29.928891947749587, CurrSamplesPerSec=34.557093097308496, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:08:15,939] [INFO] [timer.py:197:stop] 0/441, RunningAvgSamplesPerSec=29.92737256900503, CurrSamplesPerSec=29.276393397889215, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:08:22,268] [INFO] [timer.py:197:stop] 0/442, RunningAvgSamplesPerSec=29.92620809160595, CurrSamplesPerSec=29.423607594409464, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:08:28,552] [INFO] [timer.py:197:stop] 0/443, RunningAvgSamplesPerSec=29.92769489472171, CurrSamplesPerSec=30.596541907622502, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:08:34,735] [INFO] [timer.py:197:stop] 0/444, RunningAvgSamplesPerSec=29.92815181074884, CurrSamplesPerSec=30.131020749943314, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:08:40,814] [INFO] [timer.py:197:stop] 0/445, RunningAvgSamplesPerSec=29.928205059311185, CurrSamplesPerSec=29.951759489235975, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:08:46,831] [INFO] [timer.py:197:stop] 0/446, RunningAvgSamplesPerSec=29.929105655464873, CurrSamplesPerSec=30.333472249216904, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:08:52,752] [INFO] [timer.py:197:stop] 0/447, RunningAvgSamplesPerSec=29.93099027703326, CurrSamplesPerSec=30.791884221289614, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:08:58,645] [INFO] [timer.py:197:stop] 0/448, RunningAvgSamplesPerSec=29.932156225931976, CurrSamplesPerSec=30.460176820438846, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:09:04,880] [INFO] [timer.py:197:stop] 0/449, RunningAvgSamplesPerSec=29.931726632562764, CurrSamplesPerSec=29.741349363177118, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:09:10,763] [INFO] [logging.py:68:log_dist] [Rank 0] step=450, skipped=2, lr=[9.823295589572114e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:09:10,764] [INFO] [timer.py:197:stop] 0/450, RunningAvgSamplesPerSec=29.932036557922796, CurrSamplesPerSec=30.071218822455034, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0181, 'learning_rate': 9.823295589572114e-06, 'epoch': 11.0} -[2022-12-14 17:09:17,267] [INFO] [timer.py:197:stop] 0/451, RunningAvgSamplesPerSec=29.932303563186675, CurrSamplesPerSec=30.05240294546299, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:09:23,285] [INFO] [timer.py:197:stop] 0/452, RunningAvgSamplesPerSec=29.93004199944191, CurrSamplesPerSec=28.94799226789532, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:09:28,954] [INFO] [timer.py:197:stop] 0/453, RunningAvgSamplesPerSec=29.930919146785687, CurrSamplesPerSec=30.330922245583842, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:09:35,276] [INFO] [timer.py:197:stop] 0/454, RunningAvgSamplesPerSec=29.92960850337359, CurrSamplesPerSec=29.349981161200425, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:09:41,289] [INFO] [timer.py:197:stop] 0/455, RunningAvgSamplesPerSec=29.928111312718112, CurrSamplesPerSec=29.2663772759807, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:09:47,400] [INFO] [timer.py:197:stop] 0/456, RunningAvgSamplesPerSec=29.92765817843899, CurrSamplesPerSec=29.72378974325869, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:09:53,344] [INFO] [timer.py:197:stop] 0/457, RunningAvgSamplesPerSec=29.927217697095426, CurrSamplesPerSec=29.728569490489278, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:09:59,370] [INFO] [timer.py:197:stop] 0/458, RunningAvgSamplesPerSec=29.9280542449503, CurrSamplesPerSec=30.313597692315163, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:05,163] [INFO] [timer.py:197:stop] 0/459, RunningAvgSamplesPerSec=29.928461450309317, CurrSamplesPerSec=30.115308896933534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:11,063] [INFO] [logging.py:68:log_dist] [Rank 0] step=460, skipped=2, lr=[9.858818266705698e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:10:11,063] [INFO] [timer.py:197:stop] 0/460, RunningAvgSamplesPerSec=29.928808172618155, CurrSamplesPerSec=30.088105480972292, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:17,081] [INFO] [timer.py:197:stop] 0/461, RunningAvgSamplesPerSec=29.92835484408868, CurrSamplesPerSec=29.722163926200004, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:22,940] [INFO] [timer.py:197:stop] 0/462, RunningAvgSamplesPerSec=29.929744626066572, CurrSamplesPerSec=30.581577716675866, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:29,204] [INFO] [timer.py:197:stop] 0/463, RunningAvgSamplesPerSec=29.92965260123662, CurrSamplesPerSec=29.887381096724233, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:35,195] [INFO] [timer.py:197:stop] 0/464, RunningAvgSamplesPerSec=29.93083831266102, CurrSamplesPerSec=30.487641983343043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:41,573] [INFO] [timer.py:197:stop] 0/465, RunningAvgSamplesPerSec=29.92979909767766, CurrSamplesPerSec=29.4572781031843, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:47,693] [INFO] [timer.py:197:stop] 0/466, RunningAvgSamplesPerSec=29.930972539365367, CurrSamplesPerSec=30.484342441125804, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:53,816] [INFO] [timer.py:197:stop] 0/467, RunningAvgSamplesPerSec=29.929620702712477, CurrSamplesPerSec=29.31527151212639, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:10:59,756] [INFO] [timer.py:197:stop] 0/468, RunningAvgSamplesPerSec=29.92980825417258, CurrSamplesPerSec=30.017275097820022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:05,629] [INFO] [timer.py:197:stop] 0/469, RunningAvgSamplesPerSec=29.92970441981856, CurrSamplesPerSec=29.881395877980676, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:11,480] [INFO] [logging.py:68:log_dist] [Rank 0] step=470, skipped=2, lr=[9.893573655076761e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:11:11,481] [INFO] [timer.py:197:stop] 0/470, RunningAvgSamplesPerSec=29.929717990925777, CurrSamplesPerSec=29.936057043183304, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:17,581] [INFO] [timer.py:197:stop] 0/471, RunningAvgSamplesPerSec=29.930056363614536, CurrSamplesPerSec=30.089258914474957, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:23,593] [INFO] [timer.py:197:stop] 0/472, RunningAvgSamplesPerSec=29.931275847489342, CurrSamplesPerSec=30.514379707207503, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:26,000] [INFO] [timer.py:197:stop] 0/473, RunningAvgSamplesPerSec=29.932750033979097, CurrSamplesPerSec=30.642071623573287, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:28,123] [INFO] [timer.py:197:stop] 0/474, RunningAvgSamplesPerSec=29.93396511980529, CurrSamplesPerSec=30.51744983305158, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:30,246] [INFO] [timer.py:197:stop] 0/475, RunningAvgSamplesPerSec=29.935145340108186, CurrSamplesPerSec=30.502795114197653, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0133, 'learning_rate': 9.910673836465484e-06, 'epoch': 11.01} -[2022-12-14 17:11:32,396] [INFO] [timer.py:197:stop] 0/476, RunningAvgSamplesPerSec=29.93557766144186, CurrSamplesPerSec=30.141475097828955, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:34,567] [INFO] [timer.py:197:stop] 0/477, RunningAvgSamplesPerSec=29.935370371403945, CurrSamplesPerSec=29.837437013698178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:36,687] [INFO] [timer.py:197:stop] 0/478, RunningAvgSamplesPerSec=29.936667388038625, CurrSamplesPerSec=30.565723224535706, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:38,859] [INFO] [timer.py:197:stop] 0/479, RunningAvgSamplesPerSec=29.93640749713423, CurrSamplesPerSec=29.81320959449839, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:11:40,752] [INFO] [logging.py:68:log_dist] [Rank 0] step=480, skipped=2, lr=[9.927594201889966e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:11:40,753] [INFO] [timer.py:197:stop] 0/480, RunningAvgSamplesPerSec=29.944240423546947, CurrSamplesPerSec=34.21448501446722, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:12:21,340] [INFO] [timer.py:197:stop] 0/481, RunningAvgSamplesPerSec=29.945001994759394, CurrSamplesPerSec=30.313522381614455, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:12:27,776] [INFO] [timer.py:197:stop] 0/482, RunningAvgSamplesPerSec=29.942993718022933, CurrSamplesPerSec=29.01103235480864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:12:34,217] [INFO] [timer.py:197:stop] 0/483, RunningAvgSamplesPerSec=29.94243941978199, CurrSamplesPerSec=29.6787244698835, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:12:40,642] [INFO] [timer.py:197:stop] 0/484, RunningAvgSamplesPerSec=29.94232694187008, CurrSamplesPerSec=29.888322847450954, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:12:46,689] [INFO] [timer.py:197:stop] 0/485, RunningAvgSamplesPerSec=29.943146399514973, CurrSamplesPerSec=30.343415873704867, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:12:53,308] [INFO] [timer.py:197:stop] 0/486, RunningAvgSamplesPerSec=29.94162294462178, CurrSamplesPerSec=29.223479466630902, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:12:59,603] [INFO] [timer.py:197:stop] 0/487, RunningAvgSamplesPerSec=29.939712076346126, CurrSamplesPerSec=29.042620908292943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:13:06,807] [INFO] [timer.py:197:stop] 0/488, RunningAvgSamplesPerSec=29.939920573178014, CurrSamplesPerSec=30.041384935317712, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:13:13,282] [INFO] [timer.py:197:stop] 0/489, RunningAvgSamplesPerSec=29.941185675630535, CurrSamplesPerSec=30.568942935982353, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:13:19,587] [INFO] [logging.py:68:log_dist] [Rank 0] step=490, skipped=2, lr=[9.96091033869825e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:13:19,588] [INFO] [timer.py:197:stop] 0/490, RunningAvgSamplesPerSec=29.939995857533276, CurrSamplesPerSec=29.371577867128234, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:13:26,486] [INFO] [timer.py:197:stop] 0/491, RunningAvgSamplesPerSec=29.93944419906448, CurrSamplesPerSec=29.67263884137239, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:13:32,973] [INFO] [timer.py:197:stop] 0/492, RunningAvgSamplesPerSec=29.935721808624344, CurrSamplesPerSec=28.22001047492714, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:13:39,418] [INFO] [timer.py:197:stop] 0/493, RunningAvgSamplesPerSec=29.936618150213135, CurrSamplesPerSec=30.38237873695493, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:13:45,853] [INFO] [timer.py:197:stop] 0/494, RunningAvgSamplesPerSec=29.93721248149583, CurrSamplesPerSec=30.231907568353492, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:13:52,163] [INFO] [timer.py:197:stop] 0/495, RunningAvgSamplesPerSec=29.936497871787843, CurrSamplesPerSec=29.588999372474202, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:14:00,339] [INFO] [timer.py:197:stop] 0/496, RunningAvgSamplesPerSec=29.935169532812726, CurrSamplesPerSec=29.294345715798567, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:14:06,672] [INFO] [timer.py:197:stop] 0/497, RunningAvgSamplesPerSec=29.93406445150269, CurrSamplesPerSec=29.397951220302847, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:14:12,893] [INFO] [timer.py:197:stop] 0/498, RunningAvgSamplesPerSec=29.933623338039, CurrSamplesPerSec=29.71685657554701, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:14:19,557] [INFO] [timer.py:197:stop] 0/499, RunningAvgSamplesPerSec=29.93394986241016, CurrSamplesPerSec=30.096788758203907, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:14:25,984] [INFO] [logging.py:68:log_dist] [Rank 0] step=500, skipped=2, lr=[9.993550644973805e-06], mom=[[0.9, 0.999]] -[2022-12-14 17:14:25,985] [INFO] [timer.py:197:stop] 0/500, RunningAvgSamplesPerSec=29.93467180396412, CurrSamplesPerSec=30.29783852798139, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0092, 'learning_rate': 9.993550644973805e-06, 'epoch': 12.0} -[2022-12-14 17:14:32,128] [INFO] [timer.py:197:stop] 0/501, RunningAvgSamplesPerSec=29.93590029581805, CurrSamplesPerSec=30.560479157807972, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:14:38,648] [INFO] [timer.py:197:stop] 0/502, RunningAvgSamplesPerSec=29.934503480095145, CurrSamplesPerSec=29.253383773861543, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:14:47,339] [INFO] [timer.py:197:stop] 0/503, RunningAvgSamplesPerSec=29.934596817472947, CurrSamplesPerSec=29.981338523455513, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:14:53,647] [INFO] [timer.py:197:stop] 0/504, RunningAvgSamplesPerSec=29.93502506818842, CurrSamplesPerSec=30.15113066407518, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:00,118] [INFO] [timer.py:197:stop] 0/505, RunningAvgSamplesPerSec=29.936565291221, CurrSamplesPerSec=30.730298373077677, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:06,055] [INFO] [timer.py:197:stop] 0/506, RunningAvgSamplesPerSec=29.937643728614617, CurrSamplesPerSec=30.490128364204864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:12,533] [INFO] [timer.py:197:stop] 0/507, RunningAvgSamplesPerSec=29.938508332497605, CurrSamplesPerSec=30.380717899779057, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:18,646] [INFO] [timer.py:197:stop] 0/508, RunningAvgSamplesPerSec=29.938156067615463, CurrSamplesPerSec=29.761315178815934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:25,052] [INFO] [timer.py:197:stop] 0/509, RunningAvgSamplesPerSec=29.935042408863985, CurrSamplesPerSec=28.43845371182981, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:31,534] [INFO] [logging.py:68:log_dist] [Rank 0] step=510, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:15:31,534] [INFO] [timer.py:197:stop] 0/510, RunningAvgSamplesPerSec=29.935583597103015, CurrSamplesPerSec=30.212509275557178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:37,409] [INFO] [timer.py:197:stop] 0/511, RunningAvgSamplesPerSec=29.936109258740398, CurrSamplesPerSec=30.205553599505432, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:43,906] [INFO] [timer.py:197:stop] 0/512, RunningAvgSamplesPerSec=29.934394845316323, CurrSamplesPerSec=29.08652379600514, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:46,438] [INFO] [timer.py:197:stop] 0/513, RunningAvgSamplesPerSec=29.935059830032312, CurrSamplesPerSec=30.278096002950708, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:48,659] [INFO] [timer.py:197:stop] 0/514, RunningAvgSamplesPerSec=29.93466336266809, CurrSamplesPerSec=29.733433111048786, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:50,832] [INFO] [timer.py:197:stop] 0/515, RunningAvgSamplesPerSec=29.934432558581378, CurrSamplesPerSec=29.816726440344212, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:52,988] [INFO] [timer.py:197:stop] 0/516, RunningAvgSamplesPerSec=29.93465054874488, CurrSamplesPerSec=30.04689965683451, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:55,118] [INFO] [timer.py:197:stop] 0/517, RunningAvgSamplesPerSec=29.93556163978349, CurrSamplesPerSec=30.41131949227055, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:57,234] [INFO] [timer.py:197:stop] 0/518, RunningAvgSamplesPerSec=29.936841827505667, CurrSamplesPerSec=30.611014586890274, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:15:59,388] [INFO] [timer.py:197:stop] 0/519, RunningAvgSamplesPerSec=29.93710048822155, CurrSamplesPerSec=30.07116829205778, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:16:01,269] [INFO] [logging.py:68:log_dist] [Rank 0] step=520, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:16:01,269] [INFO] [timer.py:197:stop] 0/520, RunningAvgSamplesPerSec=29.94468723686717, CurrSamplesPerSec=34.459568565162925, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:16:40,251] [INFO] [timer.py:197:stop] 0/521, RunningAvgSamplesPerSec=29.945293212773635, CurrSamplesPerSec=30.262520422248446, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:16:46,395] [INFO] [timer.py:197:stop] 0/522, RunningAvgSamplesPerSec=29.945163621262726, CurrSamplesPerSec=29.878056642381523, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:16:52,424] [INFO] [timer.py:197:stop] 0/523, RunningAvgSamplesPerSec=29.944881706657558, CurrSamplesPerSec=29.799001643280018, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:16:58,883] [INFO] [timer.py:197:stop] 0/524, RunningAvgSamplesPerSec=29.941114511350243, CurrSamplesPerSec=28.099368182694626, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:17:05,247] [INFO] [timer.py:197:stop] 0/525, RunningAvgSamplesPerSec=29.94193316883897, CurrSamplesPerSec=30.37547180998963, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0092, 'learning_rate': 1e-05, 'epoch': 13.0} -[2022-12-14 17:17:11,682] [INFO] [timer.py:197:stop] 0/526, RunningAvgSamplesPerSec=29.941617837060992, CurrSamplesPerSec=29.777604430819256, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:17:17,851] [INFO] [timer.py:197:stop] 0/527, RunningAvgSamplesPerSec=29.940848585597934, CurrSamplesPerSec=29.54312550516034, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:17:23,771] [INFO] [timer.py:197:stop] 0/528, RunningAvgSamplesPerSec=29.942090296743146, CurrSamplesPerSec=30.608525902692566, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:17:30,362] [INFO] [timer.py:197:stop] 0/529, RunningAvgSamplesPerSec=29.938771305819966, CurrSamplesPerSec=28.2893461893045, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:17:36,369] [INFO] [logging.py:68:log_dist] [Rank 0] step=530, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:17:36,370] [INFO] [timer.py:197:stop] 0/530, RunningAvgSamplesPerSec=29.938377798347336, CurrSamplesPerSec=29.732428644673853, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:17:42,570] [INFO] [timer.py:197:stop] 0/531, RunningAvgSamplesPerSec=29.938816513641605, CurrSamplesPerSec=30.172267862427685, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:17:48,936] [INFO] [timer.py:197:stop] 0/532, RunningAvgSamplesPerSec=29.937821309254, CurrSamplesPerSec=29.420473067821234, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:17:54,790] [INFO] [timer.py:197:stop] 0/533, RunningAvgSamplesPerSec=29.93730378944752, CurrSamplesPerSec=29.665513135643888, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:00,885] [INFO] [timer.py:197:stop] 0/534, RunningAvgSamplesPerSec=29.937570827341574, CurrSamplesPerSec=30.080044033877936, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:06,530] [INFO] [timer.py:197:stop] 0/535, RunningAvgSamplesPerSec=29.9373927532391, CurrSamplesPerSec=29.84295673064897, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:12,414] [INFO] [timer.py:197:stop] 0/536, RunningAvgSamplesPerSec=29.9373822237188, CurrSamplesPerSec=29.931771043275468, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:18,622] [INFO] [timer.py:197:stop] 0/537, RunningAvgSamplesPerSec=29.936924294162537, CurrSamplesPerSec=29.694374842850227, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:24,989] [INFO] [timer.py:197:stop] 0/538, RunningAvgSamplesPerSec=29.936540938120686, CurrSamplesPerSec=29.732843596326063, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:31,273] [INFO] [timer.py:197:stop] 0/539, RunningAvgSamplesPerSec=29.934823608046056, CurrSamplesPerSec=29.041844809369366, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:37,454] [INFO] [logging.py:68:log_dist] [Rank 0] step=540, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:18:37,455] [INFO] [timer.py:197:stop] 0/540, RunningAvgSamplesPerSec=29.9340974316372, CurrSamplesPerSec=29.549164625339333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:43,699] [INFO] [timer.py:197:stop] 0/541, RunningAvgSamplesPerSec=29.93523497487942, CurrSamplesPerSec=30.56003034651909, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:49,979] [INFO] [timer.py:197:stop] 0/542, RunningAvgSamplesPerSec=29.93270794768381, CurrSamplesPerSec=28.63002784446392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:18:56,197] [INFO] [timer.py:197:stop] 0/543, RunningAvgSamplesPerSec=29.933072421243782, CurrSamplesPerSec=30.131193238573296, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:02,459] [INFO] [timer.py:197:stop] 0/544, RunningAvgSamplesPerSec=29.933049131013156, CurrSamplesPerSec=29.920454427659077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:08,601] [INFO] [timer.py:197:stop] 0/545, RunningAvgSamplesPerSec=29.933497148926012, CurrSamplesPerSec=30.178312506415136, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:14,432] [INFO] [timer.py:197:stop] 0/546, RunningAvgSamplesPerSec=29.932962232425073, CurrSamplesPerSec=29.645299101879335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:20,879] [INFO] [timer.py:197:stop] 0/547, RunningAvgSamplesPerSec=29.93385955084332, CurrSamplesPerSec=30.43010813391152, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:27,148] [INFO] [timer.py:197:stop] 0/548, RunningAvgSamplesPerSec=29.932970315349, CurrSamplesPerSec=29.456072411107687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:33,271] [INFO] [timer.py:197:stop] 0/549, RunningAvgSamplesPerSec=29.93353806676922, CurrSamplesPerSec=30.24678022509466, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:39,134] [INFO] [logging.py:68:log_dist] [Rank 0] step=550, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:19:39,134] [INFO] [timer.py:197:stop] 0/550, RunningAvgSamplesPerSec=29.932414700657624, CurrSamplesPerSec=29.33031648301219, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.007, 'learning_rate': 1e-05, 'epoch': 13.01} -[2022-12-14 17:19:45,132] [INFO] [timer.py:197:stop] 0/551, RunningAvgSamplesPerSec=29.932154784913248, CurrSamplesPerSec=29.790396751455614, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:51,488] [INFO] [timer.py:197:stop] 0/552, RunningAvgSamplesPerSec=29.93110911076577, CurrSamplesPerSec=29.367856793937193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:53,850] [INFO] [timer.py:197:stop] 0/553, RunningAvgSamplesPerSec=29.932402473328587, CurrSamplesPerSec=30.661101033601796, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:56,002] [INFO] [timer.py:197:stop] 0/554, RunningAvgSamplesPerSec=29.932702508678116, CurrSamplesPerSec=30.098941798645164, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:19:58,157] [INFO] [timer.py:197:stop] 0/555, RunningAvgSamplesPerSec=29.933580666341967, CurrSamplesPerSec=30.426317509959787, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:20:00,266] [INFO] [timer.py:197:stop] 0/556, RunningAvgSamplesPerSec=29.9349376925135, CurrSamplesPerSec=30.70470527490693, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:20:02,409] [INFO] [timer.py:197:stop] 0/557, RunningAvgSamplesPerSec=29.935445114557695, CurrSamplesPerSec=30.219226616064777, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:20:04,542] [INFO] [timer.py:197:stop] 0/558, RunningAvgSamplesPerSec=29.936215321793203, CurrSamplesPerSec=30.3698839331272, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:20:06,719] [INFO] [timer.py:197:stop] 0/559, RunningAvgSamplesPerSec=29.935903215974935, CurrSamplesPerSec=29.763374286200246, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:20:08,601] [INFO] [logging.py:68:log_dist] [Rank 0] step=560, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:20:08,602] [INFO] [timer.py:197:stop] 0/560, RunningAvgSamplesPerSec=29.94283087015208, CurrSamplesPerSec=34.37354058199113, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:20:49,072] [INFO] [timer.py:197:stop] 0/561, RunningAvgSamplesPerSec=29.94246897467693, CurrSamplesPerSec=29.741886490081107, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:20:55,346] [INFO] [timer.py:197:stop] 0/562, RunningAvgSamplesPerSec=29.94006094572516, CurrSamplesPerSec=28.651987415711147, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:21:01,879] [INFO] [timer.py:197:stop] 0/563, RunningAvgSamplesPerSec=29.939146846456882, CurrSamplesPerSec=29.435871556806084, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:21:08,301] [INFO] [timer.py:197:stop] 0/564, RunningAvgSamplesPerSec=29.93940314258978, CurrSamplesPerSec=30.083880352800097, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:21:15,149] [INFO] [timer.py:197:stop] 0/565, RunningAvgSamplesPerSec=29.939253054901457, CurrSamplesPerSec=29.855141168138317, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:21:22,788] [INFO] [timer.py:197:stop] 0/566, RunningAvgSamplesPerSec=29.938512066217378, CurrSamplesPerSec=29.5270787166701, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:21:29,223] [INFO] [timer.py:197:stop] 0/567, RunningAvgSamplesPerSec=29.939151115508917, CurrSamplesPerSec=30.303974647367706, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:21:35,730] [INFO] [timer.py:197:stop] 0/568, RunningAvgSamplesPerSec=29.938168411703824, CurrSamplesPerSec=29.393067984610315, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:21:42,268] [INFO] [timer.py:197:stop] 0/569, RunningAvgSamplesPerSec=29.93778068343115, CurrSamplesPerSec=29.719926250289106, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:21:48,739] [INFO] [logging.py:68:log_dist] [Rank 0] step=570, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:21:48,740] [INFO] [timer.py:197:stop] 0/570, RunningAvgSamplesPerSec=29.937520957774137, CurrSamplesPerSec=29.79097863201337, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:21:55,234] [INFO] [timer.py:197:stop] 0/571, RunningAvgSamplesPerSec=29.936789292168935, CurrSamplesPerSec=29.52690333174352, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:22:01,783] [INFO] [timer.py:197:stop] 0/572, RunningAvgSamplesPerSec=29.936838649581954, CurrSamplesPerSec=29.964949435245355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:22:08,995] [INFO] [timer.py:197:stop] 0/573, RunningAvgSamplesPerSec=29.93641579128206, CurrSamplesPerSec=29.697315026980718, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:22:15,170] [INFO] [timer.py:197:stop] 0/574, RunningAvgSamplesPerSec=29.93731802191041, CurrSamplesPerSec=30.46152833580602, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:22:21,451] [INFO] [timer.py:197:stop] 0/575, RunningAvgSamplesPerSec=29.937378482854818, CurrSamplesPerSec=29.97200221031068, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0055, 'learning_rate': 1e-05, 'epoch': 14.0} -[2022-12-14 17:22:28,158] [INFO] [timer.py:197:stop] 0/576, RunningAvgSamplesPerSec=29.938017047807552, CurrSamplesPerSec=30.308450040454854, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:22:34,152] [INFO] [timer.py:197:stop] 0/577, RunningAvgSamplesPerSec=29.938265447869927, CurrSamplesPerSec=30.08153057561878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:22:40,469] [INFO] [timer.py:197:stop] 0/578, RunningAvgSamplesPerSec=29.938827464718557, CurrSamplesPerSec=30.265519600142017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:22:46,759] [INFO] [timer.py:197:stop] 0/579, RunningAvgSamplesPerSec=29.937391477994442, CurrSamplesPerSec=29.132538690205195, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:22:53,054] [INFO] [logging.py:68:log_dist] [Rank 0] step=580, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:22:53,055] [INFO] [timer.py:197:stop] 0/580, RunningAvgSamplesPerSec=29.936298461430226, CurrSamplesPerSec=29.318662249497258, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:22:59,504] [INFO] [timer.py:197:stop] 0/581, RunningAvgSamplesPerSec=29.936155076159384, CurrSamplesPerSec=29.85350759073089, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:23:06,719] [INFO] [timer.py:197:stop] 0/582, RunningAvgSamplesPerSec=29.9358291127924, CurrSamplesPerSec=29.748280778699144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:23:13,841] [INFO] [timer.py:197:stop] 0/583, RunningAvgSamplesPerSec=29.935918999511994, CurrSamplesPerSec=29.988144405793694, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:23:20,423] [INFO] [timer.py:197:stop] 0/584, RunningAvgSamplesPerSec=29.936315841039608, CurrSamplesPerSec=30.168673431394044, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:23:27,006] [INFO] [timer.py:197:stop] 0/585, RunningAvgSamplesPerSec=29.937301393180775, CurrSamplesPerSec=30.522116913602776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:23:33,432] [INFO] [timer.py:197:stop] 0/586, RunningAvgSamplesPerSec=29.937878937409387, CurrSamplesPerSec=30.27842386653355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:23:39,954] [INFO] [timer.py:197:stop] 0/587, RunningAvgSamplesPerSec=29.93758136993092, CurrSamplesPerSec=29.764806589794016, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:23:46,200] [INFO] [timer.py:197:stop] 0/588, RunningAvgSamplesPerSec=29.937691081078565, CurrSamplesPerSec=30.002010226830617, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:23:52,291] [INFO] [timer.py:197:stop] 0/589, RunningAvgSamplesPerSec=29.937609012585035, CurrSamplesPerSec=29.88959413867207, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:23:58,590] [INFO] [logging.py:68:log_dist] [Rank 0] step=590, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:23:58,591] [INFO] [timer.py:197:stop] 0/590, RunningAvgSamplesPerSec=29.93745058451544, CurrSamplesPerSec=29.844741787581757, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:04,951] [INFO] [timer.py:197:stop] 0/591, RunningAvgSamplesPerSec=29.935937226657522, CurrSamplesPerSec=29.07181283864351, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:11,173] [INFO] [timer.py:197:stop] 0/592, RunningAvgSamplesPerSec=29.937051837001732, CurrSamplesPerSec=30.608302534281275, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:13,610] [INFO] [timer.py:197:stop] 0/593, RunningAvgSamplesPerSec=29.937715644281155, CurrSamplesPerSec=30.334562300927477, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:15,729] [INFO] [timer.py:197:stop] 0/594, RunningAvgSamplesPerSec=29.938760246952178, CurrSamplesPerSec=30.56914136201017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:17,872] [INFO] [timer.py:197:stop] 0/595, RunningAvgSamplesPerSec=29.939262663341207, CurrSamplesPerSec=30.239682719031034, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:20,018] [INFO] [timer.py:197:stop] 0/596, RunningAvgSamplesPerSec=29.939884486298144, CurrSamplesPerSec=30.313231411968463, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:22,178] [INFO] [timer.py:197:stop] 0/597, RunningAvgSamplesPerSec=29.939979467948685, CurrSamplesPerSec=29.996505265289297, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:24,291] [INFO] [timer.py:197:stop] 0/598, RunningAvgSamplesPerSec=29.941159771949764, CurrSamplesPerSec=30.660337582381636, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:26,482] [INFO] [timer.py:197:stop] 0/599, RunningAvgSamplesPerSec=29.940508060812352, CurrSamplesPerSec=29.557070920003078, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:24:28,404] [INFO] [logging.py:68:log_dist] [Rank 0] step=600, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:24:28,405] [INFO] [timer.py:197:stop] 0/600, RunningAvgSamplesPerSec=29.946012709155607, CurrSamplesPerSec=33.63814051266577, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0054, 'learning_rate': 1e-05, 'epoch': 14.01} -[2022-12-14 17:25:11,118] [INFO] [timer.py:197:stop] 0/601, RunningAvgSamplesPerSec=29.94497381326137, CurrSamplesPerSec=29.3363618798321, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:25:17,788] [INFO] [timer.py:197:stop] 0/602, RunningAvgSamplesPerSec=29.94249628844026, CurrSamplesPerSec=28.52865023637711, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:25:24,256] [INFO] [timer.py:197:stop] 0/603, RunningAvgSamplesPerSec=29.943150736113477, CurrSamplesPerSec=30.341045956431085, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:25:30,845] [INFO] [timer.py:197:stop] 0/604, RunningAvgSamplesPerSec=29.94239256398053, CurrSamplesPerSec=29.493572590861497, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:25:37,306] [INFO] [timer.py:197:stop] 0/605, RunningAvgSamplesPerSec=29.942193305133994, CurrSamplesPerSec=29.822718910561324, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:25:44,147] [INFO] [timer.py:197:stop] 0/606, RunningAvgSamplesPerSec=29.941210554363934, CurrSamplesPerSec=29.360131670016457, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:25:50,876] [INFO] [timer.py:197:stop] 0/607, RunningAvgSamplesPerSec=29.94208792998009, CurrSamplesPerSec=30.481587022463803, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:25:57,052] [INFO] [timer.py:197:stop] 0/608, RunningAvgSamplesPerSec=29.94168968481071, CurrSamplesPerSec=29.70267784508315, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:26:03,878] [INFO] [timer.py:197:stop] 0/609, RunningAvgSamplesPerSec=29.94198498864028, CurrSamplesPerSec=30.12201688068914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:26:10,026] [INFO] [logging.py:68:log_dist] [Rank 0] step=610, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:26:10,026] [INFO] [timer.py:197:stop] 0/610, RunningAvgSamplesPerSec=29.940197143817983, CurrSamplesPerSec=28.892995084534213, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:26:17,234] [INFO] [timer.py:197:stop] 0/611, RunningAvgSamplesPerSec=29.93937412067569, CurrSamplesPerSec=29.447215378806376, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:26:23,741] [INFO] [timer.py:197:stop] 0/612, RunningAvgSamplesPerSec=29.939336417720835, CurrSamplesPerSec=29.916392942937335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:26:30,214] [INFO] [timer.py:197:stop] 0/613, RunningAvgSamplesPerSec=29.93554349915285, CurrSamplesPerSec=27.78810784668125, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:26:37,025] [INFO] [timer.py:197:stop] 0/614, RunningAvgSamplesPerSec=29.935314297987233, CurrSamplesPerSec=29.795925534676865, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:26:43,158] [INFO] [timer.py:197:stop] 0/615, RunningAvgSamplesPerSec=29.935948796085846, CurrSamplesPerSec=30.32937325917252, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:26:50,467] [INFO] [timer.py:197:stop] 0/616, RunningAvgSamplesPerSec=29.936661652696223, CurrSamplesPerSec=30.38012650678087, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:26:56,609] [INFO] [timer.py:197:stop] 0/617, RunningAvgSamplesPerSec=29.936631539472497, CurrSamplesPerSec=29.918153451161555, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:27:03,124] [INFO] [timer.py:197:stop] 0/618, RunningAvgSamplesPerSec=29.934643034320832, CurrSamplesPerSec=28.759787214256114, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:27:09,596] [INFO] [timer.py:197:stop] 0/619, RunningAvgSamplesPerSec=29.93468814872888, CurrSamplesPerSec=29.96250448987537, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:27:16,227] [INFO] [logging.py:68:log_dist] [Rank 0] step=620, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:27:16,227] [INFO] [timer.py:197:stop] 0/620, RunningAvgSamplesPerSec=29.93387322884523, CurrSamplesPerSec=29.439387109927548, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:27:22,416] [INFO] [timer.py:197:stop] 0/621, RunningAvgSamplesPerSec=29.934121295211654, CurrSamplesPerSec=30.088216773332718, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:27:28,855] [INFO] [timer.py:197:stop] 0/622, RunningAvgSamplesPerSec=29.934363897489884, CurrSamplesPerSec=30.08529309280681, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:27:35,674] [INFO] [timer.py:197:stop] 0/623, RunningAvgSamplesPerSec=29.934408659300466, CurrSamplesPerSec=29.96218677661173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:27:42,249] [INFO] [timer.py:197:stop] 0/624, RunningAvgSamplesPerSec=29.934695121699956, CurrSamplesPerSec=30.113653481605166, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:27:48,834] [INFO] [timer.py:197:stop] 0/625, RunningAvgSamplesPerSec=29.934818345346983, CurrSamplesPerSec=30.011660516803193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0043, 'learning_rate': 1e-05, 'epoch': 15.01} -[2022-12-14 17:27:55,049] [INFO] [timer.py:197:stop] 0/626, RunningAvgSamplesPerSec=29.935537516656403, CurrSamplesPerSec=30.390400075716897, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:01,960] [INFO] [timer.py:197:stop] 0/627, RunningAvgSamplesPerSec=29.935972630517654, CurrSamplesPerSec=30.209972768473957, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:08,807] [INFO] [timer.py:197:stop] 0/628, RunningAvgSamplesPerSec=29.93518989971446, CurrSamplesPerSec=29.453861693322878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:15,317] [INFO] [timer.py:197:stop] 0/629, RunningAvgSamplesPerSec=29.935582772968477, CurrSamplesPerSec=30.18356198147127, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:22,496] [INFO] [logging.py:68:log_dist] [Rank 0] step=630, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:28:22,497] [INFO] [timer.py:197:stop] 0/630, RunningAvgSamplesPerSec=29.935366301237053, CurrSamplesPerSec=29.8002521141341, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:29,053] [INFO] [timer.py:197:stop] 0/631, RunningAvgSamplesPerSec=29.935780965298868, CurrSamplesPerSec=30.198478823188875, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:35,566] [INFO] [timer.py:197:stop] 0/632, RunningAvgSamplesPerSec=29.935405623718616, CurrSamplesPerSec=29.701166070435132, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:37,918] [INFO] [timer.py:197:stop] 0/633, RunningAvgSamplesPerSec=29.936705686082373, CurrSamplesPerSec=30.778821005811658, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:40,032] [INFO] [timer.py:197:stop] 0/634, RunningAvgSamplesPerSec=29.937787222776993, CurrSamplesPerSec=30.63618244153188, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:42,193] [INFO] [timer.py:197:stop] 0/635, RunningAvgSamplesPerSec=29.93784448359478, CurrSamplesPerSec=29.974077187793498, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:44,375] [INFO] [timer.py:197:stop] 0/636, RunningAvgSamplesPerSec=29.937424562443457, CurrSamplesPerSec=29.673957455774342, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:46,523] [INFO] [timer.py:197:stop] 0/637, RunningAvgSamplesPerSec=29.93775304714489, CurrSamplesPerSec=30.147473550883088, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:48,723] [INFO] [timer.py:197:stop] 0/638, RunningAvgSamplesPerSec=29.937559809170597, CurrSamplesPerSec=29.815355367865557, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:50,805] [INFO] [timer.py:197:stop] 0/639, RunningAvgSamplesPerSec=29.939344076185485, CurrSamplesPerSec=31.118917686959534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:28:52,715] [INFO] [logging.py:68:log_dist] [Rank 0] step=640, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:28:52,716] [INFO] [timer.py:197:stop] 0/640, RunningAvgSamplesPerSec=29.944845073483936, CurrSamplesPerSec=33.91420233694085, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:29:32,521] [INFO] [timer.py:197:stop] 0/641, RunningAvgSamplesPerSec=29.944714830333318, CurrSamplesPerSec=29.861850006518903, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:29:38,726] [INFO] [timer.py:197:stop] 0/642, RunningAvgSamplesPerSec=29.94567385476609, CurrSamplesPerSec=30.57131377331381, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:29:45,900] [INFO] [timer.py:197:stop] 0/643, RunningAvgSamplesPerSec=29.946052248800296, CurrSamplesPerSec=30.19020194172486, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:29:52,087] [INFO] [timer.py:197:stop] 0/644, RunningAvgSamplesPerSec=29.946184634610937, CurrSamplesPerSec=30.031285467952383, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:29:58,236] [INFO] [timer.py:197:stop] 0/645, RunningAvgSamplesPerSec=29.945897122688283, CurrSamplesPerSec=29.762446992643635, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:30:04,897] [INFO] [timer.py:197:stop] 0/646, RunningAvgSamplesPerSec=29.94412605273401, CurrSamplesPerSec=28.84711319266241, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:30:11,958] [INFO] [timer.py:197:stop] 0/647, RunningAvgSamplesPerSec=29.942835379522048, CurrSamplesPerSec=29.13412593592142, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:30:18,196] [INFO] [timer.py:197:stop] 0/648, RunningAvgSamplesPerSec=29.94174123519367, CurrSamplesPerSec=29.25229353349256, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:30:24,733] [INFO] [timer.py:197:stop] 0/649, RunningAvgSamplesPerSec=29.94113689007374, CurrSamplesPerSec=29.55576267302949, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:30:31,172] [INFO] [logging.py:68:log_dist] [Rank 0] step=650, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:30:31,172] [INFO] [timer.py:197:stop] 0/650, RunningAvgSamplesPerSec=29.941332313872845, CurrSamplesPerSec=30.068308548364097, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0048, 'learning_rate': 1e-05, 'epoch': 16.0} -[2022-12-14 17:30:37,874] [INFO] [timer.py:197:stop] 0/651, RunningAvgSamplesPerSec=29.94140766555329, CurrSamplesPerSec=29.990315435442305, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:30:44,759] [INFO] [timer.py:197:stop] 0/652, RunningAvgSamplesPerSec=29.94150999144909, CurrSamplesPerSec=30.0080673478105, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:30:51,093] [INFO] [timer.py:197:stop] 0/653, RunningAvgSamplesPerSec=29.941519066289764, CurrSamplesPerSec=29.94741887680998, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:30:57,529] [INFO] [timer.py:197:stop] 0/654, RunningAvgSamplesPerSec=29.941778439563436, CurrSamplesPerSec=30.111589534979853, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:31:03,313] [INFO] [timer.py:197:stop] 0/655, RunningAvgSamplesPerSec=29.94239728262093, CurrSamplesPerSec=30.3514029224857, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:31:09,476] [INFO] [timer.py:197:stop] 0/656, RunningAvgSamplesPerSec=29.94315026104793, CurrSamplesPerSec=30.44306684954288, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:31:15,800] [INFO] [timer.py:197:stop] 0/657, RunningAvgSamplesPerSec=29.943477860492052, CurrSamplesPerSec=30.15927431360199, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:31:21,824] [INFO] [timer.py:197:stop] 0/658, RunningAvgSamplesPerSec=29.94284842413404, CurrSamplesPerSec=29.5361756056812, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:31:28,316] [INFO] [timer.py:197:stop] 0/659, RunningAvgSamplesPerSec=29.943811372207737, CurrSamplesPerSec=30.589139896632847, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:31:34,475] [INFO] [logging.py:68:log_dist] [Rank 0] step=660, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:31:34,476] [INFO] [timer.py:197:stop] 0/660, RunningAvgSamplesPerSec=29.94447126626885, CurrSamplesPerSec=30.3844008707988, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:31:41,217] [INFO] [timer.py:197:stop] 0/661, RunningAvgSamplesPerSec=29.94371674046536, CurrSamplesPerSec=29.455348396810333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:31:47,405] [INFO] [timer.py:197:stop] 0/662, RunningAvgSamplesPerSec=29.94290469189936, CurrSamplesPerSec=29.417174796580912, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:31:54,022] [INFO] [timer.py:197:stop] 0/663, RunningAvgSamplesPerSec=29.94218795743871, CurrSamplesPerSec=29.47651139955624, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:00,499] [INFO] [timer.py:197:stop] 0/664, RunningAvgSamplesPerSec=29.941906019797887, CurrSamplesPerSec=29.756699721727358, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:07,196] [INFO] [timer.py:197:stop] 0/665, RunningAvgSamplesPerSec=29.94233806955962, CurrSamplesPerSec=30.231117676088203, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:13,226] [INFO] [timer.py:197:stop] 0/666, RunningAvgSamplesPerSec=29.94323547428685, CurrSamplesPerSec=30.55029543036369, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:20,063] [INFO] [timer.py:197:stop] 0/667, RunningAvgSamplesPerSec=29.942134021481632, CurrSamplesPerSec=29.228233313716128, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:26,125] [INFO] [timer.py:197:stop] 0/668, RunningAvgSamplesPerSec=29.941827406720932, CurrSamplesPerSec=29.739309775761544, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:32,712] [INFO] [timer.py:197:stop] 0/669, RunningAvgSamplesPerSec=29.941779963491797, CurrSamplesPerSec=29.910216131822896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:38,930] [INFO] [logging.py:68:log_dist] [Rank 0] step=670, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:32:38,931] [INFO] [timer.py:197:stop] 0/670, RunningAvgSamplesPerSec=29.941666222575776, CurrSamplesPerSec=29.865993057185044, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:45,165] [INFO] [timer.py:197:stop] 0/671, RunningAvgSamplesPerSec=29.94161838884207, CurrSamplesPerSec=29.90969956867308, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:51,730] [INFO] [timer.py:197:stop] 0/672, RunningAvgSamplesPerSec=29.941266747280356, CurrSamplesPerSec=29.707855195769824, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:54,293] [INFO] [timer.py:197:stop] 0/673, RunningAvgSamplesPerSec=29.941105518249742, CurrSamplesPerSec=29.83347097816743, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:56,411] [INFO] [timer.py:197:stop] 0/674, RunningAvgSamplesPerSec=29.942047184018204, CurrSamplesPerSec=30.58754699764311, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:32:58,533] [INFO] [timer.py:197:stop] 0/675, RunningAvgSamplesPerSec=29.942911344485417, CurrSamplesPerSec=30.53512981460549, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0039, 'learning_rate': 1e-05, 'epoch': 16.01} -[2022-12-14 17:33:00,671] [INFO] [timer.py:197:stop] 0/676, RunningAvgSamplesPerSec=29.943457745673662, CurrSamplesPerSec=30.315764746857838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:33:02,884] [INFO] [timer.py:197:stop] 0/677, RunningAvgSamplesPerSec=29.943007273454043, CurrSamplesPerSec=29.64244122069648, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:33:05,076] [INFO] [timer.py:197:stop] 0/678, RunningAvgSamplesPerSec=29.942416946994346, CurrSamplesPerSec=29.54918739459907, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:33:07,212] [INFO] [timer.py:197:stop] 0/679, RunningAvgSamplesPerSec=29.943039846286545, CurrSamplesPerSec=30.370134759289076, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:33:09,129] [INFO] [logging.py:68:log_dist] [Rank 0] step=680, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:33:09,130] [INFO] [timer.py:197:stop] 0/680, RunningAvgSamplesPerSec=29.948531896284823, CurrSamplesPerSec=34.19457583116939, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:33:51,673] [INFO] [timer.py:197:stop] 0/681, RunningAvgSamplesPerSec=29.946870915473475, CurrSamplesPerSec=28.86159756657047, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:33:58,134] [INFO] [timer.py:197:stop] 0/682, RunningAvgSamplesPerSec=29.94563833267967, CurrSamplesPerSec=29.131501698769487, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:34:04,306] [INFO] [timer.py:197:stop] 0/683, RunningAvgSamplesPerSec=29.946452534257954, CurrSamplesPerSec=30.510554188975416, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:34:10,443] [INFO] [timer.py:197:stop] 0/684, RunningAvgSamplesPerSec=29.94658118632476, CurrSamplesPerSec=30.034450693336062, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:34:16,488] [INFO] [timer.py:197:stop] 0/685, RunningAvgSamplesPerSec=29.94684454077219, CurrSamplesPerSec=30.12753757868994, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:34:22,999] [INFO] [timer.py:197:stop] 0/686, RunningAvgSamplesPerSec=29.946778391810298, CurrSamplesPerSec=29.901666808877618, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:34:29,675] [INFO] [timer.py:197:stop] 0/687, RunningAvgSamplesPerSec=29.94723168825824, CurrSamplesPerSec=30.260534945512305, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:34:36,085] [INFO] [timer.py:197:stop] 0/688, RunningAvgSamplesPerSec=29.946516385743518, CurrSamplesPerSec=29.464433475649116, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:34:42,601] [INFO] [timer.py:197:stop] 0/689, RunningAvgSamplesPerSec=29.9468376626777, CurrSamplesPerSec=30.168870085660856, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:34:48,804] [INFO] [logging.py:68:log_dist] [Rank 0] step=690, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:34:48,805] [INFO] [timer.py:197:stop] 0/690, RunningAvgSamplesPerSec=29.945510737648256, CurrSamplesPerSec=29.060882193066302, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:34:55,904] [INFO] [timer.py:197:stop] 0/691, RunningAvgSamplesPerSec=29.945228065837796, CurrSamplesPerSec=29.75200655433036, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:35:02,116] [INFO] [timer.py:197:stop] 0/692, RunningAvgSamplesPerSec=29.944197227234145, CurrSamplesPerSec=29.250428837026238, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:35:08,366] [INFO] [timer.py:197:stop] 0/693, RunningAvgSamplesPerSec=29.943576173332435, CurrSamplesPerSec=29.521103811480522, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:35:14,501] [INFO] [timer.py:197:stop] 0/694, RunningAvgSamplesPerSec=29.94298703405994, CurrSamplesPerSec=29.54136008989776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:35:20,383] [INFO] [timer.py:197:stop] 0/695, RunningAvgSamplesPerSec=29.943743628724324, CurrSamplesPerSec=30.476638212315773, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:35:26,718] [INFO] [timer.py:197:stop] 0/696, RunningAvgSamplesPerSec=29.94378636394865, CurrSamplesPerSec=29.973431236586876, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:35:33,119] [INFO] [timer.py:197:stop] 0/697, RunningAvgSamplesPerSec=29.944249080683605, CurrSamplesPerSec=30.26886068127514, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:35:39,352] [INFO] [timer.py:197:stop] 0/698, RunningAvgSamplesPerSec=29.94379795273833, CurrSamplesPerSec=29.633517572651037, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:35:46,379] [INFO] [timer.py:197:stop] 0/699, RunningAvgSamplesPerSec=29.94387702493779, CurrSamplesPerSec=29.999012756054526, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:35:52,632] [INFO] [logging.py:68:log_dist] [Rank 0] step=700, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:35:52,633] [INFO] [timer.py:197:stop] 0/700, RunningAvgSamplesPerSec=29.944082284042878, CurrSamplesPerSec=30.087835684727857, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0033, 'learning_rate': 1e-05, 'epoch': 17.0} -[2022-12-14 17:35:58,907] [INFO] [timer.py:197:stop] 0/701, RunningAvgSamplesPerSec=29.944393987861464, CurrSamplesPerSec=30.163557930765574, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:36:05,262] [INFO] [timer.py:197:stop] 0/702, RunningAvgSamplesPerSec=29.945362931147855, CurrSamplesPerSec=30.638350404386504, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:36:12,420] [INFO] [timer.py:197:stop] 0/703, RunningAvgSamplesPerSec=29.945381542068255, CurrSamplesPerSec=29.95841486455373, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:36:18,733] [INFO] [timer.py:197:stop] 0/704, RunningAvgSamplesPerSec=29.94560556538671, CurrSamplesPerSec=30.103474989379947, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:36:25,521] [INFO] [timer.py:197:stop] 0/705, RunningAvgSamplesPerSec=29.946610913853082, CurrSamplesPerSec=30.669424417566415, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:36:31,593] [INFO] [timer.py:197:stop] 0/706, RunningAvgSamplesPerSec=29.946690680560625, CurrSamplesPerSec=30.002872026710207, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:36:38,453] [INFO] [timer.py:197:stop] 0/707, RunningAvgSamplesPerSec=29.94644158725778, CurrSamplesPerSec=29.772102256103416, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:36:45,054] [INFO] [timer.py:197:stop] 0/708, RunningAvgSamplesPerSec=29.947529518888548, CurrSamplesPerSec=30.734710556539646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:36:51,347] [INFO] [timer.py:197:stop] 0/709, RunningAvgSamplesPerSec=29.94781216761311, CurrSamplesPerSec=30.14870264827659, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:36:57,758] [INFO] [logging.py:68:log_dist] [Rank 0] step=710, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:36:57,759] [INFO] [timer.py:197:stop] 0/710, RunningAvgSamplesPerSec=29.946692340827518, CurrSamplesPerSec=29.17539487006926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:03,887] [INFO] [timer.py:197:stop] 0/711, RunningAvgSamplesPerSec=29.94665207373273, CurrSamplesPerSec=29.91817012363132, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:10,009] [INFO] [timer.py:197:stop] 0/712, RunningAvgSamplesPerSec=29.94557329884982, CurrSamplesPerSec=29.199796933751866, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:12,391] [INFO] [timer.py:197:stop] 0/713, RunningAvgSamplesPerSec=29.946180824441708, CurrSamplesPerSec=30.383836847217218, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:14,542] [INFO] [timer.py:197:stop] 0/714, RunningAvgSamplesPerSec=29.946431577311444, CurrSamplesPerSec=30.1257861522287, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:16,660] [INFO] [timer.py:197:stop] 0/715, RunningAvgSamplesPerSec=29.947301346509917, CurrSamplesPerSec=30.579672079753646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:18,773] [INFO] [timer.py:197:stop] 0/716, RunningAvgSamplesPerSec=29.948292550630356, CurrSamplesPerSec=30.67212628482552, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:20,948] [INFO] [timer.py:197:stop] 0/717, RunningAvgSamplesPerSec=29.94805854370298, CurrSamplesPerSec=29.781905865661404, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:23,122] [INFO] [timer.py:197:stop] 0/718, RunningAvgSamplesPerSec=29.947866062051528, CurrSamplesPerSec=29.810872112703084, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:25,264] [INFO] [timer.py:197:stop] 0/719, RunningAvgSamplesPerSec=29.948262221225615, CurrSamplesPerSec=30.2346282459778, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:37:27,159] [INFO] [logging.py:68:log_dist] [Rank 0] step=720, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:37:27,159] [INFO] [timer.py:197:stop] 0/720, RunningAvgSamplesPerSec=29.95349718608037, CurrSamplesPerSec=34.245554374923216, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:38:08,754] [INFO] [timer.py:197:stop] 0/721, RunningAvgSamplesPerSec=29.9527151375341, CurrSamplesPerSec=29.401551110320554, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:38:15,356] [INFO] [timer.py:197:stop] 0/722, RunningAvgSamplesPerSec=29.95225841448229, CurrSamplesPerSec=29.627440657474544, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:38:21,666] [INFO] [timer.py:197:stop] 0/723, RunningAvgSamplesPerSec=29.952156398368338, CurrSamplesPerSec=29.87888472982159, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:38:28,401] [INFO] [timer.py:197:stop] 0/724, RunningAvgSamplesPerSec=29.952301604573805, CurrSamplesPerSec=30.057363014082096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:38:34,788] [INFO] [timer.py:197:stop] 0/725, RunningAvgSamplesPerSec=29.95252055258793, CurrSamplesPerSec=30.111440914719488, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.003, 'learning_rate': 1e-05, 'epoch': 18.0} -[2022-12-14 17:38:41,253] [INFO] [timer.py:197:stop] 0/726, RunningAvgSamplesPerSec=29.95304727602677, CurrSamplesPerSec=30.338779276332595, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:38:47,984] [INFO] [timer.py:197:stop] 0/727, RunningAvgSamplesPerSec=29.9530281659767, CurrSamplesPerSec=29.939198886462442, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:38:54,513] [INFO] [timer.py:197:stop] 0/728, RunningAvgSamplesPerSec=29.953204153657197, CurrSamplesPerSec=30.081341800269463, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:39:03,690] [INFO] [timer.py:197:stop] 0/729, RunningAvgSamplesPerSec=29.9541638766691, CurrSamplesPerSec=30.66753934985864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:39:10,089] [INFO] [logging.py:68:log_dist] [Rank 0] step=730, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:39:10,090] [INFO] [timer.py:197:stop] 0/730, RunningAvgSamplesPerSec=29.95451769072158, CurrSamplesPerSec=30.213971530807484, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:39:16,679] [INFO] [timer.py:197:stop] 0/731, RunningAvgSamplesPerSec=29.954606372018862, CurrSamplesPerSec=30.01930599253374, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:39:22,817] [INFO] [timer.py:197:stop] 0/732, RunningAvgSamplesPerSec=29.95368503722233, CurrSamplesPerSec=29.29678194875574, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:39:28,801] [INFO] [timer.py:197:stop] 0/733, RunningAvgSamplesPerSec=29.95448835392155, CurrSamplesPerSec=30.552635556567267, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:39:35,194] [INFO] [timer.py:197:stop] 0/734, RunningAvgSamplesPerSec=29.95282915513884, CurrSamplesPerSec=28.787218275881965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:39:41,324] [INFO] [timer.py:197:stop] 0/735, RunningAvgSamplesPerSec=29.95254771290634, CurrSamplesPerSec=29.747941219005803, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:39:48,285] [INFO] [timer.py:197:stop] 0/736, RunningAvgSamplesPerSec=29.950949985254304, CurrSamplesPerSec=28.82394364911677, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:39:54,708] [INFO] [timer.py:197:stop] 0/737, RunningAvgSamplesPerSec=29.9502971979077, CurrSamplesPerSec=29.478706095268215, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:00,613] [INFO] [timer.py:197:stop] 0/738, RunningAvgSamplesPerSec=29.948894295267273, CurrSamplesPerSec=28.95212604461684, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:07,244] [INFO] [timer.py:197:stop] 0/739, RunningAvgSamplesPerSec=29.948381909972525, CurrSamplesPerSec=29.575962281746328, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:13,616] [INFO] [logging.py:68:log_dist] [Rank 0] step=740, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:40:13,617] [INFO] [timer.py:197:stop] 0/740, RunningAvgSamplesPerSec=29.9474119593635, CurrSamplesPerSec=29.249246392025206, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:20,057] [INFO] [timer.py:197:stop] 0/741, RunningAvgSamplesPerSec=29.946815245098577, CurrSamplesPerSec=29.51283061356041, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:26,527] [INFO] [timer.py:197:stop] 0/742, RunningAvgSamplesPerSec=29.94679403806896, CurrSamplesPerSec=29.931130251570035, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:33,190] [INFO] [timer.py:197:stop] 0/743, RunningAvgSamplesPerSec=29.947691174570096, CurrSamplesPerSec=30.62664355443937, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:39,702] [INFO] [timer.py:197:stop] 0/744, RunningAvgSamplesPerSec=29.94750432068207, CurrSamplesPerSec=29.809683647095078, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:46,181] [INFO] [timer.py:197:stop] 0/745, RunningAvgSamplesPerSec=29.94815980539915, CurrSamplesPerSec=30.442569693956496, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:52,290] [INFO] [timer.py:197:stop] 0/746, RunningAvgSamplesPerSec=29.94904397544926, CurrSamplesPerSec=30.620735850046653, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:40:59,066] [INFO] [timer.py:197:stop] 0/747, RunningAvgSamplesPerSec=29.949681787507362, CurrSamplesPerSec=30.431864075054744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:05,509] [INFO] [timer.py:197:stop] 0/748, RunningAvgSamplesPerSec=29.948930562571036, CurrSamplesPerSec=29.399548200457406, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:12,098] [INFO] [timer.py:197:stop] 0/749, RunningAvgSamplesPerSec=29.9490099526876, CurrSamplesPerSec=30.008352488519744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:18,380] [INFO] [logging.py:68:log_dist] [Rank 0] step=750, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:41:18,380] [INFO] [timer.py:197:stop] 0/750, RunningAvgSamplesPerSec=29.94826059600099, CurrSamplesPerSec=29.39877544645957, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0029, 'learning_rate': 1e-05, 'epoch': 18.01} -[2022-12-14 17:41:24,572] [INFO] [timer.py:197:stop] 0/751, RunningAvgSamplesPerSec=29.949044591492033, CurrSamplesPerSec=30.547201311671962, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:30,854] [INFO] [timer.py:197:stop] 0/752, RunningAvgSamplesPerSec=29.94845207573533, CurrSamplesPerSec=29.511146682028613, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:33,166] [INFO] [timer.py:197:stop] 0/753, RunningAvgSamplesPerSec=29.949362421503853, CurrSamplesPerSec=30.648071544432522, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:35,348] [INFO] [timer.py:197:stop] 0/754, RunningAvgSamplesPerSec=29.949001848092923, CurrSamplesPerSec=29.680640893326785, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:37,493] [INFO] [timer.py:197:stop] 0/755, RunningAvgSamplesPerSec=29.94933358489095, CurrSamplesPerSec=30.200897874256878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:39,631] [INFO] [timer.py:197:stop] 0/756, RunningAvgSamplesPerSec=29.949777502774428, CurrSamplesPerSec=30.287825642795063, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:41,762] [INFO] [timer.py:197:stop] 0/757, RunningAvgSamplesPerSec=29.95036625517746, CurrSamplesPerSec=30.400973246260317, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:43,896] [INFO] [timer.py:197:stop] 0/758, RunningAvgSamplesPerSec=29.950907492365626, CurrSamplesPerSec=30.36520146018776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:46,029] [INFO] [timer.py:197:stop] 0/759, RunningAvgSamplesPerSec=29.951521207875523, CurrSamplesPerSec=30.42280021433681, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:41:47,909] [INFO] [logging.py:68:log_dist] [Rank 0] step=760, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:41:47,910] [INFO] [timer.py:197:stop] 0/760, RunningAvgSamplesPerSec=29.95670289263911, CurrSamplesPerSec=34.47114026873665, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:42:29,053] [INFO] [timer.py:197:stop] 0/761, RunningAvgSamplesPerSec=29.955623668517134, CurrSamplesPerSec=29.159345856726166, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:42:35,528] [INFO] [timer.py:197:stop] 0/762, RunningAvgSamplesPerSec=29.956153457536757, CurrSamplesPerSec=30.363741703389046, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:42:42,308] [INFO] [timer.py:197:stop] 0/763, RunningAvgSamplesPerSec=29.956826087527663, CurrSamplesPerSec=30.476911566185997, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:42:48,827] [INFO] [timer.py:197:stop] 0/764, RunningAvgSamplesPerSec=29.955522368631975, CurrSamplesPerSec=28.99523877676561, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:42:55,214] [INFO] [timer.py:197:stop] 0/765, RunningAvgSamplesPerSec=29.953454182943766, CurrSamplesPerSec=28.456367119215795, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:43:01,561] [INFO] [timer.py:197:stop] 0/766, RunningAvgSamplesPerSec=29.954140503284144, CurrSamplesPerSec=30.487132983231906, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:43:08,764] [INFO] [timer.py:197:stop] 0/767, RunningAvgSamplesPerSec=29.953917209852587, CurrSamplesPerSec=29.784288377684128, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:43:15,801] [INFO] [timer.py:197:stop] 0/768, RunningAvgSamplesPerSec=29.953624211751144, CurrSamplesPerSec=29.731147636852096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:43:22,520] [INFO] [timer.py:197:stop] 0/769, RunningAvgSamplesPerSec=29.953060119084114, CurrSamplesPerSec=29.52711769138118, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:43:28,722] [INFO] [logging.py:68:log_dist] [Rank 0] step=770, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:43:28,723] [INFO] [timer.py:197:stop] 0/770, RunningAvgSamplesPerSec=29.952298716417154, CurrSamplesPerSec=29.379485867805617, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:43:35,081] [INFO] [timer.py:197:stop] 0/771, RunningAvgSamplesPerSec=29.951978236725544, CurrSamplesPerSec=29.707858483549952, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:43:41,819] [INFO] [timer.py:197:stop] 0/772, RunningAvgSamplesPerSec=29.95188148963192, CurrSamplesPerSec=29.877667556905042, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:43:47,781] [INFO] [timer.py:197:stop] 0/773, RunningAvgSamplesPerSec=29.952292383150183, CurrSamplesPerSec=30.27206253174538, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:43:54,232] [INFO] [timer.py:197:stop] 0/774, RunningAvgSamplesPerSec=29.951912052601106, CurrSamplesPerSec=29.661523841831258, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:44:00,576] [INFO] [timer.py:197:stop] 0/775, RunningAvgSamplesPerSec=29.951526131275514, CurrSamplesPerSec=29.656532999362092, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0029, 'learning_rate': 1e-05, 'epoch': 19.0} -[2022-12-14 17:44:07,652] [INFO] [timer.py:197:stop] 0/776, RunningAvgSamplesPerSec=29.951469300464325, CurrSamplesPerSec=29.907603505189464, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:44:13,962] [INFO] [timer.py:197:stop] 0/777, RunningAvgSamplesPerSec=29.951367977302183, CurrSamplesPerSec=29.87314892179299, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:44:19,941] [INFO] [timer.py:197:stop] 0/778, RunningAvgSamplesPerSec=29.95189599453701, CurrSamplesPerSec=30.36678502567713, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:44:26,274] [INFO] [timer.py:197:stop] 0/779, RunningAvgSamplesPerSec=29.95153057130426, CurrSamplesPerSec=29.67062506445379, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:44:32,842] [INFO] [logging.py:68:log_dist] [Rank 0] step=780, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:44:32,843] [INFO] [timer.py:197:stop] 0/780, RunningAvgSamplesPerSec=29.951923174484907, CurrSamplesPerSec=30.2601187789513, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:44:39,030] [INFO] [timer.py:197:stop] 0/781, RunningAvgSamplesPerSec=29.952499880001987, CurrSamplesPerSec=30.408008891921735, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:44:44,852] [INFO] [timer.py:197:stop] 0/782, RunningAvgSamplesPerSec=29.95231266691134, CurrSamplesPerSec=29.807181227154132, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:44:51,717] [INFO] [timer.py:197:stop] 0/783, RunningAvgSamplesPerSec=29.952279684377267, CurrSamplesPerSec=29.92657541381627, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:44:58,303] [INFO] [timer.py:197:stop] 0/784, RunningAvgSamplesPerSec=29.95153565040869, CurrSamplesPerSec=29.38151820433898, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:04,639] [INFO] [timer.py:197:stop] 0/785, RunningAvgSamplesPerSec=29.951720644552235, CurrSamplesPerSec=30.09708908548328, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:10,620] [INFO] [timer.py:197:stop] 0/786, RunningAvgSamplesPerSec=29.951473287414363, CurrSamplesPerSec=29.759038612906444, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:17,571] [INFO] [timer.py:197:stop] 0/787, RunningAvgSamplesPerSec=29.9505776856815, CurrSamplesPerSec=29.264529925897694, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:24,283] [INFO] [timer.py:197:stop] 0/788, RunningAvgSamplesPerSec=29.95060081262462, CurrSamplesPerSec=29.96876648818031, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:30,894] [INFO] [timer.py:197:stop] 0/789, RunningAvgSamplesPerSec=29.950743865270418, CurrSamplesPerSec=30.063607490135702, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:37,924] [INFO] [logging.py:68:log_dist] [Rank 0] step=790, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:45:37,925] [INFO] [timer.py:197:stop] 0/790, RunningAvgSamplesPerSec=29.95096170431202, CurrSamplesPerSec=30.123389259602273, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:44,122] [INFO] [timer.py:197:stop] 0/791, RunningAvgSamplesPerSec=29.951263697313006, CurrSamplesPerSec=30.191142499453672, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:50,226] [INFO] [timer.py:197:stop] 0/792, RunningAvgSamplesPerSec=29.950752865715643, CurrSamplesPerSec=29.55306518606724, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:52,655] [INFO] [timer.py:197:stop] 0/793, RunningAvgSamplesPerSec=29.950939786817106, CurrSamplesPerSec=30.099340043642762, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:54,840] [INFO] [timer.py:197:stop] 0/794, RunningAvgSamplesPerSec=29.95092876680191, CurrSamplesPerSec=29.942214474166928, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:56,981] [INFO] [timer.py:197:stop] 0/795, RunningAvgSamplesPerSec=29.951314396237077, CurrSamplesPerSec=30.259883411128012, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:45:59,098] [INFO] [timer.py:197:stop] 0/796, RunningAvgSamplesPerSec=29.952103802071427, CurrSamplesPerSec=30.591482491481614, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:46:01,241] [INFO] [timer.py:197:stop] 0/797, RunningAvgSamplesPerSec=29.952449034868724, CurrSamplesPerSec=30.22909887179499, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:46:03,370] [INFO] [timer.py:197:stop] 0/798, RunningAvgSamplesPerSec=29.953054183232208, CurrSamplesPerSec=30.44201041333066, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:46:05,543] [INFO] [timer.py:197:stop] 0/799, RunningAvgSamplesPerSec=29.9528890271097, CurrSamplesPerSec=29.8219999520067, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:46:07,465] [INFO] [logging.py:68:log_dist] [Rank 0] step=800, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:46:07,465] [INFO] [timer.py:197:stop] 0/800, RunningAvgSamplesPerSec=29.957094129557106, CurrSamplesPerSec=33.731335991607985, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0036, 'learning_rate': 1e-05, 'epoch': 19.01} -[2022-12-14 17:46:44,688] [INFO] [timer.py:197:stop] 0/801, RunningAvgSamplesPerSec=29.9559954778868, CurrSamplesPerSec=29.10423131988796, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:46:50,962] [INFO] [timer.py:197:stop] 0/802, RunningAvgSamplesPerSec=29.95631554067597, CurrSamplesPerSec=30.214250395552924, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:46:57,157] [INFO] [timer.py:197:stop] 0/803, RunningAvgSamplesPerSec=29.95649393934405, CurrSamplesPerSec=30.099896929318987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:02,810] [INFO] [timer.py:197:stop] 0/804, RunningAvgSamplesPerSec=29.95693756854211, CurrSamplesPerSec=30.316555644876942, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:08,593] [INFO] [timer.py:197:stop] 0/805, RunningAvgSamplesPerSec=29.957274017680557, CurrSamplesPerSec=30.229561845241708, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:14,378] [INFO] [timer.py:197:stop] 0/806, RunningAvgSamplesPerSec=29.95815186780218, CurrSamplesPerSec=30.68007344415484, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:21,522] [INFO] [timer.py:197:stop] 0/807, RunningAvgSamplesPerSec=29.958575393637748, CurrSamplesPerSec=30.303009942228687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:27,613] [INFO] [timer.py:197:stop] 0/808, RunningAvgSamplesPerSec=29.95766418662826, CurrSamplesPerSec=29.241695023043313, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:33,562] [INFO] [timer.py:197:stop] 0/809, RunningAvgSamplesPerSec=29.95581908801141, CurrSamplesPerSec=28.539090040099357, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:39,889] [INFO] [logging.py:68:log_dist] [Rank 0] step=810, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:47:39,890] [INFO] [timer.py:197:stop] 0/810, RunningAvgSamplesPerSec=29.955662202209513, CurrSamplesPerSec=29.829588866098618, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:46,112] [INFO] [timer.py:197:stop] 0/811, RunningAvgSamplesPerSec=29.95491253383742, CurrSamplesPerSec=29.361201060144996, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:52,382] [INFO] [timer.py:197:stop] 0/812, RunningAvgSamplesPerSec=29.952767668722668, CurrSamplesPerSec=28.312700102108437, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:47:58,929] [INFO] [timer.py:197:stop] 0/813, RunningAvgSamplesPerSec=29.95219977266448, CurrSamplesPerSec=29.499170033329438, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:05,292] [INFO] [timer.py:197:stop] 0/814, RunningAvgSamplesPerSec=29.952355309110107, CurrSamplesPerSec=30.079029494509342, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:10,999] [INFO] [timer.py:197:stop] 0/815, RunningAvgSamplesPerSec=29.952290595114402, CurrSamplesPerSec=29.89983497114668, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:17,034] [INFO] [timer.py:197:stop] 0/816, RunningAvgSamplesPerSec=29.952799059922476, CurrSamplesPerSec=30.371973132274196, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:22,887] [INFO] [timer.py:197:stop] 0/817, RunningAvgSamplesPerSec=29.952219132315776, CurrSamplesPerSec=29.48749138642878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:29,128] [INFO] [timer.py:197:stop] 0/818, RunningAvgSamplesPerSec=29.95220312414883, CurrSamplesPerSec=29.93916215547325, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:34,888] [INFO] [timer.py:197:stop] 0/819, RunningAvgSamplesPerSec=29.952414888975042, CurrSamplesPerSec=30.12621891820308, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:40,895] [INFO] [logging.py:68:log_dist] [Rank 0] step=820, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:48:40,895] [INFO] [timer.py:197:stop] 0/820, RunningAvgSamplesPerSec=29.95192287163559, CurrSamplesPerSec=29.555274550841734, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:46,760] [INFO] [timer.py:197:stop] 0/821, RunningAvgSamplesPerSec=29.95225777600475, CurrSamplesPerSec=30.2287414387611, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:52,507] [INFO] [timer.py:197:stop] 0/822, RunningAvgSamplesPerSec=29.95225783267183, CurrSamplesPerSec=29.952304243080608, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:48:58,838] [INFO] [timer.py:197:stop] 0/823, RunningAvgSamplesPerSec=29.952800792626054, CurrSamplesPerSec=30.404754113642355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:04,840] [INFO] [timer.py:197:stop] 0/824, RunningAvgSamplesPerSec=29.951745011363496, CurrSamplesPerSec=29.109356803971885, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:10,925] [INFO] [timer.py:197:stop] 0/825, RunningAvgSamplesPerSec=29.95175123781871, CurrSamplesPerSec=29.95687025980358, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0039, 'learning_rate': 1e-05, 'epoch': 20.0} -[2022-12-14 17:49:16,785] [INFO] [timer.py:197:stop] 0/826, RunningAvgSamplesPerSec=29.951944877397377, CurrSamplesPerSec=30.11216376340664, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:22,736] [INFO] [timer.py:197:stop] 0/827, RunningAvgSamplesPerSec=29.95234393118289, CurrSamplesPerSec=30.28481862590745, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:28,879] [INFO] [timer.py:197:stop] 0/828, RunningAvgSamplesPerSec=29.95245133301024, CurrSamplesPerSec=30.041321057113386, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:34,855] [INFO] [timer.py:197:stop] 0/829, RunningAvgSamplesPerSec=29.952159587444935, CurrSamplesPerSec=29.713103419267465, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:41,851] [INFO] [logging.py:68:log_dist] [Rank 0] step=830, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:49:41,851] [INFO] [timer.py:197:stop] 0/830, RunningAvgSamplesPerSec=29.951570381573674, CurrSamplesPerSec=29.472106809320344, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:47,889] [INFO] [timer.py:197:stop] 0/831, RunningAvgSamplesPerSec=29.949127887298303, CurrSamplesPerSec=28.05481504505161, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:54,217] [INFO] [timer.py:197:stop] 0/832, RunningAvgSamplesPerSec=29.949466147514606, CurrSamplesPerSec=30.232537468922544, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:56,573] [INFO] [timer.py:197:stop] 0/833, RunningAvgSamplesPerSec=29.94960797904154, CurrSamplesPerSec=30.067793246034746, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:49:58,678] [INFO] [timer.py:197:stop] 0/834, RunningAvgSamplesPerSec=29.950590321323443, CurrSamplesPerSec=30.789818083050186, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:50:00,816] [INFO] [timer.py:197:stop] 0/835, RunningAvgSamplesPerSec=29.951012792333742, CurrSamplesPerSec=30.306687782203632, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:50:02,989] [INFO] [timer.py:197:stop] 0/836, RunningAvgSamplesPerSec=29.950874008984027, CurrSamplesPerSec=29.835712520863623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:50:05,178] [INFO] [timer.py:197:stop] 0/837, RunningAvgSamplesPerSec=29.950456250927598, CurrSamplesPerSec=29.606057190376877, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:50:07,372] [INFO] [timer.py:197:stop] 0/838, RunningAvgSamplesPerSec=29.949965033496984, CurrSamplesPerSec=29.54534639159077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:50:09,539] [INFO] [timer.py:197:stop] 0/839, RunningAvgSamplesPerSec=29.95027161216249, CurrSamplesPerSec=30.20878626460527, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:50:11,477] [INFO] [logging.py:68:log_dist] [Rank 0] step=840, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:50:11,478] [INFO] [timer.py:197:stop] 0/840, RunningAvgSamplesPerSec=29.954046707856005, CurrSamplesPerSec=33.486918187070096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:50:49,669] [INFO] [timer.py:197:stop] 0/841, RunningAvgSamplesPerSec=29.954186845075274, CurrSamplesPerSec=30.072084602975444, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:50:55,696] [INFO] [timer.py:197:stop] 0/842, RunningAvgSamplesPerSec=29.95285551021903, CurrSamplesPerSec=28.876068536966276, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:51:01,745] [INFO] [timer.py:197:stop] 0/843, RunningAvgSamplesPerSec=29.950942393634865, CurrSamplesPerSec=28.42585068000712, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:51:07,788] [INFO] [timer.py:197:stop] 0/844, RunningAvgSamplesPerSec=29.951350359366927, CurrSamplesPerSec=30.298430141375878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:51:13,813] [INFO] [timer.py:197:stop] 0/845, RunningAvgSamplesPerSec=29.951228804527606, CurrSamplesPerSec=29.84922859832237, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:51:20,297] [INFO] [timer.py:197:stop] 0/846, RunningAvgSamplesPerSec=29.950233577544317, CurrSamplesPerSec=29.134144908031725, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:51:26,631] [INFO] [timer.py:197:stop] 0/847, RunningAvgSamplesPerSec=29.950584014063285, CurrSamplesPerSec=30.249305873506312, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:51:32,646] [INFO] [timer.py:197:stop] 0/848, RunningAvgSamplesPerSec=29.95042453540347, CurrSamplesPerSec=29.8162694021739, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:51:39,044] [INFO] [timer.py:197:stop] 0/849, RunningAvgSamplesPerSec=29.950200121006198, CurrSamplesPerSec=29.76154285456433, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:51:44,803] [INFO] [logging.py:68:log_dist] [Rank 0] step=850, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:51:44,803] [INFO] [timer.py:197:stop] 0/850, RunningAvgSamplesPerSec=29.950414852915838, CurrSamplesPerSec=30.133405326644393, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0053, 'learning_rate': 1e-05, 'epoch': 21.0} -[2022-12-14 17:51:50,648] [INFO] [timer.py:197:stop] 0/851, RunningAvgSamplesPerSec=29.949528569857904, CurrSamplesPerSec=29.216380213806485, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:51:57,058] [INFO] [timer.py:197:stop] 0/852, RunningAvgSamplesPerSec=29.949594961834055, CurrSamplesPerSec=30.006068160445086, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:03,273] [INFO] [timer.py:197:stop] 0/853, RunningAvgSamplesPerSec=29.948827953975787, CurrSamplesPerSec=29.31077735257529, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:09,485] [INFO] [timer.py:197:stop] 0/854, RunningAvgSamplesPerSec=29.94901054509995, CurrSamplesPerSec=30.105206940913103, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:15,331] [INFO] [timer.py:197:stop] 0/855, RunningAvgSamplesPerSec=29.949170009878827, CurrSamplesPerSec=30.085653885389696, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:21,613] [INFO] [timer.py:197:stop] 0/856, RunningAvgSamplesPerSec=29.950471130268827, CurrSamplesPerSec=31.1030887173733, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:27,406] [INFO] [timer.py:197:stop] 0/857, RunningAvgSamplesPerSec=29.950295395109798, CurrSamplesPerSec=29.800966716028636, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:33,345] [INFO] [timer.py:197:stop] 0/858, RunningAvgSamplesPerSec=29.949980279485672, CurrSamplesPerSec=29.682961278251067, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:39,272] [INFO] [timer.py:197:stop] 0/859, RunningAvgSamplesPerSec=29.949551530391194, CurrSamplesPerSec=29.586990409636414, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:45,232] [INFO] [logging.py:68:log_dist] [Rank 0] step=860, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:52:45,233] [INFO] [timer.py:197:stop] 0/860, RunningAvgSamplesPerSec=29.949359192083797, CurrSamplesPerSec=29.78542854949032, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:51,066] [INFO] [timer.py:197:stop] 0/861, RunningAvgSamplesPerSec=29.94974395909321, CurrSamplesPerSec=30.283557906721363, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:52:57,316] [INFO] [timer.py:197:stop] 0/862, RunningAvgSamplesPerSec=29.94979723780498, CurrSamplesPerSec=29.995633775762638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:04,554] [INFO] [timer.py:197:stop] 0/863, RunningAvgSamplesPerSec=29.950096432458448, CurrSamplesPerSec=30.20963618627385, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:11,041] [INFO] [timer.py:197:stop] 0/864, RunningAvgSamplesPerSec=29.94945794792266, CurrSamplesPerSec=29.409642809875777, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:16,902] [INFO] [timer.py:197:stop] 0/865, RunningAvgSamplesPerSec=29.949901970650913, CurrSamplesPerSec=30.337610062438173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:22,806] [INFO] [timer.py:197:stop] 0/866, RunningAvgSamplesPerSec=29.950423063374863, CurrSamplesPerSec=30.406989330867265, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:29,127] [INFO] [timer.py:197:stop] 0/867, RunningAvgSamplesPerSec=29.950203196940272, CurrSamplesPerSec=29.761437265378305, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:35,173] [INFO] [timer.py:197:stop] 0/868, RunningAvgSamplesPerSec=29.950589897225772, CurrSamplesPerSec=30.288867986864222, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:41,067] [INFO] [timer.py:197:stop] 0/869, RunningAvgSamplesPerSec=29.95005472173191, CurrSamplesPerSec=29.493663325738215, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:47,185] [INFO] [logging.py:68:log_dist] [Rank 0] step=870, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:53:47,186] [INFO] [timer.py:197:stop] 0/870, RunningAvgSamplesPerSec=29.949660479420707, CurrSamplesPerSec=29.611713746823284, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:53,366] [INFO] [timer.py:197:stop] 0/871, RunningAvgSamplesPerSec=29.949622342764865, CurrSamplesPerSec=29.916556314713592, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:53:59,519] [INFO] [timer.py:197:stop] 0/872, RunningAvgSamplesPerSec=29.949249417672515, CurrSamplesPerSec=29.628650608675766, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:54:02,183] [INFO] [timer.py:197:stop] 0/873, RunningAvgSamplesPerSec=29.949354886432694, CurrSamplesPerSec=30.041395021374807, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:54:04,348] [INFO] [timer.py:197:stop] 0/874, RunningAvgSamplesPerSec=29.94976765544327, CurrSamplesPerSec=30.313662733676008, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:54:06,487] [INFO] [timer.py:197:stop] 0/875, RunningAvgSamplesPerSec=29.950135298731468, CurrSamplesPerSec=30.274192922639127, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0057, 'learning_rate': 1e-05, 'epoch': 21.01} -[2022-12-14 17:54:08,616] [INFO] [timer.py:197:stop] 0/876, RunningAvgSamplesPerSec=29.950693765863818, CurrSamplesPerSec=30.446312569739955, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:54:10,737] [INFO] [timer.py:197:stop] 0/877, RunningAvgSamplesPerSec=29.951347173881484, CurrSamplesPerSec=30.533539062468897, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:54:12,986] [INFO] [timer.py:197:stop] 0/878, RunningAvgSamplesPerSec=29.949957572129005, CurrSamplesPerSec=28.78154518899172, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:54:15,119] [INFO] [timer.py:197:stop] 0/879, RunningAvgSamplesPerSec=29.950440072072663, CurrSamplesPerSec=30.379167261287105, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:54:17,051] [INFO] [logging.py:68:log_dist] [Rank 0] step=880, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:54:17,051] [INFO] [timer.py:197:stop] 0/880, RunningAvgSamplesPerSec=29.954165091141252, CurrSamplesPerSec=33.6214184736314, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:55:11,289] [INFO] [timer.py:197:stop] 0/881, RunningAvgSamplesPerSec=29.953546479296797, CurrSamplesPerSec=29.420089359937826, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:55:19,115] [INFO] [timer.py:197:stop] 0/882, RunningAvgSamplesPerSec=29.95361357527779, CurrSamplesPerSec=30.012707428170206, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:55:26,279] [INFO] [timer.py:197:stop] 0/883, RunningAvgSamplesPerSec=29.95379130120383, CurrSamplesPerSec=30.111011950832655, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:55:33,202] [INFO] [timer.py:197:stop] 0/884, RunningAvgSamplesPerSec=29.953826965604925, CurrSamplesPerSec=29.985280333717558, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:55:39,777] [INFO] [timer.py:197:stop] 0/885, RunningAvgSamplesPerSec=29.954319772565878, CurrSamplesPerSec=30.39538286726253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:55:46,156] [INFO] [timer.py:197:stop] 0/886, RunningAvgSamplesPerSec=29.95399446259059, CurrSamplesPerSec=29.66947726832376, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:55:53,223] [INFO] [timer.py:197:stop] 0/887, RunningAvgSamplesPerSec=29.953944133876423, CurrSamplesPerSec=29.90951960892786, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:55:59,873] [INFO] [timer.py:197:stop] 0/888, RunningAvgSamplesPerSec=29.953101911801618, CurrSamplesPerSec=29.22585301490071, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:56:06,625] [INFO] [timer.py:197:stop] 0/889, RunningAvgSamplesPerSec=29.953683075308145, CurrSamplesPerSec=30.477610543951222, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:56:12,957] [INFO] [logging.py:68:log_dist] [Rank 0] step=890, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:56:12,958] [INFO] [timer.py:197:stop] 0/890, RunningAvgSamplesPerSec=29.953470104868682, CurrSamplesPerSec=29.7657505334174, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:56:19,935] [INFO] [timer.py:197:stop] 0/891, RunningAvgSamplesPerSec=29.952973067015495, CurrSamplesPerSec=29.51801988838296, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:56:27,052] [INFO] [timer.py:197:stop] 0/892, RunningAvgSamplesPerSec=29.95435270914646, CurrSamplesPerSec=31.233280039764814, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:56:33,358] [INFO] [timer.py:197:stop] 0/893, RunningAvgSamplesPerSec=29.95329224207347, CurrSamplesPerSec=29.038338741784827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:56:39,964] [INFO] [timer.py:197:stop] 0/894, RunningAvgSamplesPerSec=29.952973054472352, CurrSamplesPerSec=29.671254750338427, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:56:46,201] [INFO] [timer.py:197:stop] 0/895, RunningAvgSamplesPerSec=29.952907414192456, CurrSamplesPerSec=29.894470643197483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:56:53,290] [INFO] [timer.py:197:stop] 0/896, RunningAvgSamplesPerSec=29.95252215839143, CurrSamplesPerSec=29.612399734891508, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:56:59,761] [INFO] [timer.py:197:stop] 0/897, RunningAvgSamplesPerSec=29.952765349182773, CurrSamplesPerSec=30.171769338291462, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:57:06,171] [INFO] [timer.py:197:stop] 0/898, RunningAvgSamplesPerSec=29.953207270086363, CurrSamplesPerSec=30.35402501818287, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:57:12,999] [INFO] [timer.py:197:stop] 0/899, RunningAvgSamplesPerSec=29.953641884103057, CurrSamplesPerSec=30.348191129459316, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:57:19,676] [INFO] [logging.py:68:log_dist] [Rank 0] step=900, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:57:19,677] [INFO] [timer.py:197:stop] 0/900, RunningAvgSamplesPerSec=29.952889294766806, CurrSamplesPerSec=29.292712195976844, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0065, 'learning_rate': 1e-05, 'epoch': 22.0} -[2022-12-14 17:57:26,632] [INFO] [timer.py:197:stop] 0/901, RunningAvgSamplesPerSec=29.953470398237442, CurrSamplesPerSec=30.484564003961562, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:57:33,096] [INFO] [timer.py:197:stop] 0/902, RunningAvgSamplesPerSec=29.95314963482249, CurrSamplesPerSec=29.667536055276006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:57:40,008] [INFO] [timer.py:197:stop] 0/903, RunningAvgSamplesPerSec=29.953133546752454, CurrSamplesPerSec=29.938661287341844, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:57:46,454] [INFO] [timer.py:197:stop] 0/904, RunningAvgSamplesPerSec=29.9537271561121, CurrSamplesPerSec=30.498303727527514, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:57:52,974] [INFO] [timer.py:197:stop] 0/905, RunningAvgSamplesPerSec=29.953721120006442, CurrSamplesPerSec=29.948277543253592, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:57:59,718] [INFO] [timer.py:197:stop] 0/906, RunningAvgSamplesPerSec=29.95353723256386, CurrSamplesPerSec=29.788403321656283, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:08,224] [INFO] [timer.py:197:stop] 0/907, RunningAvgSamplesPerSec=29.953934793520702, CurrSamplesPerSec=30.317699267366336, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:15,207] [INFO] [timer.py:197:stop] 0/908, RunningAvgSamplesPerSec=29.953055657284487, CurrSamplesPerSec=29.17804605087527, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:21,510] [INFO] [timer.py:197:stop] 0/909, RunningAvgSamplesPerSec=29.953695509358724, CurrSamplesPerSec=30.54485506651049, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:28,118] [INFO] [logging.py:68:log_dist] [Rank 0] step=910, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:58:28,119] [INFO] [timer.py:197:stop] 0/910, RunningAvgSamplesPerSec=29.954260947460142, CurrSamplesPerSec=30.47605691912094, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:34,402] [INFO] [timer.py:197:stop] 0/911, RunningAvgSamplesPerSec=29.954027693899967, CurrSamplesPerSec=29.743722094705287, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:40,855] [INFO] [timer.py:197:stop] 0/912, RunningAvgSamplesPerSec=29.954401647753116, CurrSamplesPerSec=30.298231794426336, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:43,451] [INFO] [timer.py:197:stop] 0/913, RunningAvgSamplesPerSec=29.95406443827387, CurrSamplesPerSec=29.650318914217074, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:45,604] [INFO] [timer.py:197:stop] 0/914, RunningAvgSamplesPerSec=29.95421671477354, CurrSamplesPerSec=30.09358676469632, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:47,816] [INFO] [timer.py:197:stop] 0/915, RunningAvgSamplesPerSec=29.953870262213606, CurrSamplesPerSec=29.64120723158761, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:49,989] [INFO] [timer.py:197:stop] 0/916, RunningAvgSamplesPerSec=29.954001900258636, CurrSamplesPerSec=30.074672135261682, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:52,105] [INFO] [timer.py:197:stop] 0/917, RunningAvgSamplesPerSec=29.954699780727903, CurrSamplesPerSec=30.606456379588508, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:54,244] [INFO] [timer.py:197:stop] 0/918, RunningAvgSamplesPerSec=29.955185626090998, CurrSamplesPerSec=30.406438245250133, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:56,407] [INFO] [timer.py:197:stop] 0/919, RunningAvgSamplesPerSec=29.955163305629718, CurrSamplesPerSec=29.934731723658185, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:58:58,289] [INFO] [logging.py:68:log_dist] [Rank 0] step=920, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 17:58:58,290] [INFO] [timer.py:197:stop] 0/920, RunningAvgSamplesPerSec=29.959410156205593, CurrSamplesPerSec=34.43635567800256, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:59:40,810] [INFO] [timer.py:197:stop] 0/921, RunningAvgSamplesPerSec=29.95969549844748, CurrSamplesPerSec=30.223952651491906, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:59:47,291] [INFO] [timer.py:197:stop] 0/922, RunningAvgSamplesPerSec=29.95953106272203, CurrSamplesPerSec=29.809173860807547, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 17:59:54,292] [INFO] [timer.py:197:stop] 0/923, RunningAvgSamplesPerSec=29.959756535717027, CurrSamplesPerSec=30.168639525745096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:00:00,707] [INFO] [timer.py:197:stop] 0/924, RunningAvgSamplesPerSec=29.959917196263245, CurrSamplesPerSec=30.10862078557418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:00:07,341] [INFO] [timer.py:197:stop] 0/925, RunningAvgSamplesPerSec=29.959160002966673, CurrSamplesPerSec=29.276942599577808, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.008, 'learning_rate': 1e-05, 'epoch': 23.0} -[2022-12-14 18:00:14,728] [INFO] [timer.py:197:stop] 0/926, RunningAvgSamplesPerSec=29.95880628755727, CurrSamplesPerSec=29.635850226241597, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:00:21,440] [INFO] [timer.py:197:stop] 0/927, RunningAvgSamplesPerSec=29.95896295108542, CurrSamplesPerSec=30.104423655497463, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:00:27,905] [INFO] [timer.py:197:stop] 0/928, RunningAvgSamplesPerSec=29.959170190690852, CurrSamplesPerSec=30.15210265726923, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:00:35,152] [INFO] [timer.py:197:stop] 0/929, RunningAvgSamplesPerSec=29.95755155933347, CurrSamplesPerSec=28.53019055203622, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:00:41,642] [INFO] [logging.py:68:log_dist] [Rank 0] step=930, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:00:41,643] [INFO] [timer.py:197:stop] 0/930, RunningAvgSamplesPerSec=29.958005442460372, CurrSamplesPerSec=30.384755115827144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:00:48,797] [INFO] [timer.py:197:stop] 0/931, RunningAvgSamplesPerSec=29.957743687341846, CurrSamplesPerSec=29.71679078024391, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:00:54,871] [INFO] [timer.py:197:stop] 0/932, RunningAvgSamplesPerSec=29.957646938889162, CurrSamplesPerSec=29.86803676532276, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:01,253] [INFO] [timer.py:197:stop] 0/933, RunningAvgSamplesPerSec=29.956905982119917, CurrSamplesPerSec=29.283326984359707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:08,148] [INFO] [timer.py:197:stop] 0/934, RunningAvgSamplesPerSec=29.956792503584186, CurrSamplesPerSec=29.851515665082655, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:14,275] [INFO] [timer.py:197:stop] 0/935, RunningAvgSamplesPerSec=29.957045139023403, CurrSamplesPerSec=30.194368684161713, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:21,235] [INFO] [timer.py:197:stop] 0/936, RunningAvgSamplesPerSec=29.957034958082044, CurrSamplesPerSec=29.947539153968812, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:27,367] [INFO] [timer.py:197:stop] 0/937, RunningAvgSamplesPerSec=29.95740706492968, CurrSamplesPerSec=30.30903864614626, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:33,732] [INFO] [timer.py:197:stop] 0/938, RunningAvgSamplesPerSec=29.956604779936853, CurrSamplesPerSec=29.22481255150955, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:40,366] [INFO] [timer.py:197:stop] 0/939, RunningAvgSamplesPerSec=29.956501799544046, CurrSamplesPerSec=29.86042163486685, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:46,898] [INFO] [logging.py:68:log_dist] [Rank 0] step=940, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:01:46,899] [INFO] [timer.py:197:stop] 0/940, RunningAvgSamplesPerSec=29.956460860532417, CurrSamplesPerSec=29.918150116669832, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:53,564] [INFO] [timer.py:197:stop] 0/941, RunningAvgSamplesPerSec=29.956592551338247, CurrSamplesPerSec=30.08063054300245, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:01:59,924] [INFO] [timer.py:197:stop] 0/942, RunningAvgSamplesPerSec=29.95622625561409, CurrSamplesPerSec=29.61618304002401, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:02:07,011] [INFO] [timer.py:197:stop] 0/943, RunningAvgSamplesPerSec=29.9559598304527, CurrSamplesPerSec=29.70759875116189, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:02:13,678] [INFO] [timer.py:197:stop] 0/944, RunningAvgSamplesPerSec=29.95595621192119, CurrSamplesPerSec=29.952551561182112, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:02:20,082] [INFO] [timer.py:197:stop] 0/945, RunningAvgSamplesPerSec=29.956747738033258, CurrSamplesPerSec=30.721418034203392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:02:26,288] [INFO] [timer.py:197:stop] 0/946, RunningAvgSamplesPerSec=29.95653459793609, CurrSamplesPerSec=29.756884444470558, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:02:33,248] [INFO] [timer.py:197:stop] 0/947, RunningAvgSamplesPerSec=29.956903376264254, CurrSamplesPerSec=30.309127623414344, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:02:40,140] [INFO] [timer.py:197:stop] 0/948, RunningAvgSamplesPerSec=29.95686131181474, CurrSamplesPerSec=29.917163139790546, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:02:46,424] [INFO] [timer.py:197:stop] 0/949, RunningAvgSamplesPerSec=29.956682417435267, CurrSamplesPerSec=29.78840001602413, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:02:53,332] [INFO] [logging.py:68:log_dist] [Rank 0] step=950, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:02:53,333] [INFO] [timer.py:197:stop] 0/950, RunningAvgSamplesPerSec=29.954972354423592, CurrSamplesPerSec=28.41868533723887, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0066, 'learning_rate': 1e-05, 'epoch': 23.01} -[2022-12-14 18:02:59,815] [INFO] [timer.py:197:stop] 0/951, RunningAvgSamplesPerSec=29.95480874504778, CurrSamplesPerSec=29.800506852545166, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:03:06,413] [INFO] [timer.py:197:stop] 0/952, RunningAvgSamplesPerSec=29.954411995632682, CurrSamplesPerSec=29.582575563633974, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:03:09,068] [INFO] [timer.py:197:stop] 0/953, RunningAvgSamplesPerSec=29.95431972308028, CurrSamplesPerSec=29.866916845086056, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:03:11,228] [INFO] [timer.py:197:stop] 0/954, RunningAvgSamplesPerSec=29.954343384286354, CurrSamplesPerSec=29.976862125206104, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:03:13,368] [INFO] [timer.py:197:stop] 0/955, RunningAvgSamplesPerSec=29.954671898814677, CurrSamplesPerSec=30.27072094904895, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:03:15,508] [INFO] [timer.py:197:stop] 0/956, RunningAvgSamplesPerSec=29.955008666737065, CurrSamplesPerSec=30.279427992927463, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:03:17,636] [INFO] [timer.py:197:stop] 0/957, RunningAvgSamplesPerSec=29.955512913341597, CurrSamplesPerSec=30.44442375263249, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:03:19,775] [INFO] [timer.py:197:stop] 0/958, RunningAvgSamplesPerSec=29.955950197003858, CurrSamplesPerSec=30.37946637435008, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:03:21,908] [INFO] [timer.py:197:stop] 0/959, RunningAvgSamplesPerSec=29.956491352308486, CurrSamplesPerSec=30.48293697690314, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:03:23,776] [INFO] [logging.py:68:log_dist] [Rank 0] step=960, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:03:23,777] [INFO] [timer.py:197:stop] 0/960, RunningAvgSamplesPerSec=29.960755862270865, CurrSamplesPerSec=34.68625311056088, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:04:04,169] [INFO] [timer.py:197:stop] 0/961, RunningAvgSamplesPerSec=29.95987344970405, CurrSamplesPerSec=29.137743731789282, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:04:10,707] [INFO] [timer.py:197:stop] 0/962, RunningAvgSamplesPerSec=29.959653623653036, CurrSamplesPerSec=29.75031500247425, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:04:18,419] [INFO] [timer.py:197:stop] 0/963, RunningAvgSamplesPerSec=29.959806095616543, CurrSamplesPerSec=30.106898571322503, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:04:25,072] [INFO] [timer.py:197:stop] 0/964, RunningAvgSamplesPerSec=29.959178638153322, CurrSamplesPerSec=29.368100980539637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:04:31,342] [INFO] [timer.py:197:stop] 0/965, RunningAvgSamplesPerSec=29.958862764659536, CurrSamplesPerSec=29.65804678723361, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:04:37,789] [INFO] [timer.py:197:stop] 0/966, RunningAvgSamplesPerSec=29.959215911279077, CurrSamplesPerSec=30.30320493086781, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:04:44,209] [INFO] [timer.py:197:stop] 0/967, RunningAvgSamplesPerSec=29.9580056082966, CurrSamplesPerSec=28.83505294948248, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:04:50,659] [INFO] [timer.py:197:stop] 0/968, RunningAvgSamplesPerSec=29.958086020662837, CurrSamplesPerSec=30.035885680884235, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:04:57,338] [INFO] [timer.py:197:stop] 0/969, RunningAvgSamplesPerSec=29.958378542797558, CurrSamplesPerSec=30.24364846154635, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:05:03,574] [INFO] [logging.py:68:log_dist] [Rank 0] step=970, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:05:03,574] [INFO] [timer.py:197:stop] 0/970, RunningAvgSamplesPerSec=29.958545909914733, CurrSamplesPerSec=30.121269899405554, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:05:10,253] [INFO] [timer.py:197:stop] 0/971, RunningAvgSamplesPerSec=29.95881101092534, CurrSamplesPerSec=30.217648197762117, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:05:16,394] [INFO] [timer.py:197:stop] 0/972, RunningAvgSamplesPerSec=29.9586092215385, CurrSamplesPerSec=29.76434454131486, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:05:23,295] [INFO] [timer.py:197:stop] 0/973, RunningAvgSamplesPerSec=29.95902700459004, CurrSamplesPerSec=30.36983926588914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:05:29,602] [INFO] [timer.py:197:stop] 0/974, RunningAvgSamplesPerSec=29.959059005988113, CurrSamplesPerSec=29.990164659337623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:05:35,723] [INFO] [timer.py:197:stop] 0/975, RunningAvgSamplesPerSec=29.95842694641888, CurrSamplesPerSec=29.356423127662467, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0054, 'learning_rate': 1e-05, 'epoch': 24.0} -[2022-12-14 18:05:42,583] [INFO] [timer.py:197:stop] 0/976, RunningAvgSamplesPerSec=29.958490123773252, CurrSamplesPerSec=30.020088212062344, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:05:48,761] [INFO] [timer.py:197:stop] 0/977, RunningAvgSamplesPerSec=29.95878100843842, CurrSamplesPerSec=30.244810443942836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:05:55,158] [INFO] [timer.py:197:stop] 0/978, RunningAvgSamplesPerSec=29.958751867098993, CurrSamplesPerSec=29.930366009839144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:01,449] [INFO] [timer.py:197:stop] 0/979, RunningAvgSamplesPerSec=29.958584479165886, CurrSamplesPerSec=29.796100822725517, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:07,466] [INFO] [logging.py:68:log_dist] [Rank 0] step=980, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:06:07,467] [INFO] [timer.py:197:stop] 0/980, RunningAvgSamplesPerSec=29.958040742150846, CurrSamplesPerSec=29.4360749130303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:14,213] [INFO] [timer.py:197:stop] 0/981, RunningAvgSamplesPerSec=29.958042418619012, CurrSamplesPerSec=29.959682094311425, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:20,484] [INFO] [timer.py:197:stop] 0/982, RunningAvgSamplesPerSec=29.95754112803186, CurrSamplesPerSec=29.47469568157215, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:26,974] [INFO] [timer.py:197:stop] 0/983, RunningAvgSamplesPerSec=29.95819711097356, CurrSamplesPerSec=30.61517260445392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:33,902] [INFO] [timer.py:197:stop] 0/984, RunningAvgSamplesPerSec=29.95826873303776, CurrSamplesPerSec=30.028695318133483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:40,199] [INFO] [timer.py:197:stop] 0/985, RunningAvgSamplesPerSec=29.95871501821471, CurrSamplesPerSec=30.40347994581103, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:46,017] [INFO] [timer.py:197:stop] 0/986, RunningAvgSamplesPerSec=29.958173504465343, CurrSamplesPerSec=29.435167901395126, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:52,699] [INFO] [timer.py:197:stop] 0/987, RunningAvgSamplesPerSec=29.95803624905819, CurrSamplesPerSec=29.82358369465395, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:06:59,232] [INFO] [timer.py:197:stop] 0/988, RunningAvgSamplesPerSec=29.95784531175417, CurrSamplesPerSec=29.770946595492006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:05,096] [INFO] [timer.py:197:stop] 0/989, RunningAvgSamplesPerSec=29.957517834637336, CurrSamplesPerSec=29.638071986711886, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:11,928] [INFO] [logging.py:68:log_dist] [Rank 0] step=990, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:07:11,929] [INFO] [timer.py:197:stop] 0/990, RunningAvgSamplesPerSec=29.95667237703617, CurrSamplesPerSec=29.144842787924468, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:18,510] [INFO] [timer.py:197:stop] 0/991, RunningAvgSamplesPerSec=29.956306177584285, CurrSamplesPerSec=29.598823083944364, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:24,958] [INFO] [timer.py:197:stop] 0/992, RunningAvgSamplesPerSec=29.955894695923767, CurrSamplesPerSec=29.554399225433507, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:27,471] [INFO] [timer.py:197:stop] 0/993, RunningAvgSamplesPerSec=29.955745975347195, CurrSamplesPerSec=29.809233445317886, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:29,577] [INFO] [timer.py:197:stop] 0/994, RunningAvgSamplesPerSec=29.956542970479035, CurrSamplesPerSec=30.767775356135235, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:31,703] [INFO] [timer.py:197:stop] 0/995, RunningAvgSamplesPerSec=29.957044204467838, CurrSamplesPerSec=30.462669094279722, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:33,846] [INFO] [timer.py:197:stop] 0/996, RunningAvgSamplesPerSec=29.957340558516023, CurrSamplesPerSec=30.254542569247434, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:36,017] [INFO] [timer.py:197:stop] 0/997, RunningAvgSamplesPerSec=29.957220845470836, CurrSamplesPerSec=29.838697346073946, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:38,189] [INFO] [timer.py:197:stop] 0/998, RunningAvgSamplesPerSec=29.95708638317248, CurrSamplesPerSec=29.82389184823155, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:40,392] [INFO] [timer.py:197:stop] 0/999, RunningAvgSamplesPerSec=29.956824777237212, CurrSamplesPerSec=29.698514263637744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:07:42,328] [INFO] [logging.py:68:log_dist] [Rank 0] step=1000, skipped=2, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:07:42,328] [INFO] [timer.py:197:stop] 0/1000, RunningAvgSamplesPerSec=29.95998316265552, CurrSamplesPerSec=33.479141595444545, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0048, 'learning_rate': 1e-05, 'epoch': 24.01} -{'eval_loss': 0.5263671875, 'eval_wer': 27.20744979243801, 'eval_runtime': 236.2022, 'eval_samples_per_second': 2.82, 'eval_steps_per_second': 0.089, 'epoch': 24.01} -[2022-12-14 18:11:39,119] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step1000 is begin to save! -[2022-12-14 18:11:39,124] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt -[2022-12-14 18:11:39,125] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt... -[2022-12-14 18:11:39,688] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1000/mp_rank_00_model_states.pt. -[2022-12-14 18:11:39,689] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt... -[2022-12-14 18:11:41,932] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt. -[2022-12-14 18:11:41,932] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-1000/global_step1000/zero_pp_rank_0_mp_rank_00_optim_states.pt -[2022-12-14 18:11:41,932] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! -[2022-12-14 18:12:31,899] [INFO] [timer.py:197:stop] 0/1001, RunningAvgSamplesPerSec=29.958827385821763, CurrSamplesPerSec=28.84816723880731, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:12:38,354] [INFO] [timer.py:197:stop] 0/1002, RunningAvgSamplesPerSec=29.95827266733437, CurrSamplesPerSec=29.414183456049738, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:12:44,941] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536.0, reducing to 65536.0 -[2022-12-14 18:12:44,942] [INFO] [timer.py:197:stop] 0/1003, RunningAvgSamplesPerSec=29.960384359569556, CurrSamplesPerSec=32.23237287785544, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:12:51,030] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536.0, reducing to 32768.0 -[2022-12-14 18:12:51,031] [INFO] [timer.py:197:stop] 0/1004, RunningAvgSamplesPerSec=29.961131679148025, CurrSamplesPerSec=30.728374157141822, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:12:57,126] [INFO] [timer.py:197:stop] 0/1005, RunningAvgSamplesPerSec=29.961111833071634, CurrSamplesPerSec=29.941239267500233, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:13:03,414] [INFO] [timer.py:197:stop] 0/1006, RunningAvgSamplesPerSec=29.960573837851562, CurrSamplesPerSec=29.430520747256686, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:13:09,851] [INFO] [timer.py:197:stop] 0/1007, RunningAvgSamplesPerSec=29.9603349560184, CurrSamplesPerSec=29.72240416731902, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:13:16,075] [INFO] [timer.py:197:stop] 0/1008, RunningAvgSamplesPerSec=29.95977166468369, CurrSamplesPerSec=29.404172694054502, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:13:22,446] [INFO] [timer.py:197:stop] 0/1009, RunningAvgSamplesPerSec=29.959891580236373, CurrSamplesPerSec=30.081014820002803, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:13:28,423] [INFO] [logging.py:68:log_dist] [Rank 0] step=1010, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:13:28,424] [INFO] [timer.py:197:stop] 0/1010, RunningAvgSamplesPerSec=29.95911303971919, CurrSamplesPerSec=29.195134888396108, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:13:35,310] [INFO] [timer.py:197:stop] 0/1011, RunningAvgSamplesPerSec=29.958338087483238, CurrSamplesPerSec=29.197056055076388, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:13:41,348] [INFO] [timer.py:197:stop] 0/1012, RunningAvgSamplesPerSec=29.958981736249847, CurrSamplesPerSec=30.622828265243996, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:13:47,773] [INFO] [timer.py:197:stop] 0/1013, RunningAvgSamplesPerSec=29.95944183202123, CurrSamplesPerSec=30.43146733192888, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:13:54,554] [INFO] [timer.py:197:stop] 0/1014, RunningAvgSamplesPerSec=29.95986768711168, CurrSamplesPerSec=30.396690776604206, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:00,394] [INFO] [timer.py:197:stop] 0/1015, RunningAvgSamplesPerSec=29.96025817538801, CurrSamplesPerSec=30.360719594260637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:07,142] [INFO] [timer.py:197:stop] 0/1016, RunningAvgSamplesPerSec=29.959844059584043, CurrSamplesPerSec=29.546143130614567, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:13,504] [INFO] [timer.py:197:stop] 0/1017, RunningAvgSamplesPerSec=29.959619287842163, CurrSamplesPerSec=29.733423230721158, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:19,390] [INFO] [timer.py:197:stop] 0/1018, RunningAvgSamplesPerSec=29.96039752369676, CurrSamplesPerSec=30.771718564920494, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:25,824] [INFO] [timer.py:197:stop] 0/1019, RunningAvgSamplesPerSec=29.960574752403208, CurrSamplesPerSec=30.141728934315093, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:32,090] [INFO] [logging.py:68:log_dist] [Rank 0] step=1020, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:14:32,091] [INFO] [timer.py:197:stop] 0/1020, RunningAvgSamplesPerSec=29.96064029815163, CurrSamplesPerSec=30.027449114726732, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:38,588] [INFO] [timer.py:197:stop] 0/1021, RunningAvgSamplesPerSec=29.960981148813218, CurrSamplesPerSec=30.312036783382705, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:44,835] [INFO] [timer.py:197:stop] 0/1022, RunningAvgSamplesPerSec=29.960817339064796, CurrSamplesPerSec=29.79482093668804, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:51,317] [INFO] [timer.py:197:stop] 0/1023, RunningAvgSamplesPerSec=29.96084410917753, CurrSamplesPerSec=29.988174556830753, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:14:57,474] [INFO] [timer.py:197:stop] 0/1024, RunningAvgSamplesPerSec=29.961077997350092, CurrSamplesPerSec=30.20179832228893, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:03,560] [INFO] [timer.py:197:stop] 0/1025, RunningAvgSamplesPerSec=29.96097132608584, CurrSamplesPerSec=29.85234892163737, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0051, 'learning_rate': 1e-05, 'epoch': 25.0} -[2022-12-14 18:15:09,892] [INFO] [timer.py:197:stop] 0/1026, RunningAvgSamplesPerSec=29.961568208739852, CurrSamplesPerSec=30.58489485780187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:16,676] [INFO] [timer.py:197:stop] 0/1027, RunningAvgSamplesPerSec=29.96155873400489, CurrSamplesPerSec=29.951859749191886, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:23,061] [INFO] [timer.py:197:stop] 0/1028, RunningAvgSamplesPerSec=29.96063953077279, CurrSamplesPerSec=29.04720922298049, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:29,412] [INFO] [timer.py:197:stop] 0/1029, RunningAvgSamplesPerSec=29.959071339039852, CurrSamplesPerSec=28.432188442803984, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:35,812] [INFO] [logging.py:68:log_dist] [Rank 0] step=1030, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:15:35,813] [INFO] [timer.py:197:stop] 0/1030, RunningAvgSamplesPerSec=29.958809591426064, CurrSamplesPerSec=29.69238767384455, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:42,014] [INFO] [timer.py:197:stop] 0/1031, RunningAvgSamplesPerSec=29.95916216721573, CurrSamplesPerSec=30.326053066972296, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:48,224] [INFO] [timer.py:197:stop] 0/1032, RunningAvgSamplesPerSec=29.958425018326718, CurrSamplesPerSec=29.218647655720048, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:50,854] [INFO] [timer.py:197:stop] 0/1033, RunningAvgSamplesPerSec=29.95871626008501, CurrSamplesPerSec=30.26173234102091, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:53,027] [INFO] [timer.py:197:stop] 0/1034, RunningAvgSamplesPerSec=29.95856584967382, CurrSamplesPerSec=29.804292050588657, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:55,189] [INFO] [timer.py:197:stop] 0/1035, RunningAvgSamplesPerSec=29.95856614046519, CurrSamplesPerSec=29.958866240170067, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:57,377] [INFO] [timer.py:197:stop] 0/1036, RunningAvgSamplesPerSec=29.958214917166345, CurrSamplesPerSec=29.599746737262958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:15:59,519] [INFO] [timer.py:197:stop] 0/1037, RunningAvgSamplesPerSec=29.95848549760024, CurrSamplesPerSec=30.240905719417686, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:16:01,631] [INFO] [timer.py:197:stop] 0/1038, RunningAvgSamplesPerSec=29.959159096175412, CurrSamplesPerSec=30.67296042042458, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:16:03,754] [INFO] [timer.py:197:stop] 0/1039, RunningAvgSamplesPerSec=29.959689507325848, CurrSamplesPerSec=30.51947263768814, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:16:05,668] [INFO] [logging.py:68:log_dist] [Rank 0] step=1040, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:16:05,669] [INFO] [timer.py:197:stop] 0/1040, RunningAvgSamplesPerSec=29.96306949549213, CurrSamplesPerSec=33.932958392488004, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:16:48,092] [INFO] [timer.py:197:stop] 0/1041, RunningAvgSamplesPerSec=29.962856095472553, CurrSamplesPerSec=29.742973984318525, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:16:54,636] [INFO] [timer.py:197:stop] 0/1042, RunningAvgSamplesPerSec=29.96204524523984, CurrSamplesPerSec=29.1426342438697, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:17:01,398] [INFO] [timer.py:197:stop] 0/1043, RunningAvgSamplesPerSec=29.962162463688614, CurrSamplesPerSec=30.084568161909807, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:17:08,278] [INFO] [timer.py:197:stop] 0/1044, RunningAvgSamplesPerSec=29.96188094825389, CurrSamplesPerSec=29.671664716450852, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:17:14,626] [INFO] [timer.py:197:stop] 0/1045, RunningAvgSamplesPerSec=29.962025958584366, CurrSamplesPerSec=30.11389333663301, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:17:21,651] [INFO] [timer.py:197:stop] 0/1046, RunningAvgSamplesPerSec=29.96230333796394, CurrSamplesPerSec=30.25443345292625, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:17:28,490] [INFO] [timer.py:197:stop] 0/1047, RunningAvgSamplesPerSec=29.960722166389953, CurrSamplesPerSec=28.39625870525509, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:17:34,699] [INFO] [timer.py:197:stop] 0/1048, RunningAvgSamplesPerSec=29.96047727391697, CurrSamplesPerSec=29.70673410517849, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:17:41,493] [INFO] [timer.py:197:stop] 0/1049, RunningAvgSamplesPerSec=29.960812441310818, CurrSamplesPerSec=30.315552478223175, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:17:47,954] [INFO] [logging.py:68:log_dist] [Rank 0] step=1050, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:17:47,955] [INFO] [timer.py:197:stop] 0/1050, RunningAvgSamplesPerSec=29.96018443181509, CurrSamplesPerSec=29.31679228410554, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0045, 'learning_rate': 1e-05, 'epoch': 26.0} -[2022-12-14 18:17:55,229] [INFO] [timer.py:197:stop] 0/1051, RunningAvgSamplesPerSec=29.96001069122972, CurrSamplesPerSec=29.779031497261446, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:18:01,355] [INFO] [timer.py:197:stop] 0/1052, RunningAvgSamplesPerSec=29.95952232096877, CurrSamplesPerSec=29.455842920580242, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:18:11,453] [INFO] [timer.py:197:stop] 0/1053, RunningAvgSamplesPerSec=29.958905679118452, CurrSamplesPerSec=29.325141731298544, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:18:18,824] [INFO] [timer.py:197:stop] 0/1054, RunningAvgSamplesPerSec=29.95923045573236, CurrSamplesPerSec=30.304508340428665, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:18:25,444] [INFO] [timer.py:197:stop] 0/1055, RunningAvgSamplesPerSec=29.95939608368555, CurrSamplesPerSec=30.13465695764361, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:18:32,512] [INFO] [timer.py:197:stop] 0/1056, RunningAvgSamplesPerSec=29.959169512331865, CurrSamplesPerSec=29.722476569513727, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:18:39,134] [INFO] [timer.py:197:stop] 0/1057, RunningAvgSamplesPerSec=29.958419783103963, CurrSamplesPerSec=29.188531813961387, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:18:45,869] [INFO] [timer.py:197:stop] 0/1058, RunningAvgSamplesPerSec=29.9577166997663, CurrSamplesPerSec=29.233902404334234, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:18:52,919] [INFO] [timer.py:197:stop] 0/1059, RunningAvgSamplesPerSec=29.957908344910376, CurrSamplesPerSec=30.16166336456556, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:18:59,287] [INFO] [logging.py:68:log_dist] [Rank 0] step=1060, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:18:59,288] [INFO] [timer.py:197:stop] 0/1060, RunningAvgSamplesPerSec=29.95781380996905, CurrSamplesPerSec=29.8582228739838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:19:05,840] [INFO] [timer.py:197:stop] 0/1061, RunningAvgSamplesPerSec=29.957850251371706, CurrSamplesPerSec=29.996454985670873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:19:12,328] [INFO] [timer.py:197:stop] 0/1062, RunningAvgSamplesPerSec=29.9578893397929, CurrSamplesPerSec=29.99934130863222, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:19:19,345] [INFO] [timer.py:197:stop] 0/1063, RunningAvgSamplesPerSec=29.95772908074387, CurrSamplesPerSec=29.788813225728987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:19:26,072] [INFO] [timer.py:197:stop] 0/1064, RunningAvgSamplesPerSec=29.957322542735515, CurrSamplesPerSec=29.53211380199727, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:19:33,268] [INFO] [timer.py:197:stop] 0/1065, RunningAvgSamplesPerSec=29.956884100925787, CurrSamplesPerSec=29.498392033497133, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:19:39,762] [INFO] [timer.py:197:stop] 0/1066, RunningAvgSamplesPerSec=29.955998256654446, CurrSamplesPerSec=29.04307024768173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:19:46,564] [INFO] [timer.py:197:stop] 0/1067, RunningAvgSamplesPerSec=29.955695722178856, CurrSamplesPerSec=29.637224471770242, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:19:52,831] [INFO] [timer.py:197:stop] 0/1068, RunningAvgSamplesPerSec=29.955575283356126, CurrSamplesPerSec=29.827855337119697, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:19:59,135] [INFO] [timer.py:197:stop] 0/1069, RunningAvgSamplesPerSec=29.955236937902992, CurrSamplesPerSec=29.59885572088834, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:06,899] [INFO] [logging.py:68:log_dist] [Rank 0] step=1070, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:20:06,900] [INFO] [timer.py:197:stop] 0/1070, RunningAvgSamplesPerSec=29.955405521758376, CurrSamplesPerSec=30.136372199253696, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:13,255] [INFO] [timer.py:197:stop] 0/1071, RunningAvgSamplesPerSec=29.95533186510246, CurrSamplesPerSec=29.876872789859767, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:19,837] [INFO] [timer.py:197:stop] 0/1072, RunningAvgSamplesPerSec=29.955188835811235, CurrSamplesPerSec=29.803067711753137, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:22,385] [INFO] [timer.py:197:stop] 0/1073, RunningAvgSamplesPerSec=29.954902469757535, CurrSamplesPerSec=29.65159624146069, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:24,519] [INFO] [timer.py:197:stop] 0/1074, RunningAvgSamplesPerSec=29.955398258095855, CurrSamplesPerSec=30.495978845584286, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:26,687] [INFO] [timer.py:197:stop] 0/1075, RunningAvgSamplesPerSec=29.95530992598268, CurrSamplesPerSec=29.860916566781892, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0029, 'learning_rate': 1e-05, 'epoch': 26.01} -[2022-12-14 18:20:28,841] [INFO] [timer.py:197:stop] 0/1076, RunningAvgSamplesPerSec=29.955437318654337, CurrSamplesPerSec=30.092756854390473, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:31,026] [INFO] [timer.py:197:stop] 0/1077, RunningAvgSamplesPerSec=29.955132385027458, CurrSamplesPerSec=29.631178744578868, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:33,140] [INFO] [timer.py:197:stop] 0/1078, RunningAvgSamplesPerSec=29.955952353854908, CurrSamplesPerSec=30.86416834065034, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:35,253] [INFO] [timer.py:197:stop] 0/1079, RunningAvgSamplesPerSec=29.956587282371437, CurrSamplesPerSec=30.655729665881836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:20:37,139] [INFO] [logging.py:68:log_dist] [Rank 0] step=1080, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:20:37,139] [INFO] [timer.py:197:stop] 0/1080, RunningAvgSamplesPerSec=29.96010802596609, CurrSamplesPerSec=34.30197932313653, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:21:14,820] [INFO] [timer.py:197:stop] 0/1081, RunningAvgSamplesPerSec=29.960583556410754, CurrSamplesPerSec=30.482137374067715, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:21:20,652] [INFO] [timer.py:197:stop] 0/1082, RunningAvgSamplesPerSec=29.95919198206845, CurrSamplesPerSec=28.529408244951497, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:21:26,447] [INFO] [timer.py:197:stop] 0/1083, RunningAvgSamplesPerSec=29.959297870482665, CurrSamplesPerSec=30.074095966491754, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:21:32,308] [INFO] [timer.py:197:stop] 0/1084, RunningAvgSamplesPerSec=29.95928211293507, CurrSamplesPerSec=29.942257892379164, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:21:38,071] [INFO] [timer.py:197:stop] 0/1085, RunningAvgSamplesPerSec=29.959846874751218, CurrSamplesPerSec=30.583654330040403, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:21:44,092] [INFO] [timer.py:197:stop] 0/1086, RunningAvgSamplesPerSec=29.95975209433213, CurrSamplesPerSec=29.85745570885574, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:21:49,994] [INFO] [timer.py:197:stop] 0/1087, RunningAvgSamplesPerSec=29.960358395698517, CurrSamplesPerSec=30.6323438019577, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:21:55,873] [INFO] [timer.py:197:stop] 0/1088, RunningAvgSamplesPerSec=29.96057203696724, CurrSamplesPerSec=30.19418188624371, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:01,978] [INFO] [timer.py:197:stop] 0/1089, RunningAvgSamplesPerSec=29.95984565007082, CurrSamplesPerSec=29.191245696926146, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:08,058] [INFO] [logging.py:68:log_dist] [Rank 0] step=1090, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:22:08,059] [INFO] [timer.py:197:stop] 0/1090, RunningAvgSamplesPerSec=29.95981207415321, CurrSamplesPerSec=29.92335949906619, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:14,267] [INFO] [timer.py:197:stop] 0/1091, RunningAvgSamplesPerSec=29.959829840522605, CurrSamplesPerSec=29.979172141368196, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:20,107] [INFO] [timer.py:197:stop] 0/1092, RunningAvgSamplesPerSec=29.959727498899678, CurrSamplesPerSec=29.84869090636966, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:26,142] [INFO] [timer.py:197:stop] 0/1093, RunningAvgSamplesPerSec=29.960010818819843, CurrSamplesPerSec=30.272048876360653, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:32,431] [INFO] [timer.py:197:stop] 0/1094, RunningAvgSamplesPerSec=29.959412734562875, CurrSamplesPerSec=29.320823893708358, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:38,521] [INFO] [timer.py:197:stop] 0/1095, RunningAvgSamplesPerSec=29.95913193272373, CurrSamplesPerSec=29.65560579869486, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:44,803] [INFO] [timer.py:197:stop] 0/1096, RunningAvgSamplesPerSec=29.958888474644453, CurrSamplesPerSec=29.695133650275583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:50,586] [INFO] [timer.py:197:stop] 0/1097, RunningAvgSamplesPerSec=29.958410073770317, CurrSamplesPerSec=29.444033835106453, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:22:56,605] [INFO] [timer.py:197:stop] 0/1098, RunningAvgSamplesPerSec=29.958477646317004, CurrSamplesPerSec=30.032652951627412, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:23:02,823] [INFO] [timer.py:197:stop] 0/1099, RunningAvgSamplesPerSec=29.95782899413772, CurrSamplesPerSec=29.263400572850248, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:23:08,684] [INFO] [logging.py:68:log_dist] [Rank 0] step=1100, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:23:08,685] [INFO] [timer.py:197:stop] 0/1100, RunningAvgSamplesPerSec=29.95768043914358, CurrSamplesPerSec=29.79559811535545, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0028, 'learning_rate': 1e-05, 'epoch': 27.0} -[2022-12-14 18:23:14,403] [INFO] [timer.py:197:stop] 0/1101, RunningAvgSamplesPerSec=29.957832944280256, CurrSamplesPerSec=30.126225680270085, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:23:20,620] [INFO] [timer.py:197:stop] 0/1102, RunningAvgSamplesPerSec=29.957533703041946, CurrSamplesPerSec=29.63224180247973, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:23:26,976] [INFO] [timer.py:197:stop] 0/1103, RunningAvgSamplesPerSec=29.958055204133018, CurrSamplesPerSec=30.54291577594929, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:23:32,653] [INFO] [timer.py:197:stop] 0/1104, RunningAvgSamplesPerSec=29.958315528549214, CurrSamplesPerSec=30.24770386135761, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:23:39,544] [INFO] [timer.py:197:stop] 0/1105, RunningAvgSamplesPerSec=29.957974757248163, CurrSamplesPerSec=29.58709802572988, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:23:45,354] [INFO] [timer.py:197:stop] 0/1106, RunningAvgSamplesPerSec=29.957940710692263, CurrSamplesPerSec=29.920434417642344, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:23:52,027] [INFO] [timer.py:197:stop] 0/1107, RunningAvgSamplesPerSec=29.956871230974716, CurrSamplesPerSec=28.820975806891468, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:23:58,292] [INFO] [timer.py:197:stop] 0/1108, RunningAvgSamplesPerSec=29.95671729283327, CurrSamplesPerSec=29.78757693645276, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:04,202] [INFO] [timer.py:197:stop] 0/1109, RunningAvgSamplesPerSec=29.956402941898123, CurrSamplesPerSec=29.612723140290278, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:10,273] [INFO] [logging.py:68:log_dist] [Rank 0] step=1110, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:24:10,274] [INFO] [timer.py:197:stop] 0/1110, RunningAvgSamplesPerSec=29.95658520353032, CurrSamplesPerSec=30.159718207917948, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:16,395] [INFO] [timer.py:197:stop] 0/1111, RunningAvgSamplesPerSec=29.95673492553112, CurrSamplesPerSec=30.123551519562742, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:22,396] [INFO] [timer.py:197:stop] 0/1112, RunningAvgSamplesPerSec=29.95714635953953, CurrSamplesPerSec=30.420490276361527, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:24,906] [INFO] [timer.py:197:stop] 0/1113, RunningAvgSamplesPerSec=29.956428455862017, CurrSamplesPerSec=29.180221901277346, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:27,042] [INFO] [timer.py:197:stop] 0/1114, RunningAvgSamplesPerSec=29.95674571076681, CurrSamplesPerSec=30.313416262625864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:29,173] [INFO] [timer.py:197:stop] 0/1115, RunningAvgSamplesPerSec=29.957178737871555, CurrSamplesPerSec=30.446578473233178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:31,344] [INFO] [timer.py:197:stop] 0/1116, RunningAvgSamplesPerSec=29.95734480242576, CurrSamplesPerSec=30.143323125184722, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:33,513] [INFO] [timer.py:197:stop] 0/1117, RunningAvgSamplesPerSec=29.95725379794703, CurrSamplesPerSec=29.856217036260947, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:35,661] [INFO] [timer.py:197:stop] 0/1118, RunningAvgSamplesPerSec=29.957426409647983, CurrSamplesPerSec=30.151134050702254, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:37,796] [INFO] [timer.py:197:stop] 0/1119, RunningAvgSamplesPerSec=29.95775447889611, CurrSamplesPerSec=30.328413789425685, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:24:39,711] [INFO] [logging.py:68:log_dist] [Rank 0] step=1120, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:24:39,711] [INFO] [timer.py:197:stop] 0/1120, RunningAvgSamplesPerSec=29.96079274086623, CurrSamplesPerSec=33.78849334049293, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:25:17,346] [INFO] [timer.py:197:stop] 0/1121, RunningAvgSamplesPerSec=29.961174203752385, CurrSamplesPerSec=30.393813524615453, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:25:23,338] [INFO] [timer.py:197:stop] 0/1122, RunningAvgSamplesPerSec=29.960547853471986, CurrSamplesPerSec=29.275697346286083, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:25:29,783] [INFO] [timer.py:197:stop] 0/1123, RunningAvgSamplesPerSec=29.960634146361183, CurrSamplesPerSec=30.05759524194232, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:25:35,705] [INFO] [timer.py:197:stop] 0/1124, RunningAvgSamplesPerSec=29.96073329231406, CurrSamplesPerSec=30.072290106839993, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:25:41,696] [INFO] [timer.py:197:stop] 0/1125, RunningAvgSamplesPerSec=29.959638262295634, CurrSamplesPerSec=28.779456155132944, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0024, 'learning_rate': 1e-05, 'epoch': 28.0} -[2022-12-14 18:25:47,716] [INFO] [timer.py:197:stop] 0/1126, RunningAvgSamplesPerSec=29.959596085073432, CurrSamplesPerSec=29.912305895166313, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:25:53,627] [INFO] [timer.py:197:stop] 0/1127, RunningAvgSamplesPerSec=29.959309149832716, CurrSamplesPerSec=29.640231896450445, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:25:59,529] [INFO] [timer.py:197:stop] 0/1128, RunningAvgSamplesPerSec=29.95965571548853, CurrSamplesPerSec=30.35468747791406, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:26:05,908] [INFO] [timer.py:197:stop] 0/1129, RunningAvgSamplesPerSec=29.959451492663216, CurrSamplesPerSec=29.731249718260518, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:26:12,067] [INFO] [logging.py:68:log_dist] [Rank 0] step=1130, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:26:12,068] [INFO] [timer.py:197:stop] 0/1130, RunningAvgSamplesPerSec=29.957577247281634, CurrSamplesPerSec=27.984542563203977, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:26:18,488] [INFO] [timer.py:197:stop] 0/1131, RunningAvgSamplesPerSec=29.95717628794688, CurrSamplesPerSec=29.511626858717566, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:26:24,487] [INFO] [timer.py:197:stop] 0/1132, RunningAvgSamplesPerSec=29.95730064785336, CurrSamplesPerSec=30.09836469959543, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:26:30,510] [INFO] [timer.py:197:stop] 0/1133, RunningAvgSamplesPerSec=29.95786691457804, CurrSamplesPerSec=30.611726711247755, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:26:36,450] [INFO] [timer.py:197:stop] 0/1134, RunningAvgSamplesPerSec=29.9570363184965, CurrSamplesPerSec=29.046219155723655, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:26:42,438] [INFO] [timer.py:197:stop] 0/1135, RunningAvgSamplesPerSec=29.956508660472736, CurrSamplesPerSec=29.370886923015988, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:26:48,682] [INFO] [timer.py:197:stop] 0/1136, RunningAvgSamplesPerSec=29.956580120970397, CurrSamplesPerSec=30.037764478410633, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:26:54,371] [INFO] [timer.py:197:stop] 0/1137, RunningAvgSamplesPerSec=29.95641673528105, CurrSamplesPerSec=29.772277263960817, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:00,300] [INFO] [timer.py:197:stop] 0/1138, RunningAvgSamplesPerSec=29.95637030571689, CurrSamplesPerSec=29.903765371557565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:06,681] [INFO] [timer.py:197:stop] 0/1139, RunningAvgSamplesPerSec=29.956133760654954, CurrSamplesPerSec=29.689809677400444, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:12,335] [INFO] [logging.py:68:log_dist] [Rank 0] step=1140, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:27:12,336] [INFO] [timer.py:197:stop] 0/1140, RunningAvgSamplesPerSec=29.956647535107436, CurrSamplesPerSec=30.552437344761138, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:18,344] [INFO] [timer.py:197:stop] 0/1141, RunningAvgSamplesPerSec=29.956361828275586, CurrSamplesPerSec=29.634721477036123, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:24,370] [INFO] [timer.py:197:stop] 0/1142, RunningAvgSamplesPerSec=29.95640435818121, CurrSamplesPerSec=30.00492444988923, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:30,648] [INFO] [timer.py:197:stop] 0/1143, RunningAvgSamplesPerSec=29.956211900746275, CurrSamplesPerSec=29.73840704286174, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:36,736] [INFO] [timer.py:197:stop] 0/1144, RunningAvgSamplesPerSec=29.95587845508021, CurrSamplesPerSec=29.58019261611061, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:43,022] [INFO] [timer.py:197:stop] 0/1145, RunningAvgSamplesPerSec=29.956195179611456, CurrSamplesPerSec=30.322319147698423, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:49,169] [INFO] [timer.py:197:stop] 0/1146, RunningAvgSamplesPerSec=29.956126908617968, CurrSamplesPerSec=29.87829608463479, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:27:55,408] [INFO] [timer.py:197:stop] 0/1147, RunningAvgSamplesPerSec=29.95600771967113, CurrSamplesPerSec=29.820273930358418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:01,508] [INFO] [timer.py:197:stop] 0/1148, RunningAvgSamplesPerSec=29.956066798766155, CurrSamplesPerSec=30.023865596747292, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:07,873] [INFO] [timer.py:197:stop] 0/1149, RunningAvgSamplesPerSec=29.955385911679297, CurrSamplesPerSec=29.19491579619159, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:13,712] [INFO] [logging.py:68:log_dist] [Rank 0] step=1150, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:28:13,712] [INFO] [timer.py:197:stop] 0/1150, RunningAvgSamplesPerSec=29.955285942655784, CurrSamplesPerSec=29.841059097613954, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.002, 'learning_rate': 1e-05, 'epoch': 28.01} -[2022-12-14 18:28:19,534] [INFO] [timer.py:197:stop] 0/1151, RunningAvgSamplesPerSec=29.955780247721048, CurrSamplesPerSec=30.534209381045038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:25,228] [INFO] [timer.py:197:stop] 0/1152, RunningAvgSamplesPerSec=29.955426377204265, CurrSamplesPerSec=29.554278831571118, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:27,696] [INFO] [timer.py:197:stop] 0/1153, RunningAvgSamplesPerSec=29.955794339874025, CurrSamplesPerSec=30.385019945176094, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:29,866] [INFO] [timer.py:197:stop] 0/1154, RunningAvgSamplesPerSec=29.955688397523218, CurrSamplesPerSec=29.834243542861945, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:32,042] [INFO] [timer.py:197:stop] 0/1155, RunningAvgSamplesPerSec=29.955532792886427, CurrSamplesPerSec=29.77734347744337, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:34,157] [INFO] [timer.py:197:stop] 0/1156, RunningAvgSamplesPerSec=29.956094045585093, CurrSamplesPerSec=30.617519188455482, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:36,330] [INFO] [timer.py:197:stop] 0/1157, RunningAvgSamplesPerSec=29.95595980756276, CurrSamplesPerSec=29.801846782119878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:38,468] [INFO] [timer.py:197:stop] 0/1158, RunningAvgSamplesPerSec=29.956248158834114, CurrSamplesPerSec=30.29304149925784, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:40,585] [INFO] [timer.py:197:stop] 0/1159, RunningAvgSamplesPerSec=29.95679763290876, CurrSamplesPerSec=30.60576194803887, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:28:42,464] [INFO] [logging.py:68:log_dist] [Rank 0] step=1160, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:28:42,465] [INFO] [timer.py:197:stop] 0/1160, RunningAvgSamplesPerSec=29.96014793752608, CurrSamplesPerSec=34.41307698549983, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:29:25,051] [INFO] [timer.py:197:stop] 0/1161, RunningAvgSamplesPerSec=29.96026258325655, CurrSamplesPerSec=30.093613754417873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:29:32,361] [INFO] [timer.py:197:stop] 0/1162, RunningAvgSamplesPerSec=29.959355098763552, CurrSamplesPerSec=28.94328232555943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:29:39,728] [INFO] [timer.py:197:stop] 0/1163, RunningAvgSamplesPerSec=29.957560258719294, CurrSamplesPerSec=28.01094954072389, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:29:46,499] [INFO] [timer.py:197:stop] 0/1164, RunningAvgSamplesPerSec=29.95677042662521, CurrSamplesPerSec=29.067034194083234, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:29:52,782] [INFO] [timer.py:197:stop] 0/1165, RunningAvgSamplesPerSec=29.956582704692394, CurrSamplesPerSec=29.740028047656644, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:29:59,690] [INFO] [timer.py:197:stop] 0/1166, RunningAvgSamplesPerSec=29.956001881642543, CurrSamplesPerSec=29.29541351606135, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:30:06,272] [INFO] [timer.py:197:stop] 0/1167, RunningAvgSamplesPerSec=29.95577521738078, CurrSamplesPerSec=29.694243451564013, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:30:13,837] [INFO] [timer.py:197:stop] 0/1168, RunningAvgSamplesPerSec=29.95559065912784, CurrSamplesPerSec=29.74211386866425, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:30:21,387] [INFO] [timer.py:197:stop] 0/1169, RunningAvgSamplesPerSec=29.955282965456668, CurrSamplesPerSec=29.600761843261953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:30:27,598] [INFO] [logging.py:68:log_dist] [Rank 0] step=1170, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:30:27,599] [INFO] [timer.py:197:stop] 0/1170, RunningAvgSamplesPerSec=29.95506089822832, CurrSamplesPerSec=29.698133125872143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:30:34,793] [INFO] [timer.py:197:stop] 0/1171, RunningAvgSamplesPerSec=29.954830412885315, CurrSamplesPerSec=29.688023404874894, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:30:41,178] [INFO] [timer.py:197:stop] 0/1172, RunningAvgSamplesPerSec=29.954756840562453, CurrSamplesPerSec=29.868997238595394, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:30:47,438] [INFO] [timer.py:197:stop] 0/1173, RunningAvgSamplesPerSec=29.955270759836946, CurrSamplesPerSec=30.56888375678855, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:30:54,342] [INFO] [timer.py:197:stop] 0/1174, RunningAvgSamplesPerSec=29.95514421273114, CurrSamplesPerSec=29.807687635374432, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:31:00,832] [INFO] [timer.py:197:stop] 0/1175, RunningAvgSamplesPerSec=29.95521383292439, CurrSamplesPerSec=30.03703175302795, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0023, 'learning_rate': 1e-05, 'epoch': 29.0} -[2022-12-14 18:31:08,077] [INFO] [timer.py:197:stop] 0/1176, RunningAvgSamplesPerSec=29.95435109442923, CurrSamplesPerSec=28.975458416156325, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:31:14,458] [INFO] [timer.py:197:stop] 0/1177, RunningAvgSamplesPerSec=29.953924257996718, CurrSamplesPerSec=29.461070373354378, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:31:20,942] [INFO] [timer.py:197:stop] 0/1178, RunningAvgSamplesPerSec=29.953528243860333, CurrSamplesPerSec=29.49533553984388, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:31:27,194] [INFO] [timer.py:197:stop] 0/1179, RunningAvgSamplesPerSec=29.953500680674736, CurrSamplesPerSec=29.921121443532403, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:31:33,381] [INFO] [logging.py:68:log_dist] [Rank 0] step=1180, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:31:33,382] [INFO] [timer.py:197:stop] 0/1180, RunningAvgSamplesPerSec=29.953200587765938, CurrSamplesPerSec=29.60411120764629, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:31:39,766] [INFO] [timer.py:197:stop] 0/1181, RunningAvgSamplesPerSec=29.953650243674755, CurrSamplesPerSec=30.492888791197306, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:31:48,311] [INFO] [timer.py:197:stop] 0/1182, RunningAvgSamplesPerSec=29.953780018314685, CurrSamplesPerSec=30.107570549129257, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:31:55,409] [INFO] [timer.py:197:stop] 0/1183, RunningAvgSamplesPerSec=29.954019949714013, CurrSamplesPerSec=30.23984282766017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:01,607] [INFO] [timer.py:197:stop] 0/1184, RunningAvgSamplesPerSec=29.95447405664433, CurrSamplesPerSec=30.500559652248647, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:08,514] [INFO] [timer.py:197:stop] 0/1185, RunningAvgSamplesPerSec=29.954276296272152, CurrSamplesPerSec=29.72233505646221, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:15,067] [INFO] [timer.py:197:stop] 0/1186, RunningAvgSamplesPerSec=29.954357607318503, CurrSamplesPerSec=30.050858726444005, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:21,705] [INFO] [timer.py:197:stop] 0/1187, RunningAvgSamplesPerSec=29.954545459784633, CurrSamplesPerSec=30.178628034131073, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:28,524] [INFO] [timer.py:197:stop] 0/1188, RunningAvgSamplesPerSec=29.954867978391192, CurrSamplesPerSec=30.34199593669895, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:34,929] [INFO] [timer.py:197:stop] 0/1189, RunningAvgSamplesPerSec=29.95456678801479, CurrSamplesPerSec=29.60156810245818, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:41,801] [INFO] [logging.py:68:log_dist] [Rank 0] step=1190, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:32:41,801] [INFO] [timer.py:197:stop] 0/1190, RunningAvgSamplesPerSec=29.95415137291121, CurrSamplesPerSec=29.469046054671914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:48,176] [INFO] [timer.py:197:stop] 0/1191, RunningAvgSamplesPerSec=29.954074170008912, CurrSamplesPerSec=29.86263733024126, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:54,422] [INFO] [timer.py:197:stop] 0/1192, RunningAvgSamplesPerSec=29.954217284484205, CurrSamplesPerSec=30.125353398687587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:56,812] [INFO] [timer.py:197:stop] 0/1193, RunningAvgSamplesPerSec=29.954508656619936, CurrSamplesPerSec=30.30530549391744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:32:58,928] [INFO] [timer.py:197:stop] 0/1194, RunningAvgSamplesPerSec=29.955049939090408, CurrSamplesPerSec=30.61390867106541, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:33:01,116] [INFO] [timer.py:197:stop] 0/1195, RunningAvgSamplesPerSec=29.954741603591035, CurrSamplesPerSec=29.591664278186638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:33:03,325] [INFO] [timer.py:197:stop] 0/1196, RunningAvgSamplesPerSec=29.954528399766218, CurrSamplesPerSec=29.702319606308144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:33:05,501] [INFO] [timer.py:197:stop] 0/1197, RunningAvgSamplesPerSec=29.95485992989227, CurrSamplesPerSec=30.356012484127113, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:33:07,614] [INFO] [timer.py:197:stop] 0/1198, RunningAvgSamplesPerSec=29.955441434259573, CurrSamplesPerSec=30.666856157626015, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:33:09,862] [INFO] [timer.py:197:stop] 0/1199, RunningAvgSamplesPerSec=29.954437154636636, CurrSamplesPerSec=28.79966183105822, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:33:11,756] [INFO] [logging.py:68:log_dist] [Rank 0] step=1200, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:33:11,757] [INFO] [timer.py:197:stop] 0/1200, RunningAvgSamplesPerSec=29.95758491370553, CurrSamplesPerSec=34.26804694357651, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0019, 'learning_rate': 1e-05, 'epoch': 29.01} -[2022-12-14 18:33:48,916] [INFO] [timer.py:197:stop] 0/1201, RunningAvgSamplesPerSec=29.958157896201556, CurrSamplesPerSec=30.660701792900152, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:33:54,942] [INFO] [timer.py:197:stop] 0/1202, RunningAvgSamplesPerSec=29.95746264035883, CurrSamplesPerSec=29.146437704080906, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:01,076] [INFO] [timer.py:197:stop] 0/1203, RunningAvgSamplesPerSec=29.95770763237542, CurrSamplesPerSec=30.254614177260958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:07,129] [INFO] [timer.py:197:stop] 0/1204, RunningAvgSamplesPerSec=29.956738582304453, CurrSamplesPerSec=28.836468544900313, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:13,148] [INFO] [timer.py:197:stop] 0/1205, RunningAvgSamplesPerSec=29.955992285094727, CurrSamplesPerSec=29.08504572759298, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:19,910] [INFO] [timer.py:197:stop] 0/1206, RunningAvgSamplesPerSec=29.955647682826843, CurrSamplesPerSec=29.546754535003878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:25,905] [INFO] [timer.py:197:stop] 0/1207, RunningAvgSamplesPerSec=29.95574067749741, CurrSamplesPerSec=30.068126674936924, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:31,757] [INFO] [timer.py:197:stop] 0/1208, RunningAvgSamplesPerSec=29.955667865887662, CurrSamplesPerSec=29.868186315924284, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:37,827] [INFO] [timer.py:197:stop] 0/1209, RunningAvgSamplesPerSec=29.95563962103786, CurrSamplesPerSec=29.92161505442529, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:43,936] [INFO] [logging.py:68:log_dist] [Rank 0] step=1210, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:34:43,937] [INFO] [timer.py:197:stop] 0/1210, RunningAvgSamplesPerSec=29.955603633176516, CurrSamplesPerSec=29.91222923202138, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:50,193] [INFO] [timer.py:197:stop] 0/1211, RunningAvgSamplesPerSec=29.95595909020552, CurrSamplesPerSec=30.39160088916524, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:34:56,282] [INFO] [timer.py:197:stop] 0/1212, RunningAvgSamplesPerSec=29.95628288373988, CurrSamplesPerSec=30.352936999402406, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:02,295] [INFO] [timer.py:197:stop] 0/1213, RunningAvgSamplesPerSec=29.95544345259622, CurrSamplesPerSec=28.973069074265414, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:08,331] [INFO] [timer.py:197:stop] 0/1214, RunningAvgSamplesPerSec=29.955269602159962, CurrSamplesPerSec=29.74620727774933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:14,100] [INFO] [timer.py:197:stop] 0/1215, RunningAvgSamplesPerSec=29.954330866418758, CurrSamplesPerSec=28.858249670872347, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:20,239] [INFO] [timer.py:197:stop] 0/1216, RunningAvgSamplesPerSec=29.95447472017895, CurrSamplesPerSec=30.129992623392063, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:26,001] [INFO] [timer.py:197:stop] 0/1217, RunningAvgSamplesPerSec=29.95422791946676, CurrSamplesPerSec=29.65758149399226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:31,953] [INFO] [timer.py:197:stop] 0/1218, RunningAvgSamplesPerSec=29.954248063906398, CurrSamplesPerSec=29.978743589735, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:37,773] [INFO] [timer.py:197:stop] 0/1219, RunningAvgSamplesPerSec=29.954513031898035, CurrSamplesPerSec=30.280220411790445, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:43,962] [INFO] [logging.py:68:log_dist] [Rank 0] step=1220, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:35:43,963] [INFO] [timer.py:197:stop] 0/1220, RunningAvgSamplesPerSec=29.954160663654793, CurrSamplesPerSec=29.53138604480873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:50,194] [INFO] [timer.py:197:stop] 0/1221, RunningAvgSamplesPerSec=29.954509621578115, CurrSamplesPerSec=30.385663121386237, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:35:56,075] [INFO] [timer.py:197:stop] 0/1222, RunningAvgSamplesPerSec=29.95488709140423, CurrSamplesPerSec=30.422207181865865, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:36:02,941] [INFO] [timer.py:197:stop] 0/1223, RunningAvgSamplesPerSec=29.954489367084225, CurrSamplesPerSec=29.477006634195728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:36:09,073] [INFO] [timer.py:197:stop] 0/1224, RunningAvgSamplesPerSec=29.95428396629421, CurrSamplesPerSec=29.70557365184775, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:36:14,826] [INFO] [timer.py:197:stop] 0/1225, RunningAvgSamplesPerSec=29.954209915673204, CurrSamplesPerSec=29.86399282023817, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0018, 'learning_rate': 1e-05, 'epoch': 30.0} -[2022-12-14 18:36:20,868] [INFO] [timer.py:197:stop] 0/1226, RunningAvgSamplesPerSec=29.954263854647902, CurrSamplesPerSec=30.020376938583222, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:36:26,668] [INFO] [timer.py:197:stop] 0/1227, RunningAvgSamplesPerSec=29.954857291690526, CurrSamplesPerSec=30.69929053292144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:36:32,927] [INFO] [timer.py:197:stop] 0/1228, RunningAvgSamplesPerSec=29.955173426713447, CurrSamplesPerSec=30.347515229693226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:36:38,681] [INFO] [timer.py:197:stop] 0/1229, RunningAvgSamplesPerSec=29.954622759321804, CurrSamplesPerSec=29.294396866131812, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:36:45,735] [INFO] [logging.py:68:log_dist] [Rank 0] step=1230, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:36:45,736] [INFO] [timer.py:197:stop] 0/1230, RunningAvgSamplesPerSec=29.95417818389259, CurrSamplesPerSec=29.418448234972818, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:36:51,551] [INFO] [timer.py:197:stop] 0/1231, RunningAvgSamplesPerSec=29.954349157793054, CurrSamplesPerSec=30.165788333360304, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:36:57,337] [INFO] [timer.py:197:stop] 0/1232, RunningAvgSamplesPerSec=29.954554806261935, CurrSamplesPerSec=30.20944919940341, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:37:00,035] [INFO] [timer.py:197:stop] 0/1233, RunningAvgSamplesPerSec=29.954424593399043, CurrSamplesPerSec=29.795115267668866, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:37:02,173] [INFO] [timer.py:197:stop] 0/1234, RunningAvgSamplesPerSec=29.954700257516002, CurrSamplesPerSec=30.297934278871224, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:37:04,345] [INFO] [timer.py:197:stop] 0/1235, RunningAvgSamplesPerSec=29.95457971446524, CurrSamplesPerSec=29.806803914568818, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:37:06,440] [INFO] [timer.py:197:stop] 0/1236, RunningAvgSamplesPerSec=29.95533442862573, CurrSamplesPerSec=30.915756707165105, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:37:08,575] [INFO] [timer.py:197:stop] 0/1237, RunningAvgSamplesPerSec=29.955632495491724, CurrSamplesPerSec=30.328023165560776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:37:10,692] [INFO] [timer.py:197:stop] 0/1238, RunningAvgSamplesPerSec=29.956137002126212, CurrSamplesPerSec=30.592448219091406, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:37:12,863] [INFO] [timer.py:197:stop] 0/1239, RunningAvgSamplesPerSec=29.956033647156378, CurrSamplesPerSec=29.828829800887746, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:37:14,742] [INFO] [logging.py:68:log_dist] [Rank 0] step=1240, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:37:14,743] [INFO] [timer.py:197:stop] 0/1240, RunningAvgSamplesPerSec=29.95925002682208, CurrSamplesPerSec=34.54777110723208, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:37:55,941] [INFO] [timer.py:197:stop] 0/1241, RunningAvgSamplesPerSec=29.9595334881878, CurrSamplesPerSec=30.314621270306223, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:38:03,196] [INFO] [timer.py:197:stop] 0/1242, RunningAvgSamplesPerSec=29.959810071709388, CurrSamplesPerSec=30.30646537582392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:38:10,515] [INFO] [timer.py:197:stop] 0/1243, RunningAvgSamplesPerSec=29.959051180529645, CurrSamplesPerSec=29.046706323240866, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:38:17,012] [INFO] [timer.py:197:stop] 0/1244, RunningAvgSamplesPerSec=29.958811625877694, CurrSamplesPerSec=29.66444768922484, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:38:23,541] [INFO] [timer.py:197:stop] 0/1245, RunningAvgSamplesPerSec=29.958812880275786, CurrSamplesPerSec=29.960370923795974, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:38:30,312] [INFO] [timer.py:197:stop] 0/1246, RunningAvgSamplesPerSec=29.959007342859962, CurrSamplesPerSec=30.20269202757623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:38:36,968] [INFO] [timer.py:197:stop] 0/1247, RunningAvgSamplesPerSec=29.95852504111262, CurrSamplesPerSec=29.370330977634776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:38:43,705] [INFO] [timer.py:197:stop] 0/1248, RunningAvgSamplesPerSec=29.958499927463688, CurrSamplesPerSec=29.927266058277993, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:38:50,040] [INFO] [timer.py:197:stop] 0/1249, RunningAvgSamplesPerSec=29.958301401408406, CurrSamplesPerSec=29.712965284366103, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:38:58,310] [INFO] [logging.py:68:log_dist] [Rank 0] step=1250, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:38:58,311] [INFO] [timer.py:197:stop] 0/1250, RunningAvgSamplesPerSec=29.957793438924163, CurrSamplesPerSec=29.33749045892463, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0015, 'learning_rate': 1e-05, 'epoch': 31.0} -[2022-12-14 18:39:05,320] [INFO] [timer.py:197:stop] 0/1251, RunningAvgSamplesPerSec=29.957456614167047, CurrSamplesPerSec=29.542920667154803, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:39:11,890] [INFO] [timer.py:197:stop] 0/1252, RunningAvgSamplesPerSec=29.957564303896344, CurrSamplesPerSec=30.092675889982075, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:39:18,286] [INFO] [timer.py:197:stop] 0/1253, RunningAvgSamplesPerSec=29.95759258793997, CurrSamplesPerSec=29.992989450208604, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:39:25,033] [INFO] [timer.py:197:stop] 0/1254, RunningAvgSamplesPerSec=29.957537388899837, CurrSamplesPerSec=29.88864232407613, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:39:31,674] [INFO] [timer.py:197:stop] 0/1255, RunningAvgSamplesPerSec=29.95756861895929, CurrSamplesPerSec=29.996719793555066, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:39:38,412] [INFO] [timer.py:197:stop] 0/1256, RunningAvgSamplesPerSec=29.957230719449914, CurrSamplesPerSec=29.539747666596643, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:39:44,741] [INFO] [timer.py:197:stop] 0/1257, RunningAvgSamplesPerSec=29.956263431772825, CurrSamplesPerSec=28.790525006963406, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:39:51,532] [INFO] [timer.py:197:stop] 0/1258, RunningAvgSamplesPerSec=29.95601573436169, CurrSamplesPerSec=29.64835073563261, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:39:58,482] [INFO] [timer.py:197:stop] 0/1259, RunningAvgSamplesPerSec=29.956047803758906, CurrSamplesPerSec=29.996381242535417, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:40:04,885] [INFO] [logging.py:68:log_dist] [Rank 0] step=1260, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:40:04,886] [INFO] [timer.py:197:stop] 0/1260, RunningAvgSamplesPerSec=29.95580105775351, CurrSamplesPerSec=29.64882228872813, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:40:11,387] [INFO] [timer.py:197:stop] 0/1261, RunningAvgSamplesPerSec=29.955554665292272, CurrSamplesPerSec=29.648769893199002, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:40:18,204] [INFO] [timer.py:197:stop] 0/1262, RunningAvgSamplesPerSec=29.955963317875348, CurrSamplesPerSec=30.479455030290918, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:40:25,629] [INFO] [timer.py:197:stop] 0/1263, RunningAvgSamplesPerSec=29.956263726338847, CurrSamplesPerSec=30.33962624121451, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:40:32,431] [INFO] [timer.py:197:stop] 0/1264, RunningAvgSamplesPerSec=29.955630410660604, CurrSamplesPerSec=29.177773299515934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:40:40,089] [INFO] [timer.py:197:stop] 0/1265, RunningAvgSamplesPerSec=29.95611870484998, CurrSamplesPerSec=30.5852990964514, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:40:46,576] [INFO] [timer.py:197:stop] 0/1266, RunningAvgSamplesPerSec=29.95599128098316, CurrSamplesPerSec=29.795915612773523, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:40:53,446] [INFO] [timer.py:197:stop] 0/1267, RunningAvgSamplesPerSec=29.955539843822038, CurrSamplesPerSec=29.395597922649635, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:00,446] [INFO] [timer.py:197:stop] 0/1268, RunningAvgSamplesPerSec=29.955476353860586, CurrSamplesPerSec=29.87537648130131, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:06,736] [INFO] [timer.py:197:stop] 0/1269, RunningAvgSamplesPerSec=29.955639136907795, CurrSamplesPerSec=30.163151205973403, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:13,619] [INFO] [logging.py:68:log_dist] [Rank 0] step=1270, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:41:13,620] [INFO] [timer.py:197:stop] 0/1270, RunningAvgSamplesPerSec=29.955774452603578, CurrSamplesPerSec=30.12820709665747, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:19,776] [INFO] [timer.py:197:stop] 0/1271, RunningAvgSamplesPerSec=29.95522654004485, CurrSamplesPerSec=29.27623375010102, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:26,329] [INFO] [timer.py:197:stop] 0/1272, RunningAvgSamplesPerSec=29.954673690987153, CurrSamplesPerSec=29.269175865322005, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:28,869] [INFO] [timer.py:197:stop] 0/1273, RunningAvgSamplesPerSec=29.95512771035424, CurrSamplesPerSec=30.543058260242777, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:30,986] [INFO] [timer.py:197:stop] 0/1274, RunningAvgSamplesPerSec=29.95561302837908, CurrSamplesPerSec=30.585431521780865, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:33,156] [INFO] [timer.py:197:stop] 0/1275, RunningAvgSamplesPerSec=29.95554076233902, CurrSamplesPerSec=29.86389979248221, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0014, 'learning_rate': 1e-05, 'epoch': 31.01} -[2022-12-14 18:41:35,338] [INFO] [timer.py:197:stop] 0/1276, RunningAvgSamplesPerSec=29.95535544809613, CurrSamplesPerSec=29.721295141420466, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:37,517] [INFO] [timer.py:197:stop] 0/1277, RunningAvgSamplesPerSec=29.955186707990354, CurrSamplesPerSec=29.741744792142747, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:39,673] [INFO] [timer.py:197:stop] 0/1278, RunningAvgSamplesPerSec=29.955320501495752, CurrSamplesPerSec=30.126884996379978, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:41,802] [INFO] [timer.py:197:stop] 0/1279, RunningAvgSamplesPerSec=29.95567760908553, CurrSamplesPerSec=30.41839095047749, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:41:43,699] [INFO] [logging.py:68:log_dist] [Rank 0] step=1280, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:41:43,700] [INFO] [timer.py:197:stop] 0/1280, RunningAvgSamplesPerSec=29.95860181764509, CurrSamplesPerSec=34.225024291585214, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:42:26,238] [INFO] [timer.py:197:stop] 0/1281, RunningAvgSamplesPerSec=29.95867654128671, CurrSamplesPerSec=30.054478975783702, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:42:32,661] [INFO] [timer.py:197:stop] 0/1282, RunningAvgSamplesPerSec=29.959034368003124, CurrSamplesPerSec=30.42380014702139, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:42:39,158] [INFO] [timer.py:197:stop] 0/1283, RunningAvgSamplesPerSec=29.95865940986645, CurrSamplesPerSec=29.48628645878358, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:42:45,632] [INFO] [timer.py:197:stop] 0/1284, RunningAvgSamplesPerSec=29.958691582508543, CurrSamplesPerSec=29.999961555053385, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:42:51,815] [INFO] [timer.py:197:stop] 0/1285, RunningAvgSamplesPerSec=29.958662338304023, CurrSamplesPerSec=29.921218163302793, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:42:58,445] [INFO] [timer.py:197:stop] 0/1286, RunningAvgSamplesPerSec=29.95813430971999, CurrSamplesPerSec=29.295666090980276, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:43:04,871] [INFO] [timer.py:197:stop] 0/1287, RunningAvgSamplesPerSec=29.95749488620291, CurrSamplesPerSec=29.15839247247997, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:43:12,418] [INFO] [timer.py:197:stop] 0/1288, RunningAvgSamplesPerSec=29.95627119417349, CurrSamplesPerSec=28.462308062516772, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:43:19,808] [INFO] [timer.py:197:stop] 0/1289, RunningAvgSamplesPerSec=29.95577212279114, CurrSamplesPerSec=29.327438903412283, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:43:26,237] [INFO] [logging.py:68:log_dist] [Rank 0] step=1290, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:43:26,237] [INFO] [timer.py:197:stop] 0/1290, RunningAvgSamplesPerSec=29.9553261958329, CurrSamplesPerSec=29.392215113837086, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:43:33,047] [INFO] [timer.py:197:stop] 0/1291, RunningAvgSamplesPerSec=29.95505877007135, CurrSamplesPerSec=29.614533034211632, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:43:39,450] [INFO] [timer.py:197:stop] 0/1292, RunningAvgSamplesPerSec=29.954870627445523, CurrSamplesPerSec=29.7143039314226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:43:45,607] [INFO] [timer.py:197:stop] 0/1293, RunningAvgSamplesPerSec=29.95466545440898, CurrSamplesPerSec=29.69231213382429, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:43:52,454] [INFO] [timer.py:197:stop] 0/1294, RunningAvgSamplesPerSec=29.953927455207317, CurrSamplesPerSec=29.030563068787075, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:43:58,588] [INFO] [timer.py:197:stop] 0/1295, RunningAvgSamplesPerSec=29.954181354212338, CurrSamplesPerSec=30.285853929093076, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:44:05,215] [INFO] [timer.py:197:stop] 0/1296, RunningAvgSamplesPerSec=29.954110123875225, CurrSamplesPerSec=29.862291832863118, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:44:11,497] [INFO] [timer.py:197:stop] 0/1297, RunningAvgSamplesPerSec=29.954357331621246, CurrSamplesPerSec=30.27769984392802, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:44:17,614] [INFO] [timer.py:197:stop] 0/1298, RunningAvgSamplesPerSec=29.953871472441286, CurrSamplesPerSec=29.337637950107684, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:44:24,123] [INFO] [timer.py:197:stop] 0/1299, RunningAvgSamplesPerSec=29.95357376487534, CurrSamplesPerSec=29.572655120532865, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:44:30,480] [INFO] [logging.py:68:log_dist] [Rank 0] step=1300, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:44:30,481] [INFO] [timer.py:197:stop] 0/1300, RunningAvgSamplesPerSec=29.953668544808604, CurrSamplesPerSec=30.077105091255824, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0014, 'learning_rate': 1e-05, 'epoch': 32.0} -[2022-12-14 18:44:37,031] [INFO] [timer.py:197:stop] 0/1301, RunningAvgSamplesPerSec=29.954042366639953, CurrSamplesPerSec=30.447258792923378, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:44:43,571] [INFO] [timer.py:197:stop] 0/1302, RunningAvgSamplesPerSec=29.953966805736556, CurrSamplesPerSec=29.856134018984616, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:44:50,292] [INFO] [timer.py:197:stop] 0/1303, RunningAvgSamplesPerSec=29.954271872616264, CurrSamplesPerSec=30.35618412551868, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:44:56,686] [INFO] [timer.py:197:stop] 0/1304, RunningAvgSamplesPerSec=29.954570320714623, CurrSamplesPerSec=30.3479543895672, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:03,247] [INFO] [timer.py:197:stop] 0/1305, RunningAvgSamplesPerSec=29.954609034073222, CurrSamplesPerSec=30.005098851730004, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:09,369] [INFO] [timer.py:197:stop] 0/1306, RunningAvgSamplesPerSec=29.95409262929288, CurrSamplesPerSec=29.296011390321393, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:15,859] [INFO] [timer.py:197:stop] 0/1307, RunningAvgSamplesPerSec=29.954430876802633, CurrSamplesPerSec=30.402102587201114, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:22,649] [INFO] [timer.py:197:stop] 0/1308, RunningAvgSamplesPerSec=29.954082243079053, CurrSamplesPerSec=29.505927390931248, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:28,665] [INFO] [timer.py:197:stop] 0/1309, RunningAvgSamplesPerSec=29.95398858168757, CurrSamplesPerSec=29.832164672246826, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:35,069] [INFO] [logging.py:68:log_dist] [Rank 0] step=1310, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:45:35,070] [INFO] [timer.py:197:stop] 0/1310, RunningAvgSamplesPerSec=29.95368266837141, CurrSamplesPerSec=29.559124645728268, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:41,443] [INFO] [timer.py:197:stop] 0/1311, RunningAvgSamplesPerSec=29.95349219161551, CurrSamplesPerSec=29.706405355654017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:47,694] [INFO] [timer.py:197:stop] 0/1312, RunningAvgSamplesPerSec=29.953899817701206, CurrSamplesPerSec=30.497167228282727, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:50,313] [INFO] [timer.py:197:stop] 0/1313, RunningAvgSamplesPerSec=29.95422472516508, CurrSamplesPerSec=30.38599331868116, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:52,476] [INFO] [timer.py:197:stop] 0/1314, RunningAvgSamplesPerSec=29.954218160863693, CurrSamplesPerSec=29.945614835356015, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:54,693] [INFO] [timer.py:197:stop] 0/1315, RunningAvgSamplesPerSec=29.9538745286013, CurrSamplesPerSec=29.509719221987332, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:56,881] [INFO] [timer.py:197:stop] 0/1316, RunningAvgSamplesPerSec=29.953900233448138, CurrSamplesPerSec=29.987688797502816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:45:59,064] [INFO] [timer.py:197:stop] 0/1317, RunningAvgSamplesPerSec=29.953675371078575, CurrSamplesPerSec=29.66109449125677, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:46:01,244] [INFO] [timer.py:197:stop] 0/1318, RunningAvgSamplesPerSec=29.953495251963627, CurrSamplesPerSec=29.718498262682804, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:46:03,409] [INFO] [timer.py:197:stop] 0/1319, RunningAvgSamplesPerSec=29.953543734620308, CurrSamplesPerSec=30.017483209930134, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:46:05,337] [INFO] [logging.py:68:log_dist] [Rank 0] step=1320, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:46:05,338] [INFO] [timer.py:197:stop] 0/1320, RunningAvgSamplesPerSec=29.956024277011718, CurrSamplesPerSec=33.6231198309246, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:46:42,964] [INFO] [timer.py:197:stop] 0/1321, RunningAvgSamplesPerSec=29.955777122200274, CurrSamplesPerSec=29.633533929392087, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:46:49,087] [INFO] [timer.py:197:stop] 0/1322, RunningAvgSamplesPerSec=29.95588089629547, CurrSamplesPerSec=30.093387716994947, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:46:55,421] [INFO] [timer.py:197:stop] 0/1323, RunningAvgSamplesPerSec=29.95609260161839, CurrSamplesPerSec=30.238177099543794, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:47:01,382] [INFO] [timer.py:197:stop] 0/1324, RunningAvgSamplesPerSec=29.954608594614186, CurrSamplesPerSec=28.1147365439948, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:47:07,653] [INFO] [timer.py:197:stop] 0/1325, RunningAvgSamplesPerSec=29.95456665034278, CurrSamplesPerSec=29.89921885798956, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0014, 'learning_rate': 1e-05, 'epoch': 33.0} -[2022-12-14 18:47:13,829] [INFO] [timer.py:197:stop] 0/1326, RunningAvgSamplesPerSec=29.954478091816124, CurrSamplesPerSec=29.837771986388482, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:47:20,266] [INFO] [timer.py:197:stop] 0/1327, RunningAvgSamplesPerSec=29.95425065487927, CurrSamplesPerSec=29.656123451282607, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:47:26,070] [INFO] [timer.py:197:stop] 0/1328, RunningAvgSamplesPerSec=29.954503237446193, CurrSamplesPerSec=30.29295945353483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:47:32,214] [INFO] [timer.py:197:stop] 0/1329, RunningAvgSamplesPerSec=29.954702616107944, CurrSamplesPerSec=30.221434637273035, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:47:38,231] [INFO] [logging.py:68:log_dist] [Rank 0] step=1330, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:47:38,232] [INFO] [timer.py:197:stop] 0/1330, RunningAvgSamplesPerSec=29.954541272564526, CurrSamplesPerSec=29.74195898722284, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:47:44,581] [INFO] [timer.py:197:stop] 0/1331, RunningAvgSamplesPerSec=29.95473319509469, CurrSamplesPerSec=30.21179520233883, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:47:50,555] [INFO] [timer.py:197:stop] 0/1332, RunningAvgSamplesPerSec=29.953610858606165, CurrSamplesPerSec=28.532828880592596, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:47:56,720] [INFO] [timer.py:197:stop] 0/1333, RunningAvgSamplesPerSec=29.95373157490086, CurrSamplesPerSec=30.115150104306494, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:02,900] [INFO] [timer.py:197:stop] 0/1334, RunningAvgSamplesPerSec=29.95345834384932, CurrSamplesPerSec=29.59415347921853, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:08,942] [INFO] [timer.py:197:stop] 0/1335, RunningAvgSamplesPerSec=29.953223945478605, CurrSamplesPerSec=29.64422855950177, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:15,222] [INFO] [timer.py:197:stop] 0/1336, RunningAvgSamplesPerSec=29.95302475545816, CurrSamplesPerSec=29.689839231510785, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:20,995] [INFO] [timer.py:197:stop] 0/1337, RunningAvgSamplesPerSec=29.952848533557376, CurrSamplesPerSec=29.71960049909934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:26,994] [INFO] [timer.py:197:stop] 0/1338, RunningAvgSamplesPerSec=29.952387563388353, CurrSamplesPerSec=29.34939070884286, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:33,336] [INFO] [timer.py:197:stop] 0/1339, RunningAvgSamplesPerSec=29.951873487182787, CurrSamplesPerSec=29.28047458645429, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:39,701] [INFO] [logging.py:68:log_dist] [Rank 0] step=1340, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:48:39,702] [INFO] [timer.py:197:stop] 0/1340, RunningAvgSamplesPerSec=29.951888123830415, CurrSamplesPerSec=29.97147012527179, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:45,701] [INFO] [timer.py:197:stop] 0/1341, RunningAvgSamplesPerSec=29.951586275957148, CurrSamplesPerSec=29.553091214951625, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:51,514] [INFO] [timer.py:197:stop] 0/1342, RunningAvgSamplesPerSec=29.951687382291603, CurrSamplesPerSec=30.087683926466624, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:48:57,757] [INFO] [timer.py:197:stop] 0/1343, RunningAvgSamplesPerSec=29.951986501864173, CurrSamplesPerSec=30.358247406940624, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:04,034] [INFO] [timer.py:197:stop] 0/1344, RunningAvgSamplesPerSec=29.951844109989153, CurrSamplesPerSec=29.762107109476606, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:10,165] [INFO] [timer.py:197:stop] 0/1345, RunningAvgSamplesPerSec=29.951529003374695, CurrSamplesPerSec=29.53454750060596, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:16,280] [INFO] [timer.py:197:stop] 0/1346, RunningAvgSamplesPerSec=29.950800372068883, CurrSamplesPerSec=29.003230559057428, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:22,636] [INFO] [timer.py:197:stop] 0/1347, RunningAvgSamplesPerSec=29.951023477472123, CurrSamplesPerSec=30.25391175139348, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:28,797] [INFO] [timer.py:197:stop] 0/1348, RunningAvgSamplesPerSec=29.950786361947813, CurrSamplesPerSec=29.635228585607063, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:34,767] [INFO] [timer.py:197:stop] 0/1349, RunningAvgSamplesPerSec=29.95027092246977, CurrSamplesPerSec=29.27220800849, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:40,786] [INFO] [logging.py:68:log_dist] [Rank 0] step=1350, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:49:40,787] [INFO] [timer.py:197:stop] 0/1350, RunningAvgSamplesPerSec=29.950181623783635, CurrSamplesPerSec=29.83037780450748, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0025, 'learning_rate': 1e-05, 'epoch': 33.01} -[2022-12-14 18:49:46,783] [INFO] [timer.py:197:stop] 0/1351, RunningAvgSamplesPerSec=29.94999305706095, CurrSamplesPerSec=29.69794584624426, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:52,696] [INFO] [timer.py:197:stop] 0/1352, RunningAvgSamplesPerSec=29.950269098934477, CurrSamplesPerSec=30.327341311172443, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:55,199] [INFO] [timer.py:197:stop] 0/1353, RunningAvgSamplesPerSec=29.950751237432623, CurrSamplesPerSec=30.616108401380735, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:57,370] [INFO] [timer.py:197:stop] 0/1354, RunningAvgSamplesPerSec=29.95065994160999, CurrSamplesPerSec=29.827825507600537, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:49:59,498] [INFO] [timer.py:197:stop] 0/1355, RunningAvgSamplesPerSec=29.951015467219136, CurrSamplesPerSec=30.439531873713946, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:50:01,671] [INFO] [timer.py:197:stop] 0/1356, RunningAvgSamplesPerSec=29.95089709374716, CurrSamplesPerSec=29.791590292331875, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:50:03,861] [INFO] [timer.py:197:stop] 0/1357, RunningAvgSamplesPerSec=29.950640786576145, CurrSamplesPerSec=29.607578892011023, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:50:06,006] [INFO] [timer.py:197:stop] 0/1358, RunningAvgSamplesPerSec=29.95083979319456, CurrSamplesPerSec=30.222945395454524, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:50:08,154] [INFO] [timer.py:197:stop] 0/1359, RunningAvgSamplesPerSec=29.951028377777753, CurrSamplesPerSec=30.208952845484173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:50:10,034] [INFO] [logging.py:68:log_dist] [Rank 0] step=1360, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:50:10,035] [INFO] [timer.py:197:stop] 0/1360, RunningAvgSamplesPerSec=29.953939723725327, CurrSamplesPerSec=34.50537867029157, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:50:53,217] [INFO] [timer.py:197:stop] 0/1361, RunningAvgSamplesPerSec=29.9540468693865, CurrSamplesPerSec=30.100261447465385, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:00,046] [INFO] [timer.py:197:stop] 0/1362, RunningAvgSamplesPerSec=29.95407872788566, CurrSamplesPerSec=29.997437144728462, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:06,525] [INFO] [timer.py:197:stop] 0/1363, RunningAvgSamplesPerSec=29.954049981594434, CurrSamplesPerSec=29.915006021772886, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:12,495] [INFO] [timer.py:197:stop] 0/1364, RunningAvgSamplesPerSec=29.954408528201125, CurrSamplesPerSec=30.45047777340668, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:18,619] [INFO] [timer.py:197:stop] 0/1365, RunningAvgSamplesPerSec=29.954795373307338, CurrSamplesPerSec=30.491118873985855, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:25,239] [INFO] [timer.py:197:stop] 0/1366, RunningAvgSamplesPerSec=29.95362016071608, CurrSamplesPerSec=28.433173232495758, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:31,952] [INFO] [timer.py:197:stop] 0/1367, RunningAvgSamplesPerSec=29.95214308329373, CurrSamplesPerSec=28.06447662325646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:38,188] [INFO] [timer.py:197:stop] 0/1368, RunningAvgSamplesPerSec=29.95245545102909, CurrSamplesPerSec=30.38499930895708, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:44,514] [INFO] [timer.py:197:stop] 0/1369, RunningAvgSamplesPerSec=29.952560714851625, CurrSamplesPerSec=30.097045217079902, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:51,105] [INFO] [logging.py:68:log_dist] [Rank 0] step=1370, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:51:51,106] [INFO] [timer.py:197:stop] 0/1370, RunningAvgSamplesPerSec=29.95230708680513, CurrSamplesPerSec=29.60956778939833, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:51:57,788] [INFO] [timer.py:197:stop] 0/1371, RunningAvgSamplesPerSec=29.951922769032922, CurrSamplesPerSec=29.435251821833226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:52:03,974] [INFO] [timer.py:197:stop] 0/1372, RunningAvgSamplesPerSec=29.952053889502327, CurrSamplesPerSec=30.132640868046682, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:52:09,909] [INFO] [timer.py:197:stop] 0/1373, RunningAvgSamplesPerSec=29.952233559228468, CurrSamplesPerSec=30.200422187538077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:52:16,082] [INFO] [timer.py:197:stop] 0/1374, RunningAvgSamplesPerSec=29.951865220150484, CurrSamplesPerSec=29.455251433312363, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:52:22,085] [INFO] [timer.py:197:stop] 0/1375, RunningAvgSamplesPerSec=29.951939646074322, CurrSamplesPerSec=30.05440158214852, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0029, 'learning_rate': 1e-05, 'epoch': 34.0} -[2022-12-14 18:52:28,431] [INFO] [timer.py:197:stop] 0/1376, RunningAvgSamplesPerSec=29.951879920447524, CurrSamplesPerSec=29.87010069590145, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:52:34,864] [INFO] [timer.py:197:stop] 0/1377, RunningAvgSamplesPerSec=29.951403563306638, CurrSamplesPerSec=29.3108957707256, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:52:40,951] [INFO] [timer.py:197:stop] 0/1378, RunningAvgSamplesPerSec=29.951493696734957, CurrSamplesPerSec=30.0759424800748, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:52:47,174] [INFO] [timer.py:197:stop] 0/1379, RunningAvgSamplesPerSec=29.95137379925367, CurrSamplesPerSec=29.78729928131542, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:52:53,120] [INFO] [logging.py:68:log_dist] [Rank 0] step=1380, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:52:53,121] [INFO] [timer.py:197:stop] 0/1380, RunningAvgSamplesPerSec=29.950943991345767, CurrSamplesPerSec=29.370575205383602, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:52:59,462] [INFO] [timer.py:197:stop] 0/1381, RunningAvgSamplesPerSec=29.950990580456125, CurrSamplesPerSec=30.015328382352827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:53:05,659] [INFO] [timer.py:197:stop] 0/1382, RunningAvgSamplesPerSec=29.950722315163283, CurrSamplesPerSec=29.585301265662626, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:53:12,470] [INFO] [timer.py:197:stop] 0/1383, RunningAvgSamplesPerSec=29.94984264458933, CurrSamplesPerSec=28.783217868972166, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:53:18,900] [INFO] [timer.py:197:stop] 0/1384, RunningAvgSamplesPerSec=29.94951764542068, CurrSamplesPerSec=29.50732529222024, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:53:25,127] [INFO] [timer.py:197:stop] 0/1385, RunningAvgSamplesPerSec=29.949399077127524, CurrSamplesPerSec=29.78642998977475, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:53:31,394] [INFO] [timer.py:197:stop] 0/1386, RunningAvgSamplesPerSec=29.948730267202464, CurrSamplesPerSec=29.05149715037478, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:53:37,769] [INFO] [timer.py:197:stop] 0/1387, RunningAvgSamplesPerSec=29.948612912857524, CurrSamplesPerSec=29.787071211324783, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:53:44,296] [INFO] [timer.py:197:stop] 0/1388, RunningAvgSamplesPerSec=29.94823579746771, CurrSamplesPerSec=29.434890321816606, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:53:50,313] [INFO] [timer.py:197:stop] 0/1389, RunningAvgSamplesPerSec=29.948347084915294, CurrSamplesPerSec=30.103390591376115, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:53:56,527] [INFO] [logging.py:68:log_dist] [Rank 0] step=1390, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:53:56,528] [INFO] [timer.py:197:stop] 0/1390, RunningAvgSamplesPerSec=29.947960536036195, CurrSamplesPerSec=29.421253412206365, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:02,503] [INFO] [timer.py:197:stop] 0/1391, RunningAvgSamplesPerSec=29.947215160237658, CurrSamplesPerSec=28.94720561027132, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:08,437] [INFO] [timer.py:197:stop] 0/1392, RunningAvgSamplesPerSec=29.947200849280957, CurrSamplesPerSec=29.92733612543918, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:10,910] [INFO] [timer.py:197:stop] 0/1393, RunningAvgSamplesPerSec=29.947003114292887, CurrSamplesPerSec=29.674652891663907, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:13,029] [INFO] [timer.py:197:stop] 0/1394, RunningAvgSamplesPerSec=29.947443219223715, CurrSamplesPerSec=30.572414022456595, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:15,196] [INFO] [timer.py:197:stop] 0/1395, RunningAvgSamplesPerSec=29.94739381559252, CurrSamplesPerSec=29.878781632096526, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:17,304] [INFO] [timer.py:197:stop] 0/1396, RunningAvgSamplesPerSec=29.94794781687854, CurrSamplesPerSec=30.740099079063995, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:19,452] [INFO] [timer.py:197:stop] 0/1397, RunningAvgSamplesPerSec=29.94808748827072, CurrSamplesPerSec=30.144064430603414, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:21,584] [INFO] [timer.py:197:stop] 0/1398, RunningAvgSamplesPerSec=29.94840714017372, CurrSamplesPerSec=30.401066207132118, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:23,722] [INFO] [timer.py:197:stop] 0/1399, RunningAvgSamplesPerSec=29.94868040727972, CurrSamplesPerSec=30.33508678768436, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:54:25,624] [INFO] [logging.py:68:log_dist] [Rank 0] step=1400, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:54:25,625] [INFO] [timer.py:197:stop] 0/1400, RunningAvgSamplesPerSec=29.95125088469151, CurrSamplesPerSec=34.031787373541825, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0029, 'learning_rate': 1e-05, 'epoch': 34.01} -[2022-12-14 18:55:01,727] [INFO] [timer.py:197:stop] 0/1401, RunningAvgSamplesPerSec=29.951556938462183, CurrSamplesPerSec=30.38562528673778, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:55:07,715] [INFO] [timer.py:197:stop] 0/1402, RunningAvgSamplesPerSec=29.951981152961253, CurrSamplesPerSec=30.557462981859643, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:55:13,701] [INFO] [timer.py:197:stop] 0/1403, RunningAvgSamplesPerSec=29.952086103479278, CurrSamplesPerSec=30.09974167427097, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:55:19,589] [INFO] [timer.py:197:stop] 0/1404, RunningAvgSamplesPerSec=29.95197031481881, CurrSamplesPerSec=29.790624872374465, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:55:25,365] [INFO] [timer.py:197:stop] 0/1405, RunningAvgSamplesPerSec=29.952206586166717, CurrSamplesPerSec=30.287166098199144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:55:31,643] [INFO] [timer.py:197:stop] 0/1406, RunningAvgSamplesPerSec=29.952425542605866, CurrSamplesPerSec=30.26280700741724, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:55:38,560] [INFO] [timer.py:197:stop] 0/1407, RunningAvgSamplesPerSec=29.95247612361396, CurrSamplesPerSec=30.023660754181087, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:55:44,744] [INFO] [timer.py:197:stop] 0/1408, RunningAvgSamplesPerSec=29.952400254143, CurrSamplesPerSec=29.846181933370197, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:55:51,011] [INFO] [timer.py:197:stop] 0/1409, RunningAvgSamplesPerSec=29.951585438501432, CurrSamplesPerSec=28.848188940566608, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:55:56,923] [INFO] [logging.py:68:log_dist] [Rank 0] step=1410, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:55:56,924] [INFO] [timer.py:197:stop] 0/1410, RunningAvgSamplesPerSec=29.95191565200422, CurrSamplesPerSec=30.423851869465366, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:03,527] [INFO] [timer.py:197:stop] 0/1411, RunningAvgSamplesPerSec=29.951685771201205, CurrSamplesPerSec=29.631476393102563, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:09,389] [INFO] [timer.py:197:stop] 0/1412, RunningAvgSamplesPerSec=29.951620121901627, CurrSamplesPerSec=29.859405248954946, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:15,222] [INFO] [timer.py:197:stop] 0/1413, RunningAvgSamplesPerSec=29.951728402390927, CurrSamplesPerSec=30.105186683018218, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:21,443] [INFO] [timer.py:197:stop] 0/1414, RunningAvgSamplesPerSec=29.951243023819995, CurrSamplesPerSec=29.28169469313205, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:27,316] [INFO] [timer.py:197:stop] 0/1415, RunningAvgSamplesPerSec=29.95131921903117, CurrSamplesPerSec=30.059294989875923, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:33,618] [INFO] [timer.py:197:stop] 0/1416, RunningAvgSamplesPerSec=29.951280368417795, CurrSamplesPerSec=29.896484954053033, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:39,518] [INFO] [timer.py:197:stop] 0/1417, RunningAvgSamplesPerSec=29.951319923973646, CurrSamplesPerSec=30.007356196947008, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:45,239] [INFO] [timer.py:197:stop] 0/1418, RunningAvgSamplesPerSec=29.95134366068139, CurrSamplesPerSec=29.98496883605996, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:51,063] [INFO] [timer.py:197:stop] 0/1419, RunningAvgSamplesPerSec=29.95074351473553, CurrSamplesPerSec=29.124399659840698, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:56:57,242] [INFO] [logging.py:68:log_dist] [Rank 0] step=1420, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:56:57,242] [INFO] [timer.py:197:stop] 0/1420, RunningAvgSamplesPerSec=29.95070198319744, CurrSamplesPerSec=29.891967283086643, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:57:03,500] [INFO] [timer.py:197:stop] 0/1421, RunningAvgSamplesPerSec=29.950125283004628, CurrSamplesPerSec=29.154114096997827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:57:09,598] [INFO] [timer.py:197:stop] 0/1422, RunningAvgSamplesPerSec=29.950198104714808, CurrSamplesPerSec=30.053890121189404, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:57:16,315] [INFO] [timer.py:197:stop] 0/1423, RunningAvgSamplesPerSec=29.95013991710399, CurrSamplesPerSec=29.867740991876357, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:57:23,302] [INFO] [timer.py:197:stop] 0/1424, RunningAvgSamplesPerSec=29.95056298335614, CurrSamplesPerSec=30.564063162700826, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:57:29,359] [INFO] [timer.py:197:stop] 0/1425, RunningAvgSamplesPerSec=29.950590684755838, CurrSamplesPerSec=29.990033987939878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0033, 'learning_rate': 1e-05, 'epoch': 35.01} -[2022-12-14 18:57:37,696] [INFO] [timer.py:197:stop] 0/1426, RunningAvgSamplesPerSec=29.950515328888056, CurrSamplesPerSec=29.84366674630597, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:57:44,054] [INFO] [timer.py:197:stop] 0/1427, RunningAvgSamplesPerSec=29.949981617636777, CurrSamplesPerSec=29.208798129665148, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:57:49,910] [INFO] [timer.py:197:stop] 0/1428, RunningAvgSamplesPerSec=29.949854016782407, CurrSamplesPerSec=29.769120834578633, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:57:55,688] [INFO] [timer.py:197:stop] 0/1429, RunningAvgSamplesPerSec=29.9497481624575, CurrSamplesPerSec=29.799557395147776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:01,536] [INFO] [logging.py:68:log_dist] [Rank 0] step=1430, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:58:01,537] [INFO] [timer.py:197:stop] 0/1430, RunningAvgSamplesPerSec=29.949486845839377, CurrSamplesPerSec=29.581177043101707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:07,629] [INFO] [timer.py:197:stop] 0/1431, RunningAvgSamplesPerSec=29.949720604148116, CurrSamplesPerSec=30.287292537165577, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:13,402] [INFO] [timer.py:197:stop] 0/1432, RunningAvgSamplesPerSec=29.94979689672131, CurrSamplesPerSec=30.05921757143524, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:15,903] [INFO] [timer.py:197:stop] 0/1433, RunningAvgSamplesPerSec=29.94991023496684, CurrSamplesPerSec=30.112866378568125, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:18,030] [INFO] [timer.py:197:stop] 0/1434, RunningAvgSamplesPerSec=29.95025961181506, CurrSamplesPerSec=30.45871137408853, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:20,165] [INFO] [timer.py:197:stop] 0/1435, RunningAvgSamplesPerSec=29.950528314860275, CurrSamplesPerSec=30.34032236516559, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:22,299] [INFO] [timer.py:197:stop] 0/1436, RunningAvgSamplesPerSec=29.95082648623946, CurrSamplesPerSec=30.384294254980205, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:24,448] [INFO] [timer.py:197:stop] 0/1437, RunningAvgSamplesPerSec=29.950969666871245, CurrSamplesPerSec=30.15770892801933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:26,544] [INFO] [timer.py:197:stop] 0/1438, RunningAvgSamplesPerSec=29.95164540595214, CurrSamplesPerSec=30.953798333120584, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:28,676] [INFO] [timer.py:197:stop] 0/1439, RunningAvgSamplesPerSec=29.95197057315591, CurrSamplesPerSec=30.42631061250592, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:58:30,572] [INFO] [logging.py:68:log_dist] [Rank 0] step=1440, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 18:58:30,572] [INFO] [timer.py:197:stop] 0/1440, RunningAvgSamplesPerSec=29.954576154660874, CurrSamplesPerSec=34.23409431216477, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:59:13,902] [INFO] [timer.py:197:stop] 0/1441, RunningAvgSamplesPerSec=29.954443206169582, CurrSamplesPerSec=29.76447655370226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:59:21,631] [INFO] [timer.py:197:stop] 0/1442, RunningAvgSamplesPerSec=29.95457229564717, CurrSamplesPerSec=30.141492020128354, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:59:28,434] [INFO] [timer.py:197:stop] 0/1443, RunningAvgSamplesPerSec=29.95463231768025, CurrSamplesPerSec=30.04131433310768, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:59:35,139] [INFO] [timer.py:197:stop] 0/1444, RunningAvgSamplesPerSec=29.954737688422732, CurrSamplesPerSec=30.107351055592094, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:59:41,753] [INFO] [timer.py:197:stop] 0/1445, RunningAvgSamplesPerSec=29.95445721623795, CurrSamplesPerSec=29.555407969305193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:59:48,716] [INFO] [timer.py:197:stop] 0/1446, RunningAvgSamplesPerSec=29.95427502504453, CurrSamplesPerSec=29.6936620590786, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 18:59:55,701] [INFO] [timer.py:197:stop] 0/1447, RunningAvgSamplesPerSec=29.954308351927043, CurrSamplesPerSec=30.0025098635312, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:00:01,977] [INFO] [timer.py:197:stop] 0/1448, RunningAvgSamplesPerSec=29.954365920716484, CurrSamplesPerSec=30.037784645669777, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:00:09,771] [INFO] [timer.py:197:stop] 0/1449, RunningAvgSamplesPerSec=29.95458553202884, CurrSamplesPerSec=30.275548471642022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:00:16,865] [INFO] [logging.py:68:log_dist] [Rank 0] step=1450, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:00:16,865] [INFO] [timer.py:197:stop] 0/1450, RunningAvgSamplesPerSec=29.954592301437465, CurrSamplesPerSec=29.96439084210919, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0031, 'learning_rate': 1e-05, 'epoch': 36.0} -[2022-12-14 19:00:23,726] [INFO] [timer.py:197:stop] 0/1451, RunningAvgSamplesPerSec=29.954880764754602, CurrSamplesPerSec=30.3784865431687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:00:29,899] [INFO] [timer.py:197:stop] 0/1452, RunningAvgSamplesPerSec=29.95482096573272, CurrSamplesPerSec=29.86842227658556, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:00:36,673] [INFO] [timer.py:197:stop] 0/1453, RunningAvgSamplesPerSec=29.95447077734924, CurrSamplesPerSec=29.455167398797172, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:00:43,251] [INFO] [timer.py:197:stop] 0/1454, RunningAvgSamplesPerSec=29.95455182575043, CurrSamplesPerSec=30.07261689745808, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:00:50,447] [INFO] [timer.py:197:stop] 0/1455, RunningAvgSamplesPerSec=29.95410852703815, CurrSamplesPerSec=29.32398847514099, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:00:57,312] [INFO] [timer.py:197:stop] 0/1456, RunningAvgSamplesPerSec=29.95405374849691, CurrSamplesPerSec=29.87467160564944, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:01:04,019] [INFO] [timer.py:197:stop] 0/1457, RunningAvgSamplesPerSec=29.954542189934255, CurrSamplesPerSec=30.681995122390017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:01:10,290] [INFO] [timer.py:197:stop] 0/1458, RunningAvgSamplesPerSec=29.95396744089959, CurrSamplesPerSec=29.14043552466373, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:01:16,879] [INFO] [timer.py:197:stop] 0/1459, RunningAvgSamplesPerSec=29.953807914324592, CurrSamplesPerSec=29.72332567612188, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:01:23,463] [INFO] [logging.py:68:log_dist] [Rank 0] step=1460, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:01:23,463] [INFO] [timer.py:197:stop] 0/1460, RunningAvgSamplesPerSec=29.953134961272227, CurrSamplesPerSec=29.00374135580632, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:01:30,297] [INFO] [timer.py:197:stop] 0/1461, RunningAvgSamplesPerSec=29.952568537736056, CurrSamplesPerSec=29.148896875181887, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:01:36,940] [INFO] [timer.py:197:stop] 0/1462, RunningAvgSamplesPerSec=29.952858743550802, CurrSamplesPerSec=30.38234434925121, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:01:44,197] [INFO] [timer.py:197:stop] 0/1463, RunningAvgSamplesPerSec=29.95238576010673, CurrSamplesPerSec=29.2774024121873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:01:50,877] [INFO] [timer.py:197:stop] 0/1464, RunningAvgSamplesPerSec=29.952139966298535, CurrSamplesPerSec=29.59729249211238, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:01:57,671] [INFO] [timer.py:197:stop] 0/1465, RunningAvgSamplesPerSec=29.952326746935437, CurrSamplesPerSec=30.227914269012945, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:04,104] [INFO] [timer.py:197:stop] 0/1466, RunningAvgSamplesPerSec=29.952703954595805, CurrSamplesPerSec=30.51492430592832, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:10,630] [INFO] [timer.py:197:stop] 0/1467, RunningAvgSamplesPerSec=29.952675625565103, CurrSamplesPerSec=29.911259310516197, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:17,236] [INFO] [timer.py:197:stop] 0/1468, RunningAvgSamplesPerSec=29.952540777805226, CurrSamplesPerSec=29.75628410394036, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:23,540] [INFO] [timer.py:197:stop] 0/1469, RunningAvgSamplesPerSec=29.952798338861825, CurrSamplesPerSec=30.33520677086406, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:30,405] [INFO] [logging.py:68:log_dist] [Rank 0] step=1470, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:02:30,405] [INFO] [timer.py:197:stop] 0/1470, RunningAvgSamplesPerSec=29.953175900922933, CurrSamplesPerSec=30.517501874407053, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:36,601] [INFO] [timer.py:197:stop] 0/1471, RunningAvgSamplesPerSec=29.953616336526114, CurrSamplesPerSec=30.614449845237253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:43,206] [INFO] [timer.py:197:stop] 0/1472, RunningAvgSamplesPerSec=29.95428416424145, CurrSamplesPerSec=30.96856459819211, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:45,720] [INFO] [timer.py:197:stop] 0/1473, RunningAvgSamplesPerSec=29.954172741651888, CurrSamplesPerSec=29.79127288708557, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:47,892] [INFO] [timer.py:197:stop] 0/1474, RunningAvgSamplesPerSec=29.95408495736296, CurrSamplesPerSec=29.825508930526745, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:50,058] [INFO] [timer.py:197:stop] 0/1475, RunningAvgSamplesPerSec=29.954052558505005, CurrSamplesPerSec=29.90643730136961, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0034, 'learning_rate': 1e-05, 'epoch': 36.01} -[2022-12-14 19:02:52,244] [INFO] [timer.py:197:stop] 0/1476, RunningAvgSamplesPerSec=29.953839633106057, CurrSamplesPerSec=29.643452710140345, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:54,405] [INFO] [timer.py:197:stop] 0/1477, RunningAvgSamplesPerSec=29.953928888421906, CurrSamplesPerSec=30.086072010983777, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:56,595] [INFO] [timer.py:197:stop] 0/1478, RunningAvgSamplesPerSec=29.95393627064913, CurrSamplesPerSec=29.96482901819766, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:02:58,713] [INFO] [timer.py:197:stop] 0/1479, RunningAvgSamplesPerSec=29.954355859201215, CurrSamplesPerSec=30.58675235295458, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:03:00,673] [INFO] [logging.py:68:log_dist] [Rank 0] step=1480, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:03:00,674] [INFO] [timer.py:197:stop] 0/1480, RunningAvgSamplesPerSec=29.956247514513056, CurrSamplesPerSec=33.037831813125095, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:03:40,496] [INFO] [timer.py:197:stop] 0/1481, RunningAvgSamplesPerSec=29.956661748624153, CurrSamplesPerSec=30.581682237454082, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:03:46,804] [INFO] [timer.py:197:stop] 0/1482, RunningAvgSamplesPerSec=29.956478561014162, CurrSamplesPerSec=29.68797415408191, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:03:53,367] [INFO] [timer.py:197:stop] 0/1483, RunningAvgSamplesPerSec=29.95491108245671, CurrSamplesPerSec=27.801896431314926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:03:59,406] [INFO] [timer.py:197:stop] 0/1484, RunningAvgSamplesPerSec=29.95487286501663, CurrSamplesPerSec=29.898379652808043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:05,738] [INFO] [timer.py:197:stop] 0/1485, RunningAvgSamplesPerSec=29.955286390717426, CurrSamplesPerSec=30.580940155399922, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:12,020] [INFO] [timer.py:197:stop] 0/1486, RunningAvgSamplesPerSec=29.955051942862585, CurrSamplesPerSec=29.611357698879253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:18,304] [INFO] [timer.py:197:stop] 0/1487, RunningAvgSamplesPerSec=29.95528591031983, CurrSamplesPerSec=30.306568024516626, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:24,344] [INFO] [timer.py:197:stop] 0/1488, RunningAvgSamplesPerSec=29.95477564864219, CurrSamplesPerSec=29.21574424842031, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:30,569] [INFO] [timer.py:197:stop] 0/1489, RunningAvgSamplesPerSec=29.953351591449408, CurrSamplesPerSec=27.97692729471436, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:36,453] [INFO] [logging.py:68:log_dist] [Rank 0] step=1490, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:04:36,454] [INFO] [timer.py:197:stop] 0/1490, RunningAvgSamplesPerSec=29.953324384696007, CurrSamplesPerSec=29.912922547798637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:42,293] [INFO] [timer.py:197:stop] 0/1491, RunningAvgSamplesPerSec=29.953617854983854, CurrSamplesPerSec=30.396766501118268, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:48,126] [INFO] [timer.py:197:stop] 0/1492, RunningAvgSamplesPerSec=29.953774630093022, CurrSamplesPerSec=30.18904754664289, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:53,883] [INFO] [timer.py:197:stop] 0/1493, RunningAvgSamplesPerSec=29.95335388362389, CurrSamplesPerSec=29.33930213817445, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:04:59,716] [INFO] [timer.py:197:stop] 0/1494, RunningAvgSamplesPerSec=29.953554199173286, CurrSamplesPerSec=30.255234794244238, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:05:05,858] [INFO] [timer.py:197:stop] 0/1495, RunningAvgSamplesPerSec=29.953432575284797, CurrSamplesPerSec=29.773063173666607, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:05:11,860] [INFO] [timer.py:197:stop] 0/1496, RunningAvgSamplesPerSec=29.95377168421837, CurrSamplesPerSec=30.468771874545624, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:05:18,774] [INFO] [timer.py:197:stop] 0/1497, RunningAvgSamplesPerSec=29.95380640746679, CurrSamplesPerSec=30.005773000986906, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:05:24,629] [INFO] [timer.py:197:stop] 0/1498, RunningAvgSamplesPerSec=29.954191505230625, CurrSamplesPerSec=30.541202593292812, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:05:30,828] [INFO] [timer.py:197:stop] 0/1499, RunningAvgSamplesPerSec=29.954184381073947, CurrSamplesPerSec=29.943530435909732, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:05:36,941] [INFO] [logging.py:68:log_dist] [Rank 0] step=1500, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:05:36,942] [INFO] [timer.py:197:stop] 0/1500, RunningAvgSamplesPerSec=29.953804960865885, CurrSamplesPerSec=29.396389824804668, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0033, 'learning_rate': 1e-05, 'epoch': 37.0} -[2022-12-14 19:05:42,961] [INFO] [timer.py:197:stop] 0/1501, RunningAvgSamplesPerSec=29.95364735842126, CurrSamplesPerSec=29.719406367947848, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:05:48,765] [INFO] [timer.py:197:stop] 0/1502, RunningAvgSamplesPerSec=29.953990599070373, CurrSamplesPerSec=30.477506733345006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:05:54,981] [INFO] [timer.py:197:stop] 0/1503, RunningAvgSamplesPerSec=29.953774970129423, CurrSamplesPerSec=29.633789096890464, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:00,996] [INFO] [timer.py:197:stop] 0/1504, RunningAvgSamplesPerSec=29.953868485140788, CurrSamplesPerSec=30.094895821958612, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:07,368] [INFO] [timer.py:197:stop] 0/1505, RunningAvgSamplesPerSec=29.95369859446613, CurrSamplesPerSec=29.70067970609505, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:13,029] [INFO] [timer.py:197:stop] 0/1506, RunningAvgSamplesPerSec=29.953767151597816, CurrSamplesPerSec=30.057164445279504, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:19,341] [INFO] [timer.py:197:stop] 0/1507, RunningAvgSamplesPerSec=29.953980885322775, CurrSamplesPerSec=30.27892592140566, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:25,294] [INFO] [timer.py:197:stop] 0/1508, RunningAvgSamplesPerSec=29.953731455944965, CurrSamplesPerSec=29.582989603239152, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:31,511] [INFO] [timer.py:197:stop] 0/1509, RunningAvgSamplesPerSec=29.953788833979853, CurrSamplesPerSec=30.04045032341816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:37,809] [INFO] [logging.py:68:log_dist] [Rank 0] step=1510, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:06:37,810] [INFO] [timer.py:197:stop] 0/1510, RunningAvgSamplesPerSec=29.953678839317757, CurrSamplesPerSec=29.788829754355625, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:43,437] [INFO] [timer.py:197:stop] 0/1511, RunningAvgSamplesPerSec=29.953648960771336, CurrSamplesPerSec=29.908659831145354, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:49,668] [INFO] [timer.py:197:stop] 0/1512, RunningAvgSamplesPerSec=29.953670313826322, CurrSamplesPerSec=29.98592679574551, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:52,132] [INFO] [timer.py:197:stop] 0/1513, RunningAvgSamplesPerSec=29.954090955701307, CurrSamplesPerSec=30.60302989261169, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:54,256] [INFO] [timer.py:197:stop] 0/1514, RunningAvgSamplesPerSec=29.954441997010676, CurrSamplesPerSec=30.49443374011064, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:56,426] [INFO] [timer.py:197:stop] 0/1515, RunningAvgSamplesPerSec=29.954374620533606, CurrSamplesPerSec=29.85284690592869, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:06:58,604] [INFO] [timer.py:197:stop] 0/1516, RunningAvgSamplesPerSec=29.95423356770708, CurrSamplesPerSec=29.742331364475262, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:07:00,775] [INFO] [timer.py:197:stop] 0/1517, RunningAvgSamplesPerSec=29.954161909038053, CurrSamplesPerSec=29.846062468986265, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:07:02,882] [INFO] [timer.py:197:stop] 0/1518, RunningAvgSamplesPerSec=29.954667501143284, CurrSamplesPerSec=30.740753855199024, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:07:05,072] [INFO] [timer.py:197:stop] 0/1519, RunningAvgSamplesPerSec=29.95440990649467, CurrSamplesPerSec=29.56892526967616, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:07:06,960] [INFO] [logging.py:68:log_dist] [Rank 0] step=1520, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:07:06,961] [INFO] [timer.py:197:stop] 0/1520, RunningAvgSamplesPerSec=29.95689047715226, CurrSamplesPerSec=34.26092219394609, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:07:53,583] [INFO] [timer.py:197:stop] 0/1521, RunningAvgSamplesPerSec=29.957070582514362, CurrSamplesPerSec=30.232990332577533, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:08:00,825] [INFO] [timer.py:197:stop] 0/1522, RunningAvgSamplesPerSec=29.955728935212612, CurrSamplesPerSec=28.047662608198845, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:08:09,124] [INFO] [timer.py:197:stop] 0/1523, RunningAvgSamplesPerSec=29.955699052477286, CurrSamplesPerSec=29.91034610858629, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:08:16,226] [INFO] [timer.py:197:stop] 0/1524, RunningAvgSamplesPerSec=29.95571705245498, CurrSamplesPerSec=29.98312007993847, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:08:23,057] [INFO] [timer.py:197:stop] 0/1525, RunningAvgSamplesPerSec=29.95540740215755, CurrSamplesPerSec=29.491424281705658, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0025, 'learning_rate': 1e-05, 'epoch': 38.0} -[2022-12-14 19:08:30,042] [INFO] [timer.py:197:stop] 0/1526, RunningAvgSamplesPerSec=29.95521114237796, CurrSamplesPerSec=29.659262516649367, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:08:37,096] [INFO] [timer.py:197:stop] 0/1527, RunningAvgSamplesPerSec=29.955033811349676, CurrSamplesPerSec=29.6871992964488, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:08:43,594] [INFO] [timer.py:197:stop] 0/1528, RunningAvgSamplesPerSec=29.955130628593867, CurrSamplesPerSec=30.103508748713985, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:08:51,019] [INFO] [timer.py:197:stop] 0/1529, RunningAvgSamplesPerSec=29.954964651412727, CurrSamplesPerSec=29.703808490183548, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:08:57,968] [INFO] [logging.py:68:log_dist] [Rank 0] step=1530, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:08:57,969] [INFO] [timer.py:197:stop] 0/1530, RunningAvgSamplesPerSec=29.954403816447453, CurrSamplesPerSec=29.121827723963275, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:09:06,293] [INFO] [timer.py:197:stop] 0/1531, RunningAvgSamplesPerSec=29.954624985273647, CurrSamplesPerSec=30.296429691996885, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:09:12,700] [INFO] [timer.py:197:stop] 0/1532, RunningAvgSamplesPerSec=29.95447912436507, CurrSamplesPerSec=29.73310706370407, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:09:20,138] [INFO] [timer.py:197:stop] 0/1533, RunningAvgSamplesPerSec=29.954233456171522, CurrSamplesPerSec=29.58302220527509, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:09:27,127] [INFO] [timer.py:197:stop] 0/1534, RunningAvgSamplesPerSec=29.953923804931456, CurrSamplesPerSec=29.487238732150303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:09:33,598] [INFO] [timer.py:197:stop] 0/1535, RunningAvgSamplesPerSec=29.953820231080464, CurrSamplesPerSec=29.795981758920632, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:09:41,020] [INFO] [timer.py:197:stop] 0/1536, RunningAvgSamplesPerSec=29.95403717015975, CurrSamplesPerSec=30.290341060976537, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:09:47,951] [INFO] [timer.py:197:stop] 0/1537, RunningAvgSamplesPerSec=29.954078043421497, CurrSamplesPerSec=30.016909230299923, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:09:54,592] [INFO] [timer.py:197:stop] 0/1538, RunningAvgSamplesPerSec=29.95400177376872, CurrSamplesPerSec=29.837383949407638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:10:01,323] [INFO] [timer.py:197:stop] 0/1539, RunningAvgSamplesPerSec=29.953829166057776, CurrSamplesPerSec=29.691031296341173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:10:09,096] [INFO] [logging.py:68:log_dist] [Rank 0] step=1540, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:10:09,097] [INFO] [timer.py:197:stop] 0/1540, RunningAvgSamplesPerSec=29.953730778731362, CurrSamplesPerSec=29.803269555284924, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:10:16,489] [INFO] [timer.py:197:stop] 0/1541, RunningAvgSamplesPerSec=29.954027150461737, CurrSamplesPerSec=30.41689505596978, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:10:23,620] [INFO] [timer.py:197:stop] 0/1542, RunningAvgSamplesPerSec=29.954328267682424, CurrSamplesPerSec=30.425034637341593, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:10:30,999] [INFO] [timer.py:197:stop] 0/1543, RunningAvgSamplesPerSec=29.954102403450328, CurrSamplesPerSec=29.610266742896048, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:10:37,557] [INFO] [timer.py:197:stop] 0/1544, RunningAvgSamplesPerSec=29.954169520426664, CurrSamplesPerSec=30.057955370503826, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:10:44,409] [INFO] [timer.py:197:stop] 0/1545, RunningAvgSamplesPerSec=29.954163870673913, CurrSamplesPerSec=29.945454486634368, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:10:51,382] [INFO] [timer.py:197:stop] 0/1546, RunningAvgSamplesPerSec=29.954041688525027, CurrSamplesPerSec=29.76669453691403, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:10:59,008] [INFO] [timer.py:197:stop] 0/1547, RunningAvgSamplesPerSec=29.953977136649172, CurrSamplesPerSec=29.854639786952333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:05,995] [INFO] [timer.py:197:stop] 0/1548, RunningAvgSamplesPerSec=29.953523883624715, CurrSamplesPerSec=29.269255650515422, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:12,344] [INFO] [timer.py:197:stop] 0/1549, RunningAvgSamplesPerSec=29.95379806094993, CurrSamplesPerSec=30.38376462619776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:19,467] [INFO] [logging.py:68:log_dist] [Rank 0] step=1550, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:11:19,468] [INFO] [timer.py:197:stop] 0/1550, RunningAvgSamplesPerSec=29.95399021818252, CurrSamplesPerSec=30.25423909142877, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.002, 'learning_rate': 1e-05, 'epoch': 38.01} -[2022-12-14 19:11:26,035] [INFO] [timer.py:197:stop] 0/1551, RunningAvgSamplesPerSec=29.953238852328855, CurrSamplesPerSec=28.833628201664805, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:34,110] [INFO] [timer.py:197:stop] 0/1552, RunningAvgSamplesPerSec=29.95208220579155, CurrSamplesPerSec=28.261620450877547, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:36,590] [INFO] [timer.py:197:stop] 0/1553, RunningAvgSamplesPerSec=29.95198925855397, CurrSamplesPerSec=29.808611129958038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:38,735] [INFO] [timer.py:197:stop] 0/1554, RunningAvgSamplesPerSec=29.952146953752557, CurrSamplesPerSec=30.19874721002232, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:40,878] [INFO] [timer.py:197:stop] 0/1555, RunningAvgSamplesPerSec=29.952314598672135, CurrSamplesPerSec=30.214780932939362, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:43,018] [INFO] [timer.py:197:stop] 0/1556, RunningAvgSamplesPerSec=29.952512767180817, CurrSamplesPerSec=30.263465491523007, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:45,106] [INFO] [timer.py:197:stop] 0/1557, RunningAvgSamplesPerSec=29.95317886520759, CurrSamplesPerSec=31.025371680281623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:47,236] [INFO] [timer.py:197:stop] 0/1558, RunningAvgSamplesPerSec=29.953462242671016, CurrSamplesPerSec=30.400697809977927, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:49,424] [INFO] [timer.py:197:stop] 0/1559, RunningAvgSamplesPerSec=29.953238980906868, CurrSamplesPerSec=29.609829076563397, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:11:51,334] [INFO] [logging.py:68:log_dist] [Rank 0] step=1560, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:11:51,335] [INFO] [timer.py:197:stop] 0/1560, RunningAvgSamplesPerSec=29.955453958468087, CurrSamplesPerSec=33.85320344702169, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:12:31,534] [INFO] [timer.py:197:stop] 0/1561, RunningAvgSamplesPerSec=29.955857071119247, CurrSamplesPerSec=30.59736496193616, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:12:37,835] [INFO] [timer.py:197:stop] 0/1562, RunningAvgSamplesPerSec=29.955767651069102, CurrSamplesPerSec=29.817007956335182, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:12:44,190] [INFO] [timer.py:197:stop] 0/1563, RunningAvgSamplesPerSec=29.954699938881046, CurrSamplesPerSec=28.376860901977448, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:12:51,972] [INFO] [timer.py:197:stop] 0/1564, RunningAvgSamplesPerSec=29.954577804994955, CurrSamplesPerSec=29.76513333273456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:12:57,809] [INFO] [timer.py:197:stop] 0/1565, RunningAvgSamplesPerSec=29.95477437644785, CurrSamplesPerSec=30.265000928462605, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:04,187] [INFO] [timer.py:197:stop] 0/1566, RunningAvgSamplesPerSec=29.954599255348565, CurrSamplesPerSec=29.683365003202372, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:10,172] [INFO] [timer.py:197:stop] 0/1567, RunningAvgSamplesPerSec=29.954466154170966, CurrSamplesPerSec=29.747733530975225, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:16,031] [INFO] [timer.py:197:stop] 0/1568, RunningAvgSamplesPerSec=29.954471563313746, CurrSamplesPerSec=29.962939266313022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:22,438] [INFO] [timer.py:197:stop] 0/1569, RunningAvgSamplesPerSec=29.95462478060264, CurrSamplesPerSec=30.196501740625386, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:28,700] [INFO] [logging.py:68:log_dist] [Rank 0] step=1570, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:13:28,701] [INFO] [timer.py:197:stop] 0/1570, RunningAvgSamplesPerSec=29.954962350744417, CurrSamplesPerSec=30.49344994287777, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:35,022] [INFO] [timer.py:197:stop] 0/1571, RunningAvgSamplesPerSec=29.954830896699274, CurrSamplesPerSec=29.750120469856824, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:40,955] [INFO] [timer.py:197:stop] 0/1572, RunningAvgSamplesPerSec=29.954659738050378, CurrSamplesPerSec=29.688499504298512, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:46,833] [INFO] [timer.py:197:stop] 0/1573, RunningAvgSamplesPerSec=29.954268891637643, CurrSamplesPerSec=29.352965876326177, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:52,622] [INFO] [timer.py:197:stop] 0/1574, RunningAvgSamplesPerSec=29.953889385897114, CurrSamplesPerSec=29.369328401017373, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:13:58,575] [INFO] [timer.py:197:stop] 0/1575, RunningAvgSamplesPerSec=29.953736603899916, CurrSamplesPerSec=29.715474935362987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0024, 'learning_rate': 1e-05, 'epoch': 39.0} -[2022-12-14 19:14:04,585] [INFO] [timer.py:197:stop] 0/1576, RunningAvgSamplesPerSec=29.953799231969764, CurrSamplesPerSec=30.052638461221544, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:14:10,429] [INFO] [timer.py:197:stop] 0/1577, RunningAvgSamplesPerSec=29.954042361523097, CurrSamplesPerSec=30.34168384275725, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:14:16,900] [INFO] [timer.py:197:stop] 0/1578, RunningAvgSamplesPerSec=29.95401915910706, CurrSamplesPerSec=29.917519911079577, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:14:23,162] [INFO] [timer.py:197:stop] 0/1579, RunningAvgSamplesPerSec=29.95401095422592, CurrSamplesPerSec=29.941085644844573, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:14:29,275] [INFO] [logging.py:68:log_dist] [Rank 0] step=1580, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:14:29,276] [INFO] [timer.py:197:stop] 0/1580, RunningAvgSamplesPerSec=29.95352706856711, CurrSamplesPerSec=29.209408365627375, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:14:35,278] [INFO] [timer.py:197:stop] 0/1581, RunningAvgSamplesPerSec=29.953325500657545, CurrSamplesPerSec=29.6385955722872, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:14:41,436] [INFO] [timer.py:197:stop] 0/1582, RunningAvgSamplesPerSec=29.95323671203421, CurrSamplesPerSec=29.81369302882661, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:14:47,653] [INFO] [timer.py:197:stop] 0/1583, RunningAvgSamplesPerSec=29.953252119262128, CurrSamplesPerSec=29.97761535222954, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:14:53,748] [INFO] [timer.py:197:stop] 0/1584, RunningAvgSamplesPerSec=29.952703951542286, CurrSamplesPerSec=29.11043641559918, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:14:59,824] [INFO] [timer.py:197:stop] 0/1585, RunningAvgSamplesPerSec=29.9529135010263, CurrSamplesPerSec=30.288133212272026, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:06,198] [INFO] [timer.py:197:stop] 0/1586, RunningAvgSamplesPerSec=29.951968432417413, CurrSamplesPerSec=28.52713737169266, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:12,748] [INFO] [timer.py:197:stop] 0/1587, RunningAvgSamplesPerSec=29.951918800616916, CurrSamplesPerSec=29.873507968629394, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:18,971] [INFO] [timer.py:197:stop] 0/1588, RunningAvgSamplesPerSec=29.951975712742225, CurrSamplesPerSec=30.042454095060393, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:24,796] [INFO] [timer.py:197:stop] 0/1589, RunningAvgSamplesPerSec=29.95160187756207, CurrSamplesPerSec=29.37021529222404, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:31,010] [INFO] [logging.py:68:log_dist] [Rank 0] step=1590, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:15:31,011] [INFO] [timer.py:197:stop] 0/1590, RunningAvgSamplesPerSec=29.951283389879062, CurrSamplesPerSec=29.454236587007724, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:36,797] [INFO] [timer.py:197:stop] 0/1591, RunningAvgSamplesPerSec=29.95158937905025, CurrSamplesPerSec=30.445518340208, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:42,882] [INFO] [timer.py:197:stop] 0/1592, RunningAvgSamplesPerSec=29.950594269758863, CurrSamplesPerSec=28.44870699476566, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:45,399] [INFO] [timer.py:197:stop] 0/1593, RunningAvgSamplesPerSec=29.950929380231532, CurrSamplesPerSec=30.493411839332307, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:47,529] [INFO] [timer.py:197:stop] 0/1594, RunningAvgSamplesPerSec=29.951214627157388, CurrSamplesPerSec=30.412029246525936, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:49,681] [INFO] [timer.py:197:stop] 0/1595, RunningAvgSamplesPerSec=29.951310528392977, CurrSamplesPerSec=30.104768025961146, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:51,834] [INFO] [timer.py:197:stop] 0/1596, RunningAvgSamplesPerSec=29.95139746455122, CurrSamplesPerSec=30.090530491895013, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:53,964] [INFO] [timer.py:197:stop] 0/1597, RunningAvgSamplesPerSec=29.95169375385113, CurrSamplesPerSec=30.4315501296401, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:56,100] [INFO] [timer.py:197:stop] 0/1598, RunningAvgSamplesPerSec=29.951932913417924, CurrSamplesPerSec=30.338316380289616, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:15:58,244] [INFO] [timer.py:197:stop] 0/1599, RunningAvgSamplesPerSec=29.952096742874513, CurrSamplesPerSec=30.215872674253557, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:16:00,118] [INFO] [logging.py:68:log_dist] [Rank 0] step=1600, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:16:00,119] [INFO] [timer.py:197:stop] 0/1600, RunningAvgSamplesPerSec=29.954572305501642, CurrSamplesPerSec=34.50960613336227, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0024, 'learning_rate': 1e-05, 'epoch': 39.01} -[2022-12-14 19:16:44,246] [INFO] [timer.py:197:stop] 0/1601, RunningAvgSamplesPerSec=29.954136207027133, CurrSamplesPerSec=29.273105004919827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:16:51,067] [INFO] [timer.py:197:stop] 0/1602, RunningAvgSamplesPerSec=29.953064750917267, CurrSamplesPerSec=28.332554502091043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:16:57,590] [INFO] [timer.py:197:stop] 0/1603, RunningAvgSamplesPerSec=29.953335086398297, CurrSamplesPerSec=30.392213374974073, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:17:05,312] [INFO] [timer.py:197:stop] 0/1604, RunningAvgSamplesPerSec=29.953364861972986, CurrSamplesPerSec=30.00111159330572, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:17:13,261] [INFO] [timer.py:197:stop] 0/1605, RunningAvgSamplesPerSec=29.95375954890098, CurrSamplesPerSec=30.59969137607354, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:17:19,656] [INFO] [timer.py:197:stop] 0/1606, RunningAvgSamplesPerSec=29.953558849138467, CurrSamplesPerSec=29.63525803115398, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:17:26,180] [INFO] [timer.py:197:stop] 0/1607, RunningAvgSamplesPerSec=29.954227153617882, CurrSamplesPerSec=31.065998889919143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:17:32,564] [INFO] [timer.py:197:stop] 0/1608, RunningAvgSamplesPerSec=29.9540270557221, CurrSamplesPerSec=29.636278846289727, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:17:39,781] [INFO] [timer.py:197:stop] 0/1609, RunningAvgSamplesPerSec=29.954138667843058, CurrSamplesPerSec=30.134467514922953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:17:46,234] [INFO] [logging.py:68:log_dist] [Rank 0] step=1610, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:17:46,235] [INFO] [timer.py:197:stop] 0/1610, RunningAvgSamplesPerSec=29.95431234915901, CurrSamplesPerSec=30.236044962854788, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:17:52,903] [INFO] [timer.py:197:stop] 0/1611, RunningAvgSamplesPerSec=29.954446285105792, CurrSamplesPerSec=30.171375956724408, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:17:59,449] [INFO] [timer.py:197:stop] 0/1612, RunningAvgSamplesPerSec=29.953983238338356, CurrSamplesPerSec=29.22703356919468, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:18:06,146] [INFO] [timer.py:197:stop] 0/1613, RunningAvgSamplesPerSec=29.95401391961435, CurrSamplesPerSec=30.00349241913903, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:18:12,989] [INFO] [timer.py:197:stop] 0/1614, RunningAvgSamplesPerSec=29.95350785286676, CurrSamplesPerSec=29.159849496825483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:18:19,322] [INFO] [timer.py:197:stop] 0/1615, RunningAvgSamplesPerSec=29.953583875839666, CurrSamplesPerSec=30.07663666608441, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:18:26,507] [INFO] [timer.py:197:stop] 0/1616, RunningAvgSamplesPerSec=29.95350437745579, CurrSamplesPerSec=29.825820437832768, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:18:32,902] [INFO] [timer.py:197:stop] 0/1617, RunningAvgSamplesPerSec=29.953360686822297, CurrSamplesPerSec=29.723226940302737, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:18:39,534] [INFO] [timer.py:197:stop] 0/1618, RunningAvgSamplesPerSec=29.953610622059113, CurrSamplesPerSec=30.362773189276254, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:18:46,354] [INFO] [timer.py:197:stop] 0/1619, RunningAvgSamplesPerSec=29.95325634241052, CurrSamplesPerSec=29.39148458087027, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:18:53,091] [INFO] [logging.py:68:log_dist] [Rank 0] step=1620, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:18:53,092] [INFO] [timer.py:197:stop] 0/1620, RunningAvgSamplesPerSec=29.952931320942337, CurrSamplesPerSec=29.436439670122944, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:18:59,639] [INFO] [timer.py:197:stop] 0/1621, RunningAvgSamplesPerSec=29.952951127494828, CurrSamplesPerSec=29.985032474840306, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:19:05,958] [INFO] [timer.py:197:stop] 0/1622, RunningAvgSamplesPerSec=29.9530721432348, CurrSamplesPerSec=30.150287417612923, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:19:13,930] [INFO] [timer.py:197:stop] 0/1623, RunningAvgSamplesPerSec=29.9531193769683, CurrSamplesPerSec=30.029834122742926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:19:21,007] [INFO] [timer.py:197:stop] 0/1624, RunningAvgSamplesPerSec=29.95283953610155, CurrSamplesPerSec=29.505989012559272, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:19:28,025] [INFO] [timer.py:197:stop] 0/1625, RunningAvgSamplesPerSec=29.95251340537202, CurrSamplesPerSec=29.432715053843022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0026, 'learning_rate': 1e-05, 'epoch': 40.01} -[2022-12-14 19:19:34,341] [INFO] [timer.py:197:stop] 0/1626, RunningAvgSamplesPerSec=29.952096027438593, CurrSamplesPerSec=29.289682195458365, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:19:41,285] [INFO] [timer.py:197:stop] 0/1627, RunningAvgSamplesPerSec=29.9519679463761, CurrSamplesPerSec=29.74539971251389, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:19:47,760] [INFO] [timer.py:197:stop] 0/1628, RunningAvgSamplesPerSec=29.95187671822501, CurrSamplesPerSec=29.804361543106857, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:19:54,215] [INFO] [timer.py:197:stop] 0/1629, RunningAvgSamplesPerSec=29.951720474100807, CurrSamplesPerSec=29.699805604821265, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:00,753] [INFO] [logging.py:68:log_dist] [Rank 0] step=1630, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:20:00,754] [INFO] [timer.py:197:stop] 0/1630, RunningAvgSamplesPerSec=29.951884526888854, CurrSamplesPerSec=30.221199870845076, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:07,089] [INFO] [timer.py:197:stop] 0/1631, RunningAvgSamplesPerSec=29.95174387894227, CurrSamplesPerSec=29.724507265652278, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:14,554] [INFO] [timer.py:197:stop] 0/1632, RunningAvgSamplesPerSec=29.95206957146324, CurrSamplesPerSec=30.49219603983227, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:16,982] [INFO] [timer.py:197:stop] 0/1633, RunningAvgSamplesPerSec=29.952008087210086, CurrSamplesPerSec=29.852123174231636, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:19,110] [INFO] [timer.py:197:stop] 0/1634, RunningAvgSamplesPerSec=29.952317740537566, CurrSamplesPerSec=30.46602964302706, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:21,332] [INFO] [timer.py:197:stop] 0/1635, RunningAvgSamplesPerSec=29.951822342469704, CurrSamplesPerSec=29.16459533764914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:23,484] [INFO] [timer.py:197:stop] 0/1636, RunningAvgSamplesPerSec=29.951920660204, CurrSamplesPerSec=30.113339311081198, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:25,623] [INFO] [timer.py:197:stop] 0/1637, RunningAvgSamplesPerSec=29.952133555294665, CurrSamplesPerSec=30.304094384559868, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:27,735] [INFO] [timer.py:197:stop] 0/1638, RunningAvgSamplesPerSec=29.95257951812492, CurrSamplesPerSec=30.699933038611732, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:29,872] [INFO] [timer.py:197:stop] 0/1639, RunningAvgSamplesPerSec=29.952804526554136, CurrSamplesPerSec=30.325501485233634, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:20:31,782] [INFO] [logging.py:68:log_dist] [Rank 0] step=1640, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:20:31,782] [INFO] [timer.py:197:stop] 0/1640, RunningAvgSamplesPerSec=29.954992294793865, CurrSamplesPerSec=34.02303552111171, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:21:10,134] [INFO] [timer.py:197:stop] 0/1641, RunningAvgSamplesPerSec=29.955391960375472, CurrSamplesPerSec=30.624679889441712, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:21:16,147] [INFO] [timer.py:197:stop] 0/1642, RunningAvgSamplesPerSec=29.95503354784833, CurrSamplesPerSec=29.378900658475445, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:21:22,539] [INFO] [timer.py:197:stop] 0/1643, RunningAvgSamplesPerSec=29.955284234309303, CurrSamplesPerSec=30.372134644834958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:21:28,160] [INFO] [timer.py:197:stop] 0/1644, RunningAvgSamplesPerSec=29.95504199191035, CurrSamplesPerSec=29.562731561580485, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:21:34,049] [INFO] [timer.py:197:stop] 0/1645, RunningAvgSamplesPerSec=29.954586733617333, CurrSamplesPerSec=29.22526436443228, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:21:39,764] [INFO] [timer.py:197:stop] 0/1646, RunningAvgSamplesPerSec=29.95479824538895, CurrSamplesPerSec=30.306393522152753, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:21:45,608] [INFO] [timer.py:197:stop] 0/1647, RunningAvgSamplesPerSec=29.954894040648608, CurrSamplesPerSec=30.113214320329973, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:21:51,435] [INFO] [timer.py:197:stop] 0/1648, RunningAvgSamplesPerSec=29.954649253114958, CurrSamplesPerSec=29.557318263294274, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:21:58,039] [INFO] [timer.py:197:stop] 0/1649, RunningAvgSamplesPerSec=29.953911780248717, CurrSamplesPerSec=28.787335588543897, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:22:04,148] [INFO] [logging.py:68:log_dist] [Rank 0] step=1650, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:22:04,149] [INFO] [timer.py:197:stop] 0/1650, RunningAvgSamplesPerSec=29.953786795258285, CurrSamplesPerSec=29.749342364826823, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0018, 'learning_rate': 1e-05, 'epoch': 41.0} -[2022-12-14 19:22:10,441] [INFO] [timer.py:197:stop] 0/1651, RunningAvgSamplesPerSec=29.95357826500252, CurrSamplesPerSec=29.613820811287244, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:22:16,542] [INFO] [timer.py:197:stop] 0/1652, RunningAvgSamplesPerSec=29.95367956784135, CurrSamplesPerSec=30.121665355470057, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:22:22,461] [INFO] [timer.py:197:stop] 0/1653, RunningAvgSamplesPerSec=29.953922668323596, CurrSamplesPerSec=30.360486092684972, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:22:28,600] [INFO] [timer.py:197:stop] 0/1654, RunningAvgSamplesPerSec=29.95380825951665, CurrSamplesPerSec=29.76610370254062, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:22:34,801] [INFO] [timer.py:197:stop] 0/1655, RunningAvgSamplesPerSec=29.9537508213497, CurrSamplesPerSec=29.85916278802806, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:22:41,172] [INFO] [timer.py:197:stop] 0/1656, RunningAvgSamplesPerSec=29.952325364658048, CurrSamplesPerSec=27.767986292453855, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:22:47,215] [INFO] [timer.py:197:stop] 0/1657, RunningAvgSamplesPerSec=29.952226859828286, CurrSamplesPerSec=29.790181858133565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:22:53,352] [INFO] [timer.py:197:stop] 0/1658, RunningAvgSamplesPerSec=29.95172144794285, CurrSamplesPerSec=29.13800308383505, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:22:59,782] [INFO] [timer.py:197:stop] 0/1659, RunningAvgSamplesPerSec=29.951010499423067, CurrSamplesPerSec=28.818234422763485, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:23:05,976] [INFO] [logging.py:68:log_dist] [Rank 0] step=1660, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:23:05,976] [INFO] [timer.py:197:stop] 0/1660, RunningAvgSamplesPerSec=29.950689246962156, CurrSamplesPerSec=29.42767508990206, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:23:11,870] [INFO] [timer.py:197:stop] 0/1661, RunningAvgSamplesPerSec=29.9503317148629, CurrSamplesPerSec=29.369055275020628, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:23:17,888] [INFO] [timer.py:197:stop] 0/1662, RunningAvgSamplesPerSec=29.94950437525186, CurrSamplesPerSec=28.637129086679455, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:23:24,014] [INFO] [timer.py:197:stop] 0/1663, RunningAvgSamplesPerSec=29.94948659726559, CurrSamplesPerSec=29.920004208756087, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:23:29,943] [INFO] [timer.py:197:stop] 0/1664, RunningAvgSamplesPerSec=29.94954802303207, CurrSamplesPerSec=30.051925196263305, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:23:36,480] [INFO] [timer.py:197:stop] 0/1665, RunningAvgSamplesPerSec=29.9495051418028, CurrSamplesPerSec=29.878405830283462, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:23:42,095] [INFO] [timer.py:197:stop] 0/1666, RunningAvgSamplesPerSec=29.949582336722212, CurrSamplesPerSec=30.078510454319368, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:23:48,130] [INFO] [timer.py:197:stop] 0/1667, RunningAvgSamplesPerSec=29.949436239566836, CurrSamplesPerSec=29.708289189038734, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:23:54,088] [INFO] [timer.py:197:stop] 0/1668, RunningAvgSamplesPerSec=29.949131882625437, CurrSamplesPerSec=29.450814420303722, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:00,119] [INFO] [timer.py:197:stop] 0/1669, RunningAvgSamplesPerSec=29.949208182438234, CurrSamplesPerSec=30.07686582189923, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:06,029] [INFO] [logging.py:68:log_dist] [Rank 0] step=1670, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:24:06,030] [INFO] [timer.py:197:stop] 0/1670, RunningAvgSamplesPerSec=29.94912302354725, CurrSamplesPerSec=29.80783327229679, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:12,079] [INFO] [timer.py:197:stop] 0/1671, RunningAvgSamplesPerSec=29.948960969496643, CurrSamplesPerSec=29.681074091822705, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:18,187] [INFO] [timer.py:197:stop] 0/1672, RunningAvgSamplesPerSec=29.949084449290257, CurrSamplesPerSec=30.156601058509672, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:20,670] [INFO] [timer.py:197:stop] 0/1673, RunningAvgSamplesPerSec=29.949411964133105, CurrSamplesPerSec=30.5065424140057, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:22,843] [INFO] [timer.py:197:stop] 0/1674, RunningAvgSamplesPerSec=29.94932081458021, CurrSamplesPerSec=29.797781046763017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:24,946] [INFO] [timer.py:197:stop] 0/1675, RunningAvgSamplesPerSec=29.949820700757336, CurrSamplesPerSec=30.809639710262363, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0023, 'learning_rate': 1e-05, 'epoch': 41.01} -[2022-12-14 19:24:27,094] [INFO] [timer.py:197:stop] 0/1676, RunningAvgSamplesPerSec=29.949955920887977, CurrSamplesPerSec=30.1779019909838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:29,295] [INFO] [timer.py:197:stop] 0/1677, RunningAvgSamplesPerSec=29.9498742382169, CurrSamplesPerSec=29.813759253928144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:31,436] [INFO] [timer.py:197:stop] 0/1678, RunningAvgSamplesPerSec=29.950050493230034, CurrSamplesPerSec=30.248218531866076, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:33,554] [INFO] [timer.py:197:stop] 0/1679, RunningAvgSamplesPerSec=29.95042082108015, CurrSamplesPerSec=30.584232765856555, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:24:35,447] [INFO] [logging.py:68:log_dist] [Rank 0] step=1680, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:24:35,448] [INFO] [timer.py:197:stop] 0/1680, RunningAvgSamplesPerSec=29.952652268613313, CurrSamplesPerSec=34.22942775803144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:25:15,637] [INFO] [timer.py:197:stop] 0/1681, RunningAvgSamplesPerSec=29.95200272976842, CurrSamplesPerSec=28.90036740822277, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:25:22,009] [INFO] [timer.py:197:stop] 0/1682, RunningAvgSamplesPerSec=29.95257573817802, CurrSamplesPerSec=30.946604214430717, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:25:28,634] [INFO] [timer.py:197:stop] 0/1683, RunningAvgSamplesPerSec=29.95267313413743, CurrSamplesPerSec=30.117197644033247, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:25:35,124] [INFO] [timer.py:197:stop] 0/1684, RunningAvgSamplesPerSec=29.952043779418926, CurrSamplesPerSec=28.930212386961827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:25:41,279] [INFO] [timer.py:197:stop] 0/1685, RunningAvgSamplesPerSec=29.952458769252086, CurrSamplesPerSec=30.6671364379365, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:25:47,725] [INFO] [timer.py:197:stop] 0/1686, RunningAvgSamplesPerSec=29.952664710932495, CurrSamplesPerSec=30.30332466197742, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:25:53,949] [INFO] [timer.py:197:stop] 0/1687, RunningAvgSamplesPerSec=29.952763971597403, CurrSamplesPerSec=30.120857554567767, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:26:00,432] [INFO] [timer.py:197:stop] 0/1688, RunningAvgSamplesPerSec=29.952569732971508, CurrSamplesPerSec=29.62881739348307, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:26:10,052] [INFO] [timer.py:197:stop] 0/1689, RunningAvgSamplesPerSec=29.95230003523335, CurrSamplesPerSec=29.504393419674678, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:26:17,520] [INFO] [logging.py:68:log_dist] [Rank 0] step=1690, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:26:17,521] [INFO] [timer.py:197:stop] 0/1690, RunningAvgSamplesPerSec=29.952167492448826, CurrSamplesPerSec=29.730225642986266, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:26:23,998] [INFO] [timer.py:197:stop] 0/1691, RunningAvgSamplesPerSec=29.95193124343898, CurrSamplesPerSec=29.55838579235466, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:26:30,560] [INFO] [timer.py:197:stop] 0/1692, RunningAvgSamplesPerSec=29.952552688420937, CurrSamplesPerSec=31.04031400693019, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:26:37,066] [INFO] [timer.py:197:stop] 0/1693, RunningAvgSamplesPerSec=29.952423243185276, CurrSamplesPerSec=29.735247909253452, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:26:43,758] [INFO] [timer.py:197:stop] 0/1694, RunningAvgSamplesPerSec=29.95239177617116, CurrSamplesPerSec=29.89927547273377, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:26:50,184] [INFO] [timer.py:197:stop] 0/1695, RunningAvgSamplesPerSec=29.952261147112118, CurrSamplesPerSec=29.73285676958407, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:26:57,720] [INFO] [timer.py:197:stop] 0/1696, RunningAvgSamplesPerSec=29.952773337312077, CurrSamplesPerSec=30.845779294276518, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:27:03,976] [INFO] [timer.py:197:stop] 0/1697, RunningAvgSamplesPerSec=29.952620530199916, CurrSamplesPerSec=29.69598448047686, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:27:10,382] [INFO] [timer.py:197:stop] 0/1698, RunningAvgSamplesPerSec=29.952738751130344, CurrSamplesPerSec=30.154473635226918, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:27:16,752] [INFO] [timer.py:197:stop] 0/1699, RunningAvgSamplesPerSec=29.952859384158675, CurrSamplesPerSec=30.15886092759279, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:27:25,801] [INFO] [logging.py:68:log_dist] [Rank 0] step=1700, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:27:25,802] [INFO] [timer.py:197:stop] 0/1700, RunningAvgSamplesPerSec=29.952833120487384, CurrSamplesPerSec=29.908329929537526, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0018, 'learning_rate': 1e-05, 'epoch': 42.0} -[2022-12-14 19:27:35,547] [INFO] [timer.py:197:stop] 0/1701, RunningAvgSamplesPerSec=29.953214284299825, CurrSamplesPerSec=30.614732659997017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:27:41,578] [INFO] [timer.py:197:stop] 0/1702, RunningAvgSamplesPerSec=29.953263637968288, CurrSamplesPerSec=30.037351055566013, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:27:48,111] [INFO] [timer.py:197:stop] 0/1703, RunningAvgSamplesPerSec=29.95351154164371, CurrSamplesPerSec=30.38096546647523, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:27:54,296] [INFO] [timer.py:197:stop] 0/1704, RunningAvgSamplesPerSec=29.953396881569844, CurrSamplesPerSec=29.7596225683397, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:00,731] [INFO] [timer.py:197:stop] 0/1705, RunningAvgSamplesPerSec=29.953313099405147, CurrSamplesPerSec=29.811391889285076, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:06,924] [INFO] [timer.py:197:stop] 0/1706, RunningAvgSamplesPerSec=29.952768403300396, CurrSamplesPerSec=29.053031555065918, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:13,828] [INFO] [timer.py:197:stop] 0/1707, RunningAvgSamplesPerSec=29.952697652690127, CurrSamplesPerSec=29.832622197851514, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:20,393] [INFO] [timer.py:197:stop] 0/1708, RunningAvgSamplesPerSec=29.95306349112709, CurrSamplesPerSec=30.590091532518418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:26,412] [INFO] [timer.py:197:stop] 0/1709, RunningAvgSamplesPerSec=29.953381492276282, CurrSamplesPerSec=30.505904510720487, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:32,611] [INFO] [logging.py:68:log_dist] [Rank 0] step=1710, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:28:32,612] [INFO] [timer.py:197:stop] 0/1710, RunningAvgSamplesPerSec=29.9534124495183, CurrSamplesPerSec=30.00634990898121, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:38,969] [INFO] [timer.py:197:stop] 0/1711, RunningAvgSamplesPerSec=29.953334954428808, CurrSamplesPerSec=29.82155600412825, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:45,114] [INFO] [timer.py:197:stop] 0/1712, RunningAvgSamplesPerSec=29.95342311544328, CurrSamplesPerSec=30.10485243168873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:47,701] [INFO] [timer.py:197:stop] 0/1713, RunningAvgSamplesPerSec=29.95365681324592, CurrSamplesPerSec=30.35868687748314, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:49,860] [INFO] [timer.py:197:stop] 0/1714, RunningAvgSamplesPerSec=29.95369041566487, CurrSamplesPerSec=30.011294786136837, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:52,011] [INFO] [timer.py:197:stop] 0/1715, RunningAvgSamplesPerSec=29.95378121197961, CurrSamplesPerSec=30.110035850214057, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:54,183] [INFO] [timer.py:197:stop] 0/1716, RunningAvgSamplesPerSec=29.953694299264225, CurrSamplesPerSec=29.805549584376315, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:56,316] [INFO] [timer.py:197:stop] 0/1717, RunningAvgSamplesPerSec=29.95392969921034, CurrSamplesPerSec=30.362917432143206, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:28:58,505] [INFO] [timer.py:197:stop] 0/1718, RunningAvgSamplesPerSec=29.953899357292258, CurrSamplesPerSec=29.90195326197914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:29:00,721] [INFO] [timer.py:197:stop] 0/1719, RunningAvgSamplesPerSec=29.953686425586405, CurrSamplesPerSec=29.59270166625473, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:29:02,652] [INFO] [logging.py:68:log_dist] [Rank 0] step=1720, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:29:02,653] [INFO] [timer.py:197:stop] 0/1720, RunningAvgSamplesPerSec=29.95556047837423, CurrSamplesPerSec=33.560800161830706, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:29:39,941] [INFO] [timer.py:197:stop] 0/1721, RunningAvgSamplesPerSec=29.95571108719752, CurrSamplesPerSec=30.216712790596233, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:29:46,142] [INFO] [timer.py:197:stop] 0/1722, RunningAvgSamplesPerSec=29.956007979192517, CurrSamplesPerSec=30.475216159267603, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:29:52,319] [INFO] [timer.py:197:stop] 0/1723, RunningAvgSamplesPerSec=29.955374629523202, CurrSamplesPerSec=28.904260392557166, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:29:58,200] [INFO] [timer.py:197:stop] 0/1724, RunningAvgSamplesPerSec=29.95521109439806, CurrSamplesPerSec=29.6763883431862, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:30:04,030] [INFO] [timer.py:197:stop] 0/1725, RunningAvgSamplesPerSec=29.95537356982786, CurrSamplesPerSec=30.237795608912034, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.002, 'learning_rate': 1e-05, 'epoch': 43.0} -[2022-12-14 19:30:10,440] [INFO] [timer.py:197:stop] 0/1726, RunningAvgSamplesPerSec=29.955203770265904, CurrSamplesPerSec=29.665470516317665, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:30:16,918] [INFO] [timer.py:197:stop] 0/1727, RunningAvgSamplesPerSec=29.955517887031785, CurrSamplesPerSec=30.507031260121735, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:30:23,401] [INFO] [timer.py:197:stop] 0/1728, RunningAvgSamplesPerSec=29.95507577970601, CurrSamplesPerSec=29.211385450995124, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:30:29,451] [INFO] [timer.py:197:stop] 0/1729, RunningAvgSamplesPerSec=29.95545710285211, CurrSamplesPerSec=30.628415261889707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:30:35,301] [INFO] [logging.py:68:log_dist] [Rank 0] step=1730, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:30:35,302] [INFO] [timer.py:197:stop] 0/1730, RunningAvgSamplesPerSec=29.955430682904137, CurrSamplesPerSec=29.90987286528931, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:30:41,375] [INFO] [timer.py:197:stop] 0/1731, RunningAvgSamplesPerSec=29.955513142821395, CurrSamplesPerSec=30.0986853074465, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:30:47,307] [INFO] [timer.py:197:stop] 0/1732, RunningAvgSamplesPerSec=29.95472676862613, CurrSamplesPerSec=28.6541528035997, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:30:53,112] [INFO] [timer.py:197:stop] 0/1733, RunningAvgSamplesPerSec=29.954780624212678, CurrSamplesPerSec=30.04824165445885, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:30:59,049] [INFO] [timer.py:197:stop] 0/1734, RunningAvgSamplesPerSec=29.954794915637084, CurrSamplesPerSec=29.979553830482207, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:05,007] [INFO] [timer.py:197:stop] 0/1735, RunningAvgSamplesPerSec=29.954736246145323, CurrSamplesPerSec=29.85346442952438, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:11,063] [INFO] [timer.py:197:stop] 0/1736, RunningAvgSamplesPerSec=29.9545320592775, CurrSamplesPerSec=29.60480990355224, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:17,234] [INFO] [timer.py:197:stop] 0/1737, RunningAvgSamplesPerSec=29.954748458384312, CurrSamplesPerSec=30.334747411829532, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:23,876] [INFO] [timer.py:197:stop] 0/1738, RunningAvgSamplesPerSec=29.954675568517725, CurrSamplesPerSec=29.82874362125344, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:29,714] [INFO] [timer.py:197:stop] 0/1739, RunningAvgSamplesPerSec=29.95480006359351, CurrSamplesPerSec=30.17249508637703, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:36,025] [INFO] [logging.py:68:log_dist] [Rank 0] step=1740, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:31:36,026] [INFO] [timer.py:197:stop] 0/1740, RunningAvgSamplesPerSec=29.954894224522636, CurrSamplesPerSec=30.119350226067255, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:41,831] [INFO] [timer.py:197:stop] 0/1741, RunningAvgSamplesPerSec=29.95494931862598, CurrSamplesPerSec=30.05101011344885, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:47,767] [INFO] [timer.py:197:stop] 0/1742, RunningAvgSamplesPerSec=29.95477729092377, CurrSamplesPerSec=29.658580909445774, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:53,789] [INFO] [timer.py:197:stop] 0/1743, RunningAvgSamplesPerSec=29.9546215289133, CurrSamplesPerSec=29.686027236981868, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:31:59,603] [INFO] [timer.py:197:stop] 0/1744, RunningAvgSamplesPerSec=29.95457500602683, CurrSamplesPerSec=29.873797207079868, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:05,621] [INFO] [timer.py:197:stop] 0/1745, RunningAvgSamplesPerSec=29.9544862277434, CurrSamplesPerSec=29.800629260863275, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:11,366] [INFO] [timer.py:197:stop] 0/1746, RunningAvgSamplesPerSec=29.95431158995507, CurrSamplesPerSec=29.652981774662244, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:17,564] [INFO] [timer.py:197:stop] 0/1747, RunningAvgSamplesPerSec=29.95375605852516, CurrSamplesPerSec=29.015281380789318, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:23,502] [INFO] [timer.py:197:stop] 0/1748, RunningAvgSamplesPerSec=29.953784437875036, CurrSamplesPerSec=30.003388459699394, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:29,565] [INFO] [timer.py:197:stop] 0/1749, RunningAvgSamplesPerSec=29.953720543524614, CurrSamplesPerSec=29.84257519413455, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:35,751] [INFO] [logging.py:68:log_dist] [Rank 0] step=1750, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:32:35,752] [INFO] [timer.py:197:stop] 0/1750, RunningAvgSamplesPerSec=29.953941491770586, CurrSamplesPerSec=30.34498001610649, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0018, 'learning_rate': 1e-05, 'epoch': 43.01} -[2022-12-14 19:32:41,728] [INFO] [timer.py:197:stop] 0/1751, RunningAvgSamplesPerSec=29.954207978860946, CurrSamplesPerSec=30.427390102089454, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:47,626] [INFO] [timer.py:197:stop] 0/1752, RunningAvgSamplesPerSec=29.953749811233408, CurrSamplesPerSec=29.173305343411066, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:50,239] [INFO] [timer.py:197:stop] 0/1753, RunningAvgSamplesPerSec=29.95370565243173, CurrSamplesPerSec=29.876626719932936, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:52,372] [INFO] [timer.py:197:stop] 0/1754, RunningAvgSamplesPerSec=29.953940899525946, CurrSamplesPerSec=30.371605439872447, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:54,504] [INFO] [timer.py:197:stop] 0/1755, RunningAvgSamplesPerSec=29.954179199654526, CurrSamplesPerSec=30.37758584184082, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:56,646] [INFO] [timer.py:197:stop] 0/1756, RunningAvgSamplesPerSec=29.95433532932042, CurrSamplesPerSec=30.230555924378184, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:32:58,808] [INFO] [timer.py:197:stop] 0/1757, RunningAvgSamplesPerSec=29.954338929603683, CurrSamplesPerSec=29.960655158785567, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:33:00,951] [INFO] [timer.py:197:stop] 0/1758, RunningAvgSamplesPerSec=29.9544945313603, CurrSamplesPerSec=30.230089516094157, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:33:03,074] [INFO] [timer.py:197:stop] 0/1759, RunningAvgSamplesPerSec=29.95481997813705, CurrSamplesPerSec=30.53742593035838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:33:04,976] [INFO] [logging.py:68:log_dist] [Rank 0] step=1760, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:33:04,977] [INFO] [timer.py:197:stop] 0/1760, RunningAvgSamplesPerSec=29.957012813131197, CurrSamplesPerSec=34.3788453051283, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:33:48,454] [INFO] [timer.py:197:stop] 0/1761, RunningAvgSamplesPerSec=29.956941393144625, CurrSamplesPerSec=29.8319093923207, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:33:55,650] [INFO] [timer.py:197:stop] 0/1762, RunningAvgSamplesPerSec=29.95724213041907, CurrSamplesPerSec=30.49575365230435, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:34:03,027] [INFO] [timer.py:197:stop] 0/1763, RunningAvgSamplesPerSec=29.95691457862099, CurrSamplesPerSec=29.39131402113355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:34:09,388] [INFO] [timer.py:197:stop] 0/1764, RunningAvgSamplesPerSec=29.957152041465488, CurrSamplesPerSec=30.381247422349045, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:34:16,099] [INFO] [timer.py:197:stop] 0/1765, RunningAvgSamplesPerSec=29.956434961667217, CurrSamplesPerSec=28.744102802918704, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:34:22,753] [INFO] [timer.py:197:stop] 0/1766, RunningAvgSamplesPerSec=29.95636833280246, CurrSamplesPerSec=29.83936072167033, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:34:29,764] [INFO] [timer.py:197:stop] 0/1767, RunningAvgSamplesPerSec=29.956343276567637, CurrSamplesPerSec=29.91220923300472, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:34:36,067] [INFO] [timer.py:197:stop] 0/1768, RunningAvgSamplesPerSec=29.956572783054618, CurrSamplesPerSec=30.36720756672578, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:34:43,326] [INFO] [timer.py:197:stop] 0/1769, RunningAvgSamplesPerSec=29.95682798357912, CurrSamplesPerSec=30.41439992803027, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:34:49,847] [INFO] [logging.py:68:log_dist] [Rank 0] step=1770, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:34:49,848] [INFO] [timer.py:197:stop] 0/1770, RunningAvgSamplesPerSec=29.956835467109666, CurrSamplesPerSec=29.970064708479136, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:34:56,939] [INFO] [timer.py:197:stop] 0/1771, RunningAvgSamplesPerSec=29.95648875120942, CurrSamplesPerSec=29.355793892242378, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:35:03,653] [INFO] [timer.py:197:stop] 0/1772, RunningAvgSamplesPerSec=29.956722845585347, CurrSamplesPerSec=30.376643944151024, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:35:10,830] [INFO] [timer.py:197:stop] 0/1773, RunningAvgSamplesPerSec=29.956502571210176, CurrSamplesPerSec=29.571628910535164, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:35:17,536] [INFO] [timer.py:197:stop] 0/1774, RunningAvgSamplesPerSec=29.956312769633307, CurrSamplesPerSec=29.623906203942923, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:35:23,939] [INFO] [timer.py:197:stop] 0/1775, RunningAvgSamplesPerSec=29.95615623145516, CurrSamplesPerSec=29.681316951056623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0016, 'learning_rate': 1e-05, 'epoch': 44.0} -[2022-12-14 19:35:31,398] [INFO] [timer.py:197:stop] 0/1776, RunningAvgSamplesPerSec=29.955912301482496, CurrSamplesPerSec=29.529583050938072, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:35:37,760] [INFO] [timer.py:197:stop] 0/1777, RunningAvgSamplesPerSec=29.955762084960575, CurrSamplesPerSec=29.6916290067482, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:35:44,775] [INFO] [timer.py:197:stop] 0/1778, RunningAvgSamplesPerSec=29.955710260030823, CurrSamplesPerSec=29.86400278753211, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:35:51,488] [INFO] [timer.py:197:stop] 0/1779, RunningAvgSamplesPerSec=29.955483845842558, CurrSamplesPerSec=29.558701509320773, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:35:58,084] [INFO] [logging.py:68:log_dist] [Rank 0] step=1780, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:35:58,084] [INFO] [timer.py:197:stop] 0/1780, RunningAvgSamplesPerSec=29.955531254011962, CurrSamplesPerSec=30.04001329464309, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:36:04,820] [INFO] [timer.py:197:stop] 0/1781, RunningAvgSamplesPerSec=29.955798373698254, CurrSamplesPerSec=30.438392847423557, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:36:11,636] [INFO] [timer.py:197:stop] 0/1782, RunningAvgSamplesPerSec=29.95573534842531, CurrSamplesPerSec=29.844031720771472, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:36:18,727] [INFO] [timer.py:197:stop] 0/1783, RunningAvgSamplesPerSec=29.955666558733792, CurrSamplesPerSec=29.833719653423312, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:36:25,429] [INFO] [timer.py:197:stop] 0/1784, RunningAvgSamplesPerSec=29.955691007523967, CurrSamplesPerSec=29.999297724713006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:36:32,288] [INFO] [timer.py:197:stop] 0/1785, RunningAvgSamplesPerSec=29.955885912530462, CurrSamplesPerSec=30.307283163039212, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:36:38,657] [INFO] [timer.py:197:stop] 0/1786, RunningAvgSamplesPerSec=29.955689220864375, CurrSamplesPerSec=29.60904849484789, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:36:45,438] [INFO] [timer.py:197:stop] 0/1787, RunningAvgSamplesPerSec=29.955461065625784, CurrSamplesPerSec=29.553891625528987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:36:52,408] [INFO] [timer.py:197:stop] 0/1788, RunningAvgSamplesPerSec=29.955038212320673, CurrSamplesPerSec=29.21880667607263, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:36:58,901] [INFO] [timer.py:197:stop] 0/1789, RunningAvgSamplesPerSec=29.95529064873008, CurrSamplesPerSec=30.41303536271988, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:05,434] [INFO] [logging.py:68:log_dist] [Rank 0] step=1790, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:37:05,435] [INFO] [timer.py:197:stop] 0/1790, RunningAvgSamplesPerSec=29.955485620028615, CurrSamplesPerSec=30.308001757270617, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:12,016] [INFO] [timer.py:197:stop] 0/1791, RunningAvgSamplesPerSec=29.955456179696444, CurrSamplesPerSec=29.902909255655153, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:18,700] [INFO] [timer.py:197:stop] 0/1792, RunningAvgSamplesPerSec=29.955408179604728, CurrSamplesPerSec=29.869781615530325, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:21,099] [INFO] [timer.py:197:stop] 0/1793, RunningAvgSamplesPerSec=29.95560793407083, CurrSamplesPerSec=30.317490395741203, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:23,215] [INFO] [timer.py:197:stop] 0/1794, RunningAvgSamplesPerSec=29.95596138193026, CurrSamplesPerSec=30.60266007329532, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:25,334] [INFO] [timer.py:197:stop] 0/1795, RunningAvgSamplesPerSec=29.9562975237613, CurrSamplesPerSec=30.571031760177107, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:27,493] [INFO] [timer.py:197:stop] 0/1796, RunningAvgSamplesPerSec=29.95632335157271, CurrSamplesPerSec=30.002704357484852, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:29,688] [INFO] [timer.py:197:stop] 0/1797, RunningAvgSamplesPerSec=29.95626159938758, CurrSamplesPerSec=29.84588659260587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:31,816] [INFO] [timer.py:197:stop] 0/1798, RunningAvgSamplesPerSec=29.956527935603642, CurrSamplesPerSec=30.442359098222273, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:34,003] [INFO] [timer.py:197:stop] 0/1799, RunningAvgSamplesPerSec=29.95632525614538, CurrSamplesPerSec=29.59668552085216, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:37:35,884] [INFO] [logging.py:68:log_dist] [Rank 0] step=1800, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:37:35,885] [INFO] [timer.py:197:stop] 0/1800, RunningAvgSamplesPerSec=29.958505190656634, CurrSamplesPerSec=34.46550611316007, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.001, 'learning_rate': 1e-05, 'epoch': 44.01} -[2022-12-14 19:38:13,282] [INFO] [timer.py:197:stop] 0/1801, RunningAvgSamplesPerSec=29.958330625633295, CurrSamplesPerSec=29.6477187470214, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:38:19,442] [INFO] [timer.py:197:stop] 0/1802, RunningAvgSamplesPerSec=29.95787709148196, CurrSamplesPerSec=29.16361309429717, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:38:25,866] [INFO] [timer.py:197:stop] 0/1803, RunningAvgSamplesPerSec=29.9581228008581, CurrSamplesPerSec=30.40703066309382, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:38:31,937] [INFO] [timer.py:197:stop] 0/1804, RunningAvgSamplesPerSec=29.957834579182375, CurrSamplesPerSec=29.447593333743615, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:38:38,714] [INFO] [timer.py:197:stop] 0/1805, RunningAvgSamplesPerSec=29.958256631788394, CurrSamplesPerSec=30.738617135019407, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:38:44,803] [INFO] [timer.py:197:stop] 0/1806, RunningAvgSamplesPerSec=29.958005683942606, CurrSamplesPerSec=29.512282260227888, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:38:51,211] [INFO] [timer.py:197:stop] 0/1807, RunningAvgSamplesPerSec=29.95736187992987, CurrSamplesPerSec=28.839309447784053, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:38:57,085] [INFO] [timer.py:197:stop] 0/1808, RunningAvgSamplesPerSec=29.957539537132117, CurrSamplesPerSec=30.28168239681024, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:39:03,937] [INFO] [timer.py:197:stop] 0/1809, RunningAvgSamplesPerSec=29.956714983693253, CurrSamplesPerSec=28.538128239617173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:39:12,396] [INFO] [logging.py:68:log_dist] [Rank 0] step=1810, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:39:12,396] [INFO] [timer.py:197:stop] 0/1810, RunningAvgSamplesPerSec=29.956556994609258, CurrSamplesPerSec=29.673767200690765, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:39:18,750] [INFO] [timer.py:197:stop] 0/1811, RunningAvgSamplesPerSec=29.95661968069175, CurrSamplesPerSec=30.070386776871356, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:39:24,446] [INFO] [timer.py:197:stop] 0/1812, RunningAvgSamplesPerSec=29.956702712772373, CurrSamplesPerSec=30.107665101177666, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:39:30,413] [INFO] [timer.py:197:stop] 0/1813, RunningAvgSamplesPerSec=29.956616330066783, CurrSamplesPerSec=29.801075894336055, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:39:36,732] [INFO] [timer.py:197:stop] 0/1814, RunningAvgSamplesPerSec=29.956520152422282, CurrSamplesPerSec=29.78334986697455, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:39:42,592] [INFO] [timer.py:197:stop] 0/1815, RunningAvgSamplesPerSec=29.956496844626564, CurrSamplesPerSec=29.914322610324025, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:39:48,552] [INFO] [timer.py:197:stop] 0/1816, RunningAvgSamplesPerSec=29.956703622204543, CurrSamplesPerSec=30.33634494422986, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:39:54,350] [INFO] [timer.py:197:stop] 0/1817, RunningAvgSamplesPerSec=29.95661501830371, CurrSamplesPerSec=29.7967457682055, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:40:00,104] [INFO] [timer.py:197:stop] 0/1818, RunningAvgSamplesPerSec=29.956555353715693, CurrSamplesPerSec=29.848654397124587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:40:06,215] [INFO] [timer.py:197:stop] 0/1819, RunningAvgSamplesPerSec=29.956718664345523, CurrSamplesPerSec=30.25625784484873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:40:12,527] [INFO] [logging.py:68:log_dist] [Rank 0] step=1820, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:40:12,527] [INFO] [timer.py:197:stop] 0/1820, RunningAvgSamplesPerSec=29.956639707117084, CurrSamplesPerSec=29.813858592132092, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:40:18,518] [INFO] [timer.py:197:stop] 0/1821, RunningAvgSamplesPerSec=29.956383985017148, CurrSamplesPerSec=29.498589771231508, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:40:24,532] [INFO] [timer.py:197:stop] 0/1822, RunningAvgSamplesPerSec=29.956625238082175, CurrSamplesPerSec=30.40199240390349, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:40:30,806] [INFO] [timer.py:197:stop] 0/1823, RunningAvgSamplesPerSec=29.956633353053725, CurrSamplesPerSec=29.971409890420063, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:40:36,933] [INFO] [timer.py:197:stop] 0/1824, RunningAvgSamplesPerSec=29.956635248192754, CurrSamplesPerSec=29.960086694199372, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:40:43,473] [INFO] [timer.py:197:stop] 0/1825, RunningAvgSamplesPerSec=29.9571651823416, CurrSamplesPerSec=30.95487988117848, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0012, 'learning_rate': 1e-05, 'epoch': 45.01} -[2022-12-14 19:40:49,368] [INFO] [timer.py:197:stop] 0/1826, RunningAvgSamplesPerSec=29.95741737891649, CurrSamplesPerSec=30.42434151731448, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:40:55,909] [INFO] [timer.py:197:stop] 0/1827, RunningAvgSamplesPerSec=29.95652824431651, CurrSamplesPerSec=28.418080616442683, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:02,252] [INFO] [timer.py:197:stop] 0/1828, RunningAvgSamplesPerSec=29.95659125402444, CurrSamplesPerSec=30.07202733190708, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:09,113] [INFO] [timer.py:197:stop] 0/1829, RunningAvgSamplesPerSec=29.956857132496847, CurrSamplesPerSec=30.450353422467124, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:14,993] [INFO] [logging.py:68:log_dist] [Rank 0] step=1830, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:41:14,993] [INFO] [timer.py:197:stop] 0/1830, RunningAvgSamplesPerSec=29.957111894189236, CurrSamplesPerSec=30.429911508718423, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:20,832] [INFO] [timer.py:197:stop] 0/1831, RunningAvgSamplesPerSec=29.9573247803898, CurrSamplesPerSec=30.351605398027836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:26,801] [INFO] [timer.py:197:stop] 0/1832, RunningAvgSamplesPerSec=29.95696979400491, CurrSamplesPerSec=29.321480456166245, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:29,213] [INFO] [timer.py:197:stop] 0/1833, RunningAvgSamplesPerSec=29.957281090399118, CurrSamplesPerSec=30.53800261882253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:31,342] [INFO] [timer.py:197:stop] 0/1834, RunningAvgSamplesPerSec=29.957537991500818, CurrSamplesPerSec=30.435431777967704, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:33,534] [INFO] [timer.py:197:stop] 0/1835, RunningAvgSamplesPerSec=29.957305921489986, CurrSamplesPerSec=29.538106166442628, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:35,693] [INFO] [timer.py:197:stop] 0/1836, RunningAvgSamplesPerSec=29.957539673245194, CurrSamplesPerSec=30.392227138983728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:37,815] [INFO] [timer.py:197:stop] 0/1837, RunningAvgSamplesPerSec=29.957953490645973, CurrSamplesPerSec=30.736632046721756, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:39,943] [INFO] [timer.py:197:stop] 0/1838, RunningAvgSamplesPerSec=29.958212729682725, CurrSamplesPerSec=30.44159614463428, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:42,131] [INFO] [timer.py:197:stop] 0/1839, RunningAvgSamplesPerSec=29.9580168509835, CurrSamplesPerSec=29.602651888469207, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:41:43,979] [INFO] [logging.py:68:log_dist] [Rank 0] step=1840, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:41:43,980] [INFO] [timer.py:197:stop] 0/1840, RunningAvgSamplesPerSec=29.960393272333977, CurrSamplesPerSec=35.07093426871344, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:42:21,672] [INFO] [timer.py:197:stop] 0/1841, RunningAvgSamplesPerSec=29.959842459068664, CurrSamplesPerSec=28.980557413384247, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:42:27,593] [INFO] [timer.py:197:stop] 0/1842, RunningAvgSamplesPerSec=29.959098989217114, CurrSamplesPerSec=28.651562328556285, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:42:34,041] [INFO] [timer.py:197:stop] 0/1843, RunningAvgSamplesPerSec=29.958836787330586, CurrSamplesPerSec=29.484035580904866, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:42:39,992] [INFO] [timer.py:197:stop] 0/1844, RunningAvgSamplesPerSec=29.959058139281474, CurrSamplesPerSec=30.372189628226295, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:42:45,895] [INFO] [timer.py:197:stop] 0/1845, RunningAvgSamplesPerSec=29.95933959641856, CurrSamplesPerSec=30.486918308008722, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:42:52,040] [INFO] [timer.py:197:stop] 0/1846, RunningAvgSamplesPerSec=29.959247829444863, CurrSamplesPerSec=29.791071206016113, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:42:57,838] [INFO] [timer.py:197:stop] 0/1847, RunningAvgSamplesPerSec=29.959207486083507, CurrSamplesPerSec=29.884998699105495, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:43:04,400] [INFO] [timer.py:197:stop] 0/1848, RunningAvgSamplesPerSec=29.9590700329021, CurrSamplesPerSec=29.70759875116189, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:43:10,469] [INFO] [timer.py:197:stop] 0/1849, RunningAvgSamplesPerSec=29.95933873781738, CurrSamplesPerSec=30.46372350763643, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:43:16,792] [INFO] [logging.py:68:log_dist] [Rank 0] step=1850, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:43:16,793] [INFO] [timer.py:197:stop] 0/1850, RunningAvgSamplesPerSec=29.959074574470346, CurrSamplesPerSec=29.47898773917353, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.001, 'learning_rate': 1e-05, 'epoch': 46.0} -[2022-12-14 19:43:23,384] [INFO] [timer.py:197:stop] 0/1851, RunningAvgSamplesPerSec=29.959125605343655, CurrSamplesPerSec=30.053728610818897, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:43:29,310] [INFO] [timer.py:197:stop] 0/1852, RunningAvgSamplesPerSec=29.959002754378155, CurrSamplesPerSec=29.733561555905503, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:43:35,246] [INFO] [timer.py:197:stop] 0/1853, RunningAvgSamplesPerSec=29.959623989671062, CurrSamplesPerSec=31.15478167375178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:43:41,304] [INFO] [timer.py:197:stop] 0/1854, RunningAvgSamplesPerSec=29.95927533160793, CurrSamplesPerSec=29.327525414832984, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:43:47,197] [INFO] [timer.py:197:stop] 0/1855, RunningAvgSamplesPerSec=29.95899715797304, CurrSamplesPerSec=29.452533483019632, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:43:53,851] [INFO] [timer.py:197:stop] 0/1856, RunningAvgSamplesPerSec=29.95904995388629, CurrSamplesPerSec=30.057201466382708, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:43:59,883] [INFO] [timer.py:197:stop] 0/1857, RunningAvgSamplesPerSec=29.959052104353294, CurrSamplesPerSec=29.96303960112898, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:44:05,647] [INFO] [timer.py:197:stop] 0/1858, RunningAvgSamplesPerSec=29.95908143201391, CurrSamplesPerSec=30.013583265903, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:44:11,609] [INFO] [timer.py:197:stop] 0/1859, RunningAvgSamplesPerSec=29.95903190011666, CurrSamplesPerSec=29.867382083651226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:44:17,476] [INFO] [logging.py:68:log_dist] [Rank 0] step=1860, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:44:17,477] [INFO] [timer.py:197:stop] 0/1860, RunningAvgSamplesPerSec=29.958691351424214, CurrSamplesPerSec=29.339372685963173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:44:25,011] [INFO] [timer.py:197:stop] 0/1861, RunningAvgSamplesPerSec=29.95861059657376, CurrSamplesPerSec=29.809316201977417, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:44:31,097] [INFO] [timer.py:197:stop] 0/1862, RunningAvgSamplesPerSec=29.95887169954343, CurrSamplesPerSec=30.452260248486663, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:44:37,310] [INFO] [timer.py:197:stop] 0/1863, RunningAvgSamplesPerSec=29.95878119578509, CurrSamplesPerSec=29.79138530067013, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:44:43,296] [INFO] [timer.py:197:stop] 0/1864, RunningAvgSamplesPerSec=29.958578250289996, CurrSamplesPerSec=29.585601253981654, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:44:49,418] [INFO] [timer.py:197:stop] 0/1865, RunningAvgSamplesPerSec=29.958690693721067, CurrSamplesPerSec=30.169534660430205, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:44:55,213] [INFO] [timer.py:197:stop] 0/1866, RunningAvgSamplesPerSec=29.958745963490284, CurrSamplesPerSec=30.062068852754965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:01,852] [INFO] [timer.py:197:stop] 0/1867, RunningAvgSamplesPerSec=29.95864671332283, CurrSamplesPerSec=29.77478043167951, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:07,652] [INFO] [timer.py:197:stop] 0/1868, RunningAvgSamplesPerSec=29.958397224430776, CurrSamplesPerSec=29.500220398202572, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:13,637] [INFO] [timer.py:197:stop] 0/1869, RunningAvgSamplesPerSec=29.958480358796486, CurrSamplesPerSec=30.11441697708241, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:19,593] [INFO] [logging.py:68:log_dist] [Rank 0] step=1870, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:45:19,593] [INFO] [timer.py:197:stop] 0/1870, RunningAvgSamplesPerSec=29.9582485080217, CurrSamplesPerSec=29.5315517363722, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:25,285] [INFO] [timer.py:197:stop] 0/1871, RunningAvgSamplesPerSec=29.958535326656758, CurrSamplesPerSec=30.504074153745908, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:31,144] [INFO] [timer.py:197:stop] 0/1872, RunningAvgSamplesPerSec=29.958751318236274, CurrSamplesPerSec=30.367956489507073, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:33,650] [INFO] [timer.py:197:stop] 0/1873, RunningAvgSamplesPerSec=29.958702146846846, CurrSamplesPerSec=29.867033153368542, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:35,790] [INFO] [timer.py:197:stop] 0/1874, RunningAvgSamplesPerSec=29.95887364354369, CurrSamplesPerSec=30.283219682846607, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:37,922] [INFO] [timer.py:197:stop] 0/1875, RunningAvgSamplesPerSec=29.959097273416536, CurrSamplesPerSec=30.383668332039218, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0007, 'learning_rate': 1e-05, 'epoch': 46.01} -[2022-12-14 19:45:40,072] [INFO] [timer.py:197:stop] 0/1876, RunningAvgSamplesPerSec=29.959199100709885, CurrSamplesPerSec=30.151144210588033, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:42,252] [INFO] [timer.py:197:stop] 0/1877, RunningAvgSamplesPerSec=29.959270020892834, CurrSamplesPerSec=30.092766974972157, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:44,361] [INFO] [timer.py:197:stop] 0/1878, RunningAvgSamplesPerSec=29.959665644682847, CurrSamplesPerSec=30.720303518623783, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:46,488] [INFO] [timer.py:197:stop] 0/1879, RunningAvgSamplesPerSec=29.959922242730574, CurrSamplesPerSec=30.449165231374014, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:45:48,408] [INFO] [logging.py:68:log_dist] [Rank 0] step=1880, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:45:48,408] [INFO] [timer.py:197:stop] 0/1880, RunningAvgSamplesPerSec=29.9616891444415, CurrSamplesPerSec=33.691205660780646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:46:28,236] [INFO] [timer.py:197:stop] 0/1881, RunningAvgSamplesPerSec=29.96164792447818, CurrSamplesPerSec=29.884436428813967, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:46:34,594] [INFO] [timer.py:197:stop] 0/1882, RunningAvgSamplesPerSec=29.961667514937783, CurrSamplesPerSec=29.998523293112235, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:46:40,599] [INFO] [timer.py:197:stop] 0/1883, RunningAvgSamplesPerSec=29.961509343295596, CurrSamplesPerSec=29.667070464242695, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:46:47,255] [INFO] [timer.py:197:stop] 0/1884, RunningAvgSamplesPerSec=29.961079756951076, CurrSamplesPerSec=29.174259703126992, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:46:53,664] [INFO] [timer.py:197:stop] 0/1885, RunningAvgSamplesPerSec=29.961051022554464, CurrSamplesPerSec=29.90707037215792, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:46:59,558] [INFO] [timer.py:197:stop] 0/1886, RunningAvgSamplesPerSec=29.960207900228692, CurrSamplesPerSec=28.452542573012362, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:47:05,938] [INFO] [timer.py:197:stop] 0/1887, RunningAvgSamplesPerSec=29.960243461965195, CurrSamplesPerSec=30.027392013544993, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:47:11,876] [INFO] [timer.py:197:stop] 0/1888, RunningAvgSamplesPerSec=29.9603593956464, CurrSamplesPerSec=30.180500979612415, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:47:18,242] [INFO] [timer.py:197:stop] 0/1889, RunningAvgSamplesPerSec=29.960290788915543, CurrSamplesPerSec=29.83145520377338, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:47:24,553] [INFO] [logging.py:68:log_dist] [Rank 0] step=1890, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:47:24,554] [INFO] [timer.py:197:stop] 0/1890, RunningAvgSamplesPerSec=29.960499356677662, CurrSamplesPerSec=30.359308338014618, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:47:30,994] [INFO] [timer.py:197:stop] 0/1891, RunningAvgSamplesPerSec=29.960822569017793, CurrSamplesPerSec=30.58374144246869, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:47:36,880] [INFO] [timer.py:197:stop] 0/1892, RunningAvgSamplesPerSec=29.960529215392093, CurrSamplesPerSec=29.41645269316216, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:47:44,110] [INFO] [timer.py:197:stop] 0/1893, RunningAvgSamplesPerSec=29.95990617181673, CurrSamplesPerSec=28.82690900695073, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:47:50,422] [INFO] [timer.py:197:stop] 0/1894, RunningAvgSamplesPerSec=29.960053490836263, CurrSamplesPerSec=30.241249812566192, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:47:56,643] [INFO] [timer.py:197:stop] 0/1895, RunningAvgSamplesPerSec=29.960160038185027, CurrSamplesPerSec=30.16311392341626, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:48:02,993] [INFO] [timer.py:197:stop] 0/1896, RunningAvgSamplesPerSec=29.960052805577856, CurrSamplesPerSec=29.75842828850112, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:48:09,371] [INFO] [timer.py:197:stop] 0/1897, RunningAvgSamplesPerSec=29.960002643030947, CurrSamplesPerSec=29.865295269679482, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:48:15,647] [INFO] [timer.py:197:stop] 0/1898, RunningAvgSamplesPerSec=29.960373969646184, CurrSamplesPerSec=30.68097113249377, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:48:21,967] [INFO] [timer.py:197:stop] 0/1899, RunningAvgSamplesPerSec=29.960498784364447, CurrSamplesPerSec=30.19903258859725, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:48:28,288] [INFO] [logging.py:68:log_dist] [Rank 0] step=1900, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:48:28,288] [INFO] [timer.py:197:stop] 0/1900, RunningAvgSamplesPerSec=29.96032549567619, CurrSamplesPerSec=29.635166422977953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0006, 'learning_rate': 1e-05, 'epoch': 47.0} -[2022-12-14 19:48:34,877] [INFO] [timer.py:197:stop] 0/1901, RunningAvgSamplesPerSec=29.96033819613356, CurrSamplesPerSec=29.984463084830637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:48:40,994] [INFO] [timer.py:197:stop] 0/1902, RunningAvgSamplesPerSec=29.960691550053525, CurrSamplesPerSec=30.647091805029408, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:48:47,369] [INFO] [timer.py:197:stop] 0/1903, RunningAvgSamplesPerSec=29.961005500760397, CurrSamplesPerSec=30.56963569770305, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:48:53,826] [INFO] [timer.py:197:stop] 0/1904, RunningAvgSamplesPerSec=29.960964569127743, CurrSamplesPerSec=29.883355199256354, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:00,003] [INFO] [timer.py:197:stop] 0/1905, RunningAvgSamplesPerSec=29.96091080179908, CurrSamplesPerSec=29.858993399853883, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:06,325] [INFO] [timer.py:197:stop] 0/1906, RunningAvgSamplesPerSec=29.960796161292915, CurrSamplesPerSec=29.744213165593994, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:12,559] [INFO] [timer.py:197:stop] 0/1907, RunningAvgSamplesPerSec=29.960819089829396, CurrSamplesPerSec=30.0045387607102, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:18,564] [INFO] [timer.py:197:stop] 0/1908, RunningAvgSamplesPerSec=29.96059198660616, CurrSamplesPerSec=29.5341218181022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:24,372] [INFO] [timer.py:197:stop] 0/1909, RunningAvgSamplesPerSec=29.96022873906128, CurrSamplesPerSec=29.28352504381585, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:30,783] [INFO] [logging.py:68:log_dist] [Rank 0] step=1910, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:49:30,784] [INFO] [timer.py:197:stop] 0/1910, RunningAvgSamplesPerSec=29.959811309641097, CurrSamplesPerSec=29.18438737135261, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:36,832] [INFO] [timer.py:197:stop] 0/1911, RunningAvgSamplesPerSec=29.959953557526486, CurrSamplesPerSec=30.233845022002804, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:43,275] [INFO] [timer.py:197:stop] 0/1912, RunningAvgSamplesPerSec=29.959885119422037, CurrSamplesPerSec=29.829804328328535, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:45,641] [INFO] [timer.py:197:stop] 0/1913, RunningAvgSamplesPerSec=29.960174880301366, CurrSamplesPerSec=30.52403968010392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:47,774] [INFO] [timer.py:197:stop] 0/1914, RunningAvgSamplesPerSec=29.960392594512754, CurrSamplesPerSec=30.382306522867022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:49,928] [INFO] [timer.py:197:stop] 0/1915, RunningAvgSamplesPerSec=29.96045855615185, CurrSamplesPerSec=30.087110631295293, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:52,056] [INFO] [timer.py:197:stop] 0/1916, RunningAvgSamplesPerSec=29.960708381159492, CurrSamplesPerSec=30.44637472858215, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:54,183] [INFO] [timer.py:197:stop] 0/1917, RunningAvgSamplesPerSec=29.960970187034267, CurrSamplesPerSec=30.47059453853817, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:56,316] [INFO] [timer.py:197:stop] 0/1918, RunningAvgSamplesPerSec=29.96118095537361, CurrSamplesPerSec=30.370316868550553, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:49:58,488] [INFO] [timer.py:197:stop] 0/1919, RunningAvgSamplesPerSec=29.96111372093596, CurrSamplesPerSec=29.8328443349088, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:50:00,377] [INFO] [logging.py:68:log_dist] [Rank 0] step=1920, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:50:00,378] [INFO] [timer.py:197:stop] 0/1920, RunningAvgSamplesPerSec=29.96306787251155, CurrSamplesPerSec=34.24477236612879, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:50:38,299] [INFO] [timer.py:197:stop] 0/1921, RunningAvgSamplesPerSec=29.963075822563468, CurrSamplesPerSec=29.97833178994353, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:50:44,644] [INFO] [timer.py:197:stop] 0/1922, RunningAvgSamplesPerSec=29.96246292026508, CurrSamplesPerSec=28.830751259782527, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:50:50,857] [INFO] [timer.py:197:stop] 0/1923, RunningAvgSamplesPerSec=29.962429017675877, CurrSamplesPerSec=29.897477226789412, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:50:57,088] [INFO] [timer.py:197:stop] 0/1924, RunningAvgSamplesPerSec=29.962162461619315, CurrSamplesPerSec=29.458716658514646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:51:03,362] [INFO] [timer.py:197:stop] 0/1925, RunningAvgSamplesPerSec=29.962017556635878, CurrSamplesPerSec=29.686076481315393, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0005, 'learning_rate': 1e-05, 'epoch': 48.0} -[2022-12-14 19:51:09,700] [INFO] [timer.py:197:stop] 0/1926, RunningAvgSamplesPerSec=29.962149210479577, CurrSamplesPerSec=30.217478119424324, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:51:15,768] [INFO] [timer.py:197:stop] 0/1927, RunningAvgSamplesPerSec=29.962043982873258, CurrSamplesPerSec=29.760945625596886, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:51:23,337] [INFO] [timer.py:197:stop] 0/1928, RunningAvgSamplesPerSec=29.961631026414427, CurrSamplesPerSec=29.187246466569796, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:51:29,555] [INFO] [timer.py:197:stop] 0/1929, RunningAvgSamplesPerSec=29.961979434007358, CurrSamplesPerSec=30.64839347248043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:51:35,664] [INFO] [logging.py:68:log_dist] [Rank 0] step=1930, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:51:35,665] [INFO] [timer.py:197:stop] 0/1930, RunningAvgSamplesPerSec=29.96154367245777, CurrSamplesPerSec=29.144735200658992, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:51:42,272] [INFO] [timer.py:197:stop] 0/1931, RunningAvgSamplesPerSec=29.96090350932948, CurrSamplesPerSec=28.775525767363025, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:51:48,382] [INFO] [timer.py:197:stop] 0/1932, RunningAvgSamplesPerSec=29.960749135111172, CurrSamplesPerSec=29.66589343659178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:51:54,442] [INFO] [timer.py:197:stop] 0/1933, RunningAvgSamplesPerSec=29.96068453416923, CurrSamplesPerSec=29.836521681139285, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:00,422] [INFO] [timer.py:197:stop] 0/1934, RunningAvgSamplesPerSec=29.96076197590347, CurrSamplesPerSec=30.111052482330603, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:06,297] [INFO] [timer.py:197:stop] 0/1935, RunningAvgSamplesPerSec=29.96106648173385, CurrSamplesPerSec=30.561161097866023, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:12,462] [INFO] [timer.py:197:stop] 0/1936, RunningAvgSamplesPerSec=29.960790446783246, CurrSamplesPerSec=29.436555877821093, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:18,868] [INFO] [timer.py:197:stop] 0/1937, RunningAvgSamplesPerSec=29.960423593484723, CurrSamplesPerSec=29.26735049891843, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:25,096] [INFO] [timer.py:197:stop] 0/1938, RunningAvgSamplesPerSec=29.960322590911762, CurrSamplesPerSec=29.76614991220478, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:30,996] [INFO] [timer.py:197:stop] 0/1939, RunningAvgSamplesPerSec=29.960297124917375, CurrSamplesPerSec=29.911075999086297, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:36,967] [INFO] [logging.py:68:log_dist] [Rank 0] step=1940, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:52:36,968] [INFO] [timer.py:197:stop] 0/1940, RunningAvgSamplesPerSec=29.96068300627103, CurrSamplesPerSec=30.727269688667644, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:42,958] [INFO] [timer.py:197:stop] 0/1941, RunningAvgSamplesPerSec=29.960578090672563, CurrSamplesPerSec=29.758622929781808, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:49,263] [INFO] [timer.py:197:stop] 0/1942, RunningAvgSamplesPerSec=29.960420139672237, CurrSamplesPerSec=29.657253831456355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:52:55,619] [INFO] [timer.py:197:stop] 0/1943, RunningAvgSamplesPerSec=29.96034380214892, CurrSamplesPerSec=29.812977816476714, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:01,605] [INFO] [timer.py:197:stop] 0/1944, RunningAvgSamplesPerSec=29.960543299697733, CurrSamplesPerSec=30.352840900546177, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:07,659] [INFO] [timer.py:197:stop] 0/1945, RunningAvgSamplesPerSec=29.960558631355333, CurrSamplesPerSec=29.990362343872963, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:13,502] [INFO] [timer.py:197:stop] 0/1946, RunningAvgSamplesPerSec=29.96042185141523, CurrSamplesPerSec=29.696996342021286, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:19,713] [INFO] [timer.py:197:stop] 0/1947, RunningAvgSamplesPerSec=29.960489137044732, CurrSamplesPerSec=30.091866269859242, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:26,012] [INFO] [timer.py:197:stop] 0/1948, RunningAvgSamplesPerSec=29.960464247681596, CurrSamplesPerSec=29.912132570355265, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:32,003] [INFO] [timer.py:197:stop] 0/1949, RunningAvgSamplesPerSec=29.96009452570487, CurrSamplesPerSec=29.25749681550669, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:38,004] [INFO] [logging.py:68:log_dist] [Rank 0] step=1950, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:53:38,005] [INFO] [timer.py:197:stop] 0/1950, RunningAvgSamplesPerSec=29.960460983029265, CurrSamplesPerSec=30.69136850674315, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0005, 'learning_rate': 1e-05, 'epoch': 48.01} -[2022-12-14 19:53:43,849] [INFO] [timer.py:197:stop] 0/1951, RunningAvgSamplesPerSec=29.96003728634151, CurrSamplesPerSec=29.156815249389812, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:49,943] [INFO] [timer.py:197:stop] 0/1952, RunningAvgSamplesPerSec=29.959751749188758, CurrSamplesPerSec=29.413393817867043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:52,405] [INFO] [timer.py:197:stop] 0/1953, RunningAvgSamplesPerSec=29.960029607266264, CurrSamplesPerSec=30.511837347466646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:54,543] [INFO] [timer.py:197:stop] 0/1954, RunningAvgSamplesPerSec=29.960208318521964, CurrSamplesPerSec=30.31298152484873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:56,679] [INFO] [timer.py:197:stop] 0/1955, RunningAvgSamplesPerSec=29.96040202406844, CurrSamplesPerSec=30.34335070460411, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:53:58,815] [INFO] [timer.py:197:stop] 0/1956, RunningAvgSamplesPerSec=29.96059009521087, CurrSamplesPerSec=30.332454252802076, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:54:00,957] [INFO] [timer.py:197:stop] 0/1957, RunningAvgSamplesPerSec=29.960740338823324, CurrSamplesPerSec=30.257222986004727, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:54:03,108] [INFO] [timer.py:197:stop] 0/1958, RunningAvgSamplesPerSec=29.960818939124703, CurrSamplesPerSec=30.115275111127964, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:54:05,299] [INFO] [timer.py:197:stop] 0/1959, RunningAvgSamplesPerSec=29.960618658777648, CurrSamplesPerSec=29.5739290231316, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:54:07,245] [INFO] [logging.py:68:log_dist] [Rank 0] step=1960, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:54:07,246] [INFO] [timer.py:197:stop] 0/1960, RunningAvgSamplesPerSec=29.962277315921387, CurrSamplesPerSec=33.60287895072721, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:54:48,418] [INFO] [timer.py:197:stop] 0/1961, RunningAvgSamplesPerSec=29.96174846440635, CurrSamplesPerSec=28.960865811196857, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:54:55,325] [INFO] [timer.py:197:stop] 0/1962, RunningAvgSamplesPerSec=29.961473058846757, CurrSamplesPerSec=29.431501690936592, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:01,886] [INFO] [timer.py:197:stop] 0/1963, RunningAvgSamplesPerSec=29.961641577968802, CurrSamplesPerSec=30.29562274787891, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:07,851] [INFO] [timer.py:197:stop] 0/1964, RunningAvgSamplesPerSec=29.961841123006625, CurrSamplesPerSec=30.358329806698173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:14,107] [INFO] [timer.py:197:stop] 0/1965, RunningAvgSamplesPerSec=29.961809582530258, CurrSamplesPerSec=29.90005477982365, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:20,543] [INFO] [timer.py:197:stop] 0/1966, RunningAvgSamplesPerSec=29.96200440188919, CurrSamplesPerSec=30.34938174694152, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:26,818] [INFO] [timer.py:197:stop] 0/1967, RunningAvgSamplesPerSec=29.962143884348148, CurrSamplesPerSec=30.238616506932335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:33,394] [INFO] [timer.py:197:stop] 0/1968, RunningAvgSamplesPerSec=29.962043073834202, CurrSamplesPerSec=29.7652521501279, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:40,099] [INFO] [timer.py:197:stop] 0/1969, RunningAvgSamplesPerSec=29.962102293374343, CurrSamplesPerSec=30.078982308297306, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:46,177] [INFO] [logging.py:68:log_dist] [Rank 0] step=1970, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:55:46,178] [INFO] [timer.py:197:stop] 0/1970, RunningAvgSamplesPerSec=29.962028867333483, CurrSamplesPerSec=29.818293061553202, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:53,029] [INFO] [timer.py:197:stop] 0/1971, RunningAvgSamplesPerSec=29.961538385945033, CurrSamplesPerSec=29.026413142254324, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:55:59,193] [INFO] [timer.py:197:stop] 0/1972, RunningAvgSamplesPerSec=29.96157032345161, CurrSamplesPerSec=30.0245876051295, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:56:05,451] [INFO] [timer.py:197:stop] 0/1973, RunningAvgSamplesPerSec=29.96143115022671, CurrSamplesPerSec=29.689747285582953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:56:11,917] [INFO] [timer.py:197:stop] 0/1974, RunningAvgSamplesPerSec=29.961586780924172, CurrSamplesPerSec=30.27150949851786, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:56:18,144] [INFO] [timer.py:197:stop] 0/1975, RunningAvgSamplesPerSec=29.96115563258589, CurrSamplesPerSec=29.134404196015648, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0005, 'learning_rate': 1e-05, 'epoch': 49.0} -[2022-12-14 19:56:24,769] [INFO] [timer.py:197:stop] 0/1976, RunningAvgSamplesPerSec=29.960964013599057, CurrSamplesPerSec=29.58761328938838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:56:31,131] [INFO] [timer.py:197:stop] 0/1977, RunningAvgSamplesPerSec=29.96118118255806, CurrSamplesPerSec=30.39609876159052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:56:37,090] [INFO] [timer.py:197:stop] 0/1978, RunningAvgSamplesPerSec=29.961054714707558, CurrSamplesPerSec=29.713346802932996, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:56:45,708] [INFO] [timer.py:197:stop] 0/1979, RunningAvgSamplesPerSec=29.96102594855626, CurrSamplesPerSec=29.904291723955232, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:56:51,736] [INFO] [logging.py:68:log_dist] [Rank 0] step=1980, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:56:51,737] [INFO] [timer.py:197:stop] 0/1980, RunningAvgSamplesPerSec=29.96069876912295, CurrSamplesPerSec=29.327541435522456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:56:57,823] [INFO] [timer.py:197:stop] 0/1981, RunningAvgSamplesPerSec=29.9604602718328, CurrSamplesPerSec=29.496029110625386, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:57:04,282] [INFO] [timer.py:197:stop] 0/1982, RunningAvgSamplesPerSec=29.960630024966807, CurrSamplesPerSec=30.300382971537655, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:57:10,973] [INFO] [timer.py:197:stop] 0/1983, RunningAvgSamplesPerSec=29.960628795271738, CurrSamplesPerSec=29.958194196980358, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:57:17,262] [INFO] [timer.py:197:stop] 0/1984, RunningAvgSamplesPerSec=29.960561163413754, CurrSamplesPerSec=29.827179216002147, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:57:24,031] [INFO] [timer.py:197:stop] 0/1985, RunningAvgSamplesPerSec=29.96040018429591, CurrSamplesPerSec=29.644703255296193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:57:30,065] [INFO] [timer.py:197:stop] 0/1986, RunningAvgSamplesPerSec=29.960360651053847, CurrSamplesPerSec=29.882170926818276, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:57:36,961] [INFO] [timer.py:197:stop] 0/1987, RunningAvgSamplesPerSec=29.960477937699725, CurrSamplesPerSec=30.19499702139377, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:57:43,179] [INFO] [timer.py:197:stop] 0/1988, RunningAvgSamplesPerSec=29.960386461364603, CurrSamplesPerSec=29.779900357012068, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:57:49,269] [INFO] [timer.py:197:stop] 0/1989, RunningAvgSamplesPerSec=29.960247988932224, CurrSamplesPerSec=29.687744319208598, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:57:55,736] [INFO] [logging.py:68:log_dist] [Rank 0] step=1990, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:57:55,737] [INFO] [timer.py:197:stop] 0/1990, RunningAvgSamplesPerSec=29.960452634495404, CurrSamplesPerSec=30.37268105112708, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:01,752] [INFO] [timer.py:197:stop] 0/1991, RunningAvgSamplesPerSec=29.96051752358303, CurrSamplesPerSec=30.090075140148656, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:08,241] [INFO] [timer.py:197:stop] 0/1992, RunningAvgSamplesPerSec=29.960517743979693, CurrSamplesPerSec=29.960956119358773, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:10,623] [INFO] [timer.py:197:stop] 0/1993, RunningAvgSamplesPerSec=29.961004172164753, CurrSamplesPerSec=30.96133147512988, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:12,756] [INFO] [timer.py:197:stop] 0/1994, RunningAvgSamplesPerSec=29.9612049586239, CurrSamplesPerSec=30.366379672145243, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:14,876] [INFO] [timer.py:197:stop] 0/1995, RunningAvgSamplesPerSec=29.96150720647458, CurrSamplesPerSec=30.575938138148537, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:17,055] [INFO] [timer.py:197:stop] 0/1996, RunningAvgSamplesPerSec=29.961398892557135, CurrSamplesPerSec=29.747074223109937, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:19,145] [INFO] [timer.py:197:stop] 0/1997, RunningAvgSamplesPerSec=29.961906896128827, CurrSamplesPerSec=31.010329095814804, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:21,321] [INFO] [timer.py:197:stop] 0/1998, RunningAvgSamplesPerSec=29.96180861077101, CurrSamplesPerSec=29.767004816871715, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:23,449] [INFO] [timer.py:197:stop] 0/1999, RunningAvgSamplesPerSec=29.96204195860545, CurrSamplesPerSec=30.435162618402543, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 19:58:25,373] [INFO] [logging.py:68:log_dist] [Rank 0] step=2000, skipped=4, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 19:58:25,374] [INFO] [timer.py:197:stop] 0/2000, RunningAvgSamplesPerSec=29.96369645757515, CurrSamplesPerSec=33.67744307049537, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0005, 'learning_rate': 1e-05, 'epoch': 49.01} -{'eval_loss': 0.5595703125, 'eval_wer': 24.413777628183553, 'eval_runtime': 196.1396, 'eval_samples_per_second': 3.396, 'eval_steps_per_second': 0.107, 'epoch': 49.01} -[2022-12-14 20:01:42,095] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step2000 is begin to save! -[2022-12-14 20:01:42,100] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-2000/global_step2000/mp_rank_00_model_states.pt -[2022-12-14 20:01:42,100] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-2000/global_step2000/mp_rank_00_model_states.pt... -[2022-12-14 20:01:42,667] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-2000/global_step2000/mp_rank_00_model_states.pt. -[2022-12-14 20:01:42,668] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-2000/global_step2000/zero_pp_rank_0_mp_rank_00_optim_states.pt... -[2022-12-14 20:01:44,904] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-2000/global_step2000/zero_pp_rank_0_mp_rank_00_optim_states.pt. -[2022-12-14 20:01:44,905] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-2000/global_step2000/zero_pp_rank_0_mp_rank_00_optim_states.pt -[2022-12-14 20:01:44,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! -[2022-12-14 20:02:46,042] [INFO] [timer.py:197:stop] 0/2001, RunningAvgSamplesPerSec=29.963263124150934, CurrSamplesPerSec=29.121789811844277, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:02:52,098] [INFO] [timer.py:197:stop] 0/2002, RunningAvgSamplesPerSec=29.962855808325042, CurrSamplesPerSec=29.170182709017393, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:02:58,352] [INFO] [timer.py:197:stop] 0/2003, RunningAvgSamplesPerSec=29.962877619986003, CurrSamplesPerSec=30.00656457808395, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:04,298] [INFO] [timer.py:197:stop] 0/2004, RunningAvgSamplesPerSec=29.962911277596618, CurrSamplesPerSec=30.030411956342622, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:10,315] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536.0, reducing to 65536.0 -[2022-12-14 20:03:10,316] [INFO] [timer.py:197:stop] 0/2005, RunningAvgSamplesPerSec=29.963727038757163, CurrSamplesPerSec=31.691075961172935, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:16,386] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536.0, reducing to 32768.0 -[2022-12-14 20:03:16,388] [INFO] [timer.py:197:stop] 0/2006, RunningAvgSamplesPerSec=29.964472985379775, CurrSamplesPerSec=31.53705766865486, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:22,580] [INFO] [timer.py:197:stop] 0/2007, RunningAvgSamplesPerSec=29.964337166815618, CurrSamplesPerSec=29.694608065246943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:28,463] [INFO] [timer.py:197:stop] 0/2008, RunningAvgSamplesPerSec=29.964195363942135, CurrSamplesPerSec=29.682554282099392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:35,201] [INFO] [timer.py:197:stop] 0/2009, RunningAvgSamplesPerSec=29.96394657770101, CurrSamplesPerSec=29.473061403031064, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:41,331] [INFO] [logging.py:68:log_dist] [Rank 0] step=2010, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:03:41,331] [INFO] [timer.py:197:stop] 0/2010, RunningAvgSamplesPerSec=29.96414830727269, CurrSamplesPerSec=30.37456785041302, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:47,934] [INFO] [timer.py:197:stop] 0/2011, RunningAvgSamplesPerSec=29.96445283853812, CurrSamplesPerSec=30.588697214403233, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:54,005] [INFO] [timer.py:197:stop] 0/2012, RunningAvgSamplesPerSec=29.964177013968538, CurrSamplesPerSec=29.420111930712732, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:03:59,787] [INFO] [timer.py:197:stop] 0/2013, RunningAvgSamplesPerSec=29.964190501002285, CurrSamplesPerSec=29.991323999040493, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:04:06,151] [INFO] [timer.py:197:stop] 0/2014, RunningAvgSamplesPerSec=29.96409406659856, CurrSamplesPerSec=29.771412150832397, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:04:12,009] [INFO] [timer.py:197:stop] 0/2015, RunningAvgSamplesPerSec=29.963837648207885, CurrSamplesPerSec=29.456660698932666, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:04:18,362] [INFO] [timer.py:197:stop] 0/2016, RunningAvgSamplesPerSec=29.963725276891743, CurrSamplesPerSec=29.73921752317004, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:04:27,975] [INFO] [timer.py:197:stop] 0/2017, RunningAvgSamplesPerSec=29.963293327556524, CurrSamplesPerSec=29.117904343179685, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:04:34,024] [INFO] [timer.py:197:stop] 0/2018, RunningAvgSamplesPerSec=29.96319691645754, CurrSamplesPerSec=29.770180607045152, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:04:40,483] [INFO] [timer.py:197:stop] 0/2019, RunningAvgSamplesPerSec=29.96328114034665, CurrSamplesPerSec=30.13404466029924, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:04:46,573] [INFO] [logging.py:68:log_dist] [Rank 0] step=2020, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:04:46,574] [INFO] [timer.py:197:stop] 0/2020, RunningAvgSamplesPerSec=29.963174729355547, CurrSamplesPerSec=29.75007101281733, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:04:53,127] [INFO] [timer.py:197:stop] 0/2021, RunningAvgSamplesPerSec=29.96285736066995, CurrSamplesPerSec=29.335816858522776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:04:59,256] [INFO] [timer.py:197:stop] 0/2022, RunningAvgSamplesPerSec=29.962576541690815, CurrSamplesPerSec=29.40613757800799, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:05:05,846] [INFO] [timer.py:197:stop] 0/2023, RunningAvgSamplesPerSec=29.962740958503414, CurrSamplesPerSec=30.29858745287211, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:05:11,830] [INFO] [timer.py:197:stop] 0/2024, RunningAvgSamplesPerSec=29.962882664937837, CurrSamplesPerSec=30.25203650230771, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:05:17,999] [INFO] [timer.py:197:stop] 0/2025, RunningAvgSamplesPerSec=29.962668479737136, CurrSamplesPerSec=29.53575962563638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0005, 'learning_rate': 1e-05, 'epoch': 50.01} -[2022-12-14 20:05:24,018] [INFO] [timer.py:197:stop] 0/2026, RunningAvgSamplesPerSec=29.963029952631555, CurrSamplesPerSec=30.712592021203992, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:05:30,193] [INFO] [timer.py:197:stop] 0/2027, RunningAvgSamplesPerSec=29.963159384437375, CurrSamplesPerSec=30.227441134654022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:05:36,453] [INFO] [timer.py:197:stop] 0/2028, RunningAvgSamplesPerSec=29.96312062718328, CurrSamplesPerSec=29.884842325953805, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:05:42,397] [INFO] [timer.py:197:stop] 0/2029, RunningAvgSamplesPerSec=29.963273871763114, CurrSamplesPerSec=30.276999760658317, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:05:48,333] [INFO] [logging.py:68:log_dist] [Rank 0] step=2030, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:05:48,334] [INFO] [timer.py:197:stop] 0/2030, RunningAvgSamplesPerSec=29.963288641840563, CurrSamplesPerSec=29.993257548162763, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:05:54,311] [INFO] [timer.py:197:stop] 0/2031, RunningAvgSamplesPerSec=29.963302995168753, CurrSamplesPerSec=29.992439864385933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:06:00,456] [INFO] [timer.py:197:stop] 0/2032, RunningAvgSamplesPerSec=29.962844438173104, CurrSamplesPerSec=29.060466908841324, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:06:03,139] [INFO] [timer.py:197:stop] 0/2033, RunningAvgSamplesPerSec=29.96304397978002, CurrSamplesPerSec=30.37366738143982, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:06:05,292] [INFO] [timer.py:197:stop] 0/2034, RunningAvgSamplesPerSec=29.963101387366436, CurrSamplesPerSec=30.08015189590863, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:06:07,446] [INFO] [timer.py:197:stop] 0/2035, RunningAvgSamplesPerSec=29.9631674315204, CurrSamplesPerSec=30.097973229796825, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:06:09,735] [INFO] [timer.py:197:stop] 0/2036, RunningAvgSamplesPerSec=29.962292157423146, CurrSamplesPerSec=28.282660709689335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:06:11,865] [INFO] [timer.py:197:stop] 0/2037, RunningAvgSamplesPerSec=29.96251980338938, CurrSamplesPerSec=30.432823201169082, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:06:14,035] [INFO] [timer.py:197:stop] 0/2038, RunningAvgSamplesPerSec=29.96247266239665, CurrSamplesPerSec=29.866847060551372, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:06:16,175] [INFO] [timer.py:197:stop] 0/2039, RunningAvgSamplesPerSec=29.96262316595897, CurrSamplesPerSec=30.272216155672467, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:06:18,063] [INFO] [logging.py:68:log_dist] [Rank 0] step=2040, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:06:18,063] [INFO] [timer.py:197:stop] 0/2040, RunningAvgSamplesPerSec=29.964523945656975, CurrSamplesPerSec=34.411285923733175, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:07:01,600] [INFO] [timer.py:197:stop] 0/2041, RunningAvgSamplesPerSec=29.964195929235284, CurrSamplesPerSec=29.31029408842285, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:07:08,596] [INFO] [timer.py:197:stop] 0/2042, RunningAvgSamplesPerSec=29.96421963161301, CurrSamplesPerSec=30.012626893933465, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:07:15,912] [INFO] [timer.py:197:stop] 0/2043, RunningAvgSamplesPerSec=29.964446361742795, CurrSamplesPerSec=30.434230948975884, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:07:22,644] [INFO] [timer.py:197:stop] 0/2044, RunningAvgSamplesPerSec=29.96368760726679, CurrSamplesPerSec=28.491209330296655, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:07:29,265] [INFO] [timer.py:197:stop] 0/2045, RunningAvgSamplesPerSec=29.96345413231927, CurrSamplesPerSec=29.494168858842897, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:07:36,386] [INFO] [timer.py:197:stop] 0/2046, RunningAvgSamplesPerSec=29.96328284057931, CurrSamplesPerSec=29.617375732958052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:07:42,971] [INFO] [timer.py:197:stop] 0/2047, RunningAvgSamplesPerSec=29.96325925740244, CurrSamplesPerSec=29.915132706350096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:07:49,448] [INFO] [timer.py:197:stop] 0/2048, RunningAvgSamplesPerSec=29.963186292558525, CurrSamplesPerSec=29.81471292801342, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:07:56,574] [INFO] [timer.py:197:stop] 0/2049, RunningAvgSamplesPerSec=29.963112020377043, CurrSamplesPerSec=29.811918305698388, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:08:03,224] [INFO] [logging.py:68:log_dist] [Rank 0] step=2050, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:08:03,225] [INFO] [timer.py:197:stop] 0/2050, RunningAvgSamplesPerSec=29.96323378454745, CurrSamplesPerSec=30.214576877893517, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 51.0} -[2022-12-14 20:08:10,150] [INFO] [timer.py:197:stop] 0/2051, RunningAvgSamplesPerSec=29.963225037045945, CurrSamplesPerSec=29.94532086401143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:08:16,641] [INFO] [timer.py:197:stop] 0/2052, RunningAvgSamplesPerSec=29.96340496094874, CurrSamplesPerSec=30.336663785214956, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:08:23,119] [INFO] [timer.py:197:stop] 0/2053, RunningAvgSamplesPerSec=29.963116728777806, CurrSamplesPerSec=29.38367304538729, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:08:29,819] [INFO] [timer.py:197:stop] 0/2054, RunningAvgSamplesPerSec=29.96293197266961, CurrSamplesPerSec=29.588731929861922, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:08:36,336] [INFO] [timer.py:197:stop] 0/2055, RunningAvgSamplesPerSec=29.96312657480394, CurrSamplesPerSec=30.367846553574577, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:08:43,291] [INFO] [timer.py:197:stop] 0/2056, RunningAvgSamplesPerSec=29.962938178623467, CurrSamplesPerSec=29.58109228853139, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:08:50,174] [INFO] [timer.py:197:stop] 0/2057, RunningAvgSamplesPerSec=29.96320717560363, CurrSamplesPerSec=30.52611195780948, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:08:57,087] [INFO] [timer.py:197:stop] 0/2058, RunningAvgSamplesPerSec=29.962949329044893, CurrSamplesPerSec=29.44228670103925, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:09:03,910] [INFO] [timer.py:197:stop] 0/2059, RunningAvgSamplesPerSec=29.96296739283706, CurrSamplesPerSec=30.000152663173296, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:09:13,433] [INFO] [logging.py:68:log_dist] [Rank 0] step=2060, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:09:13,434] [INFO] [timer.py:197:stop] 0/2060, RunningAvgSamplesPerSec=29.962526051265147, CurrSamplesPerSec=29.08139689704346, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:09:20,200] [INFO] [timer.py:197:stop] 0/2061, RunningAvgSamplesPerSec=29.962438159436974, CurrSamplesPerSec=29.782642718401146, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:09:26,703] [INFO] [timer.py:197:stop] 0/2062, RunningAvgSamplesPerSec=29.96215734504799, CurrSamplesPerSec=29.394912285735124, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:09:33,614] [INFO] [timer.py:197:stop] 0/2063, RunningAvgSamplesPerSec=29.962470346674323, CurrSamplesPerSec=30.62144144057818, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:09:39,806] [INFO] [timer.py:197:stop] 0/2064, RunningAvgSamplesPerSec=29.96225362250751, CurrSamplesPerSec=29.52214924532701, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:09:46,824] [INFO] [timer.py:197:stop] 0/2065, RunningAvgSamplesPerSec=29.962111352211597, CurrSamplesPerSec=29.67159584135221, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:09:53,276] [INFO] [timer.py:197:stop] 0/2066, RunningAvgSamplesPerSec=29.96204096098847, CurrSamplesPerSec=29.81752463480856, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:00,418] [INFO] [timer.py:197:stop] 0/2067, RunningAvgSamplesPerSec=29.962049618230083, CurrSamplesPerSec=29.979928832761008, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:07,242] [INFO] [timer.py:197:stop] 0/2068, RunningAvgSamplesPerSec=29.961979417586665, CurrSamplesPerSec=29.817713425640513, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:14,630] [INFO] [timer.py:197:stop] 0/2069, RunningAvgSamplesPerSec=29.962016036324915, CurrSamplesPerSec=30.037861953747356, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:21,844] [INFO] [logging.py:68:log_dist] [Rank 0] step=2070, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:10:21,844] [INFO] [timer.py:197:stop] 0/2070, RunningAvgSamplesPerSec=29.961847151397908, CurrSamplesPerSec=29.61678427596196, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:28,918] [INFO] [timer.py:197:stop] 0/2071, RunningAvgSamplesPerSec=29.96199455652936, CurrSamplesPerSec=30.26996316099813, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:35,652] [INFO] [timer.py:197:stop] 0/2072, RunningAvgSamplesPerSec=29.96180418058848, CurrSamplesPerSec=29.5730297863752, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:38,275] [INFO] [timer.py:197:stop] 0/2073, RunningAvgSamplesPerSec=29.96175873467013, CurrSamplesPerSec=29.8679802688186, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:40,431] [INFO] [timer.py:197:stop] 0/2074, RunningAvgSamplesPerSec=29.96185484414453, CurrSamplesPerSec=30.1622293370512, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:42,569] [INFO] [timer.py:197:stop] 0/2075, RunningAvgSamplesPerSec=29.962015165691994, CurrSamplesPerSec=30.29792743950188, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 51.01} -[2022-12-14 20:10:44,681] [INFO] [timer.py:197:stop] 0/2076, RunningAvgSamplesPerSec=29.962354760835225, CurrSamplesPerSec=30.683282220334934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:46,818] [INFO] [timer.py:197:stop] 0/2077, RunningAvgSamplesPerSec=29.96251981442217, CurrSamplesPerSec=30.308799094559543, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:48,910] [INFO] [timer.py:197:stop] 0/2078, RunningAvgSamplesPerSec=29.962986530565058, CurrSamplesPerSec=30.963785000373733, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:51,053] [INFO] [timer.py:197:stop] 0/2079, RunningAvgSamplesPerSec=29.96311254114569, CurrSamplesPerSec=30.22701566676392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:10:52,987] [INFO] [logging.py:68:log_dist] [Rank 0] step=2080, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:10:52,988] [INFO] [timer.py:197:stop] 0/2080, RunningAvgSamplesPerSec=29.96466719201001, CurrSamplesPerSec=33.58387246219717, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:11:31,544] [INFO] [timer.py:197:stop] 0/2081, RunningAvgSamplesPerSec=29.964887081927436, CurrSamplesPerSec=30.428897377021826, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:11:37,791] [INFO] [timer.py:197:stop] 0/2082, RunningAvgSamplesPerSec=29.965129107384865, CurrSamplesPerSec=30.476897725365852, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:11:44,001] [INFO] [timer.py:197:stop] 0/2083, RunningAvgSamplesPerSec=29.964841533064405, CurrSamplesPerSec=29.378399069033513, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:11:50,379] [INFO] [timer.py:197:stop] 0/2084, RunningAvgSamplesPerSec=29.964439496113105, CurrSamplesPerSec=29.15053655313721, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:11:56,311] [INFO] [timer.py:197:stop] 0/2085, RunningAvgSamplesPerSec=29.96390608741792, CurrSamplesPerSec=28.89305728249975, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:02,415] [INFO] [timer.py:197:stop] 0/2086, RunningAvgSamplesPerSec=29.964024254814014, CurrSamplesPerSec=30.212206640405178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:08,595] [INFO] [timer.py:197:stop] 0/2087, RunningAvgSamplesPerSec=29.9640937893197, CurrSamplesPerSec=30.109708245852104, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:14,442] [INFO] [timer.py:197:stop] 0/2088, RunningAvgSamplesPerSec=29.96403116906517, CurrSamplesPerSec=29.83403464803212, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:20,864] [INFO] [timer.py:197:stop] 0/2089, RunningAvgSamplesPerSec=29.96366104613535, CurrSamplesPerSec=29.210988105276687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:26,887] [INFO] [logging.py:68:log_dist] [Rank 0] step=2090, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:12:26,888] [INFO] [timer.py:197:stop] 0/2090, RunningAvgSamplesPerSec=29.963716766583982, CurrSamplesPerSec=30.080458632786172, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:33,235] [INFO] [timer.py:197:stop] 0/2091, RunningAvgSamplesPerSec=29.963559747137726, CurrSamplesPerSec=29.63925335288412, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:39,452] [INFO] [timer.py:197:stop] 0/2092, RunningAvgSamplesPerSec=29.963254182270067, CurrSamplesPerSec=29.33825037501332, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:45,268] [INFO] [timer.py:197:stop] 0/2093, RunningAvgSamplesPerSec=29.96349703275905, CurrSamplesPerSec=30.479804573501468, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:51,509] [INFO] [timer.py:197:stop] 0/2094, RunningAvgSamplesPerSec=29.96335678890582, CurrSamplesPerSec=29.67295044365371, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:12:57,570] [INFO] [timer.py:197:stop] 0/2095, RunningAvgSamplesPerSec=29.96318935037387, CurrSamplesPerSec=29.616957462581258, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:13:03,604] [INFO] [timer.py:197:stop] 0/2096, RunningAvgSamplesPerSec=29.96314648512006, CurrSamplesPerSec=29.873697469050473, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:13:09,496] [INFO] [timer.py:197:stop] 0/2097, RunningAvgSamplesPerSec=29.963315807111485, CurrSamplesPerSec=30.322123912998293, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:13:15,404] [INFO] [timer.py:197:stop] 0/2098, RunningAvgSamplesPerSec=29.96297766727819, CurrSamplesPerSec=29.270944007195077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:13:22,997] [INFO] [timer.py:197:stop] 0/2099, RunningAvgSamplesPerSec=29.963037194425244, CurrSamplesPerSec=30.08832806651646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:13:29,618] [INFO] [logging.py:68:log_dist] [Rank 0] step=2100, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:13:29,619] [INFO] [timer.py:197:stop] 0/2100, RunningAvgSamplesPerSec=29.962865115585565, CurrSamplesPerSec=29.606311884900922, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 52.0} -[2022-12-14 20:13:35,825] [INFO] [timer.py:197:stop] 0/2101, RunningAvgSamplesPerSec=29.96292556493461, CurrSamplesPerSec=30.090287635915256, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:13:42,396] [INFO] [timer.py:197:stop] 0/2102, RunningAvgSamplesPerSec=29.962660929902892, CurrSamplesPerSec=29.4173069711343, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:13:49,013] [INFO] [timer.py:197:stop] 0/2103, RunningAvgSamplesPerSec=29.962658617339475, CurrSamplesPerSec=29.957803021544002, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:13:55,168] [INFO] [timer.py:197:stop] 0/2104, RunningAvgSamplesPerSec=29.96286283664626, CurrSamplesPerSec=30.398164030336623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:01,537] [INFO] [timer.py:197:stop] 0/2105, RunningAvgSamplesPerSec=29.96273728350441, CurrSamplesPerSec=29.701129921186226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:07,644] [INFO] [timer.py:197:stop] 0/2106, RunningAvgSamplesPerSec=29.962703219863464, CurrSamplesPerSec=29.8912383248597, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:13,835] [INFO] [timer.py:197:stop] 0/2107, RunningAvgSamplesPerSec=29.962941252347992, CurrSamplesPerSec=30.47227905379043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:20,203] [INFO] [timer.py:197:stop] 0/2108, RunningAvgSamplesPerSec=29.962578384146425, CurrSamplesPerSec=29.217738092568812, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:26,176] [INFO] [timer.py:197:stop] 0/2109, RunningAvgSamplesPerSec=29.962731179665802, CurrSamplesPerSec=30.2880136011776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:32,341] [INFO] [logging.py:68:log_dist] [Rank 0] step=2110, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:14:32,341] [INFO] [timer.py:197:stop] 0/2110, RunningAvgSamplesPerSec=29.962611164733058, CurrSamplesPerSec=29.711856962724735, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:38,375] [INFO] [timer.py:197:stop] 0/2111, RunningAvgSamplesPerSec=29.962508665204215, CurrSamplesPerSec=29.747987372295317, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:44,137] [INFO] [timer.py:197:stop] 0/2112, RunningAvgSamplesPerSec=29.962719781094414, CurrSamplesPerSec=30.41468250483721, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:46,643] [INFO] [timer.py:197:stop] 0/2113, RunningAvgSamplesPerSec=29.962651144516066, CurrSamplesPerSec=29.818524922228253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:48,818] [INFO] [timer.py:197:stop] 0/2114, RunningAvgSamplesPerSec=29.962563298604742, CurrSamplesPerSec=29.778261789341745, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:50,931] [INFO] [timer.py:197:stop] 0/2115, RunningAvgSamplesPerSec=29.962880621644437, CurrSamplesPerSec=30.648407469505493, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:53,040] [INFO] [timer.py:197:stop] 0/2116, RunningAvgSamplesPerSec=29.963228144437707, CurrSamplesPerSec=30.716000911288823, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:55,209] [INFO] [timer.py:197:stop] 0/2117, RunningAvgSamplesPerSec=29.963178032651694, CurrSamplesPerSec=29.857615116546665, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:57,362] [INFO] [timer.py:197:stop] 0/2118, RunningAvgSamplesPerSec=29.963234846577567, CurrSamplesPerSec=30.083880352800097, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:14:59,536] [INFO] [timer.py:197:stop] 0/2119, RunningAvgSamplesPerSec=29.96315496725, CurrSamplesPerSec=29.795078889367524, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:15:01,451] [INFO] [logging.py:68:log_dist] [Rank 0] step=2120, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:15:01,452] [INFO] [timer.py:197:stop] 0/2120, RunningAvgSamplesPerSec=29.964769869726073, CurrSamplesPerSec=33.82403918836556, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:15:43,989] [INFO] [timer.py:197:stop] 0/2121, RunningAvgSamplesPerSec=29.964526964838356, CurrSamplesPerSec=29.458742521469173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:15:51,109] [INFO] [timer.py:197:stop] 0/2122, RunningAvgSamplesPerSec=29.964396679744702, CurrSamplesPerSec=29.690844106272102, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:15:57,757] [INFO] [timer.py:197:stop] 0/2123, RunningAvgSamplesPerSec=29.96431499356518, CurrSamplesPerSec=29.792135848401585, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:16:04,498] [INFO] [timer.py:197:stop] 0/2124, RunningAvgSamplesPerSec=29.96438174871421, CurrSamplesPerSec=30.106641944497593, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:16:11,068] [INFO] [timer.py:197:stop] 0/2125, RunningAvgSamplesPerSec=29.964207661887368, CurrSamplesPerSec=29.599296326676612, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 53.0} -[2022-12-14 20:16:18,068] [INFO] [timer.py:197:stop] 0/2126, RunningAvgSamplesPerSec=29.96437397005689, CurrSamplesPerSec=30.321658100009397, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:16:25,032] [INFO] [timer.py:197:stop] 0/2127, RunningAvgSamplesPerSec=29.96424731517256, CurrSamplesPerSec=29.697627147745983, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:16:31,384] [INFO] [timer.py:197:stop] 0/2128, RunningAvgSamplesPerSec=29.96425416820303, CurrSamplesPerSec=29.978823942232943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:16:38,541] [INFO] [timer.py:197:stop] 0/2129, RunningAvgSamplesPerSec=29.963783332478894, CurrSamplesPerSec=28.99516047846497, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:16:44,880] [INFO] [logging.py:68:log_dist] [Rank 0] step=2130, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:16:44,881] [INFO] [timer.py:197:stop] 0/2130, RunningAvgSamplesPerSec=29.963547900653275, CurrSamplesPerSec=29.47101961858515, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:16:52,184] [INFO] [timer.py:197:stop] 0/2131, RunningAvgSamplesPerSec=29.963706735563495, CurrSamplesPerSec=30.305565518718385, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:16:58,778] [INFO] [timer.py:197:stop] 0/2132, RunningAvgSamplesPerSec=29.96395066729938, CurrSamplesPerSec=30.49244542669756, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:17:05,288] [INFO] [timer.py:197:stop] 0/2133, RunningAvgSamplesPerSec=29.96392163161897, CurrSamplesPerSec=29.902203080210754, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:17:11,818] [INFO] [timer.py:197:stop] 0/2134, RunningAvgSamplesPerSec=29.963946966789095, CurrSamplesPerSec=30.018033713940206, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:17:17,977] [INFO] [timer.py:197:stop] 0/2135, RunningAvgSamplesPerSec=29.963831319551534, CurrSamplesPerSec=29.71928462597729, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:17:24,688] [INFO] [timer.py:197:stop] 0/2136, RunningAvgSamplesPerSec=29.96402935029227, CurrSamplesPerSec=30.39247145222972, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:17:30,770] [INFO] [timer.py:197:stop] 0/2137, RunningAvgSamplesPerSec=29.964295638402355, CurrSamplesPerSec=30.543544801996674, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:17:37,258] [INFO] [timer.py:197:stop] 0/2138, RunningAvgSamplesPerSec=29.964259999040294, CurrSamplesPerSec=29.8883627816556, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:17:43,857] [INFO] [timer.py:197:stop] 0/2139, RunningAvgSamplesPerSec=29.963938598822725, CurrSamplesPerSec=29.2928112888575, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:17:50,362] [INFO] [logging.py:68:log_dist] [Rank 0] step=2140, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:17:50,362] [INFO] [timer.py:197:stop] 0/2140, RunningAvgSamplesPerSec=29.963905515072412, CurrSamplesPerSec=29.89337204258686, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:17:56,920] [INFO] [timer.py:197:stop] 0/2141, RunningAvgSamplesPerSec=29.963827999426904, CurrSamplesPerSec=29.79901156723874, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:18:03,756] [INFO] [timer.py:197:stop] 0/2142, RunningAvgSamplesPerSec=29.963786744681908, CurrSamplesPerSec=29.875802082704976, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:18:10,672] [INFO] [timer.py:197:stop] 0/2143, RunningAvgSamplesPerSec=29.96394481150726, CurrSamplesPerSec=30.30607189561005, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:18:17,273] [INFO] [timer.py:197:stop] 0/2144, RunningAvgSamplesPerSec=29.96421371049948, CurrSamplesPerSec=30.551209879803015, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:18:24,171] [INFO] [timer.py:197:stop] 0/2145, RunningAvgSamplesPerSec=29.964164614388363, CurrSamplesPerSec=29.85936871349482, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:18:30,851] [INFO] [timer.py:197:stop] 0/2146, RunningAvgSamplesPerSec=29.96397071905795, CurrSamplesPerSec=29.554138915612057, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:18:37,912] [INFO] [timer.py:197:stop] 0/2147, RunningAvgSamplesPerSec=29.96381237324194, CurrSamplesPerSec=29.628124104251174, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:18:44,468] [INFO] [timer.py:197:stop] 0/2148, RunningAvgSamplesPerSec=29.964007478546613, CurrSamplesPerSec=30.388439065812506, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:18:50,749] [INFO] [timer.py:197:stop] 0/2149, RunningAvgSamplesPerSec=29.96418245290787, CurrSamplesPerSec=30.344444896611908, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:18:57,930] [INFO] [logging.py:68:log_dist] [Rank 0] step=2150, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:18:57,931] [INFO] [timer.py:197:stop] 0/2150, RunningAvgSamplesPerSec=29.964090874498563, CurrSamplesPerSec=29.768754388758744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 53.01} -[2022-12-14 20:19:04,318] [INFO] [timer.py:197:stop] 0/2151, RunningAvgSamplesPerSec=29.96402938321466, CurrSamplesPerSec=29.83252605000178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:19:11,236] [INFO] [timer.py:197:stop] 0/2152, RunningAvgSamplesPerSec=29.963967848264762, CurrSamplesPerSec=29.832310548451932, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:19:13,656] [INFO] [timer.py:197:stop] 0/2153, RunningAvgSamplesPerSec=29.96424697257075, CurrSamplesPerSec=30.576634699850917, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:19:15,772] [INFO] [timer.py:197:stop] 0/2154, RunningAvgSamplesPerSec=29.96454530396617, CurrSamplesPerSec=30.620306224215295, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:19:17,920] [INFO] [timer.py:197:stop] 0/2155, RunningAvgSamplesPerSec=29.96464254364827, CurrSamplesPerSec=30.17537468334936, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:19:20,070] [INFO] [timer.py:197:stop] 0/2156, RunningAvgSamplesPerSec=29.964852513297714, CurrSamplesPerSec=30.42384497312934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:19:22,277] [INFO] [timer.py:197:stop] 0/2157, RunningAvgSamplesPerSec=29.96475107513967, CurrSamplesPerSec=29.747835726310402, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:19:24,455] [INFO] [timer.py:197:stop] 0/2158, RunningAvgSamplesPerSec=29.964658756386896, CurrSamplesPerSec=29.767024622195017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:19:26,606] [INFO] [timer.py:197:stop] 0/2159, RunningAvgSamplesPerSec=29.964733481701373, CurrSamplesPerSec=30.126712557072143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:19:28,511] [INFO] [logging.py:68:log_dist] [Rank 0] step=2160, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:19:28,512] [INFO] [timer.py:197:stop] 0/2160, RunningAvgSamplesPerSec=29.966417532165064, CurrSamplesPerSec=34.100244780487074, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:20:07,705] [INFO] [timer.py:197:stop] 0/2161, RunningAvgSamplesPerSec=29.966621362130883, CurrSamplesPerSec=30.41304225415623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:20:14,145] [INFO] [timer.py:197:stop] 0/2162, RunningAvgSamplesPerSec=29.966086137168052, CurrSamplesPerSec=28.85346033484941, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:20:20,468] [INFO] [timer.py:197:stop] 0/2163, RunningAvgSamplesPerSec=29.96559355348362, CurrSamplesPerSec=28.938112205753416, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:20:26,746] [INFO] [timer.py:197:stop] 0/2164, RunningAvgSamplesPerSec=29.965579483810863, CurrSamplesPerSec=29.935205753937453, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:20:32,573] [INFO] [timer.py:197:stop] 0/2165, RunningAvgSamplesPerSec=29.965766336782636, CurrSamplesPerSec=30.37526557873306, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:20:38,509] [INFO] [timer.py:197:stop] 0/2166, RunningAvgSamplesPerSec=29.965949055906098, CurrSamplesPerSec=30.366455245711816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:20:44,534] [INFO] [timer.py:197:stop] 0/2167, RunningAvgSamplesPerSec=29.965566050894708, CurrSamplesPerSec=29.15906078513545, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:20:50,504] [INFO] [timer.py:197:stop] 0/2168, RunningAvgSamplesPerSec=29.965732493964378, CurrSamplesPerSec=30.330469870364535, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:20:56,742] [INFO] [timer.py:197:stop] 0/2169, RunningAvgSamplesPerSec=29.96579780302747, CurrSamplesPerSec=30.10792849930118, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:21:02,744] [INFO] [logging.py:68:log_dist] [Rank 0] step=2170, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:21:02,745] [INFO] [timer.py:197:stop] 0/2170, RunningAvgSamplesPerSec=29.96577293586929, CurrSamplesPerSec=29.91198257935156, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:21:10,840] [INFO] [timer.py:197:stop] 0/2171, RunningAvgSamplesPerSec=29.965803988585385, CurrSamplesPerSec=30.033277936489572, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:21:16,896] [INFO] [timer.py:197:stop] 0/2172, RunningAvgSamplesPerSec=29.96575113964789, CurrSamplesPerSec=29.851558820654418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:21:23,200] [INFO] [timer.py:197:stop] 0/2173, RunningAvgSamplesPerSec=29.965612424260367, CurrSamplesPerSec=29.667595074901335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:21:29,131] [INFO] [timer.py:197:stop] 0/2174, RunningAvgSamplesPerSec=29.96561951761288, CurrSamplesPerSec=29.981027107689933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:21:35,104] [INFO] [timer.py:197:stop] 0/2175, RunningAvgSamplesPerSec=29.965847104426178, CurrSamplesPerSec=30.468460625453943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 54.0} -[2022-12-14 20:21:42,201] [INFO] [timer.py:197:stop] 0/2176, RunningAvgSamplesPerSec=29.96511941842439, CurrSamplesPerSec=28.463153090860857, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:21:48,824] [INFO] [timer.py:197:stop] 0/2177, RunningAvgSamplesPerSec=29.964948881601543, CurrSamplesPerSec=29.598734964555124, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:21:55,104] [INFO] [timer.py:197:stop] 0/2178, RunningAvgSamplesPerSec=29.965247636698166, CurrSamplesPerSec=30.629449726299715, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:01,004] [INFO] [timer.py:197:stop] 0/2179, RunningAvgSamplesPerSec=29.96506903737993, CurrSamplesPerSec=29.58141501045296, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:07,381] [INFO] [logging.py:68:log_dist] [Rank 0] step=2180, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:22:07,382] [INFO] [timer.py:197:stop] 0/2180, RunningAvgSamplesPerSec=29.965127998516913, CurrSamplesPerSec=30.09403884891828, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:13,848] [INFO] [timer.py:197:stop] 0/2181, RunningAvgSamplesPerSec=29.96502090703704, CurrSamplesPerSec=29.733578023275083, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:19,762] [INFO] [timer.py:197:stop] 0/2182, RunningAvgSamplesPerSec=29.964764116449448, CurrSamplesPerSec=29.41547919913238, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:26,063] [INFO] [timer.py:197:stop] 0/2183, RunningAvgSamplesPerSec=29.965034546431582, CurrSamplesPerSec=30.566408879211565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:32,107] [INFO] [timer.py:197:stop] 0/2184, RunningAvgSamplesPerSec=29.96486326288516, CurrSamplesPerSec=29.59589584282201, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:38,068] [INFO] [timer.py:197:stop] 0/2185, RunningAvgSamplesPerSec=29.964918676617472, CurrSamplesPerSec=30.086321543085454, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:43,989] [INFO] [timer.py:197:stop] 0/2186, RunningAvgSamplesPerSec=29.964655627864534, CurrSamplesPerSec=29.401222640157073, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:50,262] [INFO] [timer.py:197:stop] 0/2187, RunningAvgSamplesPerSec=29.96445390830484, CurrSamplesPerSec=29.53028473049199, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:22:57,380] [INFO] [timer.py:197:stop] 0/2188, RunningAvgSamplesPerSec=29.96429900250537, CurrSamplesPerSec=29.629612099824033, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:03,388] [INFO] [timer.py:197:stop] 0/2189, RunningAvgSamplesPerSec=29.963942802235994, CurrSamplesPerSec=29.20501968412916, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:09,527] [INFO] [logging.py:68:log_dist] [Rank 0] step=2190, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:23:09,527] [INFO] [timer.py:197:stop] 0/2190, RunningAvgSamplesPerSec=29.963800872928466, CurrSamplesPerSec=29.656585422332878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:15,774] [INFO] [timer.py:197:stop] 0/2191, RunningAvgSamplesPerSec=29.96396557331264, CurrSamplesPerSec=30.328718757156786, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:21,484] [INFO] [timer.py:197:stop] 0/2192, RunningAvgSamplesPerSec=29.964170489695334, CurrSamplesPerSec=30.419552610602512, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:23,846] [INFO] [timer.py:197:stop] 0/2193, RunningAvgSamplesPerSec=29.964128161824835, CurrSamplesPerSec=29.87171614489875, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:25,981] [INFO] [timer.py:197:stop] 0/2194, RunningAvgSamplesPerSec=29.96430652636085, CurrSamplesPerSec=30.360269763548036, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:28,168] [INFO] [timer.py:197:stop] 0/2195, RunningAvgSamplesPerSec=29.96420736235743, CurrSamplesPerSec=29.74840605510663, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:30,308] [INFO] [timer.py:197:stop] 0/2196, RunningAvgSamplesPerSec=29.96434703343757, CurrSamplesPerSec=30.27381052270687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:32,483] [INFO] [timer.py:197:stop] 0/2197, RunningAvgSamplesPerSec=29.964427307404105, CurrSamplesPerSec=30.141590169839574, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:34,603] [INFO] [timer.py:197:stop] 0/2198, RunningAvgSamplesPerSec=29.964695198242406, CurrSamplesPerSec=30.564491211706066, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:36,742] [INFO] [timer.py:197:stop] 0/2199, RunningAvgSamplesPerSec=29.96483754412884, CurrSamplesPerSec=30.280725942044434, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:23:38,626] [INFO] [logging.py:68:log_dist] [Rank 0] step=2200, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:23:38,627] [INFO] [timer.py:197:stop] 0/2200, RunningAvgSamplesPerSec=29.966615935326324, CurrSamplesPerSec=34.45984725674741, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 54.01} -[2022-12-14 20:24:16,655] [INFO] [timer.py:197:stop] 0/2201, RunningAvgSamplesPerSec=29.966397960968987, CurrSamplesPerSec=29.49483320754269, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:24:22,939] [INFO] [timer.py:197:stop] 0/2202, RunningAvgSamplesPerSec=29.96592055200626, CurrSamplesPerSec=28.95164828954382, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:24:29,180] [INFO] [timer.py:197:stop] 0/2203, RunningAvgSamplesPerSec=29.966051950620333, CurrSamplesPerSec=30.257946029614377, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:24:35,035] [INFO] [timer.py:197:stop] 0/2204, RunningAvgSamplesPerSec=29.965926029070875, CurrSamplesPerSec=29.691313727930094, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:24:41,345] [INFO] [timer.py:197:stop] 0/2205, RunningAvgSamplesPerSec=29.96623412467606, CurrSamplesPerSec=30.660383108223254, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:24:47,326] [INFO] [timer.py:197:stop] 0/2206, RunningAvgSamplesPerSec=29.966400519770307, CurrSamplesPerSec=30.337510631983402, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:24:53,474] [INFO] [timer.py:197:stop] 0/2207, RunningAvgSamplesPerSec=29.96644558020851, CurrSamplesPerSec=30.066089169349183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:24:59,330] [INFO] [timer.py:197:stop] 0/2208, RunningAvgSamplesPerSec=29.96653161069774, CurrSamplesPerSec=30.15743788208606, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:05,551] [INFO] [timer.py:197:stop] 0/2209, RunningAvgSamplesPerSec=29.966010804274383, CurrSamplesPerSec=28.859552744617446, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:11,537] [INFO] [logging.py:68:log_dist] [Rank 0] step=2210, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:25:11,537] [INFO] [timer.py:197:stop] 0/2210, RunningAvgSamplesPerSec=29.965659095616566, CurrSamplesPerSec=29.2090460349496, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:17,867] [INFO] [timer.py:197:stop] 0/2211, RunningAvgSamplesPerSec=29.965536431483972, CurrSamplesPerSec=29.697121186860606, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:23,947] [INFO] [timer.py:197:stop] 0/2212, RunningAvgSamplesPerSec=29.96533587970355, CurrSamplesPerSec=29.528774211716666, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:29,787] [INFO] [timer.py:197:stop] 0/2213, RunningAvgSamplesPerSec=29.965559163145407, CurrSamplesPerSec=30.46728139491754, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:35,657] [INFO] [timer.py:197:stop] 0/2214, RunningAvgSamplesPerSec=29.96544326534961, CurrSamplesPerSec=29.711366959634734, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:41,421] [INFO] [timer.py:197:stop] 0/2215, RunningAvgSamplesPerSec=29.965518484940738, CurrSamplesPerSec=30.1328336703185, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:47,522] [INFO] [timer.py:197:stop] 0/2216, RunningAvgSamplesPerSec=29.965360878036712, CurrSamplesPerSec=29.6205915810286, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:53,388] [INFO] [timer.py:197:stop] 0/2217, RunningAvgSamplesPerSec=29.965007984495923, CurrSamplesPerSec=29.203564496593895, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:25:59,082] [INFO] [timer.py:197:stop] 0/2218, RunningAvgSamplesPerSec=29.96526139712323, CurrSamplesPerSec=30.53729044646925, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:26:05,029] [INFO] [timer.py:197:stop] 0/2219, RunningAvgSamplesPerSec=29.965091252258485, CurrSamplesPerSec=29.592737552049165, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:26:10,755] [INFO] [logging.py:68:log_dist] [Rank 0] step=2220, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:26:10,755] [INFO] [timer.py:197:stop] 0/2220, RunningAvgSamplesPerSec=29.965299465747893, CurrSamplesPerSec=30.434134334670038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:26:16,752] [INFO] [timer.py:197:stop] 0/2221, RunningAvgSamplesPerSec=29.96531320811506, CurrSamplesPerSec=29.995824828748027, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:26:22,509] [INFO] [timer.py:197:stop] 0/2222, RunningAvgSamplesPerSec=29.96567055502687, CurrSamplesPerSec=30.780186828485157, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:26:28,565] [INFO] [timer.py:197:stop] 0/2223, RunningAvgSamplesPerSec=29.965944675010928, CurrSamplesPerSec=30.58711133206481, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:26:34,582] [INFO] [timer.py:197:stop] 0/2224, RunningAvgSamplesPerSec=29.965680704478427, CurrSamplesPerSec=29.390657545525414, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:26:40,543] [INFO] [timer.py:197:stop] 0/2225, RunningAvgSamplesPerSec=29.96562060783633, CurrSamplesPerSec=29.832678560672253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 55.01} -[2022-12-14 20:26:46,242] [INFO] [timer.py:197:stop] 0/2226, RunningAvgSamplesPerSec=29.965901542174205, CurrSamplesPerSec=30.603717222064827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:26:52,700] [INFO] [timer.py:197:stop] 0/2227, RunningAvgSamplesPerSec=29.9656754142019, CurrSamplesPerSec=29.471071387753707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:26:58,625] [INFO] [timer.py:197:stop] 0/2228, RunningAvgSamplesPerSec=29.965965938190088, CurrSamplesPerSec=30.626640060149896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:04,486] [INFO] [timer.py:197:stop] 0/2229, RunningAvgSamplesPerSec=29.96619238191353, CurrSamplesPerSec=30.478884011599522, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:10,956] [INFO] [logging.py:68:log_dist] [Rank 0] step=2230, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:27:10,956] [INFO] [timer.py:197:stop] 0/2230, RunningAvgSamplesPerSec=29.9660938748794, CurrSamplesPerSec=29.7483137460725, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:17,174] [INFO] [timer.py:197:stop] 0/2231, RunningAvgSamplesPerSec=29.966110321489722, CurrSamplesPerSec=30.00279825202017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:23,424] [INFO] [timer.py:197:stop] 0/2232, RunningAvgSamplesPerSec=29.96606847359631, CurrSamplesPerSec=29.873079108132607, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:25,903] [INFO] [timer.py:197:stop] 0/2233, RunningAvgSamplesPerSec=29.966288491674884, CurrSamplesPerSec=30.46509954041682, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:28,072] [INFO] [timer.py:197:stop] 0/2234, RunningAvgSamplesPerSec=29.96624442626348, CurrSamplesPerSec=29.86825610671751, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:30,244] [INFO] [timer.py:197:stop] 0/2235, RunningAvgSamplesPerSec=29.966177469369057, CurrSamplesPerSec=29.817471641320534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:32,358] [INFO] [timer.py:197:stop] 0/2236, RunningAvgSamplesPerSec=29.966471284147197, CurrSamplesPerSec=30.63725239775597, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:34,529] [INFO] [timer.py:197:stop] 0/2237, RunningAvgSamplesPerSec=29.966409108708582, CurrSamplesPerSec=29.82815032113252, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:36,667] [INFO] [timer.py:197:stop] 0/2238, RunningAvgSamplesPerSec=29.96655462457435, CurrSamplesPerSec=30.295352636357634, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:38,807] [INFO] [timer.py:197:stop] 0/2239, RunningAvgSamplesPerSec=29.966688469871553, CurrSamplesPerSec=30.268986967449237, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:27:40,694] [INFO] [logging.py:68:log_dist] [Rank 0] step=2240, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:27:40,695] [INFO] [timer.py:197:stop] 0/2240, RunningAvgSamplesPerSec=29.968389048769474, CurrSamplesPerSec=34.32598576350269, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:28:19,604] [INFO] [timer.py:197:stop] 0/2241, RunningAvgSamplesPerSec=29.968132470358483, CurrSamplesPerSec=29.404710595196953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:28:25,799] [INFO] [timer.py:197:stop] 0/2242, RunningAvgSamplesPerSec=29.967943129871063, CurrSamplesPerSec=29.54992578818474, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:28:32,121] [INFO] [timer.py:197:stop] 0/2243, RunningAvgSamplesPerSec=29.967558350664884, CurrSamplesPerSec=29.129759845784587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:28:38,083] [INFO] [timer.py:197:stop] 0/2244, RunningAvgSamplesPerSec=29.967469330873392, CurrSamplesPerSec=29.7692958073866, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:28:44,307] [INFO] [timer.py:197:stop] 0/2245, RunningAvgSamplesPerSec=29.967567731253453, CurrSamplesPerSec=30.189818265329176, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:28:50,437] [INFO] [timer.py:197:stop] 0/2246, RunningAvgSamplesPerSec=29.96765057072747, CurrSamplesPerSec=30.154619293172026, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:28:56,398] [INFO] [timer.py:197:stop] 0/2247, RunningAvgSamplesPerSec=29.967267421084575, CurrSamplesPerSec=29.13147008433662, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:29:02,508] [INFO] [timer.py:197:stop] 0/2248, RunningAvgSamplesPerSec=29.96726840128781, CurrSamplesPerSec=29.96946911922388, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:29:08,305] [INFO] [timer.py:197:stop] 0/2249, RunningAvgSamplesPerSec=29.967373518701052, CurrSamplesPerSec=30.205342871439793, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:29:14,435] [INFO] [logging.py:68:log_dist] [Rank 0] step=2250, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:29:14,436] [INFO] [timer.py:197:stop] 0/2250, RunningAvgSamplesPerSec=29.967077449313603, CurrSamplesPerSec=29.31626399805953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 56.0} -[2022-12-14 20:29:20,673] [INFO] [timer.py:197:stop] 0/2251, RunningAvgSamplesPerSec=29.96723411133297, CurrSamplesPerSec=30.32360022371433, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:29:26,716] [INFO] [timer.py:197:stop] 0/2252, RunningAvgSamplesPerSec=29.967016914987063, CurrSamplesPerSec=29.486380387775974, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:29:32,792] [INFO] [timer.py:197:stop] 0/2253, RunningAvgSamplesPerSec=29.96685469618096, CurrSamplesPerSec=29.606256374182475, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:29:38,840] [INFO] [timer.py:197:stop] 0/2254, RunningAvgSamplesPerSec=29.966745927082158, CurrSamplesPerSec=29.723891774146864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:29:44,633] [INFO] [timer.py:197:stop] 0/2255, RunningAvgSamplesPerSec=29.966497314296806, CurrSamplesPerSec=29.41689433256031, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:29:51,167] [INFO] [timer.py:197:stop] 0/2256, RunningAvgSamplesPerSec=29.96645517970199, CurrSamplesPerSec=29.871825842215245, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:29:57,484] [INFO] [timer.py:197:stop] 0/2257, RunningAvgSamplesPerSec=29.966605319722166, CurrSamplesPerSec=30.308888070421144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:03,799] [INFO] [timer.py:197:stop] 0/2258, RunningAvgSamplesPerSec=29.966759537985208, CurrSamplesPerSec=30.31860669390237, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:10,356] [INFO] [timer.py:197:stop] 0/2259, RunningAvgSamplesPerSec=29.966587340731376, CurrSamplesPerSec=29.58308414934134, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:17,481] [INFO] [logging.py:68:log_dist] [Rank 0] step=2260, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:30:17,482] [INFO] [timer.py:197:stop] 0/2260, RunningAvgSamplesPerSec=29.966508412904147, CurrSamplesPerSec=29.789421491271877, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:23,644] [INFO] [timer.py:197:stop] 0/2261, RunningAvgSamplesPerSec=29.966366900331046, CurrSamplesPerSec=29.650204287666195, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:29,513] [INFO] [timer.py:197:stop] 0/2262, RunningAvgSamplesPerSec=29.966166305593173, CurrSamplesPerSec=29.519776022440865, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:35,628] [INFO] [timer.py:197:stop] 0/2263, RunningAvgSamplesPerSec=29.96626978416363, CurrSamplesPerSec=30.201971622222338, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:41,677] [INFO] [timer.py:197:stop] 0/2264, RunningAvgSamplesPerSec=29.96630622866724, CurrSamplesPerSec=30.04893456231123, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:47,606] [INFO] [timer.py:197:stop] 0/2265, RunningAvgSamplesPerSec=29.965940445712697, CurrSamplesPerSec=29.160780801704746, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:53,616] [INFO] [timer.py:197:stop] 0/2266, RunningAvgSamplesPerSec=29.965815717748097, CurrSamplesPerSec=29.686191385395638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:30:59,538] [INFO] [timer.py:197:stop] 0/2267, RunningAvgSamplesPerSec=29.965686688086446, CurrSamplesPerSec=29.676385062368006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:05,646] [INFO] [timer.py:197:stop] 0/2268, RunningAvgSamplesPerSec=29.96587804710128, CurrSamplesPerSec=30.40567020299221, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:11,619] [INFO] [timer.py:197:stop] 0/2269, RunningAvgSamplesPerSec=29.965428322877127, CurrSamplesPerSec=28.97988474352303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:19,691] [INFO] [logging.py:68:log_dist] [Rank 0] step=2270, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:31:19,692] [INFO] [timer.py:197:stop] 0/2270, RunningAvgSamplesPerSec=29.96531460355929, CurrSamplesPerSec=29.709712907497238, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:26,049] [INFO] [timer.py:197:stop] 0/2271, RunningAvgSamplesPerSec=29.96495734278918, CurrSamplesPerSec=29.17603225112594, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:31,903] [INFO] [timer.py:197:stop] 0/2272, RunningAvgSamplesPerSec=29.964989527322984, CurrSamplesPerSec=30.038194719145846, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:34,387] [INFO] [timer.py:197:stop] 0/2273, RunningAvgSamplesPerSec=29.96495226172005, CurrSamplesPerSec=29.88059758644749, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:36,520] [INFO] [timer.py:197:stop] 0/2274, RunningAvgSamplesPerSec=29.965125452990304, CurrSamplesPerSec=30.363676446950837, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:38,688] [INFO] [timer.py:197:stop] 0/2275, RunningAvgSamplesPerSec=29.965088685402762, CurrSamplesPerSec=29.881785060568856, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 56.01} -[2022-12-14 20:31:40,858] [INFO] [timer.py:197:stop] 0/2276, RunningAvgSamplesPerSec=29.96504144026457, CurrSamplesPerSec=29.85803689093868, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:42,982] [INFO] [timer.py:197:stop] 0/2277, RunningAvgSamplesPerSec=29.965273930925285, CurrSamplesPerSec=30.50345715108751, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:45,120] [INFO] [timer.py:197:stop] 0/2278, RunningAvgSamplesPerSec=29.96543484321554, CurrSamplesPerSec=30.33603982311136, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:47,269] [INFO] [timer.py:197:stop] 0/2279, RunningAvgSamplesPerSec=29.965510581872984, CurrSamplesPerSec=30.138889593733378, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:31:49,146] [INFO] [logging.py:68:log_dist] [Rank 0] step=2280, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:31:49,146] [INFO] [timer.py:197:stop] 0/2280, RunningAvgSamplesPerSec=29.967229042869338, CurrSamplesPerSec=34.4681215839785, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:32:32,770] [INFO] [timer.py:197:stop] 0/2281, RunningAvgSamplesPerSec=29.967000325326783, CurrSamplesPerSec=29.454889441893695, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:32:39,260] [INFO] [timer.py:197:stop] 0/2282, RunningAvgSamplesPerSec=29.96693282184375, CurrSamplesPerSec=29.81387845985232, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:32:46,060] [INFO] [timer.py:197:stop] 0/2283, RunningAvgSamplesPerSec=29.96680765915084, CurrSamplesPerSec=29.684129813095407, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:32:52,684] [INFO] [timer.py:197:stop] 0/2284, RunningAvgSamplesPerSec=29.966813302408404, CurrSamplesPerSec=29.979691107013583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:32:59,032] [INFO] [timer.py:197:stop] 0/2285, RunningAvgSamplesPerSec=29.96712151687989, CurrSamplesPerSec=30.687379207268155, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:33:08,078] [INFO] [timer.py:197:stop] 0/2286, RunningAvgSamplesPerSec=29.967115755678506, CurrSamplesPerSec=29.95396870580797, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:33:14,565] [INFO] [timer.py:197:stop] 0/2287, RunningAvgSamplesPerSec=29.96716152630363, CurrSamplesPerSec=30.072067758520927, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:33:20,717] [INFO] [timer.py:197:stop] 0/2288, RunningAvgSamplesPerSec=29.967201935938604, CurrSamplesPerSec=30.059823465536162, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:33:27,755] [INFO] [timer.py:197:stop] 0/2289, RunningAvgSamplesPerSec=29.96702290089449, CurrSamplesPerSec=29.563265512001387, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:33:34,026] [INFO] [logging.py:68:log_dist] [Rank 0] step=2290, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:33:34,027] [INFO] [timer.py:197:stop] 0/2290, RunningAvgSamplesPerSec=29.966972011157726, CurrSamplesPerSec=29.851037642479845, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:33:40,885] [INFO] [timer.py:197:stop] 0/2291, RunningAvgSamplesPerSec=29.96704987646029, CurrSamplesPerSec=30.146271637958307, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:33:47,093] [INFO] [timer.py:197:stop] 0/2292, RunningAvgSamplesPerSec=29.967127805641642, CurrSamplesPerSec=30.146576339208856, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:33:53,539] [INFO] [timer.py:197:stop] 0/2293, RunningAvgSamplesPerSec=29.96693636135591, CurrSamplesPerSec=29.53485295973525, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:34:00,916] [INFO] [timer.py:197:stop] 0/2294, RunningAvgSamplesPerSec=29.9667632115072, CurrSamplesPerSec=29.575261688315052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:34:07,377] [INFO] [timer.py:197:stop] 0/2295, RunningAvgSamplesPerSec=29.966657129539602, CurrSamplesPerSec=29.725474988331193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:34:15,048] [INFO] [timer.py:197:stop] 0/2296, RunningAvgSamplesPerSec=29.966473737086655, CurrSamplesPerSec=29.551776813594312, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:34:21,940] [INFO] [timer.py:197:stop] 0/2297, RunningAvgSamplesPerSec=29.966461796611394, CurrSamplesPerSec=29.939095372087444, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:34:29,850] [INFO] [timer.py:197:stop] 0/2298, RunningAvgSamplesPerSec=29.96662627065207, CurrSamplesPerSec=30.348911664901824, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:34:36,913] [INFO] [timer.py:197:stop] 0/2299, RunningAvgSamplesPerSec=29.96650386488581, CurrSamplesPerSec=29.688072655831288, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:34:43,480] [INFO] [logging.py:68:log_dist] [Rank 0] step=2300, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:34:43,481] [INFO] [timer.py:197:stop] 0/2300, RunningAvgSamplesPerSec=29.966603298177198, CurrSamplesPerSec=30.196756504775383, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 57.0} -[2022-12-14 20:34:50,111] [INFO] [timer.py:197:stop] 0/2301, RunningAvgSamplesPerSec=29.966700769975585, CurrSamplesPerSec=30.19237855611987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:34:56,644] [INFO] [timer.py:197:stop] 0/2302, RunningAvgSamplesPerSec=29.966337103589215, CurrSamplesPerSec=29.152971087091686, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:35:03,732] [INFO] [timer.py:197:stop] 0/2303, RunningAvgSamplesPerSec=29.966140928008638, CurrSamplesPerSec=29.521633012527772, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:35:10,533] [INFO] [timer.py:197:stop] 0/2304, RunningAvgSamplesPerSec=29.965783577129038, CurrSamplesPerSec=29.165488919718904, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:35:17,427] [INFO] [timer.py:197:stop] 0/2305, RunningAvgSamplesPerSec=29.96588630822642, CurrSamplesPerSec=30.20425528946291, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:35:24,488] [INFO] [timer.py:197:stop] 0/2306, RunningAvgSamplesPerSec=29.965903110774583, CurrSamplesPerSec=30.00464943572107, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:35:31,121] [INFO] [timer.py:197:stop] 0/2307, RunningAvgSamplesPerSec=29.965848417254062, CurrSamplesPerSec=29.840362474719914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:35:38,061] [INFO] [timer.py:197:stop] 0/2308, RunningAvgSamplesPerSec=29.965786178847814, CurrSamplesPerSec=29.823010479670426, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:35:44,418] [INFO] [timer.py:197:stop] 0/2309, RunningAvgSamplesPerSec=29.9660214857005, CurrSamplesPerSec=30.518650298824983, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:35:51,077] [INFO] [logging.py:68:log_dist] [Rank 0] step=2310, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:35:51,078] [INFO] [timer.py:197:stop] 0/2310, RunningAvgSamplesPerSec=29.965950064027282, CurrSamplesPerSec=29.802081695767598, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:35:57,668] [INFO] [timer.py:197:stop] 0/2311, RunningAvgSamplesPerSec=29.965260782206492, CurrSamplesPerSec=28.45463267472669, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:04,633] [INFO] [timer.py:197:stop] 0/2312, RunningAvgSamplesPerSec=29.965004386343086, CurrSamplesPerSec=29.384461088574568, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:07,147] [INFO] [timer.py:197:stop] 0/2313, RunningAvgSamplesPerSec=29.965318530034118, CurrSamplesPerSec=30.70900823119227, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:09,294] [INFO] [timer.py:197:stop] 0/2314, RunningAvgSamplesPerSec=29.96541166936468, CurrSamplesPerSec=30.18221465692673, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:11,449] [INFO] [timer.py:197:stop] 0/2315, RunningAvgSamplesPerSec=29.965618549272218, CurrSamplesPerSec=30.45168679381327, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:13,600] [INFO] [timer.py:197:stop] 0/2316, RunningAvgSamplesPerSec=29.965680472377354, CurrSamplesPerSec=30.109596794446926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:15,737] [INFO] [timer.py:197:stop] 0/2317, RunningAvgSamplesPerSec=29.96583410253144, CurrSamplesPerSec=30.32560426291854, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:17,878] [INFO] [timer.py:197:stop] 0/2318, RunningAvgSamplesPerSec=29.96596550850786, CurrSamplesPerSec=30.27329156682161, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:19,998] [INFO] [timer.py:197:stop] 0/2319, RunningAvgSamplesPerSec=29.966226150500862, CurrSamplesPerSec=30.582288472057073, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:21,879] [INFO] [logging.py:68:log_dist] [Rank 0] step=2320, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:36:21,879] [INFO] [timer.py:197:stop] 0/2320, RunningAvgSamplesPerSec=29.96792629134809, CurrSamplesPerSec=34.50361790259594, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:36:58,316] [INFO] [timer.py:197:stop] 0/2321, RunningAvgSamplesPerSec=29.967853744643488, CurrSamplesPerSec=29.800629260863275, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:37:04,696] [INFO] [timer.py:197:stop] 0/2322, RunningAvgSamplesPerSec=29.967674534844335, CurrSamplesPerSec=29.55777390650957, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:37:10,754] [INFO] [timer.py:197:stop] 0/2323, RunningAvgSamplesPerSec=29.967732415792195, CurrSamplesPerSec=30.102620903419123, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:37:16,619] [INFO] [timer.py:197:stop] 0/2324, RunningAvgSamplesPerSec=29.96774372624014, CurrSamplesPerSec=29.994018302203006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:37:22,709] [INFO] [timer.py:197:stop] 0/2325, RunningAvgSamplesPerSec=29.967493803529607, CurrSamplesPerSec=29.39820234681442, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 58.0} -[2022-12-14 20:37:29,096] [INFO] [timer.py:197:stop] 0/2326, RunningAvgSamplesPerSec=29.96765285005464, CurrSamplesPerSec=30.341731856791927, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:37:35,155] [INFO] [timer.py:197:stop] 0/2327, RunningAvgSamplesPerSec=29.967667036368148, CurrSamplesPerSec=30.000672355461457, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:37:41,254] [INFO] [timer.py:197:stop] 0/2328, RunningAvgSamplesPerSec=29.967676177802627, CurrSamplesPerSec=29.988945103906797, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:37:47,417] [INFO] [timer.py:197:stop] 0/2329, RunningAvgSamplesPerSec=29.96768940884898, CurrSamplesPerSec=29.998496473686558, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:37:53,453] [INFO] [logging.py:68:log_dist] [Rank 0] step=2330, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:37:53,454] [INFO] [timer.py:197:stop] 0/2330, RunningAvgSamplesPerSec=29.967792685509103, CurrSamplesPerSec=30.210061165042752, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:37:59,765] [INFO] [timer.py:197:stop] 0/2331, RunningAvgSamplesPerSec=29.967831348558786, CurrSamplesPerSec=30.058110194780838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:38:06,243] [INFO] [timer.py:197:stop] 0/2332, RunningAvgSamplesPerSec=29.967987430500013, CurrSamplesPerSec=30.33596782913568, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:38:12,681] [INFO] [timer.py:197:stop] 0/2333, RunningAvgSamplesPerSec=29.967424356526173, CurrSamplesPerSec=28.710512724061356, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:38:18,785] [INFO] [timer.py:197:stop] 0/2334, RunningAvgSamplesPerSec=29.967610613286475, CurrSamplesPerSec=30.40816045413188, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:38:24,835] [INFO] [timer.py:197:stop] 0/2335, RunningAvgSamplesPerSec=29.967229399561536, CurrSamplesPerSec=29.103862127497543, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:38:31,336] [INFO] [timer.py:197:stop] 0/2336, RunningAvgSamplesPerSec=29.967220129274615, CurrSamplesPerSec=29.945608154124994, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:38:37,298] [INFO] [timer.py:197:stop] 0/2337, RunningAvgSamplesPerSec=29.967198324776383, CurrSamplesPerSec=29.916392942937335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:38:43,247] [INFO] [timer.py:197:stop] 0/2338, RunningAvgSamplesPerSec=29.96723061300356, CurrSamplesPerSec=30.042813860890373, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:38:49,513] [INFO] [timer.py:197:stop] 0/2339, RunningAvgSamplesPerSec=29.96724859575536, CurrSamplesPerSec=30.009315297776844, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:38:55,340] [INFO] [logging.py:68:log_dist] [Rank 0] step=2340, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:38:55,341] [INFO] [timer.py:197:stop] 0/2340, RunningAvgSamplesPerSec=29.967407819590562, CurrSamplesPerSec=30.344194494612637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:01,447] [INFO] [timer.py:197:stop] 0/2341, RunningAvgSamplesPerSec=29.967370455960346, CurrSamplesPerSec=29.880268303616003, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:07,623] [INFO] [timer.py:197:stop] 0/2342, RunningAvgSamplesPerSec=29.967419469021372, CurrSamplesPerSec=30.082501457699156, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:13,661] [INFO] [timer.py:197:stop] 0/2343, RunningAvgSamplesPerSec=29.96738164641998, CurrSamplesPerSec=29.879137488544966, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:20,338] [INFO] [timer.py:197:stop] 0/2344, RunningAvgSamplesPerSec=29.967276508505325, CurrSamplesPerSec=29.72315453445227, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:26,358] [INFO] [timer.py:197:stop] 0/2345, RunningAvgSamplesPerSec=29.967057377146293, CurrSamplesPerSec=29.462496364320845, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:32,250] [INFO] [timer.py:197:stop] 0/2346, RunningAvgSamplesPerSec=29.966977883888354, CurrSamplesPerSec=29.78187612797548, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:38,531] [INFO] [timer.py:197:stop] 0/2347, RunningAvgSamplesPerSec=29.96674706644091, CurrSamplesPerSec=29.43530992087838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:44,986] [INFO] [timer.py:197:stop] 0/2348, RunningAvgSamplesPerSec=29.966374674802207, CurrSamplesPerSec=29.117853807329052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:51,072] [INFO] [timer.py:197:stop] 0/2349, RunningAvgSamplesPerSec=29.96643746490019, CurrSamplesPerSec=30.114471031327028, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:39:57,369] [INFO] [logging.py:68:log_dist] [Rank 0] step=2350, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:39:57,369] [INFO] [timer.py:197:stop] 0/2350, RunningAvgSamplesPerSec=29.966314824960648, CurrSamplesPerSec=29.681218494131436, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 58.01} -[2022-12-14 20:40:03,422] [INFO] [timer.py:197:stop] 0/2351, RunningAvgSamplesPerSec=29.965959652834734, CurrSamplesPerSec=29.154604890527732, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:40:10,179] [INFO] [timer.py:197:stop] 0/2352, RunningAvgSamplesPerSec=29.965898879278782, CurrSamplesPerSec=29.823818951111285, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:40:12,540] [INFO] [timer.py:197:stop] 0/2353, RunningAvgSamplesPerSec=29.966171446438, CurrSamplesPerSec=30.62070092066704, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:40:14,655] [INFO] [timer.py:197:stop] 0/2354, RunningAvgSamplesPerSec=29.966446688280328, CurrSamplesPerSec=30.627828164524104, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:40:16,770] [INFO] [timer.py:197:stop] 0/2355, RunningAvgSamplesPerSec=29.96671569868634, CurrSamplesPerSec=30.613081235291354, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:40:18,910] [INFO] [timer.py:197:stop] 0/2356, RunningAvgSamplesPerSec=29.966843954235383, CurrSamplesPerSec=30.271700668212528, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:40:21,090] [INFO] [timer.py:197:stop] 0/2357, RunningAvgSamplesPerSec=29.966896536268486, CurrSamplesPerSec=30.09118824652638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:40:23,264] [INFO] [timer.py:197:stop] 0/2358, RunningAvgSamplesPerSec=29.96681672895602, CurrSamplesPerSec=29.780042418751936, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:40:25,380] [INFO] [timer.py:197:stop] 0/2359, RunningAvgSamplesPerSec=29.967087884579243, CurrSamplesPerSec=30.619852161237628, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:40:27,302] [INFO] [logging.py:68:log_dist] [Rank 0] step=2360, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:40:27,302] [INFO] [timer.py:197:stop] 0/2360, RunningAvgSamplesPerSec=29.968480765519605, CurrSamplesPerSec=33.65559229654534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:05,239] [INFO] [timer.py:197:stop] 0/2361, RunningAvgSamplesPerSec=29.96838954975663, CurrSamplesPerSec=29.754836129482506, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:11,431] [INFO] [timer.py:197:stop] 0/2362, RunningAvgSamplesPerSec=29.96836384553797, CurrSamplesPerSec=29.907850085645038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:18,019] [INFO] [timer.py:197:stop] 0/2363, RunningAvgSamplesPerSec=29.967989498604307, CurrSamplesPerSec=29.10983977822186, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:23,820] [INFO] [timer.py:197:stop] 0/2364, RunningAvgSamplesPerSec=29.968010402642143, CurrSamplesPerSec=30.017446286603253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:29,674] [INFO] [timer.py:197:stop] 0/2365, RunningAvgSamplesPerSec=29.968191982970552, CurrSamplesPerSec=30.403314656188346, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:35,998] [INFO] [timer.py:197:stop] 0/2366, RunningAvgSamplesPerSec=29.9678856346186, CurrSamplesPerSec=29.26106557689771, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:41,894] [INFO] [timer.py:197:stop] 0/2367, RunningAvgSamplesPerSec=29.96805250646758, CurrSamplesPerSec=30.367801892329364, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:47,757] [INFO] [timer.py:197:stop] 0/2368, RunningAvgSamplesPerSec=29.96782853995353, CurrSamplesPerSec=29.447351053821418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:53,904] [INFO] [timer.py:197:stop] 0/2369, RunningAvgSamplesPerSec=29.967524976824784, CurrSamplesPerSec=29.266112442909183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:41:59,721] [INFO] [logging.py:68:log_dist] [Rank 0] step=2370, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:41:59,722] [INFO] [timer.py:197:stop] 0/2370, RunningAvgSamplesPerSec=29.967401744700705, CurrSamplesPerSec=29.67852430979181, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:42:06,060] [INFO] [timer.py:197:stop] 0/2371, RunningAvgSamplesPerSec=29.967278884527882, CurrSamplesPerSec=29.679144486721444, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:42:12,259] [INFO] [timer.py:197:stop] 0/2372, RunningAvgSamplesPerSec=29.966900093358102, CurrSamplesPerSec=29.095644499920713, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:42:18,132] [INFO] [timer.py:197:stop] 0/2373, RunningAvgSamplesPerSec=29.96661723667373, CurrSamplesPerSec=29.310921374775802, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:42:24,379] [INFO] [timer.py:197:stop] 0/2374, RunningAvgSamplesPerSec=29.966155615022448, CurrSamplesPerSec=28.91023416350606, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:42:30,359] [INFO] [timer.py:197:stop] 0/2375, RunningAvgSamplesPerSec=29.966198506978664, CurrSamplesPerSec=30.068284972054975, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 59.0} -[2022-12-14 20:42:36,374] [INFO] [timer.py:197:stop] 0/2376, RunningAvgSamplesPerSec=29.966270462418397, CurrSamplesPerSec=30.137999660262516, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:42:42,426] [INFO] [timer.py:197:stop] 0/2377, RunningAvgSamplesPerSec=29.966130758082972, CurrSamplesPerSec=29.63810471026838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:42:48,448] [INFO] [timer.py:197:stop] 0/2378, RunningAvgSamplesPerSec=29.966124540534846, CurrSamplesPerSec=29.95136513992157, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:42:54,570] [INFO] [timer.py:197:stop] 0/2379, RunningAvgSamplesPerSec=29.966190060744935, CurrSamplesPerSec=30.122679392827642, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:00,517] [INFO] [logging.py:68:log_dist] [Rank 0] step=2380, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:43:00,518] [INFO] [timer.py:197:stop] 0/2380, RunningAvgSamplesPerSec=29.966109283770816, CurrSamplesPerSec=29.775325372258873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:07,412] [INFO] [timer.py:197:stop] 0/2381, RunningAvgSamplesPerSec=29.965828269695145, CurrSamplesPerSec=29.312160024128016, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:13,291] [INFO] [timer.py:197:stop] 0/2382, RunningAvgSamplesPerSec=29.965858842888025, CurrSamplesPerSec=30.038769513109454, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:19,857] [INFO] [timer.py:197:stop] 0/2383, RunningAvgSamplesPerSec=29.966002697190426, CurrSamplesPerSec=30.31233457596737, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:25,955] [INFO] [timer.py:197:stop] 0/2384, RunningAvgSamplesPerSec=29.9659000466088, CurrSamplesPerSec=29.723467198606592, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:32,324] [INFO] [timer.py:197:stop] 0/2385, RunningAvgSamplesPerSec=29.965832030453154, CurrSamplesPerSec=29.804689155057623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:38,237] [INFO] [timer.py:197:stop] 0/2386, RunningAvgSamplesPerSec=29.96597473951572, CurrSamplesPerSec=30.30995582150923, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:44,105] [INFO] [timer.py:197:stop] 0/2387, RunningAvgSamplesPerSec=29.965722360143452, CurrSamplesPerSec=29.375897808140536, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:50,239] [INFO] [timer.py:197:stop] 0/2388, RunningAvgSamplesPerSec=29.965396999643982, CurrSamplesPerSec=29.209007895401168, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:43:55,889] [INFO] [timer.py:197:stop] 0/2389, RunningAvgSamplesPerSec=29.9652851372778, CurrSamplesPerSec=29.700738857663957, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:02,035] [INFO] [logging.py:68:log_dist] [Rank 0] step=2390, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:44:02,035] [INFO] [timer.py:197:stop] 0/2390, RunningAvgSamplesPerSec=29.965523121579515, CurrSamplesPerSec=30.544573541310914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:07,952] [INFO] [timer.py:197:stop] 0/2391, RunningAvgSamplesPerSec=29.965629443454038, CurrSamplesPerSec=30.22169662730334, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:14,192] [INFO] [timer.py:197:stop] 0/2392, RunningAvgSamplesPerSec=29.96567795942969, CurrSamplesPerSec=30.082032864426395, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:16,634] [INFO] [timer.py:197:stop] 0/2393, RunningAvgSamplesPerSec=29.965644075114145, CurrSamplesPerSec=29.884878923778796, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:18,769] [INFO] [timer.py:197:stop] 0/2394, RunningAvgSamplesPerSec=29.96580047457715, CurrSamplesPerSec=30.344479198577478, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:20,888] [INFO] [timer.py:197:stop] 0/2395, RunningAvgSamplesPerSec=29.966053719856536, CurrSamplesPerSec=30.58431988158003, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:23,037] [INFO] [timer.py:197:stop] 0/2396, RunningAvgSamplesPerSec=29.966126798166087, CurrSamplesPerSec=30.142030159158992, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:25,164] [INFO] [timer.py:197:stop] 0/2397, RunningAvgSamplesPerSec=29.966331635840916, CurrSamplesPerSec=30.464874802952114, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:27,305] [INFO] [timer.py:197:stop] 0/2398, RunningAvgSamplesPerSec=29.966451994872514, CurrSamplesPerSec=30.257512881415234, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:29,478] [INFO] [timer.py:197:stop] 0/2399, RunningAvgSamplesPerSec=29.96638994332518, CurrSamplesPerSec=29.81844873903728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:44:31,350] [INFO] [logging.py:68:log_dist] [Rank 0] step=2400, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:44:31,351] [INFO] [timer.py:197:stop] 0/2400, RunningAvgSamplesPerSec=29.96805769272142, CurrSamplesPerSec=34.581293244256614, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 59.01} -[2022-12-14 20:45:08,169] [INFO] [timer.py:197:stop] 0/2401, RunningAvgSamplesPerSec=29.967532282344138, CurrSamplesPerSec=28.758453081006596, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:45:14,221] [INFO] [timer.py:197:stop] 0/2402, RunningAvgSamplesPerSec=29.96745891539061, CurrSamplesPerSec=29.79247972431168, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:45:20,265] [INFO] [timer.py:197:stop] 0/2403, RunningAvgSamplesPerSec=29.96702948473792, CurrSamplesPerSec=28.970676999759544, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:45:26,198] [INFO] [timer.py:197:stop] 0/2404, RunningAvgSamplesPerSec=29.967090450163436, CurrSamplesPerSec=30.11418724870764, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:45:32,090] [INFO] [timer.py:197:stop] 0/2405, RunningAvgSamplesPerSec=29.967151636571874, CurrSamplesPerSec=30.114846037991974, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:45:37,893] [INFO] [timer.py:197:stop] 0/2406, RunningAvgSamplesPerSec=29.967068778684514, CurrSamplesPerSec=29.769275999040943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:45:43,767] [INFO] [timer.py:197:stop] 0/2407, RunningAvgSamplesPerSec=29.9669406404242, CurrSamplesPerSec=29.662031868107583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:45:49,542] [INFO] [timer.py:197:stop] 0/2408, RunningAvgSamplesPerSec=29.96705849899756, CurrSamplesPerSec=30.25321617733707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:45:55,665] [INFO] [timer.py:197:stop] 0/2409, RunningAvgSamplesPerSec=29.967325093267647, CurrSamplesPerSec=30.622786344269418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:01,770] [INFO] [logging.py:68:log_dist] [Rank 0] step=2410, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:46:01,771] [INFO] [timer.py:197:stop] 0/2410, RunningAvgSamplesPerSec=29.96735488823334, CurrSamplesPerSec=30.03924348257456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:10,999] [INFO] [timer.py:197:stop] 0/2411, RunningAvgSamplesPerSec=29.967065068371156, CurrSamplesPerSec=29.28506808260833, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:17,009] [INFO] [timer.py:197:stop] 0/2412, RunningAvgSamplesPerSec=29.966927394610014, CurrSamplesPerSec=29.638903187439535, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:23,072] [INFO] [timer.py:197:stop] 0/2413, RunningAvgSamplesPerSec=29.966573411853144, CurrSamplesPerSec=29.13709853453457, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:29,228] [INFO] [timer.py:197:stop] 0/2414, RunningAvgSamplesPerSec=29.966411548100865, CurrSamplesPerSec=29.581177043101707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:35,300] [INFO] [timer.py:197:stop] 0/2415, RunningAvgSamplesPerSec=29.966597296197246, CurrSamplesPerSec=30.421424552309382, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:41,612] [INFO] [timer.py:197:stop] 0/2416, RunningAvgSamplesPerSec=29.96671863291689, CurrSamplesPerSec=30.262394190027017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:47,730] [INFO] [timer.py:197:stop] 0/2417, RunningAvgSamplesPerSec=29.966697598394887, CurrSamplesPerSec=29.916006192374855, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:53,577] [INFO] [timer.py:197:stop] 0/2418, RunningAvgSamplesPerSec=29.966396938405257, CurrSamplesPerSec=29.25748724895182, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:46:59,910] [INFO] [timer.py:197:stop] 0/2419, RunningAvgSamplesPerSec=29.96626223234216, CurrSamplesPerSec=29.644310402519654, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:47:06,019] [INFO] [logging.py:68:log_dist] [Rank 0] step=2420, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:47:06,020] [INFO] [timer.py:197:stop] 0/2420, RunningAvgSamplesPerSec=29.96647393556097, CurrSamplesPerSec=30.48705334529637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:47:12,105] [INFO] [timer.py:197:stop] 0/2421, RunningAvgSamplesPerSec=29.966372673680763, CurrSamplesPerSec=29.723506693494013, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:47:17,944] [INFO] [timer.py:197:stop] 0/2422, RunningAvgSamplesPerSec=29.966520880742916, CurrSamplesPerSec=30.32937668595898, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:47:24,617] [INFO] [timer.py:197:stop] 0/2423, RunningAvgSamplesPerSec=29.96643196258251, CurrSamplesPerSec=29.752784798734144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:47:30,864] [INFO] [timer.py:197:stop] 0/2424, RunningAvgSamplesPerSec=29.966333811918073, CurrSamplesPerSec=29.730581262417026, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:47:36,851] [INFO] [timer.py:197:stop] 0/2425, RunningAvgSamplesPerSec=29.96650428229378, CurrSamplesPerSec=30.38515408128291, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 60.01} -[2022-12-14 20:47:42,851] [INFO] [timer.py:197:stop] 0/2426, RunningAvgSamplesPerSec=29.966422999679875, CurrSamplesPerSec=29.770761698119987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:47:49,219] [INFO] [timer.py:197:stop] 0/2427, RunningAvgSamplesPerSec=29.96647800342891, CurrSamplesPerSec=30.10040320690666, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:47:55,182] [INFO] [timer.py:197:stop] 0/2428, RunningAvgSamplesPerSec=29.966673137852574, CurrSamplesPerSec=30.447469456454016, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:02,505] [INFO] [timer.py:197:stop] 0/2429, RunningAvgSamplesPerSec=29.96673589776012, CurrSamplesPerSec=30.11976928880505, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:08,715] [INFO] [logging.py:68:log_dist] [Rank 0] step=2430, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:48:08,715] [INFO] [timer.py:197:stop] 0/2430, RunningAvgSamplesPerSec=29.966641917354142, CurrSamplesPerSec=29.740275167568857, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:14,745] [INFO] [timer.py:197:stop] 0/2431, RunningAvgSamplesPerSec=29.96661778686082, CurrSamplesPerSec=29.90814332174973, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:20,967] [INFO] [timer.py:197:stop] 0/2432, RunningAvgSamplesPerSec=29.96708656937068, CurrSamplesPerSec=31.15075414555507, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:23,585] [INFO] [timer.py:197:stop] 0/2433, RunningAvgSamplesPerSec=29.967250602804942, CurrSamplesPerSec=30.371227447618534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:25,792] [INFO] [timer.py:197:stop] 0/2434, RunningAvgSamplesPerSec=29.967149318275435, CurrSamplesPerSec=29.722934027898884, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:27,999] [INFO] [timer.py:197:stop] 0/2435, RunningAvgSamplesPerSec=29.96706560782957, CurrSamplesPerSec=29.764856095839047, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:30,150] [INFO] [timer.py:197:stop] 0/2436, RunningAvgSamplesPerSec=29.96713223346136, CurrSamplesPerSec=30.130114371558054, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:32,276] [INFO] [timer.py:197:stop] 0/2437, RunningAvgSamplesPerSec=29.967335217919356, CurrSamplesPerSec=30.469684908567263, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:34,454] [INFO] [timer.py:197:stop] 0/2438, RunningAvgSamplesPerSec=29.967244622591128, CurrSamplesPerSec=29.74825770158127, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:36,589] [INFO] [timer.py:197:stop] 0/2439, RunningAvgSamplesPerSec=29.96739289071156, CurrSamplesPerSec=30.332982094265486, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:48:38,539] [INFO] [logging.py:68:log_dist] [Rank 0] step=2440, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:48:38,540] [INFO] [timer.py:197:stop] 0/2440, RunningAvgSamplesPerSec=29.968731297109812, CurrSamplesPerSec=33.62895797449122, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:49:25,667] [INFO] [timer.py:197:stop] 0/2441, RunningAvgSamplesPerSec=29.968581921129424, CurrSamplesPerSec=29.608777423796422, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:49:32,700] [INFO] [timer.py:197:stop] 0/2442, RunningAvgSamplesPerSec=29.968419882713178, CurrSamplesPerSec=29.57835432524091, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:49:39,553] [INFO] [timer.py:197:stop] 0/2443, RunningAvgSamplesPerSec=29.96835035303425, CurrSamplesPerSec=29.79965333060391, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:49:46,612] [INFO] [timer.py:197:stop] 0/2444, RunningAvgSamplesPerSec=29.96827619366369, CurrSamplesPerSec=29.78834051477086, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:49:53,130] [INFO] [timer.py:197:stop] 0/2445, RunningAvgSamplesPerSec=29.968378563075355, CurrSamplesPerSec=30.22046837565385, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:50:00,197] [INFO] [timer.py:197:stop] 0/2446, RunningAvgSamplesPerSec=29.96830048279454, CurrSamplesPerSec=29.77875730489919, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:50:07,017] [INFO] [timer.py:197:stop] 0/2447, RunningAvgSamplesPerSec=29.96827251360072, CurrSamplesPerSec=29.900071432127838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:50:13,286] [INFO] [timer.py:197:stop] 0/2448, RunningAvgSamplesPerSec=29.968301534053484, CurrSamplesPerSec=30.0394250067563, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:50:20,389] [INFO] [timer.py:197:stop] 0/2449, RunningAvgSamplesPerSec=29.967998369535994, CurrSamplesPerSec=29.24437101150043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:50:26,848] [INFO] [logging.py:68:log_dist] [Rank 0] step=2450, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:50:26,849] [INFO] [timer.py:197:stop] 0/2450, RunningAvgSamplesPerSec=29.967811174389595, CurrSamplesPerSec=29.516643692761328, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 61.0} -[2022-12-14 20:50:33,825] [INFO] [timer.py:197:stop] 0/2451, RunningAvgSamplesPerSec=29.96782343311191, CurrSamplesPerSec=29.997862878703728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:50:40,504] [INFO] [timer.py:197:stop] 0/2452, RunningAvgSamplesPerSec=29.96760400045323, CurrSamplesPerSec=29.439684146455146, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:50:46,936] [INFO] [timer.py:197:stop] 0/2453, RunningAvgSamplesPerSec=29.968003873670867, CurrSamplesPerSec=30.980816820684698, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:50:53,831] [INFO] [timer.py:197:stop] 0/2454, RunningAvgSamplesPerSec=29.968269481077602, CurrSamplesPerSec=30.633735110250903, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:51:00,534] [INFO] [timer.py:197:stop] 0/2455, RunningAvgSamplesPerSec=29.968236377025963, CurrSamplesPerSec=29.88728459558515, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:51:07,655] [INFO] [timer.py:197:stop] 0/2456, RunningAvgSamplesPerSec=29.96831968274806, CurrSamplesPerSec=30.174072182719218, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:51:14,109] [INFO] [timer.py:197:stop] 0/2457, RunningAvgSamplesPerSec=29.968656607868695, CurrSamplesPerSec=30.818939112047293, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:51:20,554] [INFO] [timer.py:197:stop] 0/2458, RunningAvgSamplesPerSec=29.968054744100346, CurrSamplesPerSec=28.55993485279639, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:51:27,531] [INFO] [timer.py:197:stop] 0/2459, RunningAvgSamplesPerSec=29.96794671190239, CurrSamplesPerSec=29.704949082770582, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:51:34,118] [INFO] [logging.py:68:log_dist] [Rank 0] step=2460, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:51:34,119] [INFO] [timer.py:197:stop] 0/2460, RunningAvgSamplesPerSec=29.967940638678115, CurrSamplesPerSec=29.953026156004526, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:51:41,168] [INFO] [timer.py:197:stop] 0/2461, RunningAvgSamplesPerSec=29.96782925450005, CurrSamplesPerSec=29.69652654058594, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:51:47,707] [INFO] [timer.py:197:stop] 0/2462, RunningAvgSamplesPerSec=29.967991193596397, CurrSamplesPerSec=30.371564203896618, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:51:55,016] [INFO] [timer.py:197:stop] 0/2463, RunningAvgSamplesPerSec=29.968244409554277, CurrSamplesPerSec=30.604383647104644, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:01,915] [INFO] [timer.py:197:stop] 0/2464, RunningAvgSamplesPerSec=29.96822533850769, CurrSamplesPerSec=29.921364911734717, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:08,705] [INFO] [timer.py:197:stop] 0/2465, RunningAvgSamplesPerSec=29.967880725266244, CurrSamplesPerSec=29.142811421300646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:15,054] [INFO] [timer.py:197:stop] 0/2466, RunningAvgSamplesPerSec=29.96819335444964, CurrSamplesPerSec=30.758513849586443, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:22,040] [INFO] [timer.py:197:stop] 0/2467, RunningAvgSamplesPerSec=29.968257945836182, CurrSamplesPerSec=30.128261200418958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:29,032] [INFO] [timer.py:197:stop] 0/2468, RunningAvgSamplesPerSec=29.968120086135393, CurrSamplesPerSec=29.632107689432505, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:36,178] [INFO] [timer.py:197:stop] 0/2469, RunningAvgSamplesPerSec=29.968193076315014, CurrSamplesPerSec=30.149274906874183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:43,202] [INFO] [logging.py:68:log_dist] [Rank 0] step=2470, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:52:43,203] [INFO] [timer.py:197:stop] 0/2470, RunningAvgSamplesPerSec=29.967974768976834, CurrSamplesPerSec=29.43892219522183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:49,704] [INFO] [timer.py:197:stop] 0/2471, RunningAvgSamplesPerSec=29.967884285608562, CurrSamplesPerSec=29.746223759129077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:56,478] [INFO] [timer.py:197:stop] 0/2472, RunningAvgSamplesPerSec=29.967842338083287, CurrSamplesPerSec=29.8646307404638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:52:58,935] [INFO] [timer.py:197:stop] 0/2473, RunningAvgSamplesPerSec=29.967809303721133, CurrSamplesPerSec=29.886436077916695, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:53:01,064] [INFO] [timer.py:197:stop] 0/2474, RunningAvgSamplesPerSec=29.96798712221944, CurrSamplesPerSec=30.413917491958202, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:53:03,243] [INFO] [timer.py:197:stop] 0/2475, RunningAvgSamplesPerSec=29.96788682454071, CurrSamplesPerSec=29.721986216090382, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 61.01} -[2022-12-14 20:53:05,390] [INFO] [timer.py:197:stop] 0/2476, RunningAvgSamplesPerSec=29.96798286500829, CurrSamplesPerSec=30.2073890973156, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:53:07,538] [INFO] [timer.py:197:stop] 0/2477, RunningAvgSamplesPerSec=29.968060766145747, CurrSamplesPerSec=30.162036158425956, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:53:09,656] [INFO] [timer.py:197:stop] 0/2478, RunningAvgSamplesPerSec=29.968308536309195, CurrSamplesPerSec=30.594355448740195, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:53:11,774] [INFO] [timer.py:197:stop] 0/2479, RunningAvgSamplesPerSec=29.968549759417826, CurrSamplesPerSec=30.5779687040971, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:53:13,703] [INFO] [logging.py:68:log_dist] [Rank 0] step=2480, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:53:13,704] [INFO] [timer.py:197:stop] 0/2480, RunningAvgSamplesPerSec=29.969879498106376, CurrSamplesPerSec=33.670515310346836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:53:57,781] [INFO] [timer.py:197:stop] 0/2481, RunningAvgSamplesPerSec=29.969702807797265, CurrSamplesPerSec=29.538171172880233, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:54:04,603] [INFO] [timer.py:197:stop] 0/2482, RunningAvgSamplesPerSec=29.969567287822397, CurrSamplesPerSec=29.637338997983957, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:54:12,949] [INFO] [timer.py:197:stop] 0/2483, RunningAvgSamplesPerSec=29.96930026548459, CurrSamplesPerSec=29.32140679159679, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:54:20,908] [INFO] [timer.py:197:stop] 0/2484, RunningAvgSamplesPerSec=29.969306887819844, CurrSamplesPerSec=29.985745917550982, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:54:27,275] [INFO] [timer.py:197:stop] 0/2485, RunningAvgSamplesPerSec=29.969388199695906, CurrSamplesPerSec=30.17257308941738, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:54:33,779] [INFO] [timer.py:197:stop] 0/2486, RunningAvgSamplesPerSec=29.969407131896975, CurrSamplesPerSec=30.016489668434282, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:54:40,401] [INFO] [timer.py:197:stop] 0/2487, RunningAvgSamplesPerSec=29.969663030188542, CurrSamplesPerSec=30.61909425534366, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:54:46,993] [INFO] [timer.py:197:stop] 0/2488, RunningAvgSamplesPerSec=29.9690782616668, CurrSamplesPerSec=28.583156522370608, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:54:54,092] [INFO] [timer.py:197:stop] 0/2489, RunningAvgSamplesPerSec=29.968492055713998, CurrSamplesPerSec=28.57878969481054, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:00,606] [INFO] [logging.py:68:log_dist] [Rank 0] step=2490, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:55:00,607] [INFO] [timer.py:197:stop] 0/2490, RunningAvgSamplesPerSec=29.968676730540473, CurrSamplesPerSec=30.435114308215283, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:07,632] [INFO] [timer.py:197:stop] 0/2491, RunningAvgSamplesPerSec=29.968829687196347, CurrSamplesPerSec=30.354282447285907, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:14,373] [INFO] [timer.py:197:stop] 0/2492, RunningAvgSamplesPerSec=29.96875400397826, CurrSamplesPerSec=29.78155562557414, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:20,770] [INFO] [timer.py:197:stop] 0/2493, RunningAvgSamplesPerSec=29.96860900212443, CurrSamplesPerSec=29.611854208459114, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:27,573] [INFO] [timer.py:197:stop] 0/2494, RunningAvgSamplesPerSec=29.968875609190672, CurrSamplesPerSec=30.648050549359965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:33,840] [INFO] [timer.py:197:stop] 0/2495, RunningAvgSamplesPerSec=29.968707413003727, CurrSamplesPerSec=29.555346141086996, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:40,389] [INFO] [timer.py:197:stop] 0/2496, RunningAvgSamplesPerSec=29.96831811292346, CurrSamplesPerSec=29.028249385475053, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:46,790] [INFO] [timer.py:197:stop] 0/2497, RunningAvgSamplesPerSec=29.968367851740016, CurrSamplesPerSec=30.092932278769933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:53,467] [INFO] [timer.py:197:stop] 0/2498, RunningAvgSamplesPerSec=29.968423923257905, CurrSamplesPerSec=30.1089787607193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:55:59,903] [INFO] [timer.py:197:stop] 0/2499, RunningAvgSamplesPerSec=29.968658616858523, CurrSamplesPerSec=30.5661373980471, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:56:06,350] [INFO] [logging.py:68:log_dist] [Rank 0] step=2500, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:56:06,350] [INFO] [timer.py:197:stop] 0/2500, RunningAvgSamplesPerSec=29.968762717812083, CurrSamplesPerSec=30.230978087350355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 62.0} -[2022-12-14 20:56:12,934] [INFO] [timer.py:197:stop] 0/2501, RunningAvgSamplesPerSec=29.968756731990776, CurrSamplesPerSec=29.95381161005467, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:56:19,516] [INFO] [timer.py:197:stop] 0/2502, RunningAvgSamplesPerSec=29.96888680304407, CurrSamplesPerSec=30.297499985045125, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:56:26,465] [INFO] [timer.py:197:stop] 0/2503, RunningAvgSamplesPerSec=29.96857438577754, CurrSamplesPerSec=29.207377522842997, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:56:33,357] [INFO] [timer.py:197:stop] 0/2504, RunningAvgSamplesPerSec=29.968647365650213, CurrSamplesPerSec=30.15228893489763, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:56:40,511] [INFO] [timer.py:197:stop] 0/2505, RunningAvgSamplesPerSec=29.96866388940032, CurrSamplesPerSec=30.01006344657841, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:56:47,121] [INFO] [timer.py:197:stop] 0/2506, RunningAvgSamplesPerSec=29.968774745747716, CurrSamplesPerSec=30.248842295097077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:56:54,323] [INFO] [timer.py:197:stop] 0/2507, RunningAvgSamplesPerSec=29.968747985540574, CurrSamplesPerSec=29.90188997535648, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:00,637] [INFO] [timer.py:197:stop] 0/2508, RunningAvgSamplesPerSec=29.968878975558415, CurrSamplesPerSec=30.300642911872895, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:07,180] [INFO] [timer.py:197:stop] 0/2509, RunningAvgSamplesPerSec=29.96867178024744, CurrSamplesPerSec=29.45828669354874, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:13,772] [INFO] [logging.py:68:log_dist] [Rank 0] step=2510, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:57:13,772] [INFO] [timer.py:197:stop] 0/2510, RunningAvgSamplesPerSec=29.96868926587792, CurrSamplesPerSec=30.012589982552736, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:20,179] [INFO] [timer.py:197:stop] 0/2511, RunningAvgSamplesPerSec=29.96842841182263, CurrSamplesPerSec=29.328188686015032, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:27,183] [INFO] [timer.py:197:stop] 0/2512, RunningAvgSamplesPerSec=29.968132310305666, CurrSamplesPerSec=29.24319224194739, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:29,566] [INFO] [timer.py:197:stop] 0/2513, RunningAvgSamplesPerSec=29.968254446790656, CurrSamplesPerSec=30.277986716667623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:31,713] [INFO] [timer.py:197:stop] 0/2514, RunningAvgSamplesPerSec=29.968339774978315, CurrSamplesPerSec=30.18414235228225, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:33,838] [INFO] [timer.py:197:stop] 0/2515, RunningAvgSamplesPerSec=29.968548396063902, CurrSamplesPerSec=30.50193554896914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:36,021] [INFO] [timer.py:197:stop] 0/2516, RunningAvgSamplesPerSec=29.968430554770375, CurrSamplesPerSec=29.675194173286876, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:38,144] [INFO] [timer.py:197:stop] 0/2517, RunningAvgSamplesPerSec=29.968646624463165, CurrSamplesPerSec=30.52187745242843, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:40,257] [INFO] [timer.py:197:stop] 0/2518, RunningAvgSamplesPerSec=29.968923549344282, CurrSamplesPerSec=30.681967066961636, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:42,389] [INFO] [timer.py:197:stop] 0/2519, RunningAvgSamplesPerSec=29.969089169027825, CurrSamplesPerSec=30.391666265687032, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:57:44,280] [INFO] [logging.py:68:log_dist] [Rank 0] step=2520, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:57:44,280] [INFO] [timer.py:197:stop] 0/2520, RunningAvgSamplesPerSec=29.970614818117323, CurrSamplesPerSec=34.375252882132784, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:58:24,681] [INFO] [timer.py:197:stop] 0/2521, RunningAvgSamplesPerSec=29.970456281243624, CurrSamplesPerSec=29.57650974529659, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:58:30,994] [INFO] [timer.py:197:stop] 0/2522, RunningAvgSamplesPerSec=29.97046881604357, CurrSamplesPerSec=30.002077291192613, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:58:37,687] [INFO] [timer.py:197:stop] 0/2523, RunningAvgSamplesPerSec=29.970379845291518, CurrSamplesPerSec=29.747839022945808, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:58:43,653] [INFO] [timer.py:197:stop] 0/2524, RunningAvgSamplesPerSec=29.97005917591604, CurrSamplesPerSec=29.182892995117403, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:58:50,036] [INFO] [timer.py:197:stop] 0/2525, RunningAvgSamplesPerSec=29.969737014355918, CurrSamplesPerSec=29.178699406263636, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 63.0} -[2022-12-14 20:58:56,143] [INFO] [timer.py:197:stop] 0/2526, RunningAvgSamplesPerSec=29.969976123962628, CurrSamplesPerSec=30.585647586622347, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:59:02,700] [INFO] [timer.py:197:stop] 0/2527, RunningAvgSamplesPerSec=29.970077489822952, CurrSamplesPerSec=30.228128716082303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:59:09,170] [INFO] [timer.py:197:stop] 0/2528, RunningAvgSamplesPerSec=29.969826553653093, CurrSamplesPerSec=29.349336157378257, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:59:15,806] [INFO] [timer.py:197:stop] 0/2529, RunningAvgSamplesPerSec=29.96975449081604, CurrSamplesPerSec=29.788823142902768, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:59:22,037] [INFO] [logging.py:68:log_dist] [Rank 0] step=2530, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 20:59:22,038] [INFO] [timer.py:197:stop] 0/2530, RunningAvgSamplesPerSec=29.969705000624856, CurrSamplesPerSec=29.845163198347393, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:59:28,954] [INFO] [timer.py:197:stop] 0/2531, RunningAvgSamplesPerSec=29.969563841790645, CurrSamplesPerSec=29.616914982656976, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:59:35,583] [INFO] [timer.py:197:stop] 0/2532, RunningAvgSamplesPerSec=29.969566118751338, CurrSamplesPerSec=29.975325659442642, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:59:41,931] [INFO] [timer.py:197:stop] 0/2533, RunningAvgSamplesPerSec=29.969722531085914, CurrSamplesPerSec=30.370742944034248, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:59:48,292] [INFO] [timer.py:197:stop] 0/2534, RunningAvgSamplesPerSec=29.969862748135508, CurrSamplesPerSec=30.329006597495386, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 20:59:54,538] [INFO] [timer.py:197:stop] 0/2535, RunningAvgSamplesPerSec=29.96962799874457, CurrSamplesPerSec=29.386806169144272, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:00:01,396] [INFO] [timer.py:197:stop] 0/2536, RunningAvgSamplesPerSec=29.969622209330367, CurrSamplesPerSec=29.954964798080102, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:00:07,758] [INFO] [timer.py:197:stop] 0/2537, RunningAvgSamplesPerSec=29.969629188027223, CurrSamplesPerSec=29.987323650852005, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:00:14,486] [INFO] [timer.py:197:stop] 0/2538, RunningAvgSamplesPerSec=29.969837055819614, CurrSamplesPerSec=30.506216525298576, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:00:21,250] [INFO] [timer.py:197:stop] 0/2539, RunningAvgSamplesPerSec=29.9697398909537, CurrSamplesPerSec=29.725340030027045, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:00:27,433] [INFO] [logging.py:68:log_dist] [Rank 0] step=2540, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:00:27,434] [INFO] [timer.py:197:stop] 0/2540, RunningAvgSamplesPerSec=29.96972602717177, CurrSamplesPerSec=29.934594858553744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:00:33,912] [INFO] [timer.py:197:stop] 0/2541, RunningAvgSamplesPerSec=29.969574440947017, CurrSamplesPerSec=29.589726710372254, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:00:40,246] [INFO] [timer.py:197:stop] 0/2542, RunningAvgSamplesPerSec=29.969430148288158, CurrSamplesPerSec=29.607497251686407, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:00:46,772] [INFO] [timer.py:197:stop] 0/2543, RunningAvgSamplesPerSec=29.969395976167387, CurrSamplesPerSec=29.882849543361534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:00:53,373] [INFO] [timer.py:197:stop] 0/2544, RunningAvgSamplesPerSec=29.96928588863634, CurrSamplesPerSec=29.692141349108354, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:00,161] [INFO] [timer.py:197:stop] 0/2545, RunningAvgSamplesPerSec=29.969505049642393, CurrSamplesPerSec=30.537168859387176, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:06,481] [INFO] [timer.py:197:stop] 0/2546, RunningAvgSamplesPerSec=29.969282665741968, CurrSamplesPerSec=29.414238248843585, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:13,226] [INFO] [timer.py:197:stop] 0/2547, RunningAvgSamplesPerSec=29.96922531717784, CurrSamplesPerSec=29.824037643541164, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:19,555] [INFO] [timer.py:197:stop] 0/2548, RunningAvgSamplesPerSec=29.969112805760552, CurrSamplesPerSec=29.685482277265816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:25,658] [INFO] [timer.py:197:stop] 0/2549, RunningAvgSamplesPerSec=29.968951086724463, CurrSamplesPerSec=29.56279667645282, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:32,128] [INFO] [logging.py:68:log_dist] [Rank 0] step=2550, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:01:32,129] [INFO] [timer.py:197:stop] 0/2550, RunningAvgSamplesPerSec=29.968838626233634, CurrSamplesPerSec=29.685114604905603, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 63.01} -[2022-12-14 21:01:38,284] [INFO] [timer.py:197:stop] 0/2551, RunningAvgSamplesPerSec=29.96897337428437, CurrSamplesPerSec=30.316292007618728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:44,673] [INFO] [timer.py:197:stop] 0/2552, RunningAvgSamplesPerSec=29.969058904653725, CurrSamplesPerSec=30.188674084782082, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:47,292] [INFO] [timer.py:197:stop] 0/2553, RunningAvgSamplesPerSec=29.969024953837298, CurrSamplesPerSec=29.882699845886194, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:49,423] [INFO] [timer.py:197:stop] 0/2554, RunningAvgSamplesPerSec=29.969191676191535, CurrSamplesPerSec=30.400625508780816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:51,614] [INFO] [timer.py:197:stop] 0/2555, RunningAvgSamplesPerSec=29.96907264296363, CurrSamplesPerSec=29.668349235229098, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:53,754] [INFO] [timer.py:197:stop] 0/2556, RunningAvgSamplesPerSec=29.969340661334627, CurrSamplesPerSec=30.669585605113603, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:55,889] [INFO] [timer.py:197:stop] 0/2557, RunningAvgSamplesPerSec=29.96948380863039, CurrSamplesPerSec=30.339598808412543, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:01:57,983] [INFO] [timer.py:197:stop] 0/2558, RunningAvgSamplesPerSec=29.969851070537, CurrSamplesPerSec=30.93854691941642, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:02:00,137] [INFO] [timer.py:197:stop] 0/2559, RunningAvgSamplesPerSec=29.969889162999603, CurrSamplesPerSec=30.067570964208187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:02:02,043] [INFO] [logging.py:68:log_dist] [Rank 0] step=2560, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:02:02,044] [INFO] [timer.py:197:stop] 0/2560, RunningAvgSamplesPerSec=29.97127412349058, CurrSamplesPerSec=33.9873329076615, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:02:39,415] [INFO] [timer.py:197:stop] 0/2561, RunningAvgSamplesPerSec=29.971251734221376, CurrSamplesPerSec=29.914089257468564, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:02:45,863] [INFO] [timer.py:197:stop] 0/2562, RunningAvgSamplesPerSec=29.97119214109778, CurrSamplesPerSec=29.819465651102124, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:02:52,225] [INFO] [timer.py:197:stop] 0/2563, RunningAvgSamplesPerSec=29.970590437986925, CurrSamplesPerSec=28.505556362610747, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:02:57,976] [INFO] [timer.py:197:stop] 0/2564, RunningAvgSamplesPerSec=29.970903895093628, CurrSamplesPerSec=30.795770018564504, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:03,875] [INFO] [timer.py:197:stop] 0/2565, RunningAvgSamplesPerSec=29.970556256442396, CurrSamplesPerSec=29.105619819566822, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:10,051] [INFO] [timer.py:197:stop] 0/2566, RunningAvgSamplesPerSec=29.97040201841346, CurrSamplesPerSec=29.58023825035064, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:16,446] [INFO] [timer.py:197:stop] 0/2567, RunningAvgSamplesPerSec=29.9703664647348, CurrSamplesPerSec=29.87948337583325, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:22,377] [INFO] [timer.py:197:stop] 0/2568, RunningAvgSamplesPerSec=29.97028175625435, CurrSamplesPerSec=29.75456897875168, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:28,672] [INFO] [timer.py:197:stop] 0/2569, RunningAvgSamplesPerSec=29.970020971047123, CurrSamplesPerSec=29.315466802434827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:34,739] [INFO] [logging.py:68:log_dist] [Rank 0] step=2570, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:03:34,740] [INFO] [timer.py:197:stop] 0/2570, RunningAvgSamplesPerSec=29.96997012486843, CurrSamplesPerSec=29.840014175469456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:40,895] [INFO] [timer.py:197:stop] 0/2571, RunningAvgSamplesPerSec=29.969707806900818, CurrSamplesPerSec=29.31088936972004, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:47,535] [INFO] [timer.py:197:stop] 0/2572, RunningAvgSamplesPerSec=29.96950061145781, CurrSamplesPerSec=29.44650795076852, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:53,585] [INFO] [timer.py:197:stop] 0/2573, RunningAvgSamplesPerSec=29.969471707740546, CurrSamplesPerSec=29.89537288772095, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:03:59,777] [INFO] [timer.py:197:stop] 0/2574, RunningAvgSamplesPerSec=29.969308564371843, CurrSamplesPerSec=29.55565853894319, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:04:06,064] [INFO] [timer.py:197:stop] 0/2575, RunningAvgSamplesPerSec=29.969203701231173, CurrSamplesPerSec=29.701902219735835, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 64.0} -[2022-12-14 21:04:12,392] [INFO] [timer.py:197:stop] 0/2576, RunningAvgSamplesPerSec=29.96922677447503, CurrSamplesPerSec=30.028712114034075, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:04:18,652] [INFO] [timer.py:197:stop] 0/2577, RunningAvgSamplesPerSec=29.969149111688395, CurrSamplesPerSec=29.770570199691555, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:04:24,458] [INFO] [timer.py:197:stop] 0/2578, RunningAvgSamplesPerSec=29.969421902050154, CurrSamplesPerSec=30.688722892398218, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:04:30,532] [INFO] [timer.py:197:stop] 0/2579, RunningAvgSamplesPerSec=29.96962123499709, CurrSamplesPerSec=30.49205749333642, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:04:36,337] [INFO] [logging.py:68:log_dist] [Rank 0] step=2580, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:04:36,337] [INFO] [timer.py:197:stop] 0/2580, RunningAvgSamplesPerSec=29.969740146753683, CurrSamplesPerSec=30.279342605439183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:04:42,407] [INFO] [timer.py:197:stop] 0/2581, RunningAvgSamplesPerSec=29.969740114317112, CurrSamplesPerSec=29.96965649306658, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:04:48,290] [INFO] [timer.py:197:stop] 0/2582, RunningAvgSamplesPerSec=29.96959590886051, CurrSamplesPerSec=29.602250356084355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:04:54,857] [INFO] [timer.py:197:stop] 0/2583, RunningAvgSamplesPerSec=29.969613263710503, CurrSamplesPerSec=30.014455798761542, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:01,065] [INFO] [timer.py:197:stop] 0/2584, RunningAvgSamplesPerSec=29.96975139771289, CurrSamplesPerSec=30.33056925467274, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:07,392] [INFO] [timer.py:197:stop] 0/2585, RunningAvgSamplesPerSec=29.96949565230987, CurrSamplesPerSec=29.323402271367755, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:13,382] [INFO] [timer.py:197:stop] 0/2586, RunningAvgSamplesPerSec=29.96947726524469, CurrSamplesPerSec=29.92205865108131, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:19,574] [INFO] [timer.py:197:stop] 0/2587, RunningAvgSamplesPerSec=29.969290146773602, CurrSamplesPerSec=29.493455932554607, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:25,814] [INFO] [timer.py:197:stop] 0/2588, RunningAvgSamplesPerSec=29.969305231138414, CurrSamplesPerSec=30.008349133891663, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:31,784] [INFO] [timer.py:197:stop] 0/2589, RunningAvgSamplesPerSec=29.96924233937472, CurrSamplesPerSec=29.80748242212658, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:37,690] [INFO] [logging.py:68:log_dist] [Rank 0] step=2590, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:05:37,690] [INFO] [timer.py:197:stop] 0/2590, RunningAvgSamplesPerSec=29.968978490582245, CurrSamplesPerSec=29.30160765330254, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:43,567] [INFO] [timer.py:197:stop] 0/2591, RunningAvgSamplesPerSec=29.969174689686366, CurrSamplesPerSec=30.485692639642995, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:49,528] [INFO] [timer.py:197:stop] 0/2592, RunningAvgSamplesPerSec=29.96882696689465, CurrSamplesPerSec=29.09483718310299, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:51,919] [INFO] [timer.py:197:stop] 0/2593, RunningAvgSamplesPerSec=29.969150983186193, CurrSamplesPerSec=30.832539285907497, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:54,079] [INFO] [timer.py:197:stop] 0/2594, RunningAvgSamplesPerSec=29.96931483369047, CurrSamplesPerSec=30.39995415694474, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:56,206] [INFO] [timer.py:197:stop] 0/2595, RunningAvgSamplesPerSec=29.96949586225509, CurrSamplesPerSec=30.446188252816977, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:05:58,373] [INFO] [timer.py:197:stop] 0/2596, RunningAvgSamplesPerSec=29.969462819466653, CurrSamplesPerSec=29.88402721580999, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:06:00,498] [INFO] [timer.py:197:stop] 0/2597, RunningAvgSamplesPerSec=29.969655857394407, CurrSamplesPerSec=30.478908236200322, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:06:02,667] [INFO] [timer.py:197:stop] 0/2598, RunningAvgSamplesPerSec=29.969611341705747, CurrSamplesPerSec=29.854536856474475, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:06:04,840] [INFO] [timer.py:197:stop] 0/2599, RunningAvgSamplesPerSec=29.969580476198416, CurrSamplesPerSec=29.889667357690417, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:06:06,735] [INFO] [logging.py:68:log_dist] [Rank 0] step=2600, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:06:06,735] [INFO] [timer.py:197:stop] 0/2600, RunningAvgSamplesPerSec=29.970994172984867, CurrSamplesPerSec=34.15510502389078, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 64.01} -[2022-12-14 21:06:44,578] [INFO] [timer.py:197:stop] 0/2601, RunningAvgSamplesPerSec=29.970835540570018, CurrSamplesPerSec=29.564300909852246, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:06:50,794] [INFO] [timer.py:197:stop] 0/2602, RunningAvgSamplesPerSec=29.97075932569012, CurrSamplesPerSec=29.773977919144823, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:06:56,609] [INFO] [timer.py:197:stop] 0/2603, RunningAvgSamplesPerSec=29.970777545875144, CurrSamplesPerSec=30.01822505237958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:02,573] [INFO] [timer.py:197:stop] 0/2604, RunningAvgSamplesPerSec=29.971115538441047, CurrSamplesPerSec=30.876810437283954, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:08,495] [INFO] [timer.py:197:stop] 0/2605, RunningAvgSamplesPerSec=29.97133076681948, CurrSamplesPerSec=30.542022672977396, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:14,527] [INFO] [timer.py:197:stop] 0/2606, RunningAvgSamplesPerSec=29.971361045520187, CurrSamplesPerSec=30.05038439037234, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:20,399] [INFO] [timer.py:197:stop] 0/2607, RunningAvgSamplesPerSec=29.971395134267137, CurrSamplesPerSec=30.060426017781687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:26,300] [INFO] [timer.py:197:stop] 0/2608, RunningAvgSamplesPerSec=29.970933721559806, CurrSamplesPerSec=28.815317247049368, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:32,418] [INFO] [timer.py:197:stop] 0/2609, RunningAvgSamplesPerSec=29.971044313475797, CurrSamplesPerSec=30.26204620395075, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:38,494] [INFO] [logging.py:68:log_dist] [Rank 0] step=2610, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:07:38,495] [INFO] [timer.py:197:stop] 0/2610, RunningAvgSamplesPerSec=29.971113693062563, CurrSamplesPerSec=30.153084874328005, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:44,770] [INFO] [timer.py:197:stop] 0/2611, RunningAvgSamplesPerSec=29.97111400411118, CurrSamplesPerSec=29.971925240865854, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:50,940] [INFO] [timer.py:197:stop] 0/2612, RunningAvgSamplesPerSec=29.971154988644294, CurrSamplesPerSec=30.078466640058284, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:07:56,712] [INFO] [timer.py:197:stop] 0/2613, RunningAvgSamplesPerSec=29.971147369620926, CurrSamplesPerSec=29.95127490891117, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:02,873] [INFO] [timer.py:197:stop] 0/2614, RunningAvgSamplesPerSec=29.971157753771323, CurrSamplesPerSec=29.998295329522502, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:08,904] [INFO] [timer.py:197:stop] 0/2615, RunningAvgSamplesPerSec=29.971052647170197, CurrSamplesPerSec=29.699007128701084, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:14,963] [INFO] [timer.py:197:stop] 0/2616, RunningAvgSamplesPerSec=29.970900817531483, CurrSamplesPerSec=29.579354926863026, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:20,867] [INFO] [timer.py:197:stop] 0/2617, RunningAvgSamplesPerSec=29.97089166443031, CurrSamplesPerSec=29.946984550668816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:27,033] [INFO] [timer.py:197:stop] 0/2618, RunningAvgSamplesPerSec=29.971044584944075, CurrSamplesPerSec=30.376341449255747, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:33,179] [INFO] [timer.py:197:stop] 0/2619, RunningAvgSamplesPerSec=29.97078923722228, CurrSamplesPerSec=29.317368617870834, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:39,311] [INFO] [logging.py:68:log_dist] [Rank 0] step=2620, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:08:39,312] [INFO] [timer.py:197:stop] 0/2620, RunningAvgSamplesPerSec=29.970360238193873, CurrSamplesPerSec=28.888222189530566, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:45,316] [INFO] [timer.py:197:stop] 0/2621, RunningAvgSamplesPerSec=29.9704422791156, CurrSamplesPerSec=30.186776362287933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:51,479] [INFO] [timer.py:197:stop] 0/2622, RunningAvgSamplesPerSec=29.970421147455884, CurrSamplesPerSec=29.91517937988586, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:08:57,589] [INFO] [timer.py:197:stop] 0/2623, RunningAvgSamplesPerSec=29.970262960499195, CurrSamplesPerSec=29.56146838981537, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:03,697] [INFO] [timer.py:197:stop] 0/2624, RunningAvgSamplesPerSec=29.97038676450003, CurrSamplesPerSec=30.298430141375878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:09,659] [INFO] [timer.py:197:stop] 0/2625, RunningAvgSamplesPerSec=29.970180816277825, CurrSamplesPerSec=29.43974549170219, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 65.0} -[2022-12-14 21:09:15,628] [INFO] [timer.py:197:stop] 0/2626, RunningAvgSamplesPerSec=29.970217914544456, CurrSamplesPerSec=30.067843765090043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:21,756] [INFO] [timer.py:197:stop] 0/2627, RunningAvgSamplesPerSec=29.97003059513881, CurrSamplesPerSec=29.486438688830862, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:27,715] [INFO] [timer.py:197:stop] 0/2628, RunningAvgSamplesPerSec=29.97021691992075, CurrSamplesPerSec=30.467437006622387, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:33,494] [INFO] [timer.py:197:stop] 0/2629, RunningAvgSamplesPerSec=29.969550432041046, CurrSamplesPerSec=28.315958449815327, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:39,443] [INFO] [logging.py:68:log_dist] [Rank 0] step=2630, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:09:39,444] [INFO] [timer.py:197:stop] 0/2630, RunningAvgSamplesPerSec=29.969743053422825, CurrSamplesPerSec=30.4844532221411, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:45,497] [INFO] [timer.py:197:stop] 0/2631, RunningAvgSamplesPerSec=29.96939145771392, CurrSamplesPerSec=29.073043956123698, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:51,384] [INFO] [timer.py:197:stop] 0/2632, RunningAvgSamplesPerSec=29.9693104792257, CurrSamplesPerSec=29.757920253764727, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:53,909] [INFO] [timer.py:197:stop] 0/2633, RunningAvgSamplesPerSec=29.969271108507577, CurrSamplesPerSec=29.866082775087143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:56,036] [INFO] [timer.py:197:stop] 0/2634, RunningAvgSamplesPerSec=29.969459040725468, CurrSamplesPerSec=30.472206411713223, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:09:58,204] [INFO] [timer.py:197:stop] 0/2635, RunningAvgSamplesPerSec=29.969423971332866, CurrSamplesPerSec=29.877404846801692, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:10:00,287] [INFO] [timer.py:197:stop] 0/2636, RunningAvgSamplesPerSec=29.96983961367916, CurrSamplesPerSec=31.10571975316167, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:10:02,393] [INFO] [timer.py:197:stop] 0/2637, RunningAvgSamplesPerSec=29.970132451700785, CurrSamplesPerSec=30.76185186182602, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:10:04,531] [INFO] [timer.py:197:stop] 0/2638, RunningAvgSamplesPerSec=29.970256601186623, CurrSamplesPerSec=30.301002047305087, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:10:06,696] [INFO] [timer.py:197:stop] 0/2639, RunningAvgSamplesPerSec=29.970239282667286, CurrSamplesPerSec=29.924657124221202, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:10:08,599] [INFO] [logging.py:68:log_dist] [Rank 0] step=2640, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:10:08,600] [INFO] [timer.py:197:stop] 0/2640, RunningAvgSamplesPerSec=29.97173958545306, CurrSamplesPerSec=34.52995502437102, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:10:46,902] [INFO] [timer.py:197:stop] 0/2641, RunningAvgSamplesPerSec=29.97205657877104, CurrSamplesPerSec=30.832294929147924, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:10:53,337] [INFO] [timer.py:197:stop] 0/2642, RunningAvgSamplesPerSec=29.972283338482182, CurrSamplesPerSec=30.582898215010392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:10:59,605] [INFO] [timer.py:197:stop] 0/2643, RunningAvgSamplesPerSec=29.972278062022593, CurrSamplesPerSec=29.958354682165872, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:11:05,610] [INFO] [timer.py:197:stop] 0/2644, RunningAvgSamplesPerSec=29.97176087114447, CurrSamplesPerSec=28.665416262121926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:11:11,567] [INFO] [timer.py:197:stop] 0/2645, RunningAvgSamplesPerSec=29.971792893697724, CurrSamplesPerSec=30.056636063109703, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:11:17,860] [INFO] [timer.py:197:stop] 0/2646, RunningAvgSamplesPerSec=29.971866610341255, CurrSamplesPerSec=30.167974990410794, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:11:23,891] [INFO] [timer.py:197:stop] 0/2647, RunningAvgSamplesPerSec=29.972245827820075, CurrSamplesPerSec=31.009612634355314, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:11:29,777] [INFO] [timer.py:197:stop] 0/2648, RunningAvgSamplesPerSec=29.97263078740606, CurrSamplesPerSec=31.026669818239398, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:11:35,968] [INFO] [timer.py:197:stop] 0/2649, RunningAvgSamplesPerSec=29.972606565863263, CurrSamplesPerSec=29.90865316639438, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:11:41,897] [INFO] [logging.py:68:log_dist] [Rank 0] step=2650, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:11:41,898] [INFO] [timer.py:197:stop] 0/2650, RunningAvgSamplesPerSec=29.972662749110643, CurrSamplesPerSec=30.12212166383215, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 66.0} -[2022-12-14 21:11:48,608] [INFO] [timer.py:197:stop] 0/2651, RunningAvgSamplesPerSec=29.97227307384091, CurrSamplesPerSec=28.974767217848516, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:11:54,526] [INFO] [timer.py:197:stop] 0/2652, RunningAvgSamplesPerSec=29.971858911074875, CurrSamplesPerSec=28.91349758856228, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:00,806] [INFO] [timer.py:197:stop] 0/2653, RunningAvgSamplesPerSec=29.97179504394757, CurrSamplesPerSec=29.803497873394942, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:06,742] [INFO] [timer.py:197:stop] 0/2654, RunningAvgSamplesPerSec=29.97163218871084, CurrSamplesPerSec=29.54603581224191, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:12,930] [INFO] [timer.py:197:stop] 0/2655, RunningAvgSamplesPerSec=29.971834721954295, CurrSamplesPerSec=30.518757859617022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:18,917] [INFO] [timer.py:197:stop] 0/2656, RunningAvgSamplesPerSec=29.971867597892707, CurrSamplesPerSec=30.05934211433941, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:24,863] [INFO] [timer.py:197:stop] 0/2657, RunningAvgSamplesPerSec=29.97190678918133, CurrSamplesPerSec=30.07628282940552, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:30,628] [INFO] [timer.py:197:stop] 0/2658, RunningAvgSamplesPerSec=29.971998255234794, CurrSamplesPerSec=30.21682503639836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:36,562] [INFO] [timer.py:197:stop] 0/2659, RunningAvgSamplesPerSec=29.972075368868005, CurrSamplesPerSec=30.178298935478573, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:42,345] [INFO] [logging.py:68:log_dist] [Rank 0] step=2660, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:12:42,345] [INFO] [timer.py:197:stop] 0/2660, RunningAvgSamplesPerSec=29.972077500594047, CurrSamplesPerSec=29.977742567640913, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:48,317] [INFO] [timer.py:197:stop] 0/2661, RunningAvgSamplesPerSec=29.97206167100976, CurrSamplesPerSec=29.930045640635797, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:12:54,389] [INFO] [timer.py:197:stop] 0/2662, RunningAvgSamplesPerSec=29.971937297037677, CurrSamplesPerSec=29.644837482381377, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:00,579] [INFO] [timer.py:197:stop] 0/2663, RunningAvgSamplesPerSec=29.971641491215397, CurrSamplesPerSec=29.204933893941778, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:07,124] [INFO] [timer.py:197:stop] 0/2664, RunningAvgSamplesPerSec=29.97132831052104, CurrSamplesPerSec=29.16050837303357, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:13,328] [INFO] [timer.py:197:stop] 0/2665, RunningAvgSamplesPerSec=29.97138532732674, CurrSamplesPerSec=30.12393689397472, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:19,242] [INFO] [timer.py:197:stop] 0/2666, RunningAvgSamplesPerSec=29.97106253354665, CurrSamplesPerSec=29.135438231803665, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:25,281] [INFO] [timer.py:197:stop] 0/2667, RunningAvgSamplesPerSec=29.97120087221405, CurrSamplesPerSec=30.344324840343127, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:31,652] [INFO] [timer.py:197:stop] 0/2668, RunningAvgSamplesPerSec=29.97140962557201, CurrSamplesPerSec=30.538263177983044, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:37,532] [INFO] [timer.py:197:stop] 0/2669, RunningAvgSamplesPerSec=29.971206791868227, CurrSamplesPerSec=29.440039306693105, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:43,686] [INFO] [logging.py:68:log_dist] [Rank 0] step=2670, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:13:43,687] [INFO] [timer.py:197:stop] 0/2670, RunningAvgSamplesPerSec=29.971202394096437, CurrSamplesPerSec=29.95947812661029, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:49,850] [INFO] [timer.py:197:stop] 0/2671, RunningAvgSamplesPerSec=29.970942635326114, CurrSamplesPerSec=29.293575285511885, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:55,920] [INFO] [timer.py:197:stop] 0/2672, RunningAvgSamplesPerSec=29.971022381569718, CurrSamplesPerSec=30.185388018793432, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:13:58,368] [INFO] [timer.py:197:stop] 0/2673, RunningAvgSamplesPerSec=29.971048453820575, CurrSamplesPerSec=30.040823488821065, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:14:00,492] [INFO] [timer.py:197:stop] 0/2674, RunningAvgSamplesPerSec=29.971244148141214, CurrSamplesPerSec=30.503224914979516, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:14:02,665] [INFO] [timer.py:197:stop] 0/2675, RunningAvgSamplesPerSec=29.971192261266857, CurrSamplesPerSec=29.83318915122229, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 66.01} -[2022-12-14 21:14:04,821] [INFO] [timer.py:197:stop] 0/2676, RunningAvgSamplesPerSec=29.971267567585244, CurrSamplesPerSec=30.17392294503552, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:14:06,992] [INFO] [timer.py:197:stop] 0/2677, RunningAvgSamplesPerSec=29.971216029664806, CurrSamplesPerSec=29.83403464803212, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:14:09,112] [INFO] [timer.py:197:stop] 0/2678, RunningAvgSamplesPerSec=29.971434713132115, CurrSamplesPerSec=30.568062234002454, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:14:11,266] [INFO] [timer.py:197:stop] 0/2679, RunningAvgSamplesPerSec=29.971476020514963, CurrSamplesPerSec=30.08242391967485, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:14:13,226] [INFO] [logging.py:68:log_dist] [Rank 0] step=2680, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:14:13,227] [INFO] [timer.py:197:stop] 0/2680, RunningAvgSamplesPerSec=29.972653812225794, CurrSamplesPerSec=33.49642038694236, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:14:54,302] [INFO] [timer.py:197:stop] 0/2681, RunningAvgSamplesPerSec=29.97287490934995, CurrSamplesPerSec=30.576909850467448, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:15:01,338] [INFO] [timer.py:197:stop] 0/2682, RunningAvgSamplesPerSec=29.972649106417474, CurrSamplesPerSec=29.37969487678794, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:15:08,105] [INFO] [timer.py:197:stop] 0/2683, RunningAvgSamplesPerSec=29.972783408321284, CurrSamplesPerSec=30.337088916954148, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:15:15,370] [INFO] [timer.py:197:stop] 0/2684, RunningAvgSamplesPerSec=29.97298500858977, CurrSamplesPerSec=30.523404515639132, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:15:21,860] [INFO] [timer.py:197:stop] 0/2685, RunningAvgSamplesPerSec=29.97292598799605, CurrSamplesPerSec=29.815464651626655, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:15:28,081] [INFO] [timer.py:197:stop] 0/2686, RunningAvgSamplesPerSec=29.97305473843145, CurrSamplesPerSec=30.322521235386905, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:15:34,257] [INFO] [timer.py:197:stop] 0/2687, RunningAvgSamplesPerSec=29.973083139634348, CurrSamplesPerSec=30.049506403539105, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:15:40,919] [INFO] [timer.py:197:stop] 0/2688, RunningAvgSamplesPerSec=29.972674861938497, CurrSamplesPerSec=28.91514203602928, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:15:47,679] [INFO] [timer.py:197:stop] 0/2689, RunningAvgSamplesPerSec=29.972581306560727, CurrSamplesPerSec=29.72338162671057, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:15:54,059] [INFO] [logging.py:68:log_dist] [Rank 0] step=2690, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:15:54,060] [INFO] [timer.py:197:stop] 0/2690, RunningAvgSamplesPerSec=29.972329326146514, CurrSamplesPerSec=29.31022048004979, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:00,885] [INFO] [timer.py:197:stop] 0/2691, RunningAvgSamplesPerSec=29.971928109463065, CurrSamplesPerSec=28.9309295236921, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:08,259] [INFO] [timer.py:197:stop] 0/2692, RunningAvgSamplesPerSec=29.97155674968283, CurrSamplesPerSec=29.0051798310531, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:14,694] [INFO] [timer.py:197:stop] 0/2693, RunningAvgSamplesPerSec=29.97153106670369, CurrSamplesPerSec=29.902602798063896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:21,030] [INFO] [timer.py:197:stop] 0/2694, RunningAvgSamplesPerSec=29.971407509920226, CurrSamplesPerSec=29.642565607261602, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:27,365] [INFO] [timer.py:197:stop] 0/2695, RunningAvgSamplesPerSec=29.971212033174005, CurrSamplesPerSec=29.45407176188453, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:33,781] [INFO] [timer.py:197:stop] 0/2696, RunningAvgSamplesPerSec=29.970978285202044, CurrSamplesPerSec=29.354448831477637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:40,332] [INFO] [timer.py:197:stop] 0/2697, RunningAvgSamplesPerSec=29.970960025313946, CurrSamplesPerSec=29.921848524709965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:46,744] [INFO] [timer.py:197:stop] 0/2698, RunningAvgSamplesPerSec=29.97096517186806, CurrSamplesPerSec=29.984841559309643, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:52,926] [INFO] [timer.py:197:stop] 0/2699, RunningAvgSamplesPerSec=29.970694638364318, CurrSamplesPerSec=29.258670360358387, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:16:59,394] [INFO] [logging.py:68:log_dist] [Rank 0] step=2700, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:16:59,395] [INFO] [timer.py:197:stop] 0/2700, RunningAvgSamplesPerSec=29.970797342866597, CurrSamplesPerSec=30.250376248522475, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 67.0} -[2022-12-14 21:17:05,733] [INFO] [timer.py:197:stop] 0/2701, RunningAvgSamplesPerSec=29.97080364982679, CurrSamplesPerSec=29.987829498585707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:17:12,102] [INFO] [timer.py:197:stop] 0/2702, RunningAvgSamplesPerSec=29.970776137781513, CurrSamplesPerSec=29.896704713477266, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:17:18,931] [INFO] [timer.py:197:stop] 0/2703, RunningAvgSamplesPerSec=29.970748962073912, CurrSamplesPerSec=29.89755381433599, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:17:24,852] [INFO] [timer.py:197:stop] 0/2704, RunningAvgSamplesPerSec=29.970922090670054, CurrSamplesPerSec=30.445956887913226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:17:31,989] [INFO] [timer.py:197:stop] 0/2705, RunningAvgSamplesPerSec=29.970700729722278, CurrSamplesPerSec=29.384290610342976, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:17:38,630] [INFO] [timer.py:197:stop] 0/2706, RunningAvgSamplesPerSec=29.970679390094325, CurrSamplesPerSec=29.913109215230048, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:17:45,209] [INFO] [timer.py:197:stop] 0/2707, RunningAvgSamplesPerSec=29.97087318618919, CurrSamplesPerSec=30.5042266751046, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:17:51,736] [INFO] [timer.py:197:stop] 0/2708, RunningAvgSamplesPerSec=29.970826122778245, CurrSamplesPerSec=29.844058264717187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:17:58,736] [INFO] [timer.py:197:stop] 0/2709, RunningAvgSamplesPerSec=29.97061926708172, CurrSamplesPerSec=29.421134100673406, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:05,630] [INFO] [logging.py:68:log_dist] [Rank 0] step=2710, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:18:05,631] [INFO] [timer.py:197:stop] 0/2710, RunningAvgSamplesPerSec=29.970576922260435, CurrSamplesPerSec=29.856386392936226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:11,758] [INFO] [timer.py:197:stop] 0/2711, RunningAvgSamplesPerSec=29.97050367183333, CurrSamplesPerSec=29.773446237134586, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:18,125] [INFO] [timer.py:197:stop] 0/2712, RunningAvgSamplesPerSec=29.970733491145992, CurrSamplesPerSec=30.60652617344676, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:20,585] [INFO] [timer.py:197:stop] 0/2713, RunningAvgSamplesPerSec=29.970929089251243, CurrSamplesPerSec=30.510547253276837, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:22,756] [INFO] [timer.py:197:stop] 0/2714, RunningAvgSamplesPerSec=29.970878946765268, CurrSamplesPerSec=29.835556663064793, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:24,869] [INFO] [timer.py:197:stop] 0/2715, RunningAvgSamplesPerSec=29.971128126657547, CurrSamplesPerSec=30.662498457939442, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:27,041] [INFO] [timer.py:197:stop] 0/2716, RunningAvgSamplesPerSec=29.971076654959532, CurrSamplesPerSec=29.83208178867495, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:29,197] [INFO] [timer.py:197:stop] 0/2717, RunningAvgSamplesPerSec=29.971219926049514, CurrSamplesPerSec=30.36517054623618, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:31,383] [INFO] [timer.py:197:stop] 0/2718, RunningAvgSamplesPerSec=29.971127359817068, CurrSamplesPerSec=29.721900652721438, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:33,512] [INFO] [timer.py:197:stop] 0/2719, RunningAvgSamplesPerSec=29.971289547069517, CurrSamplesPerSec=30.418363375054096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:18:35,429] [INFO] [logging.py:68:log_dist] [Rank 0] step=2720, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:18:35,429] [INFO] [timer.py:197:stop] 0/2720, RunningAvgSamplesPerSec=29.97253677311293, CurrSamplesPerSec=33.793397788285134, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:19:19,895] [INFO] [timer.py:197:stop] 0/2721, RunningAvgSamplesPerSec=29.97190715334369, CurrSamplesPerSec=28.35306565054139, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:19:26,758] [INFO] [timer.py:197:stop] 0/2722, RunningAvgSamplesPerSec=29.971302589078217, CurrSamplesPerSec=28.41299115090333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:19:34,214] [INFO] [timer.py:197:stop] 0/2723, RunningAvgSamplesPerSec=29.971127612110433, CurrSamplesPerSec=29.502632630283795, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:19:40,574] [INFO] [timer.py:197:stop] 0/2724, RunningAvgSamplesPerSec=29.9711265503812, CurrSamplesPerSec=29.968237863685303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:19:47,178] [INFO] [timer.py:197:stop] 0/2725, RunningAvgSamplesPerSec=29.97125679613111, CurrSamplesPerSec=30.330031216371896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 68.0} -[2022-12-14 21:19:54,036] [INFO] [timer.py:197:stop] 0/2726, RunningAvgSamplesPerSec=29.971224956491113, CurrSamplesPerSec=29.88477578468347, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:20:01,123] [INFO] [timer.py:197:stop] 0/2727, RunningAvgSamplesPerSec=29.97146205096314, CurrSamplesPerSec=30.631536341788188, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:20:07,808] [INFO] [timer.py:197:stop] 0/2728, RunningAvgSamplesPerSec=29.971240355679324, CurrSamplesPerSec=29.37906142794681, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:20:15,273] [INFO] [timer.py:197:stop] 0/2729, RunningAvgSamplesPerSec=29.971026943496458, CurrSamplesPerSec=29.400346755600825, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:20:21,939] [INFO] [logging.py:68:log_dist] [Rank 0] step=2730, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:20:21,939] [INFO] [timer.py:197:stop] 0/2730, RunningAvgSamplesPerSec=29.971260706427383, CurrSamplesPerSec=30.622590714572294, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:20:29,149] [INFO] [timer.py:197:stop] 0/2731, RunningAvgSamplesPerSec=29.971053946652866, CurrSamplesPerSec=29.417435923062744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:20:35,875] [INFO] [timer.py:197:stop] 0/2732, RunningAvgSamplesPerSec=29.97104280627684, CurrSamplesPerSec=29.94067153945105, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:20:41,988] [INFO] [timer.py:197:stop] 0/2733, RunningAvgSamplesPerSec=29.9709408356332, CurrSamplesPerSec=29.695123795385143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:20:49,130] [INFO] [timer.py:197:stop] 0/2734, RunningAvgSamplesPerSec=29.97101176662766, CurrSamplesPerSec=30.16598495001581, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:20:55,380] [INFO] [timer.py:197:stop] 0/2735, RunningAvgSamplesPerSec=29.970985467731808, CurrSamplesPerSec=29.899308775624633, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:21:02,383] [INFO] [timer.py:197:stop] 0/2736, RunningAvgSamplesPerSec=29.970837001909793, CurrSamplesPerSec=29.570501790519565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:21:08,734] [INFO] [timer.py:197:stop] 0/2737, RunningAvgSamplesPerSec=29.97099948744266, CurrSamplesPerSec=30.42192101749018, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:21:15,315] [INFO] [timer.py:197:stop] 0/2738, RunningAvgSamplesPerSec=29.9708417991751, CurrSamplesPerSec=29.54568459392762, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:21:21,844] [INFO] [timer.py:197:stop] 0/2739, RunningAvgSamplesPerSec=29.970931796945457, CurrSamplesPerSec=30.219206204430918, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:21:28,541] [INFO] [logging.py:68:log_dist] [Rank 0] step=2740, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:21:28,542] [INFO] [timer.py:197:stop] 0/2740, RunningAvgSamplesPerSec=29.97079583116385, CurrSamplesPerSec=29.603223193157135, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:21:35,621] [INFO] [timer.py:197:stop] 0/2741, RunningAvgSamplesPerSec=29.970458025712038, CurrSamplesPerSec=29.073245478889753, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:21:42,378] [INFO] [timer.py:197:stop] 0/2742, RunningAvgSamplesPerSec=29.970423308052602, CurrSamplesPerSec=29.875632505692483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:21:49,565] [INFO] [timer.py:197:stop] 0/2743, RunningAvgSamplesPerSec=29.97029132396976, CurrSamplesPerSec=29.612968149083144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:21:56,268] [INFO] [timer.py:197:stop] 0/2744, RunningAvgSamplesPerSec=29.9701092240146, CurrSamplesPerSec=29.479152843275305, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:03,189] [INFO] [timer.py:197:stop] 0/2745, RunningAvgSamplesPerSec=29.969895239290246, CurrSamplesPerSec=29.394419805718037, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:09,453] [INFO] [timer.py:197:stop] 0/2746, RunningAvgSamplesPerSec=29.969910778911807, CurrSamplesPerSec=30.012596693706115, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:16,134] [INFO] [timer.py:197:stop] 0/2747, RunningAvgSamplesPerSec=29.970003611372952, CurrSamplesPerSec=30.226920363598488, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:22,886] [INFO] [timer.py:197:stop] 0/2748, RunningAvgSamplesPerSec=29.96986693685372, CurrSamplesPerSec=29.5993354922704, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:29,607] [INFO] [timer.py:197:stop] 0/2749, RunningAvgSamplesPerSec=29.96951361895953, CurrSamplesPerSec=29.029737383708486, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:36,372] [INFO] [logging.py:68:log_dist] [Rank 0] step=2750, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:22:36,373] [INFO] [timer.py:197:stop] 0/2750, RunningAvgSamplesPerSec=29.969596589386487, CurrSamplesPerSec=30.199263613298882, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 68.01} -[2022-12-14 21:22:42,715] [INFO] [timer.py:197:stop] 0/2751, RunningAvgSamplesPerSec=29.96963938097504, CurrSamplesPerSec=30.087694043636414, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:49,625] [INFO] [timer.py:197:stop] 0/2752, RunningAvgSamplesPerSec=29.96961770283869, CurrSamplesPerSec=29.91014281209574, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:52,002] [INFO] [timer.py:197:stop] 0/2753, RunningAvgSamplesPerSec=29.969762207509252, CurrSamplesPerSec=30.372492040437148, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:54,114] [INFO] [timer.py:197:stop] 0/2754, RunningAvgSamplesPerSec=29.970014187960377, CurrSamplesPerSec=30.679631632465416, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:56,252] [INFO] [timer.py:197:stop] 0/2755, RunningAvgSamplesPerSec=29.97013677381508, CurrSamplesPerSec=30.31133511134839, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:22:58,422] [INFO] [timer.py:197:stop] 0/2756, RunningAvgSamplesPerSec=29.970089687412838, CurrSamplesPerSec=29.841019289715245, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:23:00,537] [INFO] [timer.py:197:stop] 0/2757, RunningAvgSamplesPerSec=29.970321998845577, CurrSamplesPerSec=30.62406847933251, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:23:02,650] [INFO] [timer.py:197:stop] 0/2758, RunningAvgSamplesPerSec=29.97058109082788, CurrSamplesPerSec=30.70180102210684, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:23:04,826] [INFO] [timer.py:197:stop] 0/2759, RunningAvgSamplesPerSec=29.970507468394352, CurrSamplesPerSec=29.768968973054104, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:23:06,767] [INFO] [logging.py:68:log_dist] [Rank 0] step=2760, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:23:06,767] [INFO] [timer.py:197:stop] 0/2760, RunningAvgSamplesPerSec=29.971632484179324, CurrSamplesPerSec=33.43147937672226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:23:49,673] [INFO] [timer.py:197:stop] 0/2761, RunningAvgSamplesPerSec=29.971749294689996, CurrSamplesPerSec=30.297414495601135, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:23:56,485] [INFO] [timer.py:197:stop] 0/2762, RunningAvgSamplesPerSec=29.97196536120027, CurrSamplesPerSec=30.580194625659683, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:24:03,776] [INFO] [timer.py:197:stop] 0/2763, RunningAvgSamplesPerSec=29.972192830261253, CurrSamplesPerSec=30.61344432396333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:24:10,555] [INFO] [timer.py:197:stop] 0/2764, RunningAvgSamplesPerSec=29.97235497159492, CurrSamplesPerSec=30.426817583697552, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:24:17,041] [INFO] [timer.py:197:stop] 0/2765, RunningAvgSamplesPerSec=29.97246151714901, CurrSamplesPerSec=30.26965937373987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:24:23,721] [INFO] [timer.py:197:stop] 0/2766, RunningAvgSamplesPerSec=29.972466134782625, CurrSamplesPerSec=29.98523009172213, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:24:30,230] [INFO] [timer.py:197:stop] 0/2767, RunningAvgSamplesPerSec=29.972457754792526, CurrSamplesPerSec=29.94931335429062, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:24:37,265] [INFO] [timer.py:197:stop] 0/2768, RunningAvgSamplesPerSec=29.972209576562, CurrSamplesPerSec=29.301361372764294, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:24:44,440] [INFO] [timer.py:197:stop] 0/2769, RunningAvgSamplesPerSec=29.97154268049546, CurrSamplesPerSec=28.233892582966337, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:24:50,856] [INFO] [logging.py:68:log_dist] [Rank 0] step=2770, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:24:50,857] [INFO] [timer.py:197:stop] 0/2770, RunningAvgSamplesPerSec=29.97157567708467, CurrSamplesPerSec=30.063156319747755, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:24:57,811] [INFO] [timer.py:197:stop] 0/2771, RunningAvgSamplesPerSec=29.97172278025283, CurrSamplesPerSec=30.384514365879483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:25:04,419] [INFO] [timer.py:197:stop] 0/2772, RunningAvgSamplesPerSec=29.97200732792897, CurrSamplesPerSec=30.781199803686835, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:25:10,882] [INFO] [timer.py:197:stop] 0/2773, RunningAvgSamplesPerSec=29.971904868478745, CurrSamplesPerSec=29.690755438115836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:25:17,522] [INFO] [timer.py:197:stop] 0/2774, RunningAvgSamplesPerSec=29.971806566529693, CurrSamplesPerSec=29.701866068694972, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:25:25,307] [INFO] [timer.py:197:stop] 0/2775, RunningAvgSamplesPerSec=29.971813074166707, CurrSamplesPerSec=29.98986311167637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 69.0} -[2022-12-14 21:25:32,043] [INFO] [timer.py:197:stop] 0/2776, RunningAvgSamplesPerSec=29.971969402763325, CurrSamplesPerSec=30.41183285489739, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:25:38,734] [INFO] [timer.py:197:stop] 0/2777, RunningAvgSamplesPerSec=29.972142303553486, CurrSamplesPerSec=30.45957196110586, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:25:45,365] [INFO] [timer.py:197:stop] 0/2778, RunningAvgSamplesPerSec=29.971899282693887, CurrSamplesPerSec=29.312361674628587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:25:52,334] [INFO] [timer.py:197:stop] 0/2779, RunningAvgSamplesPerSec=29.971911642823066, CurrSamplesPerSec=30.006262700535583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:25:58,986] [INFO] [logging.py:68:log_dist] [Rank 0] step=2780, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:25:58,987] [INFO] [timer.py:197:stop] 0/2780, RunningAvgSamplesPerSec=29.971921929311765, CurrSamplesPerSec=30.00051476944621, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:26:05,497] [INFO] [timer.py:197:stop] 0/2781, RunningAvgSamplesPerSec=29.97192753728724, CurrSamplesPerSec=29.98751459799137, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:26:11,912] [INFO] [timer.py:197:stop] 0/2782, RunningAvgSamplesPerSec=29.972172895136435, CurrSamplesPerSec=30.66990097695411, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:26:18,866] [INFO] [timer.py:197:stop] 0/2783, RunningAvgSamplesPerSec=29.971989084214464, CurrSamplesPerSec=29.46956368520695, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:26:25,678] [INFO] [timer.py:197:stop] 0/2784, RunningAvgSamplesPerSec=29.971892283639043, CurrSamplesPerSec=29.705087143147548, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:26:32,592] [INFO] [timer.py:197:stop] 0/2785, RunningAvgSamplesPerSec=29.9716422674092, CurrSamplesPerSec=29.291877924436065, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:26:38,974] [INFO] [timer.py:197:stop] 0/2786, RunningAvgSamplesPerSec=29.971582776585493, CurrSamplesPerSec=29.80692968436921, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:26:45,984] [INFO] [timer.py:197:stop] 0/2787, RunningAvgSamplesPerSec=29.971763067401394, CurrSamplesPerSec=30.4822446776389, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:26:52,726] [INFO] [timer.py:197:stop] 0/2788, RunningAvgSamplesPerSec=29.971682280045904, CurrSamplesPerSec=29.748366494021855, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:26:59,197] [INFO] [timer.py:197:stop] 0/2789, RunningAvgSamplesPerSec=29.97179267029622, CurrSamplesPerSec=30.28252959291961, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:05,685] [INFO] [logging.py:68:log_dist] [Rank 0] step=2790, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:27:05,685] [INFO] [timer.py:197:stop] 0/2790, RunningAvgSamplesPerSec=29.97174089222654, CurrSamplesPerSec=29.828127119931345, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:12,001] [INFO] [timer.py:197:stop] 0/2791, RunningAvgSamplesPerSec=29.97202735292439, CurrSamplesPerSec=30.792551802141542, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:18,640] [INFO] [timer.py:197:stop] 0/2792, RunningAvgSamplesPerSec=29.97194092979027, CurrSamplesPerSec=29.73283042307973, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:21,059] [INFO] [timer.py:197:stop] 0/2793, RunningAvgSamplesPerSec=29.97189089832615, CurrSamplesPerSec=29.832950431387054, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:23,186] [INFO] [timer.py:197:stop] 0/2794, RunningAvgSamplesPerSec=29.97206163248835, CurrSamplesPerSec=30.45628194707479, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:25,302] [INFO] [timer.py:197:stop] 0/2795, RunningAvgSamplesPerSec=29.972284653062797, CurrSamplesPerSec=30.608173400905674, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:27,452] [INFO] [timer.py:197:stop] 0/2796, RunningAvgSamplesPerSec=29.972340596931417, CurrSamplesPerSec=30.129410951290186, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:29,589] [INFO] [timer.py:197:stop] 0/2797, RunningAvgSamplesPerSec=29.972462303905232, CurrSamplesPerSec=30.316415266025853, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:31,735] [INFO] [timer.py:197:stop] 0/2798, RunningAvgSamplesPerSec=29.97254132109033, CurrSamplesPerSec=30.19503438280964, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:33,922] [INFO] [timer.py:197:stop] 0/2799, RunningAvgSamplesPerSec=29.972413097987992, CurrSamplesPerSec=29.61814041403207, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:27:35,841] [INFO] [logging.py:68:log_dist] [Rank 0] step=2800, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:27:35,842] [INFO] [timer.py:197:stop] 0/2800, RunningAvgSamplesPerSec=29.973744215868035, CurrSamplesPerSec=34.2251508372452, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 69.01} -[2022-12-14 21:28:19,614] [INFO] [timer.py:197:stop] 0/2801, RunningAvgSamplesPerSec=29.973395540746505, CurrSamplesPerSec=29.028566435600673, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:28:26,463] [INFO] [timer.py:197:stop] 0/2802, RunningAvgSamplesPerSec=29.973399037771323, CurrSamplesPerSec=29.983190408880287, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:28:33,907] [INFO] [timer.py:197:stop] 0/2803, RunningAvgSamplesPerSec=29.973612824169326, CurrSamplesPerSec=30.584417451779508, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:28:40,045] [INFO] [timer.py:197:stop] 0/2804, RunningAvgSamplesPerSec=29.973855148705685, CurrSamplesPerSec=30.668338197842463, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:28:46,823] [INFO] [timer.py:197:stop] 0/2805, RunningAvgSamplesPerSec=29.973562571698167, CurrSamplesPerSec=29.175594643224933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:28:53,359] [INFO] [timer.py:197:stop] 0/2806, RunningAvgSamplesPerSec=29.97364248195521, CurrSamplesPerSec=30.1993179725659, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:29:00,447] [INFO] [timer.py:197:stop] 0/2807, RunningAvgSamplesPerSec=29.97355562207476, CurrSamplesPerSec=29.731964307747653, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:29:06,897] [INFO] [timer.py:197:stop] 0/2808, RunningAvgSamplesPerSec=29.973485451438307, CurrSamplesPerSec=29.777941364727827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:29:14,311] [INFO] [timer.py:197:stop] 0/2809, RunningAvgSamplesPerSec=29.973849298696255, CurrSamplesPerSec=31.03081956782691, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:29:20,839] [INFO] [logging.py:68:log_dist] [Rank 0] step=2810, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:29:20,840] [INFO] [timer.py:197:stop] 0/2810, RunningAvgSamplesPerSec=29.973865592392876, CurrSamplesPerSec=30.019671918483425, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:29:27,874] [INFO] [timer.py:197:stop] 0/2811, RunningAvgSamplesPerSec=29.97379975053342, CurrSamplesPerSec=29.790049617630185, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:29:34,864] [INFO] [timer.py:197:stop] 0/2812, RunningAvgSamplesPerSec=29.973712962126772, CurrSamplesPerSec=29.731891859322484, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:29:41,932] [INFO] [timer.py:197:stop] 0/2813, RunningAvgSamplesPerSec=29.973301825376073, CurrSamplesPerSec=28.86089937778787, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:29:48,582] [INFO] [timer.py:197:stop] 0/2814, RunningAvgSamplesPerSec=29.973376400882497, CurrSamplesPerSec=30.184485154883077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:29:55,182] [INFO] [timer.py:197:stop] 0/2815, RunningAvgSamplesPerSec=29.973594838106234, CurrSamplesPerSec=30.600695994084038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:30:01,896] [INFO] [timer.py:197:stop] 0/2816, RunningAvgSamplesPerSec=29.973377374680062, CurrSamplesPerSec=29.37389196322008, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:30:08,668] [INFO] [timer.py:197:stop] 0/2817, RunningAvgSamplesPerSec=29.973317636373658, CurrSamplesPerSec=29.806151914459203, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:30:15,396] [INFO] [timer.py:197:stop] 0/2818, RunningAvgSamplesPerSec=29.973261986351922, CurrSamplesPerSec=29.817421960096585, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:30:23,528] [INFO] [timer.py:197:stop] 0/2819, RunningAvgSamplesPerSec=29.973260303537668, CurrSamplesPerSec=29.96852224796302, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:30:30,126] [INFO] [logging.py:68:log_dist] [Rank 0] step=2820, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:30:30,127] [INFO] [timer.py:197:stop] 0/2820, RunningAvgSamplesPerSec=29.973237886818655, CurrSamplesPerSec=29.91022279727046, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:30:36,810] [INFO] [timer.py:197:stop] 0/2821, RunningAvgSamplesPerSec=29.973266651805318, CurrSamplesPerSec=30.054546274920902, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:30:43,131] [INFO] [timer.py:197:stop] 0/2822, RunningAvgSamplesPerSec=29.973210149548212, CurrSamplesPerSec=29.814772534669533, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:30:50,865] [INFO] [timer.py:197:stop] 0/2823, RunningAvgSamplesPerSec=29.97305233590503, CurrSamplesPerSec=29.534531252956914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:30:57,848] [INFO] [timer.py:197:stop] 0/2824, RunningAvgSamplesPerSec=29.972929444153664, CurrSamplesPerSec=29.630217151753232, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:05,192] [INFO] [timer.py:197:stop] 0/2825, RunningAvgSamplesPerSec=29.97298874847258, CurrSamplesPerSec=30.141285569373853, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 70.0} -[2022-12-14 21:31:11,609] [INFO] [timer.py:197:stop] 0/2826, RunningAvgSamplesPerSec=29.972758588384334, CurrSamplesPerSec=29.336807529951777, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:18,379] [INFO] [timer.py:197:stop] 0/2827, RunningAvgSamplesPerSec=29.97268297739382, CurrSamplesPerSec=29.760668466705383, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:25,823] [INFO] [timer.py:197:stop] 0/2828, RunningAvgSamplesPerSec=29.97219547406591, CurrSamplesPerSec=28.655520102650925, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:32,534] [INFO] [timer.py:197:stop] 0/2829, RunningAvgSamplesPerSec=29.971940964755778, CurrSamplesPerSec=29.269558838217417, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:39,288] [INFO] [logging.py:68:log_dist] [Rank 0] step=2830, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:31:39,289] [INFO] [timer.py:197:stop] 0/2830, RunningAvgSamplesPerSec=29.97207850445606, CurrSamplesPerSec=30.366015550233282, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:45,580] [INFO] [timer.py:197:stop] 0/2831, RunningAvgSamplesPerSec=29.971663009253316, CurrSamplesPerSec=28.84098575009428, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:52,125] [INFO] [timer.py:197:stop] 0/2832, RunningAvgSamplesPerSec=29.97161238051963, CurrSamplesPerSec=29.829065140118082, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:54,622] [INFO] [timer.py:197:stop] 0/2833, RunningAvgSamplesPerSec=29.971705902635353, CurrSamplesPerSec=30.238732321858926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:56,781] [INFO] [timer.py:197:stop] 0/2834, RunningAvgSamplesPerSec=29.97185752424659, CurrSamplesPerSec=30.407337213947965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:31:58,956] [INFO] [timer.py:197:stop] 0/2835, RunningAvgSamplesPerSec=29.971926651281688, CurrSamplesPerSec=30.168981976301822, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:32:01,064] [INFO] [timer.py:197:stop] 0/2836, RunningAvgSamplesPerSec=29.97218530334114, CurrSamplesPerSec=30.723316764314387, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:32:03,242] [INFO] [timer.py:197:stop] 0/2837, RunningAvgSamplesPerSec=29.972104128591955, CurrSamplesPerSec=29.74380778375961, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:32:05,417] [INFO] [timer.py:197:stop] 0/2838, RunningAvgSamplesPerSec=29.97207559172474, CurrSamplesPerSec=29.891391436423483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:32:07,562] [INFO] [timer.py:197:stop] 0/2839, RunningAvgSamplesPerSec=29.97215971869312, CurrSamplesPerSec=30.21265889517536, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:32:09,454] [INFO] [logging.py:68:log_dist] [Rank 0] step=2840, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:32:09,454] [INFO] [timer.py:197:stop] 0/2840, RunningAvgSamplesPerSec=29.97349010450593, CurrSamplesPerSec=34.29174743452661, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:32:47,700] [INFO] [timer.py:197:stop] 0/2841, RunningAvgSamplesPerSec=29.973190901325744, CurrSamplesPerSec=29.147453604772775, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:32:53,944] [INFO] [timer.py:197:stop] 0/2842, RunningAvgSamplesPerSec=29.972767612352023, CurrSamplesPerSec=28.81738983501003, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:00,041] [INFO] [timer.py:197:stop] 0/2843, RunningAvgSamplesPerSec=29.972609836721542, CurrSamplesPerSec=29.531129388998686, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:05,891] [INFO] [timer.py:197:stop] 0/2844, RunningAvgSamplesPerSec=29.972824830487095, CurrSamplesPerSec=30.596332663585553, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:11,637] [INFO] [timer.py:197:stop] 0/2845, RunningAvgSamplesPerSec=29.972681352223862, CurrSamplesPerSec=29.57039103769771, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:17,586] [INFO] [timer.py:197:stop] 0/2846, RunningAvgSamplesPerSec=29.972654994449908, CurrSamplesPerSec=29.89790678810029, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:23,614] [INFO] [timer.py:197:stop] 0/2847, RunningAvgSamplesPerSec=29.97242185159504, CurrSamplesPerSec=29.32371939508919, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:29,520] [INFO] [timer.py:197:stop] 0/2848, RunningAvgSamplesPerSec=29.972217597042185, CurrSamplesPerSec=29.402169427009444, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:35,377] [INFO] [timer.py:197:stop] 0/2849, RunningAvgSamplesPerSec=29.972168612640573, CurrSamplesPerSec=29.833404665466006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:41,215] [INFO] [logging.py:68:log_dist] [Rank 0] step=2850, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:33:41,216] [INFO] [timer.py:197:stop] 0/2850, RunningAvgSamplesPerSec=29.972343953826694, CurrSamplesPerSec=30.479998383092965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 71.0} -[2022-12-14 21:33:47,220] [INFO] [timer.py:197:stop] 0/2851, RunningAvgSamplesPerSec=29.97232914721115, CurrSamplesPerSec=29.930219173102934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:53,221] [INFO] [timer.py:197:stop] 0/2852, RunningAvgSamplesPerSec=29.972238778462145, CurrSamplesPerSec=29.716971718028503, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:33:59,348] [INFO] [timer.py:197:stop] 0/2853, RunningAvgSamplesPerSec=29.97176623946422, CurrSamplesPerSec=28.68296093862529, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:34:05,593] [INFO] [timer.py:197:stop] 0/2854, RunningAvgSamplesPerSec=29.97169753936389, CurrSamplesPerSec=29.77710565050222, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:34:14,161] [INFO] [timer.py:197:stop] 0/2855, RunningAvgSamplesPerSec=29.971674547184254, CurrSamplesPerSec=29.906244053521096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:34:20,391] [INFO] [timer.py:197:stop] 0/2856, RunningAvgSamplesPerSec=29.971617564025927, CurrSamplesPerSec=29.809921994717765, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:34:26,109] [INFO] [timer.py:197:stop] 0/2857, RunningAvgSamplesPerSec=29.97197749307147, CurrSamplesPerSec=31.03568447306195, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:34:31,843] [INFO] [timer.py:197:stop] 0/2858, RunningAvgSamplesPerSec=29.97204295782859, CurrSamplesPerSec=30.160118062839135, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:34:38,234] [INFO] [timer.py:197:stop] 0/2859, RunningAvgSamplesPerSec=29.97162288389038, CurrSamplesPerSec=28.818082826444336, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:34:44,552] [INFO] [logging.py:68:log_dist] [Rank 0] step=2860, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:34:44,552] [INFO] [timer.py:197:stop] 0/2860, RunningAvgSamplesPerSec=29.971485417028763, CurrSamplesPerSec=29.58382423798652, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:34:50,799] [INFO] [timer.py:197:stop] 0/2861, RunningAvgSamplesPerSec=29.971421751968432, CurrSamplesPerSec=29.790565362232726, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:34:56,671] [INFO] [timer.py:197:stop] 0/2862, RunningAvgSamplesPerSec=29.97144559954857, CurrSamplesPerSec=30.039781338381975, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:02,876] [INFO] [timer.py:197:stop] 0/2863, RunningAvgSamplesPerSec=29.97165094473771, CurrSamplesPerSec=30.57068012145571, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:08,914] [INFO] [timer.py:197:stop] 0/2864, RunningAvgSamplesPerSec=29.971570814220314, CurrSamplesPerSec=29.744058262287776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:14,942] [INFO] [timer.py:197:stop] 0/2865, RunningAvgSamplesPerSec=29.971709988243724, CurrSamplesPerSec=30.375392754343622, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:20,681] [INFO] [timer.py:197:stop] 0/2866, RunningAvgSamplesPerSec=29.971762164679728, CurrSamplesPerSec=30.121891817892106, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:27,277] [INFO] [timer.py:197:stop] 0/2867, RunningAvgSamplesPerSec=29.971735928900767, CurrSamplesPerSec=29.896784626796187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:33,463] [INFO] [timer.py:197:stop] 0/2868, RunningAvgSamplesPerSec=29.97158177662985, CurrSamplesPerSec=29.536351100776955, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:39,230] [INFO] [timer.py:197:stop] 0/2869, RunningAvgSamplesPerSec=29.971583816500427, CurrSamplesPerSec=29.977431226572754, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:45,288] [INFO] [logging.py:68:log_dist] [Rank 0] step=2870, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:35:45,288] [INFO] [timer.py:197:stop] 0/2870, RunningAvgSamplesPerSec=29.971554632292545, CurrSamplesPerSec=29.888116522426934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:51,273] [INFO] [timer.py:197:stop] 0/2871, RunningAvgSamplesPerSec=29.971310822216402, CurrSamplesPerSec=29.288010849534714, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:57,507] [INFO] [timer.py:197:stop] 0/2872, RunningAvgSamplesPerSec=29.971371474796218, CurrSamplesPerSec=30.146400288846127, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:35:59,930] [INFO] [timer.py:197:stop] 0/2873, RunningAvgSamplesPerSec=29.97151168533928, CurrSamplesPerSec=30.379394174105936, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:36:02,080] [INFO] [timer.py:197:stop] 0/2874, RunningAvgSamplesPerSec=29.971724598520108, CurrSamplesPerSec=30.595729359301316, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:36:04,206] [INFO] [timer.py:197:stop] 0/2875, RunningAvgSamplesPerSec=29.97189470549518, CurrSamplesPerSec=30.46854016628359, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 71.01} -[2022-12-14 21:36:06,353] [INFO] [timer.py:197:stop] 0/2876, RunningAvgSamplesPerSec=29.971966174281036, CurrSamplesPerSec=30.17871285453477, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:36:08,525] [INFO] [timer.py:197:stop] 0/2877, RunningAvgSamplesPerSec=29.971915261929958, CurrSamplesPerSec=29.826304281231565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:36:10,703] [INFO] [timer.py:197:stop] 0/2878, RunningAvgSamplesPerSec=29.971853341559108, CurrSamplesPerSec=29.79488377090361, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:36:12,826] [INFO] [timer.py:197:stop] 0/2879, RunningAvgSamplesPerSec=29.97203747022088, CurrSamplesPerSec=30.511119459010644, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:36:14,721] [INFO] [logging.py:68:log_dist] [Rank 0] step=2880, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:36:14,722] [INFO] [timer.py:197:stop] 0/2880, RunningAvgSamplesPerSec=29.973308718627603, CurrSamplesPerSec=34.139193714334404, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:36:56,083] [INFO] [timer.py:197:stop] 0/2881, RunningAvgSamplesPerSec=29.973123940956015, CurrSamplesPerSec=29.450607629289085, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:37:02,794] [INFO] [timer.py:197:stop] 0/2882, RunningAvgSamplesPerSec=29.9732548901446, CurrSamplesPerSec=30.355061625979594, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:37:09,176] [INFO] [timer.py:197:stop] 0/2883, RunningAvgSamplesPerSec=29.972960834369847, CurrSamplesPerSec=29.14935900512609, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:37:15,638] [INFO] [timer.py:197:stop] 0/2884, RunningAvgSamplesPerSec=29.972929796014743, CurrSamplesPerSec=29.883774374352154, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:37:21,903] [INFO] [timer.py:197:stop] 0/2885, RunningAvgSamplesPerSec=29.973123165817892, CurrSamplesPerSec=30.540976731623278, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:37:28,680] [INFO] [timer.py:197:stop] 0/2886, RunningAvgSamplesPerSec=29.972771512229013, CurrSamplesPerSec=28.9921353559481, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:37:35,129] [INFO] [timer.py:197:stop] 0/2887, RunningAvgSamplesPerSec=29.972732486793994, CurrSamplesPerSec=29.860604325945292, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:37:41,355] [INFO] [timer.py:197:stop] 0/2888, RunningAvgSamplesPerSec=29.972784593092754, CurrSamplesPerSec=30.123869283715564, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:37:47,883] [INFO] [timer.py:197:stop] 0/2889, RunningAvgSamplesPerSec=29.972770031172058, CurrSamplesPerSec=29.93080319137964, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:37:53,996] [INFO] [logging.py:68:log_dist] [Rank 0] step=2890, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:37:53,996] [INFO] [timer.py:197:stop] 0/2890, RunningAvgSamplesPerSec=29.972685407938904, CurrSamplesPerSec=29.730354060132665, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:00,607] [INFO] [timer.py:197:stop] 0/2891, RunningAvgSamplesPerSec=29.971873434664516, CurrSamplesPerSec=27.797105858501805, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:06,942] [INFO] [timer.py:197:stop] 0/2892, RunningAvgSamplesPerSec=29.971651201818375, CurrSamplesPerSec=29.343089751541456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:13,440] [INFO] [timer.py:197:stop] 0/2893, RunningAvgSamplesPerSec=29.971746105803746, CurrSamplesPerSec=30.2485525658067, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:19,889] [INFO] [timer.py:197:stop] 0/2894, RunningAvgSamplesPerSec=29.971639499483548, CurrSamplesPerSec=29.666578658600134, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:26,233] [INFO] [timer.py:197:stop] 0/2895, RunningAvgSamplesPerSec=29.971403461404407, CurrSamplesPerSec=29.3039875122989, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:33,102] [INFO] [timer.py:197:stop] 0/2896, RunningAvgSamplesPerSec=29.9713989632528, CurrSamplesPerSec=29.958391460263055, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:40,098] [INFO] [timer.py:197:stop] 0/2897, RunningAvgSamplesPerSec=29.97144749931771, CurrSamplesPerSec=30.11257249227598, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:46,667] [INFO] [timer.py:197:stop] 0/2898, RunningAvgSamplesPerSec=29.971315175395493, CurrSamplesPerSec=29.593073577801864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:52,822] [INFO] [timer.py:197:stop] 0/2899, RunningAvgSamplesPerSec=29.971498831227038, CurrSamplesPerSec=30.51297840878103, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:38:59,330] [INFO] [logging.py:68:log_dist] [Rank 0] step=2900, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:38:59,331] [INFO] [timer.py:197:stop] 0/2900, RunningAvgSamplesPerSec=29.971296767704153, CurrSamplesPerSec=29.397136698452048, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 72.0} -[2022-12-14 21:39:06,306] [INFO] [timer.py:197:stop] 0/2901, RunningAvgSamplesPerSec=29.97123068791564, CurrSamplesPerSec=29.78094768120933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:39:12,460] [INFO] [timer.py:197:stop] 0/2902, RunningAvgSamplesPerSec=29.971189075216845, CurrSamplesPerSec=29.851037642479845, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:39:19,497] [INFO] [timer.py:197:stop] 0/2903, RunningAvgSamplesPerSec=29.971210207469987, CurrSamplesPerSec=30.032619351027563, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:39:25,502] [INFO] [timer.py:197:stop] 0/2904, RunningAvgSamplesPerSec=29.97119251376042, CurrSamplesPerSec=29.919950850376758, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:39:32,166] [INFO] [timer.py:197:stop] 0/2905, RunningAvgSamplesPerSec=29.971225310735267, CurrSamplesPerSec=30.066705443561418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:39:38,624] [INFO] [timer.py:197:stop] 0/2906, RunningAvgSamplesPerSec=29.9711102834771, CurrSamplesPerSec=29.640866838879, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:39:45,672] [INFO] [timer.py:197:stop] 0/2907, RunningAvgSamplesPerSec=29.971173493141528, CurrSamplesPerSec=30.155865911746204, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:39:52,392] [INFO] [timer.py:197:stop] 0/2908, RunningAvgSamplesPerSec=29.97120703658628, CurrSamplesPerSec=30.068968700030524, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:39:58,727] [INFO] [timer.py:197:stop] 0/2909, RunningAvgSamplesPerSec=29.971519035329887, CurrSamplesPerSec=30.906480638219726, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:05,645] [INFO] [logging.py:68:log_dist] [Rank 0] step=2910, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:40:05,646] [INFO] [timer.py:197:stop] 0/2910, RunningAvgSamplesPerSec=29.971528023178713, CurrSamplesPerSec=29.99767850423885, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:11,959] [INFO] [timer.py:197:stop] 0/2911, RunningAvgSamplesPerSec=29.97113953023312, CurrSamplesPerSec=28.88245328982457, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:18,214] [INFO] [timer.py:197:stop] 0/2912, RunningAvgSamplesPerSec=29.971290796780135, CurrSamplesPerSec=30.417884260053516, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:20,663] [INFO] [timer.py:197:stop] 0/2913, RunningAvgSamplesPerSec=29.97155145845482, CurrSamplesPerSec=30.749779229938905, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:22,835] [INFO] [timer.py:197:stop] 0/2914, RunningAvgSamplesPerSec=29.97149999111939, CurrSamplesPerSec=29.82242403396552, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:25,004] [INFO] [timer.py:197:stop] 0/2915, RunningAvgSamplesPerSec=29.971461566885647, CurrSamplesPerSec=29.85998650693758, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:27,084] [INFO] [timer.py:197:stop] 0/2916, RunningAvgSamplesPerSec=29.971854857959144, CurrSamplesPerSec=31.163060537394273, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:29,253] [INFO] [timer.py:197:stop] 0/2917, RunningAvgSamplesPerSec=29.97181662444371, CurrSamplesPerSec=29.86081691474161, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:31,414] [INFO] [timer.py:197:stop] 0/2918, RunningAvgSamplesPerSec=29.971817372069832, CurrSamplesPerSec=29.97399686073904, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:33,546] [INFO] [timer.py:197:stop] 0/2919, RunningAvgSamplesPerSec=29.971958704646052, CurrSamplesPerSec=30.38983238892761, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:40:35,456] [INFO] [logging.py:68:log_dist] [Rank 0] step=2920, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:40:35,456] [INFO] [timer.py:197:stop] 0/2920, RunningAvgSamplesPerSec=29.97315697146431, CurrSamplesPerSec=33.930101846380396, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:41:18,440] [INFO] [timer.py:197:stop] 0/2921, RunningAvgSamplesPerSec=29.973209118643528, CurrSamplesPerSec=30.126151297699987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:41:25,211] [INFO] [timer.py:197:stop] 0/2922, RunningAvgSamplesPerSec=29.9731869581533, CurrSamplesPerSec=29.90863983690134, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:41:32,000] [INFO] [timer.py:197:stop] 0/2923, RunningAvgSamplesPerSec=29.97328813499417, CurrSamplesPerSec=30.271666530589968, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:41:38,839] [INFO] [timer.py:197:stop] 0/2924, RunningAvgSamplesPerSec=29.972858681537076, CurrSamplesPerSec=28.768833621823187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:41:45,522] [INFO] [timer.py:197:stop] 0/2925, RunningAvgSamplesPerSec=29.97274050119368, CurrSamplesPerSec=29.631352099585282, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 73.0} -[2022-12-14 21:41:52,177] [INFO] [timer.py:197:stop] 0/2926, RunningAvgSamplesPerSec=29.972564446487546, CurrSamplesPerSec=29.46664578075035, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:41:59,008] [INFO] [timer.py:197:stop] 0/2927, RunningAvgSamplesPerSec=29.97277013931907, CurrSamplesPerSec=30.58653627250417, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:42:05,531] [INFO] [timer.py:197:stop] 0/2928, RunningAvgSamplesPerSec=29.97295741894137, CurrSamplesPerSec=30.530951839349246, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:42:12,412] [INFO] [timer.py:197:stop] 0/2929, RunningAvgSamplesPerSec=29.972987155922315, CurrSamplesPerSec=30.060250972181816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:42:18,970] [INFO] [logging.py:68:log_dist] [Rank 0] step=2930, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:42:18,971] [INFO] [timer.py:197:stop] 0/2930, RunningAvgSamplesPerSec=29.97303321084147, CurrSamplesPerSec=30.108445178237744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:42:26,142] [INFO] [timer.py:197:stop] 0/2931, RunningAvgSamplesPerSec=29.97292642636666, CurrSamplesPerSec=29.663490491863787, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:42:32,973] [INFO] [timer.py:197:stop] 0/2932, RunningAvgSamplesPerSec=29.972814650963734, CurrSamplesPerSec=29.64896310263029, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:42:39,328] [INFO] [timer.py:197:stop] 0/2933, RunningAvgSamplesPerSec=29.972850142603775, CurrSamplesPerSec=30.077202822088047, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:42:45,894] [INFO] [timer.py:197:stop] 0/2934, RunningAvgSamplesPerSec=29.972841048348098, CurrSamplesPerSec=29.94620947685728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:42:53,438] [INFO] [timer.py:197:stop] 0/2935, RunningAvgSamplesPerSec=29.972822639087447, CurrSamplesPerSec=29.918943746663263, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:42:59,921] [INFO] [timer.py:197:stop] 0/2936, RunningAvgSamplesPerSec=29.972677237712244, CurrSamplesPerSec=29.552199751767887, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:43:06,870] [INFO] [timer.py:197:stop] 0/2937, RunningAvgSamplesPerSec=29.973053852859568, CurrSamplesPerSec=31.12035354729808, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:43:13,956] [INFO] [timer.py:197:stop] 0/2938, RunningAvgSamplesPerSec=29.9729954443175, CurrSamplesPerSec=29.802541607856796, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:43:20,568] [INFO] [timer.py:197:stop] 0/2939, RunningAvgSamplesPerSec=29.97304528319972, CurrSamplesPerSec=30.120090352824164, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:43:27,256] [INFO] [logging.py:68:log_dist] [Rank 0] step=2940, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:43:27,257] [INFO] [timer.py:197:stop] 0/2940, RunningAvgSamplesPerSec=29.97317020699012, CurrSamplesPerSec=30.34461983744712, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:43:33,891] [INFO] [timer.py:197:stop] 0/2941, RunningAvgSamplesPerSec=29.973135707651036, CurrSamplesPerSec=29.872118372331478, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:43:41,120] [INFO] [timer.py:197:stop] 0/2942, RunningAvgSamplesPerSec=29.97308080985915, CurrSamplesPerSec=29.812600357126435, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:43:48,361] [INFO] [timer.py:197:stop] 0/2943, RunningAvgSamplesPerSec=29.973305599726395, CurrSamplesPerSec=30.64909333939536, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:43:55,395] [INFO] [timer.py:197:stop] 0/2944, RunningAvgSamplesPerSec=29.973397334945663, CurrSamplesPerSec=30.245641946561523, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:01,867] [INFO] [timer.py:197:stop] 0/2945, RunningAvgSamplesPerSec=29.97350395487341, CurrSamplesPerSec=30.290498288485658, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:08,601] [INFO] [timer.py:197:stop] 0/2946, RunningAvgSamplesPerSec=29.97327775291872, CurrSamplesPerSec=29.32203455370363, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:15,088] [INFO] [timer.py:197:stop] 0/2947, RunningAvgSamplesPerSec=29.973498988076155, CurrSamplesPerSec=30.639287618797162, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:21,988] [INFO] [timer.py:197:stop] 0/2948, RunningAvgSamplesPerSec=29.97366814052425, CurrSamplesPerSec=30.48024410951136, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:28,934] [INFO] [timer.py:197:stop] 0/2949, RunningAvgSamplesPerSec=29.97358323115238, CurrSamplesPerSec=29.72551119686519, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:35,374] [INFO] [logging.py:68:log_dist] [Rank 0] step=2950, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:44:35,374] [INFO] [timer.py:197:stop] 0/2950, RunningAvgSamplesPerSec=29.97352210067205, CurrSamplesPerSec=29.79444724393368, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 73.01} -[2022-12-14 21:44:41,928] [INFO] [timer.py:197:stop] 0/2951, RunningAvgSamplesPerSec=29.973227861300003, CurrSamplesPerSec=29.130215046785583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:48,173] [INFO] [timer.py:197:stop] 0/2952, RunningAvgSamplesPerSec=29.973109011112996, CurrSamplesPerSec=29.626672226268433, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:50,877] [INFO] [timer.py:197:stop] 0/2953, RunningAvgSamplesPerSec=29.97325444110032, CurrSamplesPerSec=30.408504919319356, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:53,040] [INFO] [timer.py:197:stop] 0/2954, RunningAvgSamplesPerSec=29.97337811603285, CurrSamplesPerSec=30.34284308119131, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:55,171] [INFO] [timer.py:197:stop] 0/2955, RunningAvgSamplesPerSec=29.973519579208574, CurrSamplesPerSec=30.397021213616295, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:57,287] [INFO] [timer.py:197:stop] 0/2956, RunningAvgSamplesPerSec=29.973732164976532, CurrSamplesPerSec=30.614931680923235, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:44:59,481] [INFO] [timer.py:197:stop] 0/2957, RunningAvgSamplesPerSec=29.973576128207462, CurrSamplesPerSec=29.51962669422641, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:45:01,615] [INFO] [timer.py:197:stop] 0/2958, RunningAvgSamplesPerSec=29.973704554466387, CurrSamplesPerSec=30.358072308941026, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:45:03,754] [INFO] [timer.py:197:stop] 0/2959, RunningAvgSamplesPerSec=29.97381185253629, CurrSamplesPerSec=30.29437822343103, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:45:05,637] [INFO] [logging.py:68:log_dist] [Rank 0] step=2960, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:45:05,637] [INFO] [timer.py:197:stop] 0/2960, RunningAvgSamplesPerSec=29.975141319754105, CurrSamplesPerSec=34.500012659472404, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:45:42,793] [INFO] [timer.py:197:stop] 0/2961, RunningAvgSamplesPerSec=29.97502070434814, CurrSamplesPerSec=29.62243839479067, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:45:48,868] [INFO] [timer.py:197:stop] 0/2962, RunningAvgSamplesPerSec=29.975237471015248, CurrSamplesPerSec=30.630679991154334, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:45:54,971] [INFO] [timer.py:197:stop] 0/2963, RunningAvgSamplesPerSec=29.97496154858027, CurrSamplesPerSec=29.17990152975321, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:00,738] [INFO] [timer.py:197:stop] 0/2964, RunningAvgSamplesPerSec=29.97485774244099, CurrSamplesPerSec=29.670608666740943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:06,642] [INFO] [timer.py:197:stop] 0/2965, RunningAvgSamplesPerSec=29.97468363977312, CurrSamplesPerSec=29.467716474580975, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:12,866] [INFO] [timer.py:197:stop] 0/2966, RunningAvgSamplesPerSec=29.9746762339204, CurrSamplesPerSec=29.952748750162353, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:19,033] [INFO] [timer.py:197:stop] 0/2967, RunningAvgSamplesPerSec=29.974657202316276, CurrSamplesPerSec=29.918353522025146, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:24,875] [INFO] [timer.py:197:stop] 0/2968, RunningAvgSamplesPerSec=29.974563399997084, CurrSamplesPerSec=29.698997271239502, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:31,555] [INFO] [timer.py:197:stop] 0/2969, RunningAvgSamplesPerSec=29.974457093396094, CurrSamplesPerSec=29.662435023924058, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:37,832] [INFO] [logging.py:68:log_dist] [Rank 0] step=2970, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:46:37,832] [INFO] [timer.py:197:stop] 0/2970, RunningAvgSamplesPerSec=29.97419030406398, CurrSamplesPerSec=29.202998982490413, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:44,367] [INFO] [timer.py:197:stop] 0/2971, RunningAvgSamplesPerSec=29.97435032622201, CurrSamplesPerSec=30.456945422099746, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:50,688] [INFO] [timer.py:197:stop] 0/2972, RunningAvgSamplesPerSec=29.97430248561614, CurrSamplesPerSec=29.832933853762583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:46:56,800] [INFO] [timer.py:197:stop] 0/2973, RunningAvgSamplesPerSec=29.973716747951016, CurrSamplesPerSec=28.329534504013154, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:47:03,330] [INFO] [timer.py:197:stop] 0/2974, RunningAvgSamplesPerSec=29.973618000062668, CurrSamplesPerSec=29.683082722813214, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:47:09,242] [INFO] [timer.py:197:stop] 0/2975, RunningAvgSamplesPerSec=29.973513714162245, CurrSamplesPerSec=29.666749149376333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 74.0} -[2022-12-14 21:47:15,334] [INFO] [timer.py:197:stop] 0/2976, RunningAvgSamplesPerSec=29.973525523052107, CurrSamplesPerSec=30.008674536307602, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:47:21,248] [INFO] [timer.py:197:stop] 0/2977, RunningAvgSamplesPerSec=29.973355096785575, CurrSamplesPerSec=29.474938411664386, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:47:27,188] [INFO] [timer.py:197:stop] 0/2978, RunningAvgSamplesPerSec=29.97337154715584, CurrSamplesPerSec=30.02239146420912, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:47:33,033] [INFO] [timer.py:197:stop] 0/2979, RunningAvgSamplesPerSec=29.97355808821133, CurrSamplesPerSec=30.539183855961465, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:47:38,843] [INFO] [logging.py:68:log_dist] [Rank 0] step=2980, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:47:38,844] [INFO] [timer.py:197:stop] 0/2980, RunningAvgSamplesPerSec=29.97358790209652, CurrSamplesPerSec=30.06260752626155, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:47:44,859] [INFO] [timer.py:197:stop] 0/2981, RunningAvgSamplesPerSec=29.973713493797263, CurrSamplesPerSec=30.35245307920243, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:47:50,677] [INFO] [timer.py:197:stop] 0/2982, RunningAvgSamplesPerSec=29.97365339218705, CurrSamplesPerSec=29.795674181828836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:47:57,126] [INFO] [timer.py:197:stop] 0/2983, RunningAvgSamplesPerSec=29.973997068449894, CurrSamplesPerSec=31.034396344529252, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:03,411] [INFO] [timer.py:197:stop] 0/2984, RunningAvgSamplesPerSec=29.973822706905118, CurrSamplesPerSec=29.462913517026358, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:09,237] [INFO] [timer.py:197:stop] 0/2985, RunningAvgSamplesPerSec=29.97404878289863, CurrSamplesPerSec=30.66372437364136, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:14,982] [INFO] [timer.py:197:stop] 0/2986, RunningAvgSamplesPerSec=29.974191927374576, CurrSamplesPerSec=30.407364769383694, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:21,817] [INFO] [timer.py:197:stop] 0/2987, RunningAvgSamplesPerSec=29.97419610828066, CurrSamplesPerSec=29.986677128594128, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:28,213] [INFO] [timer.py:197:stop] 0/2988, RunningAvgSamplesPerSec=29.974035364182853, CurrSamplesPerSec=29.501776631810298, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:34,572] [INFO] [timer.py:197:stop] 0/2989, RunningAvgSamplesPerSec=29.973836635950452, CurrSamplesPerSec=29.391957653141, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:40,394] [INFO] [logging.py:68:log_dist] [Rank 0] step=2990, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:48:40,395] [INFO] [timer.py:197:stop] 0/2990, RunningAvgSamplesPerSec=29.973684671140376, CurrSamplesPerSec=29.52653957705629, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:46,274] [INFO] [timer.py:197:stop] 0/2991, RunningAvgSamplesPerSec=29.973753616928413, CurrSamplesPerSec=30.181189820568193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:52,328] [INFO] [timer.py:197:stop] 0/2992, RunningAvgSamplesPerSec=29.97343576767414, CurrSamplesPerSec=29.05258190739425, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:54,794] [INFO] [timer.py:197:stop] 0/2993, RunningAvgSamplesPerSec=29.97303952439231, CurrSamplesPerSec=28.83333707487052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:56,981] [INFO] [timer.py:197:stop] 0/2994, RunningAvgSamplesPerSec=29.972918688897945, CurrSamplesPerSec=29.615807279958737, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:48:59,169] [INFO] [timer.py:197:stop] 0/2995, RunningAvgSamplesPerSec=29.972792373343946, CurrSamplesPerSec=29.59956396029992, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:49:01,411] [INFO] [timer.py:197:stop] 0/2996, RunningAvgSamplesPerSec=29.972535627630474, CurrSamplesPerSec=29.223310851193958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:49:03,593] [INFO] [timer.py:197:stop] 0/2997, RunningAvgSamplesPerSec=29.972602749807223, CurrSamplesPerSec=30.17492354410049, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:49:05,775] [INFO] [timer.py:197:stop] 0/2998, RunningAvgSamplesPerSec=29.972512409279336, CurrSamplesPerSec=29.704363983988763, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:49:07,959] [INFO] [timer.py:197:stop] 0/2999, RunningAvgSamplesPerSec=29.972415846194714, CurrSamplesPerSec=29.685879504961555, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -[2022-12-14 21:49:09,880] [INFO] [logging.py:68:log_dist] [Rank 0] step=3000, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] -[2022-12-14 21:49:09,881] [INFO] [timer.py:197:stop] 0/3000, RunningAvgSamplesPerSec=29.97352191818894, CurrSamplesPerSec=33.70076916490631, MemAllocated=0.53GB, MaxMemAllocated=17.47GB -{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 74.01} -{'eval_loss': 0.59375, 'eval_wer': 24.817682037473354, 'eval_runtime': 205.8889, 'eval_samples_per_second': 3.235, 'eval_steps_per_second': 0.102, 'epoch': 74.01} -[2022-12-14 21:52:36,354] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step3000 is begin to save! -[2022-12-14 21:52:36,359] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-3000/global_step3000/mp_rank_00_model_states.pt -[2022-12-14 21:52:36,359] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-3000/global_step3000/mp_rank_00_model_states.pt... -[2022-12-14 21:52:36,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-3000/global_step3000/mp_rank_00_model_states.pt. -[2022-12-14 21:52:36,957] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt... -[2022-12-14 21:52:39,199] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt. -[2022-12-14 21:52:39,200] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt -[2022-12-14 21:52:39,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +12/14/2022 23:13:59 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/facebook--voxpopuli/b5ff837284f0778eefe0f642734e142d8c3f574eba8c9c8a4b13602297f73604 +12/14/2022 23:14:01 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/facebook--voxpopuli/b5ff837284f0778eefe0f642734e142d8c3f574eba8c9c8a4b13602297f73604 +12/14/2022 23:14:02 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/facebook--voxpopuli/b5ff837284f0778eefe0f642734e142d8c3f574eba8c9c8a4b13602297f73604 +12/14/2022 23:14:09 - WARNING - huggingface_hub.repository - /home/milan/whisper-small-hr-vox/./ is already a clone of https://huggingface.co/mikr/whisper-small-hr-vox. Make sure you pull the latest changes with `repo.git_pull()`.