diff --git "a/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.6.log" "b/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.6.log"
new file mode 100644--- /dev/null
+++ "b/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.6.log"
@@ -0,0 +1,3674 @@
+# Running on gpua006.delta.ncsa.illinois.edu
+# Started at Sat Feb 10 14:14:12 CST 2024
+# SLURMD_NODENAME=gpua006
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2969984
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_END_TIME=1707754434
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2969984
+# SLURM_JOB_NAME=exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpua[006,012,016,033,038-040,042,049,054-055,057,079-080,085,089]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA100x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_RESERVATION=bbjs
+# SLURM_JOB_START_TIME=1707596034
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_MPI_TYPE=pmi2
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpua[006,012,016,033,038-040,042,049,054-055,057,079-080,085,089]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1
+# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=662147
+# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua006
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4 
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+GpuFreq=control_disabled
+/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_st/scratch/bbjs/peng6/espnet-owsm-ctc-2/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc-2/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_methats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_methats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+od file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+od file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_methats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_methats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+od file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+od file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+ats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_methats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+od file:///scratch/bbjs/peng6/espnet-owsm-ctc-2/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_58be6548-87c6-4ccd-8679-40dd3c47f8b4
+[gpua006:0/64] 2024-02-10 14:16:46,442 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpua006:0/64] 2024-02-10 14:17:02,582 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpua006:0/64] 2024-02-10 14:17:02,646 (s2t:420) INFO: Vocabulary size: 50002
+[gpua006:0/64] 2024-02-10 14:17:15,329 (abs_task:1270) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpua006:0/64] 2024-02-10 14:17:15,348 (abs_task:1271) INFO: Model structure:
+ESPnetS2TCTCModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): EBranchformerCTCEncoder(
+    (embed): Conv2dSubsampling8(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+        (4): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (5): ReLU()
+      )
+      (out): Linear(in_features=9216, out_features=1024, bias=True)
+      (pos_enc): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (1): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (2): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (cross_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (3): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (4): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (5): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (cross_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (6): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (7): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (8): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (cross_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (9): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (10): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (11): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (cross_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (12): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (13): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (14): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (cross_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (15): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (16): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (17): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (cross_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (18): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (19): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (20): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (cross_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (21): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (22): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (23): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (cross_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (24): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (25): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (26): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (cross_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+        )
+        (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (conditioning_layer): Linear(in_features=50002, out_features=1024, bias=True)
+  )
+  (prompt_encoder): TransformerEncoder(
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=512, out_features=512, bias=True)
+          (linear_k): Linear(in_features=512, out_features=512, bias=True)
+          (linear_v): Linear(in_features=512, out_features=512, bias=True)
+          (linear_out): Linear(in_features=512, out_features=512, bias=True)
+          (dropout): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=512, out_features=2048, bias=True)
+          (w_2): Linear(in_features=2048, out_features=512, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=512, out_features=512, bias=True)
+          (linear_k): Linear(in_features=512, out_features=512, bias=True)
+          (linear_v): Linear(in_features=512, out_features=512, bias=True)
+          (linear_out): Linear(in_features=512, out_features=512, bias=True)
+          (dropout): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=512, out_features=2048, bias=True)
+          (w_2): Linear(in_features=2048, out_features=512, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=512, out_features=512, bias=True)
+          (linear_k): Linear(in_features=512, out_features=512, bias=True)
+          (linear_v): Linear(in_features=512, out_features=512, bias=True)
+          (linear_out): Linear(in_features=512, out_features=512, bias=True)
+          (dropout): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=512, out_features=2048, bias=True)
+          (w_2): Linear(in_features=2048, out_features=512, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=512, out_features=512, bias=True)
+          (linear_k): Linear(in_features=512, out_features=512, bias=True)
+          (linear_v): Linear(in_features=512, out_features=512, bias=True)
+          (linear_out): Linear(in_features=512, out_features=512, bias=True)
+          (dropout): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=512, out_features=2048, bias=True)
+          (w_2): Linear(in_features=2048, out_features=512, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
+  )
+  (embed): Embedding(50002, 512)
+  (pos_enc): PositionalEncoding(
+    (dropout): Dropout(p=0.0, inplace=False)
+  )
+  (embed_proj): Linear(in_features=512, out_features=1024, bias=True)
+  (prompt_proj): Linear(in_features=512, out_features=1024, bias=True)
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TCTCModel
+    Total Number of model parameters: 1.01 B
+    Number of trainable parameters: 1.01 B (100.0%)
+    Size: 4.02 GB
+    Type: torch.float32
+[gpua006:0/64] 2024-02-10 14:17:15,348 (abs_task:1274) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.0002
+    lr: 1.6666666666666667e-09
+    maximize: False
+    weight_decay: 0.0
+)
+[gpua006:0/64] 2024-02-10 14:17:15,348 (abs_task:1275) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0002])
+[gpua006:0/64] 2024-02-10 14:17:15,349 (abs_task:1284) INFO: Saving the configuration in exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/config.yaml
+[gpua006:0/64] 2024-02-10 14:17:20,935 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 14:17:21,903 (abs_task:1660) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev_v3/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fc83785ece0>)
+[gpua006:0/64] 2024-02-10 14:17:21,903 (abs_task:1661) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpua006:0/64] 2024-02-10 14:17:21,904 (abs_task:1662) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 14:17:48,341 (trainer:167) INFO: The training was resumed using exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/checkpoint.pth
+gpua006:662233:662233 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.6<0>
+gpua006:662233:662233 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua006:662233:662233 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua006:662233:662233 [0] NCCL INFO cudaDriverVersion 12020
+NCCL version 2.14.3+cuda11.7
+[gpua006:0/64] 2024-02-10 14:17:56,942 (trainer:301) INFO: 28/45epoch started
+[gpua006:0/64] 2024-02-10 14:17:56,984 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua006:0/64] 2024-02-10 14:18:15,411 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 14:18:18,798 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fc6738b1510>)
+[gpua006:0/64] 2024-02-10 14:18:18,798 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua006:0/64] 2024-02-10 14:18:18,801 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+gpua033:1630002:1630002 [0] NCCL INFO cudaDriverVersion 12020
+gpua033:1630002:1630002 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0>
+gpua033:1630002:1630002 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua033:1630002:1630002 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua033:1630002:1630067 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua033:1630002:1630067 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua033:1630002:1630067 [0] NCCL INFO Using network AWS Libfabric
+gpua033:1630002:1630067 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua033:1630002:1630067 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua033:1630002:1630067 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpua033:1630002:1630067 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1
+gpua033:1630002:1630067 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/AWS Libfabric/1
+gpua033:1630002:1630067 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua033:1630002:1630067 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua033:1630002:1630067 [0] NCCL INFO Connected all rings
+gpua033:1630002:1630067 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1
+gpua033:1630002:1630067 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1
+gpua033:1630002:1630067 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/AWS Libfabric/1
+gpua033:1630002:1630067 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/AWS Libfabric/1
+gpua033:1630002:1630067 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/AWS Libfabric/1
+gpua033:1630002:1630067 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/AWS Libfabric/1
+gpua033:1630002:1630067 [0] NCCL INFO Connected all trees
+gpua033:1630002:1630067 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua033:1630002:1630067 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua033:1630002:1630067 [0] NCCL INFO comm 0x55bd4c2be490 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua033:1630003:1630003 [1] NCCL INFO cudaDriverVersion 12020
+gpua033:1630003:1630003 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0>
+gpua033:1630003:1630003 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua033:1630003:1630003 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua033:1630003:1630066 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua033:1630003:1630066 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua033:1630003:1630066 [1] NCCL INFO Using network AWS Libfabric
+gpua033:1630003:1630066 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua033:1630003:1630066 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua033:1630003:1630066 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpua033:1630003:1630066 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua033:1630003:1630066 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua033:1630003:1630066 [1] NCCL INFO Connected all rings
+gpua033:1630003:1630066 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/AWS Libfabric/1
+gpua033:1630003:1630066 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/AWS Libfabric/1
+gpua033:1630003:1630066 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua033:1630003:1630066 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua033:1630003:1630066 [1] NCCL INFO Connected all trees
+gpua033:1630003:1630066 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua033:1630003:1630066 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua033:1630003:1630066 [1] NCCL INFO comm 0x5643b0352670 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua033:1630005:1630005 [3] NCCL INFO cudaDriverVersion 12020
+gpua033:1630005:1630005 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0>
+gpua033:1630005:1630005 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua033:1630005:1630005 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua033:1630005:1630065 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua033:1630005:1630065 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua033:1630005:1630065 [3] NCCL INFO Using network AWS Libfabric
+gpua033:1630005:1630065 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua033:1630005:1630065 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua033:1630005:1630065 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpua033:1630005:1630065 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/AWS Libfabric/1
+gpua033:1630005:1630065 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/AWS Libfabric/1
+gpua033:1630005:1630065 [3] NCCL INFO Connected all rings
+gpua033:1630005:1630065 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua033:1630005:1630065 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua033:1630005:1630065 [3] NCCL INFO Connected all trees
+gpua033:1630005:1630065 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua033:1630005:1630065 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua033:1630005:1630065 [3] NCCL INFO comm 0x56400515a730 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua033:1630004:1630004 [2] NCCL INFO cudaDriverVersion 12020
+gpua033:1630004:1630004 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0>
+gpua033:1630004:1630004 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua033:1630004:1630004 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua033:1630004:1630064 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua033:1630004:1630064 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua033:1630004:1630064 [2] NCCL INFO Using network AWS Libfabric
+gpua033:1630004:1630064 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua033:1630004:1630064 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua033:1630004:1630064 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpua033:1630004:1630064 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua033:1630004:1630064 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua033:1630004:1630064 [2] NCCL INFO Connected all rings
+gpua033:1630004:1630064 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua033:1630004:1630064 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua033:1630004:1630064 [2] NCCL INFO Connected all trees
+gpua033:1630004:1630064 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua033:1630004:1630064 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua033:1630004:1630064 [2] NCCL INFO comm 0x55acb3964ca0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua006:662234:662234 [1] NCCL INFO cudaDriverVersion 12020
+gpua006:662234:662234 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.6<0>
+gpua006:662234:662234 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua006:662234:662234 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua006:662234:662295 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua006:662234:662295 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua006:662234:662295 [1] NCCL INFO Using network AWS Libfabric
+gpua006:662234:662295 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua006:662234:662295 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua006:662234:662295 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpua006:662234:662295 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua006:662234:662295 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua006:662234:662295 [1] NCCL INFO Connected all rings
+gpua006:662234:662295 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua006:662234:662295 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua006:662234:662295 [1] NCCL INFO Connected all trees
+gpua006:662234:662295 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua006:662234:662295 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua006:662234:662295 [1] NCCL INFO comm 0x559b5b74b7b0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua012:2708176:2708176 [0] NCCL INFO cudaDriverVersion 12020
+gpua012:2708176:2708176 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.12<0>
+gpua012:2708176:2708176 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua012:2708176:2708176 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua012:2708176:2708255 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua012:2708176:2708255 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua012:2708176:2708255 [0] NCCL INFO Using network AWS Libfabric
+gpua012:2708176:2708255 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua012:2708176:2708255 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua012:2708176:2708255 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpua012:2708176:2708255 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1
+gpua012:2708176:2708255 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/AWS Libfabric/1
+gpua012:2708176:2708255 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua012:2708176:2708255 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua012:2708176:2708255 [0] NCCL INFO Connected all rings
+gpua012:2708176:2708255 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1
+gpua012:2708176:2708255 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/AWS Libfabric/1
+gpua012:2708176:2708255 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/AWS Libfabric/1
+gpua012:2708176:2708255 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/AWS Libfabric/1
+gpua012:2708176:2708255 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/AWS Libfabric/1
+gpua012:2708176:2708255 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/AWS Libfabric/1
+gpua012:2708176:2708255 [0] NCCL INFO Connected all trees
+gpua012:2708176:2708255 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua012:2708176:2708255 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua012:2708176:2708255 [0] NCCL INFO comm 0x55d9b7e2bca0 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua042:231581:231581 [1] NCCL INFO cudaDriverVersion 12020
+gpua042:231581:231581 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.42<0>
+gpua042:231581:231581 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua042:231581:231581 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua042:231581:231636 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua042:231581:231636 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua042:231581:231636 [1] NCCL INFO Using network AWS Libfabric
+gpua042:231581:231636 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua042:231581:231636 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua042:231581:231636 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpua042:231581:231636 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua042:231581:231636 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua042:231581:231636 [1] NCCL INFO Connected all rings
+gpua012:2708177:2708177 [1] NCCL INFO cudaDriverVersion 12020
+gpua012:2708177:2708177 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.12<0>
+gpua012:2708177:2708177 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua012:2708177:2708177 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua012:2708177:2708253 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua012:2708177:2708253 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua012:2708177:2708253 [1] NCCL INFO Using network AWS Libfabric
+gpua012:2708177:2708253 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua012:2708177:2708253 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua012:2708177:2708253 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpua012:2708177:2708253 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua012:2708177:2708253 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua089:835724:835724 [1] NCCL INFO cudaDriverVersion 12020
+gpua089:835724:835724 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.89<0>
+gpua089:835724:835724 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua089:835724:835724 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua089:835724:835782 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua089:835724:835782 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua089:835724:835782 [1] NCCL INFO Using network AWS Libfabric
+gpua089:835724:835782 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua089:835724:835782 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua089:835724:835782 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpua089:835724:835782 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua089:835724:835782 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua089:835724:835782 [1] NCCL INFO Connected all rings
+gpua038:164984:164984 [2] NCCL INFO cudaDriverVersion 12020
+gpua038:164984:164984 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.38<0>
+gpua038:164984:164984 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua038:164984:164984 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua038:164984:165043 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua038:164984:165043 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua038:164984:165043 [2] NCCL INFO Using network AWS Libfabric
+gpua038:164984:165043 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua038:164984:165043 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua038:164984:165043 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpua038:164984:165043 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua038:164984:165043 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua038:164984:165043 [2] NCCL INFO Connected all rings
+gpua042:231581:231636 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/AWS Libfabric/1
+gpua042:231581:231636 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/AWS Libfabric/1
+gpua042:231581:231636 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua042:231581:231636 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua042:231581:231636 [1] NCCL INFO Connected all trees
+gpua042:231581:231636 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua042:231581:231636 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua042:231581:231636 [1] NCCL INFO comm 0x563f6d313830 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua039:3647429:3647429 [2] NCCL INFO cudaDriverVersion 12020
+gpua039:3647429:3647429 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0>
+gpua039:3647429:3647429 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua039:3647429:3647429 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua039:3647429:3647493 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua039:3647429:3647493 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua039:3647429:3647493 [2] NCCL INFO Using network AWS Libfabric
+gpua039:3647429:3647493 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua039:3647429:3647493 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua039:3647429:3647493 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpua039:3647429:3647493 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua039:3647429:3647493 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua012:2708177:2708253 [1] NCCL INFO Connected all rings
+gpua012:2708177:2708253 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/AWS Libfabric/1
+gpua012:2708177:2708253 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/AWS Libfabric/1
+gpua012:2708177:2708253 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua012:2708177:2708253 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua012:2708177:2708253 [1] NCCL INFO Connected all trees
+gpua012:2708177:2708253 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua012:2708177:2708253 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua012:2708177:2708253 [1] NCCL INFO comm 0x55f706e7dac0 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua089:835724:835782 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua089:835724:835782 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua089:835724:835782 [1] NCCL INFO Connected all trees
+gpua089:835724:835782 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua089:835724:835782 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua089:835724:835782 [1] NCCL INFO comm 0x564d0da8ae00 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua038:164984:165043 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua038:164984:165043 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua038:164984:165043 [2] NCCL INFO Connected all trees
+gpua038:164984:165043 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua038:164984:165043 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua038:164984:165043 [2] NCCL INFO comm 0x5655300f5e30 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua039:3647429:3647493 [2] NCCL INFO Connected all rings
+gpua039:3647429:3647493 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua039:3647429:3647493 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua039:3647429:3647493 [2] NCCL INFO Connected all trees
+gpua039:3647429:3647493 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua039:3647429:3647493 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua039:3647429:3647493 [2] NCCL INFO comm 0x563c541851b0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua089:835726:835726 [3] NCCL INFO cudaDriverVersion 12020
+gpua089:835726:835726 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.89<0>
+gpua089:835726:835726 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua089:835726:835726 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua089:835726:835783 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua089:835726:835783 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua089:835726:835783 [3] NCCL INFO Using network AWS Libfabric
+gpua089:835726:835783 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua089:835726:835783 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua089:835726:835783 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpua089:835726:835783 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1
+gpua089:835726:835783 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/AWS Libfabric/1
+gpua049:4034215:4034215 [2] NCCL INFO cudaDriverVersion 12020
+gpua049:4034215:4034215 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0>
+gpua049:4034215:4034215 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua049:4034215:4034215 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua049:4034215:4034285 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua049:4034215:4034285 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua049:4034215:4034285 [2] NCCL INFO Using network AWS Libfabric
+gpua049:4034215:4034285 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua049:4034215:4034285 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua049:4034215:4034285 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpua049:4034215:4034285 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua049:4034215:4034285 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua085:4003429:4003429 [1] NCCL INFO cudaDriverVersion 12020
+gpua085:4003429:4003429 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.85<0>
+gpua085:4003429:4003429 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua085:4003429:4003429 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua085:4003429:4003509 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua085:4003429:4003509 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua085:4003429:4003509 [1] NCCL INFO Using network AWS Libfabric
+gpua085:4003429:4003509 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua085:4003429:4003509 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua085:4003429:4003509 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpua085:4003429:4003509 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua085:4003429:4003509 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua038:164985:164985 [3] NCCL INFO cudaDriverVersion 12020
+gpua038:164985:164985 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.38<0>
+gpua038:164985:164985 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua038:164985:164985 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua038:164985:165044 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua038:164985:165044 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua038:164985:165044 [3] NCCL INFO Using network AWS Libfabric
+gpua038:164985:165044 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua038:164985:165044 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua038:164985:165044 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpua038:164985:165044 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/AWS Libfabric/1
+gpua038:164985:165044 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/AWS Libfabric/1
+gpua042:231582:231582 [2] NCCL INFO cudaDriverVersion 12020
+gpua042:231582:231582 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.42<0>
+gpua042:231582:231582 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua042:231582:231582 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua042:231582:231635 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua042:231582:231635 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua042:231582:231635 [2] NCCL INFO Using network AWS Libfabric
+gpua042:231582:231635 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua042:231582:231635 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua042:231582:231635 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpua042:231582:231635 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua042:231582:231635 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua042:231582:231635 [2] NCCL INFO Connected all rings
+gpua039:3647428:3647428 [1] NCCL INFO cudaDriverVersion 12020
+gpua039:3647428:3647428 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0>
+gpua039:3647428:3647428 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua039:3647428:3647428 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua039:3647428:3647495 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua039:3647428:3647495 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua039:3647428:3647495 [1] NCCL INFO Using network AWS Libfabric
+gpua039:3647428:3647495 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua039:3647428:3647495 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua039:3647428:3647495 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpua039:3647428:3647495 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua039:3647428:3647495 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua089:835726:835783 [3] NCCL INFO Connected all rings
+gpua089:835726:835783 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua089:835726:835783 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua089:835726:835783 [3] NCCL INFO Connected all trees
+gpua089:835726:835783 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua089:835726:835783 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua089:835726:835783 [3] NCCL INFO comm 0x5647f1e460f0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua080:3255808:3255808 [0] NCCL INFO cudaDriverVersion 12020
+gpua080:3255808:3255808 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.80<0>
+gpua080:3255808:3255808 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua080:3255808:3255808 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua080:3255808:3255935 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua080:3255808:3255935 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua080:3255808:3255935 [0] NCCL INFO Using network AWS Libfabric
+gpua080:3255808:3255935 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua080:3255808:3255935 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua080:3255808:3255935 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpua080:3255808:3255935 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/AWS Libfabric/1
+gpua049:4034215:4034285 [2] NCCL INFO Connected all rings
+gpua049:4034215:4034285 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua049:4034215:4034285 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua049:4034215:4034285 [2] NCCL INFO Connected all trees
+gpua049:4034215:4034285 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua049:4034215:4034285 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua049:4034215:4034285 [2] NCCL INFO comm 0x55f8ef2a9840 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua085:4003429:4003509 [1] NCCL INFO Connected all rings
+gpua085:4003429:4003509 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/AWS Libfabric/1
+gpua085:4003429:4003509 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/AWS Libfabric/1
+gpua085:4003429:4003509 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua085:4003429:4003509 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua085:4003429:4003509 [1] NCCL INFO Connected all trees
+gpua085:4003429:4003509 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua085:4003429:4003509 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua085:4003429:4003509 [1] NCCL INFO comm 0x55631a271a20 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua085:4003430:4003430 [2] NCCL INFO cudaDriverVersion 12020
+gpua085:4003430:4003430 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.85<0>
+gpua038:164985:165044 [3] NCCL INFO Connected all rings
+gpua038:164985:165044 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua038:164985:165044 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua038:164985:165044 [3] NCCL INFO Connected all trees
+gpua038:164985:165044 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua038:164985:165044 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua038:164985:165044 [3] NCCL INFO comm 0x5597249b37b0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua042:231582:231635 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua042:231582:231635 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua042:231582:231635 [2] NCCL INFO Connected all trees
+gpua042:231582:231635 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua042:231582:231635 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua042:231582:231635 [2] NCCL INFO comm 0x56511f6e3ef0 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua042:231580:231580 [0] NCCL INFO cudaDriverVersion 12020
+gpua042:231580:231580 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.42<0>
+gpua042:231580:231580 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua042:231580:231580 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua042:231580:231638 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua042:231580:231638 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua039:3647428:3647495 [1] NCCL INFO Connected all rings
+gpua039:3647428:3647495 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/AWS Libfabric/1
+gpua039:3647428:3647495 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/AWS Libfabric/1
+gpua039:3647428:3647495 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua039:3647428:3647495 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua039:3647428:3647495 [1] NCCL INFO Connected all trees
+gpua039:3647428:3647495 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua039:3647428:3647495 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua039:3647428:3647495 [1] NCCL INFO comm 0x55d7a93589b0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua089:835725:835725 [2] NCCL INFO cudaDriverVersion 12020
+gpua089:835725:835725 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.89<0>
+gpua089:835725:835725 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua089:835725:835725 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua089:835725:835784 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua089:835725:835784 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua089:835725:835784 [2] NCCL INFO Using network AWS Libfabric
+gpua089:835725:835784 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua089:835725:835784 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua089:835725:835784 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpua089:835725:835784 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua089:835725:835784 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua089:835725:835784 [2] NCCL INFO Connected all rings
+gpua080:3255808:3255935 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/AWS Libfabric/1
+gpua080:3255808:3255935 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua080:3255808:3255935 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua080:3255808:3255935 [0] NCCL INFO Connected all rings
+gpua080:3255808:3255935 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/AWS Libfabric/1
+gpua080:3255808:3255935 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/AWS Libfabric/1
+gpua080:3255808:3255935 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/AWS Libfabric/1
+gpua080:3255808:3255935 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/AWS Libfabric/1
+gpua080:3255808:3255935 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/AWS Libfabric/1
+gpua080:3255808:3255935 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/AWS Libfabric/1
+gpua049:4034214:4034214 [1] NCCL INFO cudaDriverVersion 12020
+gpua049:4034214:4034214 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0>
+gpua049:4034214:4034214 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua049:4034214:4034214 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua049:4034214:4034288 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua049:4034214:4034288 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua049:4034214:4034288 [1] NCCL INFO Using network AWS Libfabric
+gpua049:4034214:4034288 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua049:4034214:4034288 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua049:4034214:4034288 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpua049:4034214:4034288 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua049:4034214:4034288 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua085:4003430:4003430 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua085:4003430:4003430 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua085:4003430:4003508 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua085:4003430:4003508 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua085:4003430:4003508 [2] NCCL INFO Using network AWS Libfabric
+gpua085:4003430:4003508 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua085:4003430:4003508 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua085:4003430:4003508 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpua085:4003430:4003508 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua085:4003430:4003508 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua085:4003430:4003508 [2] NCCL INFO Connected all rings
+gpua085:4003430:4003508 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua038:164983:164983 [1] NCCL INFO cudaDriverVersion 12020
+gpua038:164983:164983 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.38<0>
+gpua038:164983:164983 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua038:164983:164983 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua038:164983:165041 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua038:164983:165041 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua038:164983:165041 [1] NCCL INFO Using network AWS Libfabric
+gpua038:164983:165041 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua038:164983:165041 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua038:164983:165041 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpua038:164983:165041 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua038:164983:165041 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua038:164983:165041 [1] NCCL INFO Connected all rings
+gpua042:231580:231638 [0] NCCL INFO Using network AWS Libfabric
+gpua042:231580:231638 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua042:231580:231638 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua042:231580:231638 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpua042:231580:231638 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/AWS Libfabric/1
+gpua042:231580:231638 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/AWS Libfabric/1
+gpua042:231580:231638 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua042:231580:231638 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua042:231580:231638 [0] NCCL INFO Connected all rings
+gpua042:231580:231638 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/AWS Libfabric/1
+gpua042:231580:231638 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/AWS Libfabric/1
+gpua039:3647427:3647427 [0] NCCL INFO cudaDriverVersion 12020
+gpua039:3647427:3647427 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0>
+gpua039:3647427:3647427 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua039:3647427:3647427 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua039:3647427:3647492 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua039:3647427:3647492 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua039:3647427:3647492 [0] NCCL INFO Using network AWS Libfabric
+gpua039:3647427:3647492 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua039:3647427:3647492 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua039:3647427:3647492 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpua039:3647427:3647492 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/AWS Libfabric/1
+gpua089:835725:835784 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua089:835725:835784 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua089:835725:835784 [2] NCCL INFO Connected all trees
+gpua089:835725:835784 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua089:835725:835784 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua089:835725:835784 [2] NCCL INFO comm 0x55eb7e3e0a50 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua080:3255808:3255935 [0] NCCL INFO Connected all trees
+gpua080:3255808:3255935 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua080:3255808:3255935 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua080:3255808:3255935 [0] NCCL INFO comm 0x55b3b6a468a0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua049:4034214:4034288 [1] NCCL INFO Connected all rings
+gpua049:4034214:4034288 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/AWS Libfabric/1
+gpua049:4034214:4034288 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/AWS Libfabric/1
+gpua049:4034214:4034288 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua049:4034214:4034288 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua049:4034214:4034288 [1] NCCL INFO Connected all trees
+gpua049:4034214:4034288 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua049:4034214:4034288 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua049:4034214:4034288 [1] NCCL INFO comm 0x5604a7e94210 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua085:4003430:4003508 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua085:4003430:4003508 [2] NCCL INFO Connected all trees
+gpua085:4003430:4003508 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua085:4003430:4003508 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua085:4003430:4003508 [2] NCCL INFO comm 0x55b616486f20 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua038:164983:165041 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/AWS Libfabric/1
+gpua038:164983:165041 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/AWS Libfabric/1
+gpua038:164983:165041 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua038:164983:165041 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua038:164983:165041 [1] NCCL INFO Connected all trees
+gpua038:164983:165041 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua038:164983:165041 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua038:164983:165041 [1] NCCL INFO comm 0x562a6d9baa20 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua042:231580:231638 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/AWS Libfabric/1
+gpua042:231580:231638 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/AWS Libfabric/1
+gpua042:231580:231638 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/AWS Libfabric/1
+gpua042:231580:231638 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/AWS Libfabric/1
+gpua042:231580:231638 [0] NCCL INFO Connected all trees
+gpua042:231580:231638 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua042:231580:231638 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua042:231580:231638 [0] NCCL INFO comm 0x55565577ef60 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua039:3647427:3647492 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/AWS Libfabric/1
+gpua039:3647427:3647492 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua039:3647427:3647492 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua039:3647427:3647492 [0] NCCL INFO Connected all rings
+gpua039:3647427:3647492 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/AWS Libfabric/1
+gpua039:3647427:3647492 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/AWS Libfabric/1
+gpua039:3647427:3647492 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/AWS Libfabric/1
+gpua039:3647427:3647492 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/AWS Libfabric/1
+gpua039:3647427:3647492 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/AWS Libfabric/1
+gpua039:3647427:3647492 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/AWS Libfabric/1
+gpua080:3255810:3255810 [2] NCCL INFO cudaDriverVersion 12020
+gpua080:3255810:3255810 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.80<0>
+gpua080:3255810:3255810 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua080:3255810:3255810 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua080:3255810:3255938 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua080:3255810:3255938 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua080:3255810:3255938 [2] NCCL INFO Using network AWS Libfabric
+gpua080:3255810:3255938 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua080:3255810:3255938 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua080:3255810:3255938 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpua080:3255810:3255938 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua080:3255810:3255938 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua049:4034216:4034216 [3] NCCL INFO cudaDriverVersion 12020
+gpua049:4034216:4034216 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0>
+gpua049:4034216:4034216 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua049:4034216:4034216 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua049:4034216:4034286 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua049:4034216:4034286 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua049:4034216:4034286 [3] NCCL INFO Using network AWS Libfabric
+gpua049:4034216:4034286 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua049:4034216:4034286 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua049:4034216:4034286 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpua049:4034216:4034286 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/AWS Libfabric/1
+gpua049:4034216:4034286 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/AWS Libfabric/1
+gpua085:4003431:4003431 [3] NCCL INFO cudaDriverVersion 12020
+gpua085:4003431:4003431 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.85<0>
+gpua085:4003431:4003431 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua085:4003431:4003431 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua085:4003431:4003507 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua085:4003431:4003507 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua085:4003431:4003507 [3] NCCL INFO Using network AWS Libfabric
+gpua085:4003431:4003507 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua085:4003431:4003507 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua085:4003431:4003507 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpua085:4003431:4003507 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/AWS Libfabric/1
+gpua085:4003431:4003507 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/AWS Libfabric/1
+gpua039:3647427:3647492 [0] NCCL INFO Connected all trees
+gpua039:3647427:3647492 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua039:3647427:3647492 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua039:3647427:3647492 [0] NCCL INFO comm 0x561fac7e8020 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua080:3255810:3255938 [2] NCCL INFO Connected all rings
+gpua080:3255810:3255938 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua080:3255810:3255938 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua080:3255810:3255938 [2] NCCL INFO Connected all trees
+gpua080:3255810:3255938 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua080:3255810:3255938 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua080:3255810:3255938 [2] NCCL INFO comm 0x55875bc62160 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua049:4034216:4034286 [3] NCCL INFO Connected all rings
+gpua049:4034216:4034286 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua049:4034216:4034286 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua049:4034216:4034286 [3] NCCL INFO Connected all trees
+gpua049:4034216:4034286 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua049:4034216:4034286 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua049:4034216:4034286 [3] NCCL INFO comm 0x5642d1d8d990 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua085:4003431:4003507 [3] NCCL INFO Connected all rings
+gpua085:4003431:4003507 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua085:4003431:4003507 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua085:4003431:4003507 [3] NCCL INFO Connected all trees
+gpua085:4003431:4003507 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua085:4003431:4003507 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua085:4003431:4003507 [3] NCCL INFO comm 0x55d953c95590 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua085:4003428:4003428 [0] NCCL INFO cudaDriverVersion 12020
+gpua085:4003428:4003428 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.85<0>
+gpua085:4003428:4003428 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua085:4003428:4003428 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua085:4003428:4003506 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua039:3647430:3647430 [3] NCCL INFO cudaDriverVersion 12020
+gpua039:3647430:3647430 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0>
+gpua039:3647430:3647430 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua039:3647430:3647430 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua039:3647430:3647494 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua039:3647430:3647494 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua039:3647430:3647494 [3] NCCL INFO Using network AWS Libfabric
+gpua039:3647430:3647494 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua039:3647430:3647494 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua039:3647430:3647494 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpua039:3647430:3647494 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/AWS Libfabric/1
+gpua039:3647430:3647494 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/AWS Libfabric/1
+gpua080:3255811:3255811 [3] NCCL INFO cudaDriverVersion 12020
+gpua080:3255811:3255811 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.80<0>
+gpua080:3255811:3255811 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua080:3255811:3255811 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua080:3255811:3255937 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua080:3255811:3255937 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua080:3255811:3255937 [3] NCCL INFO Using network AWS Libfabric
+gpua080:3255811:3255937 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua080:3255811:3255937 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua080:3255811:3255937 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpua080:3255811:3255937 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/AWS Libfabric/1
+gpua080:3255811:3255937 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/AWS Libfabric/1
+gpua049:4034213:4034213 [0] NCCL INFO cudaDriverVersion 12020
+gpua049:4034213:4034213 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0>
+gpua049:4034213:4034213 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua049:4034213:4034213 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua049:4034213:4034287 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua049:4034213:4034287 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua049:4034213:4034287 [0] NCCL INFO Using network AWS Libfabric
+gpua049:4034213:4034287 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua049:4034213:4034287 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua049:4034213:4034287 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpua049:4034213:4034287 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/AWS Libfabric/1
+gpua085:4003428:4003506 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua085:4003428:4003506 [0] NCCL INFO Using network AWS Libfabric
+gpua085:4003428:4003506 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua085:4003428:4003506 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua085:4003428:4003506 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpua085:4003428:4003506 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/AWS Libfabric/1
+gpua085:4003428:4003506 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/AWS Libfabric/1
+gpua085:4003428:4003506 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua085:4003428:4003506 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua085:4003428:4003506 [0] NCCL INFO Connected all rings
+gpua085:4003428:4003506 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/AWS Libfabric/1
+gpua039:3647430:3647494 [3] NCCL INFO Connected all rings
+gpua039:3647430:3647494 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua039:3647430:3647494 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua039:3647430:3647494 [3] NCCL INFO Connected all trees
+gpua039:3647430:3647494 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua039:3647430:3647494 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua039:3647430:3647494 [3] NCCL INFO comm 0x55c66c5f9560 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua080:3255811:3255937 [3] NCCL INFO Connected all rings
+gpua080:3255811:3255937 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua080:3255811:3255937 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua080:3255811:3255937 [3] NCCL INFO Connected all trees
+gpua080:3255811:3255937 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua080:3255811:3255937 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua080:3255811:3255937 [3] NCCL INFO comm 0x55ffd8082bc0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua049:4034213:4034287 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/AWS Libfabric/1
+gpua049:4034213:4034287 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua049:4034213:4034287 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua049:4034213:4034287 [0] NCCL INFO Connected all rings
+gpua049:4034213:4034287 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/AWS Libfabric/1
+gpua049:4034213:4034287 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/AWS Libfabric/1
+gpua049:4034213:4034287 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/AWS Libfabric/1
+gpua049:4034213:4034287 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/AWS Libfabric/1
+gpua049:4034213:4034287 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/AWS Libfabric/1
+gpua049:4034213:4034287 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/AWS Libfabric/1
+gpua085:4003428:4003506 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/AWS Libfabric/1
+gpua085:4003428:4003506 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/AWS Libfabric/1
+gpua085:4003428:4003506 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/AWS Libfabric/1
+gpua085:4003428:4003506 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/AWS Libfabric/1
+gpua085:4003428:4003506 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/AWS Libfabric/1
+gpua085:4003428:4003506 [0] NCCL INFO Connected all trees
+gpua085:4003428:4003506 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua085:4003428:4003506 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua085:4003428:4003506 [0] NCCL INFO comm 0x55a4b56b2040 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua038:164982:164982 [0] NCCL INFO cudaDriverVersion 12020
+gpua038:164982:164982 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.38<0>
+gpua038:164982:164982 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua038:164982:164982 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua038:164982:165042 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua038:164982:165042 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua038:164982:165042 [0] NCCL INFO Using network AWS Libfabric
+gpua038:164982:165042 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua038:164982:165042 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua038:164982:165042 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpua038:164982:165042 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/AWS Libfabric/1
+gpua038:164982:165042 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/AWS Libfabric/1
+gpua080:3255809:3255809 [1] NCCL INFO cudaDriverVersion 12020
+gpua080:3255809:3255809 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.80<0>
+gpua080:3255809:3255809 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua080:3255809:3255809 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua080:3255809:3255936 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua080:3255809:3255936 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua080:3255809:3255936 [1] NCCL INFO Using network AWS Libfabric
+gpua080:3255809:3255936 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua080:3255809:3255936 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua080:3255809:3255936 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpua080:3255809:3255936 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua080:3255809:3255936 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua049:4034213:4034287 [0] NCCL INFO Connected all trees
+gpua049:4034213:4034287 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua049:4034213:4034287 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua049:4034213:4034287 [0] NCCL INFO comm 0x556fa0bc0680 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua038:164982:165042 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua038:164982:165042 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua038:164982:165042 [0] NCCL INFO Connected all rings
+gpua038:164982:165042 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/AWS Libfabric/1
+gpua038:164982:165042 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/AWS Libfabric/1
+gpua038:164982:165042 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/AWS Libfabric/1
+gpua038:164982:165042 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/AWS Libfabric/1
+gpua038:164982:165042 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/AWS Libfabric/1
+gpua038:164982:165042 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/AWS Libfabric/1
+gpua038:164982:165042 [0] NCCL INFO Connected all trees
+gpua038:164982:165042 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:567904:567904 [0] NCCL INFO cudaDriverVersion 12020
+gpua016:567904:567904 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:567904:567904 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua016:567904:567904 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua016:567904:567988 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua016:567904:567988 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua016:567904:567988 [0] NCCL INFO Using network AWS Libfabric
+gpua016:567904:567988 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua016:567904:567988 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua016:567904:567988 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpua016:567904:567988 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1
+gpua016:567904:567988 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/AWS Libfabric/1
+gpua080:3255809:3255936 [1] NCCL INFO Connected all rings
+gpua080:3255809:3255936 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/AWS Libfabric/1
+gpua080:3255809:3255936 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/AWS Libfabric/1
+gpua080:3255809:3255936 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua080:3255809:3255936 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua080:3255809:3255936 [1] NCCL INFO Connected all trees
+gpua080:3255809:3255936 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua080:3255809:3255936 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua080:3255809:3255936 [1] NCCL INFO comm 0x55d27075e050 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua038:164982:165042 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua038:164982:165042 [0] NCCL INFO comm 0x55eb966de240 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua016:567904:567988 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua016:567904:567988 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua016:567904:567988 [0] NCCL INFO Connected all rings
+gpua016:567904:567988 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/AWS Libfabric/1
+gpua016:567904:567988 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/AWS Libfabric/1
+gpua016:567904:567988 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/AWS Libfabric/1
+gpua016:567904:567988 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/AWS Libfabric/1
+gpua016:567904:567988 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/AWS Libfabric/1
+gpua016:567904:567988 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/AWS Libfabric/1
+gpua016:567904:567988 [0] NCCL INFO Connected all trees
+gpua016:567904:567988 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:567904:567988 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:567904:567988 [0] NCCL INFO comm 0x55cd179a0660 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua042:231583:231583 [3] NCCL INFO cudaDriverVersion 12020
+gpua042:231583:231583 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.42<0>
+gpua042:231583:231583 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua042:231583:231583 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua042:231583:231637 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua042:231583:231637 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua042:231583:231637 [3] NCCL INFO Using network AWS Libfabric
+gpua042:231583:231637 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua042:231583:231637 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua042:231583:231637 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpua042:231583:231637 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/AWS Libfabric/1
+gpua042:231583:231637 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/AWS Libfabric/1
+gpua042:231583:231637 [3] NCCL INFO Connected all rings
+gpua042:231583:231637 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua042:231583:231637 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua042:231583:231637 [3] NCCL INFO Connected all trees
+gpua042:231583:231637 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua042:231583:231637 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua042:231583:231637 [3] NCCL INFO comm 0x564c9e4e0330 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua016:567907:567907 [3] NCCL INFO cudaDriverVersion 12020
+gpua016:567907:567907 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:567907:567907 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua016:567907:567907 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua016:567907:567986 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua016:567907:567986 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua016:567907:567986 [3] NCCL INFO Using network AWS Libfabric
+gpua016:567907:567986 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua016:567907:567986 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua016:567907:567986 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpua016:567907:567986 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1
+gpua016:567907:567986 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/AWS Libfabric/1
+gpua089:835723:835723 [0] NCCL INFO cudaDriverVersion 12020
+gpua089:835723:835723 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.89<0>
+gpua089:835723:835723 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua089:835723:835723 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua089:835723:835781 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua089:835723:835781 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua089:835723:835781 [0] NCCL INFO Using network AWS Libfabric
+gpua089:835723:835781 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua089:835723:835781 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua089:835723:835781 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpua089:835723:835781 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/AWS Libfabric/1
+gpua089:835723:835781 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/AWS Libfabric/1
+gpua016:567907:567986 [3] NCCL INFO Connected all rings
+gpua016:567907:567986 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua016:567907:567986 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua016:567907:567986 [3] NCCL INFO Connected all trees
+gpua016:567907:567986 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:567907:567986 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:567907:567986 [3] NCCL INFO comm 0x5644e38cda30 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua089:835723:835781 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua089:835723:835781 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua089:835723:835781 [0] NCCL INFO Connected all rings
+gpua089:835723:835781 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/AWS Libfabric/1
+gpua089:835723:835781 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/AWS Libfabric/1
+gpua089:835723:835781 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/AWS Libfabric/1
+gpua089:835723:835781 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/AWS Libfabric/1
+gpua089:835723:835781 [0] NCCL INFO Connected all trees
+gpua089:835723:835781 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua089:835723:835781 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua089:835723:835781 [0] NCCL INFO comm 0x55fcb9326ab0 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua055:105498:105498 [3] NCCL INFO cudaDriverVersion 12020
+gpua055:105498:105498 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:105498:105498 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua055:105498:105498 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua055:105498:105561 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua055:105498:105561 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua055:105498:105561 [3] NCCL INFO Using network AWS Libfabric
+gpua055:105498:105561 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua055:105498:105561 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua055:105498:105561 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpua055:105498:105561 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/AWS Libfabric/1
+gpua055:105498:105561 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/AWS Libfabric/1
+gpua055:105498:105561 [3] NCCL INFO Connected all rings
+gpua055:105498:105561 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua055:105498:105561 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua055:105498:105561 [3] NCCL INFO Connected all trees
+gpua055:105498:105561 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:105498:105561 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:105498:105561 [3] NCCL INFO comm 0x5649fc48fed0 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua055:105497:105497 [2] NCCL INFO cudaDriverVersion 12020
+gpua055:105497:105497 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:105497:105497 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua055:105497:105497 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua055:105497:105560 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua055:105497:105560 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua055:105497:105560 [2] NCCL INFO Using network AWS Libfabric
+gpua055:105497:105560 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua055:105497:105560 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua055:105497:105560 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpua055:105497:105560 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua055:105497:105560 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua055:105497:105560 [2] NCCL INFO Connected all rings
+gpua055:105497:105560 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua055:105497:105560 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua055:105497:105560 [2] NCCL INFO Connected all trees
+gpua055:105497:105560 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:105497:105560 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:105497:105560 [2] NCCL INFO comm 0x564ad4b17ef0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua040:3844987:3844987 [1] NCCL INFO cudaDriverVersion 12020
+gpua040:3844987:3844987 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0>
+gpua040:3844987:3844987 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua040:3844987:3844987 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua040:3844987:3845043 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua040:3844987:3845043 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua040:3844987:3845043 [1] NCCL INFO Using network AWS Libfabric
+gpua040:3844987:3845043 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua040:3844987:3845043 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua040:3844987:3845043 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpua040:3844987:3845043 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua040:3844987:3845043 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua040:3844987:3845043 [1] NCCL INFO Connected all rings
+gpua040:3844987:3845043 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/AWS Libfabric/1
+gpua040:3844987:3845043 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/AWS Libfabric/1
+gpua040:3844987:3845043 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua040:3844987:3845043 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua040:3844987:3845043 [1] NCCL INFO Connected all trees
+gpua040:3844987:3845043 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua040:3844987:3845043 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua040:3844987:3845043 [1] NCCL INFO comm 0x56075b52ece0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua054:406314:406314 [3] NCCL INFO cudaDriverVersion 12020
+gpua054:406314:406314 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0>
+gpua054:406314:406314 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua054:406314:406314 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua054:406314:406380 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua054:406314:406380 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua054:406314:406380 [3] NCCL INFO Using network AWS Libfabric
+gpua054:406314:406380 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua054:406314:406380 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua054:406314:406380 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpua054:406314:406380 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/AWS Libfabric/1
+gpua054:406314:406380 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/AWS Libfabric/1
+gpua054:406314:406380 [3] NCCL INFO Connected all rings
+gpua054:406314:406380 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua054:406314:406380 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua054:406314:406380 [3] NCCL INFO Connected all trees
+gpua054:406314:406380 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua054:406314:406380 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua054:406314:406380 [3] NCCL INFO comm 0x5590472c48f0 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua016:567906:567906 [2] NCCL INFO cudaDriverVersion 12020
+gpua016:567906:567906 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:567906:567906 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua016:567906:567906 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua016:567906:567985 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua016:567906:567985 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua016:567906:567985 [2] NCCL INFO Using network AWS Libfabric
+gpua016:567906:567985 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua016:567906:567985 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua016:567906:567985 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpua016:567906:567985 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua016:567906:567985 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua016:567906:567985 [2] NCCL INFO Connected all rings
+gpua055:105495:105495 [0] NCCL INFO cudaDriverVersion 12020
+gpua055:105495:105495 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:105495:105495 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua055:105495:105495 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua055:105495:105563 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua055:105495:105563 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua055:105495:105563 [0] NCCL INFO Using network AWS Libfabric
+gpua055:105495:105563 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua055:105495:105563 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua055:105495:105563 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpua055:105495:105563 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/AWS Libfabric/1
+gpua055:105495:105563 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/AWS Libfabric/1
+gpua079:3679372:3679372 [3] NCCL INFO cudaDriverVersion 12020
+gpua079:3679372:3679372 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.79<0>
+gpua079:3679372:3679372 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua079:3679372:3679372 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua079:3679372:3679431 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua079:3679372:3679431 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua079:3679372:3679431 [3] NCCL INFO Using network AWS Libfabric
+gpua079:3679372:3679431 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua079:3679372:3679431 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua079:3679372:3679431 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpua079:3679372:3679431 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/AWS Libfabric/1
+gpua079:3679372:3679431 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/AWS Libfabric/1
+gpua054:406313:406313 [2] NCCL INFO cudaDriverVersion 12020
+gpua054:406313:406313 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0>
+gpua054:406313:406313 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua054:406313:406313 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua054:406313:406382 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua054:406313:406382 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua054:406313:406382 [2] NCCL INFO Using network AWS Libfabric
+gpua054:406313:406382 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua054:406313:406382 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua054:406313:406382 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpua054:406313:406382 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua054:406313:406382 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua054:406313:406382 [2] NCCL INFO Connected all rings
+gpua040:3844986:3844986 [0] NCCL INFO cudaDriverVersion 12020
+gpua040:3844986:3844986 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0>
+gpua040:3844986:3844986 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua040:3844986:3844986 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua040:3844986:3845046 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua040:3844986:3845046 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua040:3844986:3845046 [0] NCCL INFO Using network AWS Libfabric
+gpua040:3844986:3845046 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua040:3844986:3845046 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua040:3844986:3845046 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpua040:3844986:3845046 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/AWS Libfabric/1
+gpua057:3871123:3871123 [0] NCCL INFO cudaDriverVersion 12020
+gpua057:3871123:3871123 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:3871123:3871123 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua057:3871123:3871123 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua057:3871123:3871192 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua057:3871123:3871192 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua057:3871123:3871192 [0] NCCL INFO Using network AWS Libfabric
+gpua057:3871123:3871192 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua057:3871123:3871192 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua057:3871123:3871192 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpua057:3871123:3871192 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/AWS Libfabric/1
+gpua016:567906:567985 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua016:567906:567985 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua016:567906:567985 [2] NCCL INFO Connected all trees
+gpua016:567906:567985 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:567906:567985 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:567906:567985 [2] NCCL INFO comm 0x5578fac02ae0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua055:105495:105563 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua055:105495:105563 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua055:105495:105563 [0] NCCL INFO Connected all rings
+gpua055:105495:105563 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/AWS Libfabric/1
+gpua055:105495:105563 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/AWS Libfabric/1
+gpua055:105495:105563 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/AWS Libfabric/1
+gpua055:105495:105563 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/AWS Libfabric/1
+gpua055:105495:105563 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/AWS Libfabric/1
+gpua055:105495:105563 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/AWS Libfabric/1
+gpua055:105495:105563 [0] NCCL INFO Connected all trees
+gpua055:105495:105563 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua079:3679372:3679431 [3] NCCL INFO Connected all rings
+gpua079:3679372:3679431 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua079:3679372:3679431 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua079:3679372:3679431 [3] NCCL INFO Connected all trees
+gpua079:3679372:3679431 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua079:3679372:3679431 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua079:3679372:3679431 [3] NCCL INFO comm 0x5628fe7a85a0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua054:406313:406382 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua054:406313:406382 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua054:406313:406382 [2] NCCL INFO Connected all trees
+gpua054:406313:406382 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua054:406313:406382 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua054:406313:406382 [2] NCCL INFO comm 0x56455d500980 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua040:3844986:3845046 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/AWS Libfabric/1
+gpua040:3844986:3845046 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua040:3844986:3845046 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua040:3844986:3845046 [0] NCCL INFO Connected all rings
+gpua040:3844986:3845046 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/AWS Libfabric/1
+gpua040:3844986:3845046 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/AWS Libfabric/1
+gpua040:3844986:3845046 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/AWS Libfabric/1
+gpua040:3844986:3845046 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/AWS Libfabric/1
+gpua040:3844986:3845046 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/AWS Libfabric/1
+gpua040:3844986:3845046 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/AWS Libfabric/1
+gpua057:3871123:3871192 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/AWS Libfabric/1
+gpua057:3871123:3871192 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua057:3871123:3871192 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua057:3871123:3871192 [0] NCCL INFO Connected all rings
+gpua057:3871123:3871192 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/AWS Libfabric/1
+gpua057:3871123:3871192 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/AWS Libfabric/1
+gpua057:3871123:3871192 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/AWS Libfabric/1
+gpua057:3871123:3871192 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/AWS Libfabric/1
+gpua057:3871123:3871192 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/AWS Libfabric/1
+gpua057:3871123:3871192 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/AWS Libfabric/1
+gpua016:567905:567905 [1] NCCL INFO cudaDriverVersion 12020
+gpua016:567905:567905 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:567905:567905 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua016:567905:567905 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua016:567905:567987 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua016:567905:567987 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua016:567905:567987 [1] NCCL INFO Using network AWS Libfabric
+gpua016:567905:567987 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua016:567905:567987 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua016:567905:567987 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpua016:567905:567987 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua016:567905:567987 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua016:567905:567987 [1] NCCL INFO Connected all rings
+gpua055:105495:105563 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:105495:105563 [0] NCCL INFO comm 0x563edc869a60 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua079:3679370:3679370 [1] NCCL INFO cudaDriverVersion 12020
+gpua079:3679370:3679370 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.79<0>
+gpua079:3679370:3679370 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua079:3679370:3679370 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua079:3679370:3679432 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua079:3679370:3679432 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua079:3679370:3679432 [1] NCCL INFO Using network AWS Libfabric
+gpua079:3679370:3679432 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua079:3679370:3679432 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua079:3679370:3679432 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpua079:3679370:3679432 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua079:3679370:3679432 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua054:406312:406312 [1] NCCL INFO cudaDriverVersion 12020
+gpua054:406312:406312 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0>
+gpua054:406312:406312 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua054:406312:406312 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua054:406312:406379 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua054:406312:406379 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua054:406312:406379 [1] NCCL INFO Using network AWS Libfabric
+gpua054:406312:406379 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua054:406312:406379 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua054:406312:406379 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpua054:406312:406379 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua054:406312:406379 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua054:406312:406379 [1] NCCL INFO Connected all rings
+gpua040:3844986:3845046 [0] NCCL INFO Connected all trees
+gpua040:3844986:3845046 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua040:3844986:3845046 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua040:3844986:3845046 [0] NCCL INFO comm 0x555884607890 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua057:3871123:3871192 [0] NCCL INFO Connected all trees
+gpua057:3871123:3871192 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:3871123:3871192 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:3871123:3871192 [0] NCCL INFO comm 0x558f47af8e20 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua057:3871124:3871124 [1] NCCL INFO cudaDriverVersion 12020
+gpua057:3871124:3871124 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:3871124:3871124 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua057:3871124:3871124 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua057:3871124:3871193 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua057:3871124:3871193 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua057:3871124:3871193 [1] NCCL INFO Using network AWS Libfabric
+gpua057:3871124:3871193 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua016:567905:567987 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/AWS Libfabric/1
+gpua016:567905:567987 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/AWS Libfabric/1
+gpua016:567905:567987 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua016:567905:567987 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua016:567905:567987 [1] NCCL INFO Connected all trees
+gpua016:567905:567987 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:567905:567987 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:567905:567987 [1] NCCL INFO comm 0x55abdc2f7020 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua055:105496:105496 [1] NCCL INFO cudaDriverVersion 12020
+gpua055:105496:105496 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:105496:105496 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua055:105496:105496 [1] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua055:105496:105562 [1] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua055:105496:105562 [1] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua055:105496:105562 [1] NCCL INFO Using network AWS Libfabric
+gpua055:105496:105562 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua055:105496:105562 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua055:105496:105562 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpua055:105496:105562 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua055:105496:105562 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua055:105496:105562 [1] NCCL INFO Connected all rings
+gpua079:3679370:3679432 [1] NCCL INFO Connected all rings
+gpua079:3679370:3679432 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/AWS Libfabric/1
+gpua079:3679370:3679432 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/AWS Libfabric/1
+gpua079:3679370:3679432 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua079:3679370:3679432 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua079:3679370:3679432 [1] NCCL INFO Connected all trees
+gpua079:3679370:3679432 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua079:3679370:3679432 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua079:3679370:3679432 [1] NCCL INFO comm 0x55b06ddcaf50 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua054:406312:406379 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/AWS Libfabric/1
+gpua054:406312:406379 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/AWS Libfabric/1
+gpua054:406312:406379 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua054:406312:406379 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua054:406312:406379 [1] NCCL INFO Connected all trees
+gpua054:406312:406379 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua054:406312:406379 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua054:406312:406379 [1] NCCL INFO comm 0x55763f547160 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua040:3844988:3844988 [2] NCCL INFO cudaDriverVersion 12020
+gpua040:3844988:3844988 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0>
+gpua040:3844988:3844988 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua040:3844988:3844988 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua040:3844988:3845045 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua040:3844988:3845045 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua040:3844988:3845045 [2] NCCL INFO Using network AWS Libfabric
+gpua040:3844988:3845045 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua040:3844988:3845045 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua040:3844988:3845045 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpua040:3844988:3845045 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua040:3844988:3845045 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua057:3871124:3871193 [1] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua057:3871124:3871193 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpua057:3871124:3871193 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua057:3871124:3871193 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua057:3871124:3871193 [1] NCCL INFO Connected all rings
+gpua057:3871124:3871193 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/AWS Libfabric/1
+gpua057:3871124:3871193 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/AWS Libfabric/1
+gpua057:3871124:3871193 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua057:3871124:3871193 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua057:3871124:3871193 [1] NCCL INFO Connected all trees
+gpua057:3871124:3871193 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:105496:105562 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/AWS Libfabric/1
+gpua055:105496:105562 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/AWS Libfabric/1
+gpua055:105496:105562 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua055:105496:105562 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua055:105496:105562 [1] NCCL INFO Connected all trees
+gpua055:105496:105562 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:105496:105562 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:105496:105562 [1] NCCL INFO comm 0x55e8a054e0d0 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua079:3679369:3679369 [0] NCCL INFO cudaDriverVersion 12020
+gpua079:3679369:3679369 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.79<0>
+gpua079:3679369:3679369 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua079:3679369:3679369 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua079:3679369:3679429 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua079:3679369:3679429 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua079:3679369:3679429 [0] NCCL INFO Using network AWS Libfabric
+gpua079:3679369:3679429 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua079:3679369:3679429 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua079:3679369:3679429 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpua079:3679369:3679429 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/AWS Libfabric/1
+gpua054:406311:406311 [0] NCCL INFO cudaDriverVersion 12020
+gpua054:406311:406311 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0>
+gpua054:406311:406311 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua054:406311:406311 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua054:406311:406381 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua054:406311:406381 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua054:406311:406381 [0] NCCL INFO Using network AWS Libfabric
+gpua054:406311:406381 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua054:406311:406381 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua054:406311:406381 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpua054:406311:406381 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/AWS Libfabric/1
+gpua054:406311:406381 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/AWS Libfabric/1
+gpua040:3844988:3845045 [2] NCCL INFO Connected all rings
+gpua040:3844988:3845045 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua040:3844988:3845045 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua040:3844988:3845045 [2] NCCL INFO Connected all trees
+gpua040:3844988:3845045 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua040:3844988:3845045 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua040:3844988:3845045 [2] NCCL INFO comm 0x55e588b90a70 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua040:3844989:3844989 [3] NCCL INFO cudaDriverVersion 12020
+gpua040:3844989:3844989 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0>
+gpua040:3844989:3844989 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua040:3844989:3844989 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua040:3844989:3845044 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua006:662233:662292 [0] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua006:662233:662292 [0] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua006:662233:662292 [0] NCCL INFO Using network AWS Libfabric
+gpua006:662233:662292 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua006:662233:662292 [0] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua006:662233:662292 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua006:662233:662292 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua006:662233:662292 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpua006:662233:662292 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1
+gpua006:662233:662292 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/AWS Libfabric/1
+gpua006:662233:662292 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua057:3871124:3871193 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:3871124:3871193 [1] NCCL INFO comm 0x556bd6505b20 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua079:3679369:3679429 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/AWS Libfabric/1
+gpua079:3679369:3679429 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua079:3679369:3679429 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua079:3679369:3679429 [0] NCCL INFO Connected all rings
+gpua079:3679369:3679429 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/AWS Libfabric/1
+gpua079:3679369:3679429 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/AWS Libfabric/1
+gpua079:3679369:3679429 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/AWS Libfabric/1
+gpua079:3679369:3679429 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/AWS Libfabric/1
+gpua079:3679369:3679429 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/AWS Libfabric/1
+gpua079:3679369:3679429 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/AWS Libfabric/1
+gpua054:406311:406381 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua054:406311:406381 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua054:406311:406381 [0] NCCL INFO Connected all rings
+gpua054:406311:406381 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/AWS Libfabric/1
+gpua054:406311:406381 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/AWS Libfabric/1
+gpua054:406311:406381 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/AWS Libfabric/1
+gpua054:406311:406381 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/AWS Libfabric/1
+gpua054:406311:406381 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/AWS Libfabric/1
+gpua054:406311:406381 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/AWS Libfabric/1
+gpua054:406311:406381 [0] NCCL INFO Connected all trees
+gpua054:406311:406381 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua040:3844989:3845044 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua040:3844989:3845044 [3] NCCL INFO Using network AWS Libfabric
+gpua040:3844989:3845044 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua040:3844989:3845044 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua040:3844989:3845044 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpua040:3844989:3845044 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/AWS Libfabric/1
+gpua040:3844989:3845044 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/AWS Libfabric/1
+gpua040:3844989:3845044 [3] NCCL INFO Connected all rings
+gpua040:3844989:3845044 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua040:3844989:3845044 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua040:3844989:3845044 [3] NCCL INFO Connected all trees
+gpua040:3844989:3845044 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua006:662233:662292 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua006:662233:662292 [0] NCCL INFO Connected all rings
+gpua006:662233:662292 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/AWS Libfabric/1
+gpua006:662233:662292 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1
+gpua006:662233:662292 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/AWS Libfabric/1
+gpua006:662233:662292 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/AWS Libfabric/1
+gpua006:662233:662292 [0] NCCL INFO Connected all trees
+gpua006:662233:662292 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua006:662233:662292 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua006:662233:662292 [0] NCCL INFO comm 0x5595593f8220 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua057:3871125:3871125 [2] NCCL INFO cudaDriverVersion 12020
+gpua057:3871125:3871125 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:3871125:3871125 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua057:3871125:3871125 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua057:3871125:3871194 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua057:3871125:3871194 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua057:3871125:3871194 [2] NCCL INFO Using network AWS Libfabric
+gpua057:3871125:3871194 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua057:3871125:3871194 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua057:3871125:3871194 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpua057:3871125:3871194 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua057:3871125:3871194 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua079:3679369:3679429 [0] NCCL INFO Connected all trees
+gpua079:3679369:3679429 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua079:3679369:3679429 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua079:3679369:3679429 [0] NCCL INFO comm 0x5555cab53770 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua054:406311:406381 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua054:406311:406381 [0] NCCL INFO comm 0x555aaa214860 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua040:3844989:3845044 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua040:3844989:3845044 [3] NCCL INFO comm 0x5623d61ff7b0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua006:662235:662235 [2] NCCL INFO cudaDriverVersion 12020
+gpua006:662235:662235 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.6<0>
+gpua006:662235:662235 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua006:662235:662235 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua006:662235:662293 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua006:662235:662293 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua006:662235:662293 [2] NCCL INFO Using network AWS Libfabric
+gpua006:662235:662293 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua006:662235:662293 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua006:662235:662293 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpua006:662235:662293 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua006:662235:662293 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua006:662235:662293 [2] NCCL INFO Connected all rings
+gpua057:3871125:3871194 [2] NCCL INFO Connected all rings
+gpua057:3871125:3871194 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua057:3871125:3871194 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua057:3871125:3871194 [2] NCCL INFO Connected all trees
+gpua057:3871125:3871194 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:3871125:3871194 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:3871125:3871194 [2] NCCL INFO comm 0x562fd25846c0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua012:2708178:2708178 [2] NCCL INFO cudaDriverVersion 12020
+gpua012:2708178:2708178 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.12<0>
+gpua012:2708178:2708178 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua012:2708178:2708178 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua012:2708178:2708256 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua012:2708178:2708256 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua012:2708178:2708256 [2] NCCL INFO Using network AWS Libfabric
+gpua012:2708178:2708256 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua012:2708178:2708256 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua012:2708178:2708256 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpua012:2708178:2708256 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua012:2708178:2708256 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua079:3679371:3679371 [2] NCCL INFO cudaDriverVersion 12020
+gpua079:3679371:3679371 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.79<0>
+gpua079:3679371:3679371 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua079:3679371:3679371 [2] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua079:3679371:3679430 [2] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua079:3679371:3679430 [2] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua079:3679371:3679430 [2] NCCL INFO Using network AWS Libfabric
+gpua079:3679371:3679430 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua079:3679371:3679430 [2] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua079:3679371:3679430 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpua079:3679371:3679430 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua079:3679371:3679430 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua006:662235:662293 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua006:662235:662293 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua006:662235:662293 [2] NCCL INFO Connected all trees
+gpua006:662235:662293 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua006:662235:662293 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua006:662235:662293 [2] NCCL INFO comm 0x55ea3c17e9e0 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua057:3871126:3871126 [3] NCCL INFO cudaDriverVersion 12020
+gpua057:3871126:3871126 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:3871126:3871126 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua057:3871126:3871126 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua057:3871126:3871191 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua057:3871126:3871191 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua057:3871126:3871191 [3] NCCL INFO Using network AWS Libfabric
+gpua057:3871126:3871191 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua057:3871126:3871191 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua057:3871126:3871191 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpua057:3871126:3871191 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/AWS Libfabric/1
+gpua057:3871126:3871191 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/AWS Libfabric/1
+gpua012:2708178:2708256 [2] NCCL INFO Connected all rings
+gpua012:2708178:2708256 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua012:2708178:2708256 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua012:2708178:2708256 [2] NCCL INFO Connected all trees
+gpua012:2708178:2708256 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua012:2708178:2708256 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua012:2708178:2708256 [2] NCCL INFO comm 0x564c7e8ac3e0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua079:3679371:3679430 [2] NCCL INFO Connected all rings
+gpua079:3679371:3679430 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua079:3679371:3679430 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua079:3679371:3679430 [2] NCCL INFO Connected all trees
+gpua079:3679371:3679430 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua079:3679371:3679430 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua079:3679371:3679430 [2] NCCL INFO comm 0x56462bb5c7f0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua006:662236:662236 [3] NCCL INFO cudaDriverVersion 12020
+gpua006:662236:662236 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.6<0>
+gpua006:662236:662236 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua006:662236:662236 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua006:662236:662294 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua006:662236:662294 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua006:662236:662294 [3] NCCL INFO Using network AWS Libfabric
+gpua006:662236:662294 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua006:662236:662294 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua006:662236:662294 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpua006:662236:662294 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1
+gpua006:662236:662294 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/AWS Libfabric/1
+gpua057:3871126:3871191 [3] NCCL INFO Connected all rings
+gpua057:3871126:3871191 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua057:3871126:3871191 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua057:3871126:3871191 [3] NCCL INFO Connected all trees
+gpua057:3871126:3871191 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:3871126:3871191 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:3871126:3871191 [3] NCCL INFO comm 0x5650e1a41bf0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua012:2708179:2708179 [3] NCCL INFO cudaDriverVersion 12020
+gpua012:2708179:2708179 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.12<0>
+gpua012:2708179:2708179 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol.
+gpua012:2708179:2708179 [3] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin symbol (v4 or v5).
+gpua012:2708179:2708254 [3] NCCL INFO NET/OFI Using aws-ofi-nccl 1.6.0
+gpua012:2708179:2708254 [3] NCCL INFO NET/OFI Selected Provider is cxi (found 2 nics)
+gpua012:2708179:2708254 [3] NCCL INFO Using network AWS Libfabric
+gpua012:2708179:2708254 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua012:2708179:2708254 [3] NCCL INFO NCCL_CROSS_NIC set by environment to 1.
+gpua012:2708179:2708254 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpua012:2708179:2708254 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1
+gpua012:2708179:2708254 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/AWS Libfabric/1
+gpua006:662236:662294 [3] NCCL INFO Connected all rings
+gpua006:662236:662294 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua006:662236:662294 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua006:662236:662294 [3] NCCL INFO Connected all trees
+gpua006:662236:662294 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua006:662236:662294 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua006:662236:662294 [3] NCCL INFO comm 0x560201c8b1f0 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua012:2708179:2708254 [3] NCCL INFO Connected all rings
+gpua012:2708179:2708254 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua012:2708179:2708254 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua012:2708179:2708254 [3] NCCL INFO Connected all trees
+gpua012:2708179:2708254 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua012:2708179:2708254 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua012:2708179:2708254 [3] NCCL INFO comm 0x563252e0cdb0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+[gpua006:0/64] 2024-02-10 14:29:25,848 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration.
+[gpua006:0/64] 2024-02-10 14:29:41,140 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-10 14:31:22,293 (trainer:756) INFO: 28epoch:train:1-100batch: iter_time=1.279, forward_time=0.247, loss_ctc=81.381, loss_interctc_layer6=89.775, loss_interctc_layer12=75.974, loss_interctc_layer15=70.330, loss_interctc_layer21=83.994, loss=80.291, backward_time=0.224, grad_norm=72.379, clip=100.000, loss_scale=2.356e+31, optim_step_time=0.139, optim0_lr0=7.698e-05, train_time=8.050
+[gpua006:0/64] 2024-02-10 14:33:10,319 (trainer:756) INFO: 28epoch:train:101-200batch: iter_time=8.523e-05, forward_time=0.143, loss_ctc=89.937, loss_interctc_layer6=98.713, loss_interctc_layer12=83.156, loss_interctc_layer15=77.075, loss_interctc_layer21=93.318, loss=88.440, backward_time=0.209, grad_norm=98.311, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.697e-05, train_time=1.082
+[gpua006:0/64] 2024-02-10 14:35:32,127 (trainer:756) INFO: 28epoch:train:201-300batch: iter_time=9.793e-05, forward_time=0.143, loss_ctc=76.991, loss_interctc_layer6=87.769, loss_interctc_layer12=73.616, loss_interctc_layer15=67.861, loss_interctc_layer21=79.551, loss=77.158, backward_time=0.206, grad_norm=93.721, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.696e-05, train_time=1.418
+[gpua006:0/64] 2024-02-10 14:38:29,578 (trainer:756) INFO: 28epoch:train:301-400batch: iter_time=1.179e-04, forward_time=0.141, loss_ctc=76.707, loss_interctc_layer6=85.847, loss_interctc_layer12=71.479, loss_interctc_layer15=65.776, loss_interctc_layer21=79.397, loss=75.841, backward_time=0.207, grad_norm=87.337, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.695e-05, train_time=1.775
+[gpua006:0/64] 2024-02-10 14:42:50,676 (trainer:756) INFO: 28epoch:train:401-500batch: iter_time=1.126e-04, forward_time=0.142, loss_ctc=105.452, loss_interctc_layer6=105.517, loss_interctc_layer12=87.733, loss_interctc_layer15=80.927, loss_interctc_layer21=109.251, loss=97.776, backward_time=0.208, grad_norm=86.110, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.695e-05, train_time=2.611
+[gpua006:0/64] 2024-02-10 14:46:00,279 (trainer:756) INFO: 28epoch:train:501-600batch: iter_time=1.055e-04, forward_time=0.142, loss_ctc=88.342, loss_interctc_layer6=95.209, loss_interctc_layer12=79.809, loss_interctc_layer15=73.536, loss_interctc_layer21=91.141, loss=85.608, backward_time=0.205, grad_norm=74.501, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.694e-05, train_time=1.896
+[gpua006:0/64] 2024-02-10 14:48:20,502 (trainer:756) INFO: 28epoch:train:601-700batch: iter_time=1.034e-04, forward_time=0.141, loss_ctc=70.042, loss_interctc_layer6=78.215, loss_interctc_layer12=64.853, loss_interctc_layer15=59.306, loss_interctc_layer21=72.513, loss=68.986, backward_time=0.205, grad_norm=64.398, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.693e-05, train_time=1.402
+[gpua006:0/64] 2024-02-10 14:50:37,639 (trainer:756) INFO: 28epoch:train:701-800batch: iter_time=1.061e-04, forward_time=0.140, loss_ctc=69.890, loss_interctc_layer6=79.077, loss_interctc_layer12=66.032, loss_interctc_layer15=60.730, loss_interctc_layer21=72.198, loss=69.585, backward_time=0.207, grad_norm=119.616, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.692e-05, train_time=1.371
+[gpua006:0/64] 2024-02-10 14:52:44,133 (trainer:756) INFO: 28epoch:train:801-900batch: iter_time=1.172e-04, forward_time=0.148, loss_ctc=81.248, loss_interctc_layer6=83.208, loss_interctc_layer12=69.135, loss_interctc_layer15=63.418, loss_interctc_layer21=84.250, loss=76.252, backward_time=0.221, grad_norm=72.651, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.691e-05, train_time=1.265
+[gpua006:0/64] 2024-02-10 14:55:20,886 (trainer:756) INFO: 28epoch:train:901-1000batch: iter_time=3.645e-04, forward_time=0.156, loss_ctc=63.953, loss_interctc_layer6=76.393, loss_interctc_layer12=64.165, loss_interctc_layer15=59.093, loss_interctc_layer21=66.034, loss=65.928, backward_time=0.227, grad_norm=72.273, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.690e-05, train_time=1.567
+[gpua006:0/64] 2024-02-10 14:56:05,644 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-10 14:57:33,096 (trainer:756) INFO: 28epoch:train:1001-1100batch: iter_time=1.071e-04, forward_time=0.176, loss_ctc=70.614, loss_interctc_layer6=78.637, loss_interctc_layer12=65.207, loss_interctc_layer15=59.728, loss_interctc_layer21=72.885, loss=69.414, backward_time=0.232, grad_norm=213.681, clip=100.000, loss_scale=1.455e+31, optim_step_time=0.139, optim0_lr0=7.689e-05, train_time=1.322
+[gpua006:0/64] 2024-02-10 14:59:35,713 (trainer:756) INFO: 28epoch:train:1101-1200batch: iter_time=9.832e-05, forward_time=0.140, loss_ctc=69.022, loss_interctc_layer6=82.722, loss_interctc_layer12=68.850, loss_interctc_layer15=63.290, loss_interctc_layer21=71.178, loss=71.012, backward_time=0.206, grad_norm=75.536, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.137, optim0_lr0=7.688e-05, train_time=1.226
+[gpua006:0/64] 2024-02-10 15:01:11,315 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua006:0/64] 2024-02-10 15:01:30,137 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 15:01:33,699 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbda67cbd30>)
+[gpua006:0/64] 2024-02-10 15:01:33,699 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua006:0/64] 2024-02-10 15:01:33,702 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 15:13:03,355 (trainer:756) INFO: 28epoch:train:1201-1300batch: iter_time=6.746, forward_time=0.206, loss_ctc=70.488, loss_interctc_layer6=79.592, loss_interctc_layer12=66.390, loss_interctc_layer15=60.908, loss_interctc_layer21=72.741, loss=70.024, backward_time=0.226, grad_norm=67.971, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.687e-05, train_time=8.076
+[gpua006:0/64] 2024-02-10 15:15:12,725 (trainer:756) INFO: 28epoch:train:1301-1400batch: iter_time=9.918e-05, forward_time=0.146, loss_ctc=92.777, loss_interctc_layer6=100.504, loss_interctc_layer12=85.146, loss_interctc_layer15=78.985, loss_interctc_layer21=95.678, loss=90.618, backward_time=0.208, grad_norm=115.573, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.137, optim0_lr0=7.686e-05, train_time=1.294
+[gpua006:0/64] 2024-02-10 15:17:26,032 (trainer:756) INFO: 28epoch:train:1401-1500batch: iter_time=1.165e-04, forward_time=0.166, loss_ctc=75.450, loss_interctc_layer6=88.470, loss_interctc_layer12=73.805, loss_interctc_layer15=67.945, loss_interctc_layer21=77.980, loss=76.730, backward_time=0.216, grad_norm=107.617, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.685e-05, train_time=1.332
+[gpua006:0/64] 2024-02-10 15:19:37,310 (trainer:756) INFO: 28epoch:train:1501-1600batch: iter_time=1.268e-04, forward_time=0.226, loss_ctc=76.881, loss_interctc_layer6=82.709, loss_interctc_layer12=68.644, loss_interctc_layer15=63.030, loss_interctc_layer21=79.699, loss=74.193, backward_time=0.248, grad_norm=76.324, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.140, optim0_lr0=7.684e-05, train_time=1.312
+[gpua006:0/64] 2024-02-10 15:21:34,037 (trainer:756) INFO: 28epoch:train:1601-1700batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=91.549, loss_interctc_layer6=102.507, loss_interctc_layer12=85.475, loss_interctc_layer15=78.788, loss_interctc_layer21=94.600, loss=90.584, backward_time=0.208, grad_norm=122.469, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.683e-05, train_time=1.168
+[gpua006:0/64] 2024-02-10 15:24:21,639 (trainer:756) INFO: 28epoch:train:1701-1800batch: iter_time=1.284e-04, forward_time=0.144, loss_ctc=93.186, loss_interctc_layer6=91.660, loss_interctc_layer12=76.013, loss_interctc_layer15=69.784, loss_interctc_layer21=96.425, loss=85.414, backward_time=0.207, grad_norm=75.532, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.682e-05, train_time=1.676
+[gpua006:0/64] 2024-02-10 15:27:01,988 (trainer:756) INFO: 28epoch:train:1801-1900batch: iter_time=1.170e-04, forward_time=0.153, loss_ctc=85.243, loss_interctc_layer6=92.728, loss_interctc_layer12=77.227, loss_interctc_layer15=71.000, loss_interctc_layer21=88.028, loss=82.845, backward_time=0.207, grad_norm=119.839, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.681e-05, train_time=1.603
+[gpua006:0/64] 2024-02-10 15:29:32,402 (trainer:756) INFO: 28epoch:train:1901-2000batch: iter_time=3.728e-04, forward_time=0.273, loss_ctc=71.685, loss_interctc_layer6=78.945, loss_interctc_layer12=65.539, loss_interctc_layer15=60.126, loss_interctc_layer21=73.906, loss=70.040, backward_time=0.280, grad_norm=84.391, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.143, optim0_lr0=7.680e-05, train_time=1.503
+[gpua006:0/64] 2024-02-10 15:32:27,468 (trainer:756) INFO: 28epoch:train:2001-2100batch: iter_time=9.585e-05, forward_time=0.142, loss_ctc=64.616, loss_interctc_layer6=71.388, loss_interctc_layer12=59.324, loss_interctc_layer15=54.381, loss_interctc_layer21=66.681, loss=63.278, backward_time=0.208, grad_norm=66.522, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.137, optim0_lr0=7.679e-05, train_time=1.752
+[gpua006:0/64] 2024-02-10 15:34:21,556 (trainer:756) INFO: 28epoch:train:2101-2200batch: iter_time=1.012e-04, forward_time=0.143, loss_ctc=76.553, loss_interctc_layer6=80.511, loss_interctc_layer12=66.614, loss_interctc_layer15=60.971, loss_interctc_layer21=79.398, loss=72.809, backward_time=0.210, grad_norm=65.507, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.137, optim0_lr0=7.678e-05, train_time=1.141
+[gpua006:0/64] 2024-02-10 15:37:03,075 (trainer:756) INFO: 28epoch:train:2201-2300batch: iter_time=1.017e-04, forward_time=0.143, loss_ctc=66.333, loss_interctc_layer6=81.761, loss_interctc_layer12=68.550, loss_interctc_layer15=63.028, loss_interctc_layer21=68.481, loss=69.631, backward_time=0.210, grad_norm=73.298, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.137, optim0_lr0=7.677e-05, train_time=1.615
+[gpua006:0/64] 2024-02-10 15:40:06,657 (trainer:756) INFO: 28epoch:train:2301-2400batch: iter_time=2.999e-04, forward_time=0.211, loss_ctc=69.128, loss_interctc_layer6=77.798, loss_interctc_layer12=64.697, loss_interctc_layer15=59.324, loss_interctc_layer21=71.440, loss=68.477, backward_time=0.289, grad_norm=58.666, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.141, optim0_lr0=7.677e-05, train_time=1.835
+[gpua006:0/64] 2024-02-10 15:42:32,886 (trainer:756) INFO: 28epoch:train:2401-2500batch: iter_time=9.036e-05, forward_time=0.143, loss_ctc=71.855, loss_interctc_layer6=82.088, loss_interctc_layer12=68.219, loss_interctc_layer15=62.476, loss_interctc_layer21=74.366, loss=71.801, backward_time=0.208, grad_norm=68.903, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.676e-05, train_time=1.462
+[gpua006:0/64] 2024-02-10 15:42:53,103 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua006:0/64] 2024-02-10 15:43:11,866 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 15:43:15,407 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fc6738b1db0>)
+[gpua006:0/64] 2024-02-10 15:43:15,407 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua006:0/64] 2024-02-10 15:43:15,411 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 15:54:32,594 (trainer:756) INFO: 28epoch:train:2501-2600batch: iter_time=2.846, forward_time=0.143, loss_ctc=79.857, loss_interctc_layer6=88.965, loss_interctc_layer12=74.702, loss_interctc_layer15=68.894, loss_interctc_layer21=82.356, loss=78.955, backward_time=0.210, grad_norm=72.766, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.675e-05, train_time=7.198
+[gpua006:0/64] 2024-02-10 15:56:31,776 (trainer:756) INFO: 28epoch:train:2601-2700batch: iter_time=9.873e-05, forward_time=0.144, loss_ctc=86.216, loss_interctc_layer6=96.143, loss_interctc_layer12=80.583, loss_interctc_layer15=74.411, loss_interctc_layer21=89.194, loss=85.309, backward_time=0.209, grad_norm=87.649, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.674e-05, train_time=1.191
+[gpua006:0/64] 2024-02-10 16:00:13,074 (trainer:756) INFO: 28epoch:train:2701-2800batch: iter_time=1.077e-04, forward_time=0.144, loss_ctc=75.430, loss_interctc_layer6=86.305, loss_interctc_layer12=72.329, loss_interctc_layer15=66.661, loss_interctc_layer21=77.955, loss=75.736, backward_time=0.207, grad_norm=62.264, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.673e-05, train_time=2.213
+[gpua006:0/64] 2024-02-10 16:02:41,071 (trainer:756) INFO: 28epoch:train:2801-2900batch: iter_time=1.016e-04, forward_time=0.239, loss_ctc=75.001, loss_interctc_layer6=85.126, loss_interctc_layer12=70.711, loss_interctc_layer15=64.896, loss_interctc_layer21=77.551, loss=74.657, backward_time=0.256, grad_norm=69.661, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.143, optim0_lr0=7.672e-05, train_time=1.479
+[gpua006:0/64] 2024-02-10 16:05:09,977 (trainer:756) INFO: 28epoch:train:2901-3000batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=102.994, loss_interctc_layer6=103.253, loss_interctc_layer12=85.746, loss_interctc_layer15=78.688, loss_interctc_layer21=106.461, loss=95.428, backward_time=0.207, grad_norm=93.919, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.137, optim0_lr0=7.671e-05, train_time=1.488
+[gpua006:0/64] 2024-02-10 16:08:31,900 (trainer:756) INFO: 28epoch:train:3001-3100batch: iter_time=1.166e-04, forward_time=0.144, loss_ctc=87.895, loss_interctc_layer6=95.002, loss_interctc_layer12=79.503, loss_interctc_layer15=73.160, loss_interctc_layer21=90.846, loss=85.281, backward_time=0.207, grad_norm=70.063, clip=100.000, loss_scale=1.582e+31, optim_step_time=0.137, optim0_lr0=7.670e-05, train_time=2.021
+[gpua006:0/64] 2024-02-10 16:10:34,460 (trainer:756) INFO: 28epoch:train:3101-3200batch: iter_time=1.192e-04, forward_time=0.142, loss_ctc=68.933, loss_interctc_layer6=76.315, loss_interctc_layer12=63.060, loss_interctc_layer15=57.665, loss_interctc_layer21=71.137, loss=67.422, backward_time=0.207, grad_norm=62.562, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.669e-05, train_time=1.225
+[gpua006:0/64] 2024-02-10 16:14:15,173 (trainer:756) INFO: 28epoch:train:3201-3300batch: iter_time=1.101e-04, forward_time=0.144, loss_ctc=69.526, loss_interctc_layer6=77.957, loss_interctc_layer12=64.941, loss_interctc_layer15=60.054, loss_interctc_layer21=71.792, loss=68.854, backward_time=0.205, grad_norm=77.890, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.668e-05, train_time=2.207
+[gpua006:0/64] 2024-02-10 16:16:36,990 (trainer:756) INFO: 28epoch:train:3301-3400batch: iter_time=1.095e-04, forward_time=0.142, loss_ctc=80.622, loss_interctc_layer6=82.741, loss_interctc_layer12=68.590, loss_interctc_layer15=62.865, loss_interctc_layer21=83.681, loss=75.700, backward_time=0.207, grad_norm=66.167, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.667e-05, train_time=1.418
+[gpua006:0/64] 2024-02-10 16:18:45,834 (trainer:756) INFO: 28epoch:train:3401-3500batch: iter_time=1.175e-04, forward_time=0.142, loss_ctc=63.183, loss_interctc_layer6=75.215, loss_interctc_layer12=62.628, loss_interctc_layer15=57.516, loss_interctc_layer21=65.369, loss=64.782, backward_time=0.208, grad_norm=55.016, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.666e-05, train_time=1.288
+[gpua006:0/64] 2024-02-10 16:21:09,042 (trainer:756) INFO: 28epoch:train:3501-3600batch: iter_time=1.126e-04, forward_time=0.142, loss_ctc=68.869, loss_interctc_layer6=77.257, loss_interctc_layer12=63.884, loss_interctc_layer15=58.392, loss_interctc_layer21=71.451, loss=67.971, backward_time=0.206, grad_norm=60.248, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.665e-05, train_time=1.432
+[gpua006:0/64] 2024-02-10 16:23:43,378 (trainer:756) INFO: 28epoch:train:3601-3700batch: iter_time=1.111e-04, forward_time=0.145, loss_ctc=68.019, loss_interctc_layer6=82.118, loss_interctc_layer12=68.320, loss_interctc_layer15=62.832, loss_interctc_layer21=70.237, loss=70.305, backward_time=0.206, grad_norm=107.677, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.664e-05, train_time=1.543
+[gpua006:0/64] 2024-02-10 16:25:49,041 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua006:0/64] 2024-02-10 16:26:07,795 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 16:26:11,326 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbab01ffe20>)
+[gpua006:0/64] 2024-02-10 16:26:11,326 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua006:0/64] 2024-02-10 16:26:11,329 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 16:32:02,291 (trainer:756) INFO: 28epoch:train:3701-3800batch: iter_time=3.327, forward_time=0.253, loss_ctc=70.235, loss_interctc_layer6=79.457, loss_interctc_layer12=66.100, loss_interctc_layer15=60.635, loss_interctc_layer21=72.615, loss=69.808, backward_time=0.235, grad_norm=59.252, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.145, optim0_lr0=7.663e-05, train_time=4.989
+[gpua006:0/64] 2024-02-10 16:34:35,807 (trainer:756) INFO: 28epoch:train:3801-3900batch: iter_time=1.066e-04, forward_time=0.145, loss_ctc=92.351, loss_interctc_layer6=99.114, loss_interctc_layer12=83.554, loss_interctc_layer15=77.545, loss_interctc_layer21=95.347, loss=89.582, backward_time=0.209, grad_norm=92.377, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.662e-05, train_time=1.535
+[gpua006:0/64] 2024-02-10 16:37:19,968 (trainer:756) INFO: 28epoch:train:3901-4000batch: iter_time=1.131e-04, forward_time=0.144, loss_ctc=74.141, loss_interctc_layer6=87.261, loss_interctc_layer12=72.779, loss_interctc_layer15=66.761, loss_interctc_layer21=76.703, loss=75.529, backward_time=0.207, grad_norm=81.457, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.662e-05, train_time=1.639
+[gpua006:0/64] 2024-02-10 16:39:38,838 (trainer:756) INFO: 28epoch:train:4001-4100batch: iter_time=9.602e-05, forward_time=0.143, loss_ctc=76.466, loss_interctc_layer6=82.171, loss_interctc_layer12=68.072, loss_interctc_layer15=62.403, loss_interctc_layer21=79.214, loss=73.665, backward_time=0.209, grad_norm=89.850, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.661e-05, train_time=1.391
+[gpua006:0/64] 2024-02-10 16:42:05,641 (trainer:756) INFO: 28epoch:train:4101-4200batch: iter_time=1.019e-04, forward_time=0.144, loss_ctc=91.793, loss_interctc_layer6=101.994, loss_interctc_layer12=85.094, loss_interctc_layer15=78.368, loss_interctc_layer21=94.951, loss=90.440, backward_time=0.208, grad_norm=113.426, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.660e-05, train_time=1.468
+[gpua006:0/64] 2024-02-10 16:44:09,076 (trainer:756) INFO: 28epoch:train:4201-4300batch: iter_time=1.008e-04, forward_time=0.144, loss_ctc=92.049, loss_interctc_layer6=91.534, loss_interctc_layer12=75.878, loss_interctc_layer15=69.570, loss_interctc_layer21=95.320, loss=84.870, backward_time=0.210, grad_norm=75.223, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.659e-05, train_time=1.234
+[gpua006:0/64] 2024-02-10 16:46:16,177 (trainer:756) INFO: 28epoch:train:4301-4400batch: iter_time=1.130e-04, forward_time=0.144, loss_ctc=84.697, loss_interctc_layer6=92.502, loss_interctc_layer12=77.018, loss_interctc_layer15=70.821, loss_interctc_layer21=87.547, loss=82.517, backward_time=0.211, grad_norm=73.419, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.658e-05, train_time=1.271
+[gpua006:0/64] 2024-02-10 16:48:59,642 (trainer:756) INFO: 28epoch:train:4401-4500batch: iter_time=1.006e-04, forward_time=0.142, loss_ctc=69.535, loss_interctc_layer6=78.207, loss_interctc_layer12=65.001, loss_interctc_layer15=59.391, loss_interctc_layer21=71.697, loss=68.766, backward_time=0.207, grad_norm=65.667, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.657e-05, train_time=1.634
+[gpua006:0/64] 2024-02-10 16:51:08,899 (trainer:756) INFO: 28epoch:train:4501-4600batch: iter_time=9.519e-05, forward_time=0.143, loss_ctc=63.909, loss_interctc_layer6=71.077, loss_interctc_layer12=58.801, loss_interctc_layer15=53.719, loss_interctc_layer21=66.090, loss=62.719, backward_time=0.208, grad_norm=62.249, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.656e-05, train_time=1.292
+[gpua006:0/64] 2024-02-10 16:53:03,021 (trainer:756) INFO: 28epoch:train:4601-4700batch: iter_time=9.632e-05, forward_time=0.143, loss_ctc=75.654, loss_interctc_layer6=79.806, loss_interctc_layer12=66.029, loss_interctc_layer15=60.431, loss_interctc_layer21=78.527, loss=72.090, backward_time=0.208, grad_norm=63.121, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.655e-05, train_time=1.141
+[gpua006:0/64] 2024-02-10 16:55:15,964 (trainer:756) INFO: 28epoch:train:4701-4800batch: iter_time=3.613e-04, forward_time=0.208, loss_ctc=65.637, loss_interctc_layer6=80.812, loss_interctc_layer12=67.426, loss_interctc_layer15=62.260, loss_interctc_layer21=67.686, loss=68.764, backward_time=0.216, grad_norm=81.538, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.654e-05, train_time=1.329
+[gpua006:0/64] 2024-02-10 16:57:27,813 (trainer:756) INFO: 28epoch:train:4801-4900batch: iter_time=1.118e-04, forward_time=0.191, loss_ctc=69.574, loss_interctc_layer6=78.042, loss_interctc_layer12=64.854, loss_interctc_layer15=59.438, loss_interctc_layer21=72.044, loss=68.790, backward_time=0.225, grad_norm=64.665, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.653e-05, train_time=1.318
+[gpua006:0/64] 2024-02-10 17:00:19,995 (trainer:756) INFO: 28epoch:train:4901-5000batch: iter_time=1.120e-04, forward_time=0.146, loss_ctc=71.312, loss_interctc_layer6=81.693, loss_interctc_layer12=67.725, loss_interctc_layer15=62.038, loss_interctc_layer21=73.664, loss=71.287, backward_time=0.210, grad_norm=75.273, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.652e-05, train_time=1.722
+[gpua006:0/64] 2024-02-10 17:00:40,024 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua006:0/64] 2024-02-10 17:00:58,980 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 17:01:02,432 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbdb21b7760>)
+[gpua006:0/64] 2024-02-10 17:01:02,432 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua006:0/64] 2024-02-10 17:01:02,435 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 17:07:26,844 (trainer:756) INFO: 28epoch:train:5001-5100batch: iter_time=3.054, forward_time=0.229, loss_ctc=80.213, loss_interctc_layer6=89.401, loss_interctc_layer12=75.045, loss_interctc_layer15=69.279, loss_interctc_layer21=82.896, loss=79.367, backward_time=0.228, grad_norm=88.645, clip=100.000, loss_scale=3.164e+31, optim_step_time=0.140, optim0_lr0=7.651e-05, train_time=4.268
+[gpua006:0/64] 2024-02-10 17:09:22,102 (trainer:756) INFO: 28epoch:train:5101-5200batch: iter_time=1.061e-04, forward_time=0.144, loss_ctc=87.544, loss_interctc_layer6=96.586, loss_interctc_layer12=80.963, loss_interctc_layer15=74.750, loss_interctc_layer21=90.085, loss=85.986, backward_time=0.211, grad_norm=74.287, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.650e-05, train_time=1.153
+[gpua006:0/64] 2024-02-10 17:11:33,409 (trainer:756) INFO: 28epoch:train:5201-5300batch: iter_time=9.758e-05, forward_time=0.156, loss_ctc=75.067, loss_interctc_layer6=85.835, loss_interctc_layer12=71.842, loss_interctc_layer15=66.126, loss_interctc_layer21=77.563, loss=75.287, backward_time=0.212, grad_norm=70.125, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.649e-05, train_time=1.313
+[gpua006:0/64] 2024-02-10 17:11:57,880 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-10 17:13:11,420 (trainer:756) INFO: 28epoch:train:5301-5400batch: iter_time=1.080e-04, forward_time=0.143, loss_ctc=74.332, loss_interctc_layer6=84.170, loss_interctc_layer12=69.668, loss_interctc_layer15=63.886, loss_interctc_layer21=76.962, loss=73.804, backward_time=0.210, grad_norm=65.078, clip=100.000, loss_scale=2.479e+31, optim_step_time=0.138, optim0_lr0=7.648e-05, train_time=0.980
+[gpua006:0/64] 2024-02-10 17:15:52,227 (trainer:756) INFO: 28epoch:train:5401-5500batch: iter_time=1.012e-04, forward_time=0.169, loss_ctc=103.208, loss_interctc_layer6=103.656, loss_interctc_layer12=85.947, loss_interctc_layer15=78.878, loss_interctc_layer21=106.821, loss=95.702, backward_time=0.212, grad_norm=79.172, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.648e-05, train_time=1.607
+[gpua006:0/64] 2024-02-10 17:18:31,217 (trainer:756) INFO: 28epoch:train:5501-5600batch: iter_time=9.827e-05, forward_time=0.209, loss_ctc=86.360, loss_interctc_layer6=94.091, loss_interctc_layer12=78.236, loss_interctc_layer15=71.902, loss_interctc_layer21=89.270, loss=83.972, backward_time=0.236, grad_norm=82.990, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.647e-05, train_time=1.590
+[gpua006:0/64] 2024-02-10 17:21:22,137 (trainer:756) INFO: 28epoch:train:5601-5700batch: iter_time=9.996e-05, forward_time=0.142, loss_ctc=68.169, loss_interctc_layer6=75.785, loss_interctc_layer12=62.509, loss_interctc_layer15=57.080, loss_interctc_layer21=70.458, loss=66.800, backward_time=0.209, grad_norm=59.381, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.646e-05, train_time=1.709
+[gpua006:0/64] 2024-02-10 17:23:55,388 (trainer:756) INFO: 28epoch:train:5701-5800batch: iter_time=9.261e-05, forward_time=0.141, loss_ctc=68.365, loss_interctc_layer6=77.292, loss_interctc_layer12=64.066, loss_interctc_layer15=59.128, loss_interctc_layer21=70.697, loss=67.910, backward_time=0.206, grad_norm=127.113, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.645e-05, train_time=1.532
+[gpua006:0/64] 2024-02-10 17:27:50,483 (trainer:756) INFO: 28epoch:train:5801-5900batch: iter_time=9.424e-05, forward_time=0.146, loss_ctc=79.132, loss_interctc_layer6=81.441, loss_interctc_layer12=67.301, loss_interctc_layer15=61.608, loss_interctc_layer21=82.026, loss=74.302, backward_time=0.208, grad_norm=71.927, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.644e-05, train_time=2.351
+[gpua006:0/64] 2024-02-10 17:30:01,569 (trainer:756) INFO: 28epoch:train:5901-6000batch: iter_time=9.551e-05, forward_time=0.150, loss_ctc=63.681, loss_interctc_layer6=75.047, loss_interctc_layer12=62.411, loss_interctc_layer15=57.321, loss_interctc_layer21=65.767, loss=64.845, backward_time=0.208, grad_norm=81.688, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.643e-05, train_time=1.311
+[gpua006:0/64] 2024-02-10 17:31:42,293 (trainer:756) INFO: 28epoch:train:6001-6100batch: iter_time=9.187e-05, forward_time=0.143, loss_ctc=68.792, loss_interctc_layer6=77.347, loss_interctc_layer12=63.991, loss_interctc_layer15=58.406, loss_interctc_layer21=71.251, loss=67.958, backward_time=0.209, grad_norm=64.978, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.642e-05, train_time=1.007
+[gpua006:0/64] 2024-02-10 17:33:51,088 (trainer:756) INFO: 28epoch:train:6101-6200batch: iter_time=9.792e-05, forward_time=0.146, loss_ctc=67.769, loss_interctc_layer6=81.590, loss_interctc_layer12=67.769, loss_interctc_layer15=62.252, loss_interctc_layer21=69.960, loss=69.868, backward_time=0.209, grad_norm=70.620, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.641e-05, train_time=1.288
+[gpua006:0/64] 2024-02-10 17:35:01,214 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua006:0/64] 2024-02-10 17:35:19,876 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 17:35:23,397 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbe2e163fd0>)
+[gpua006:0/64] 2024-02-10 17:35:23,397 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua006:0/64] 2024-02-10 17:35:23,401 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 17:43:40,656 (trainer:756) INFO: 28epoch:train:6201-6300batch: iter_time=2.924, forward_time=0.198, loss_ctc=71.974, loss_interctc_layer6=79.877, loss_interctc_layer12=66.303, loss_interctc_layer15=60.840, loss_interctc_layer21=74.523, loss=70.703, backward_time=0.221, grad_norm=60.905, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.640e-05, train_time=5.895
+[gpua006:0/64] 2024-02-10 17:45:14,979 (trainer:756) INFO: 28epoch:train:6301-6400batch: iter_time=8.923e-05, forward_time=0.144, loss_ctc=99.089, loss_interctc_layer6=99.784, loss_interctc_layer12=84.132, loss_interctc_layer15=78.201, loss_interctc_layer21=102.675, loss=92.776, backward_time=0.209, grad_norm=82.297, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.639e-05, train_time=0.943
+[gpua006:0/64] 2024-02-10 17:47:04,991 (trainer:756) INFO: 28epoch:train:6401-6500batch: iter_time=9.556e-05, forward_time=0.225, loss_ctc=80.257, loss_interctc_layer6=87.247, loss_interctc_layer12=72.704, loss_interctc_layer15=66.830, loss_interctc_layer21=82.956, loss=77.999, backward_time=0.259, grad_norm=73.375, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.142, optim0_lr0=7.638e-05, train_time=1.099
+[gpua006:0/64] 2024-02-10 17:49:57,820 (trainer:756) INFO: 28epoch:train:6501-6600batch: iter_time=0.044, forward_time=0.166, loss_ctc=85.907, loss_interctc_layer6=81.460, loss_interctc_layer12=67.347, loss_interctc_layer15=61.516, loss_interctc_layer21=89.259, loss=77.098, backward_time=0.209, grad_norm=73.952, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.637e-05, train_time=1.729
+[gpua006:0/64] 2024-02-10 17:51:45,162 (trainer:756) INFO: 28epoch:train:6601-6700batch: iter_time=1.308e-04, forward_time=0.143, loss_ctc=100.102, loss_interctc_layer6=101.675, loss_interctc_layer12=84.825, loss_interctc_layer15=77.972, loss_interctc_layer21=103.804, loss=93.676, backward_time=0.208, grad_norm=79.576, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.636e-05, train_time=1.073
+[gpua006:0/64] 2024-02-10 17:54:17,189 (trainer:756) INFO: 28epoch:train:6701-6800batch: iter_time=1.229e-04, forward_time=0.143, loss_ctc=96.584, loss_interctc_layer6=91.192, loss_interctc_layer12=75.594, loss_interctc_layer15=69.195, loss_interctc_layer21=100.011, loss=86.515, backward_time=0.207, grad_norm=74.064, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.635e-05, train_time=1.520
+[gpua006:0/64] 2024-02-10 17:56:40,901 (trainer:756) INFO: 28epoch:train:6801-6900batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=88.132, loss_interctc_layer6=91.111, loss_interctc_layer12=75.574, loss_interctc_layer15=69.376, loss_interctc_layer21=91.137, loss=83.066, backward_time=0.207, grad_norm=73.687, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.634e-05, train_time=1.437
+[gpua006:0/64] 2024-02-10 17:58:37,908 (trainer:756) INFO: 28epoch:train:6901-7000batch: iter_time=1.079e-04, forward_time=0.142, loss_ctc=74.457, loss_interctc_layer6=78.033, loss_interctc_layer12=64.538, loss_interctc_layer15=59.117, loss_interctc_layer21=76.821, loss=70.593, backward_time=0.207, grad_norm=69.914, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.634e-05, train_time=1.170
+[gpua006:0/64] 2024-02-10 18:00:51,995 (trainer:756) INFO: 28epoch:train:7001-7100batch: iter_time=2.454e-04, forward_time=0.230, loss_ctc=66.289, loss_interctc_layer6=70.688, loss_interctc_layer12=58.471, loss_interctc_layer15=53.767, loss_interctc_layer21=68.544, loss=63.552, backward_time=0.247, grad_norm=84.519, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.142, optim0_lr0=7.633e-05, train_time=1.340
+[gpua006:0/64] 2024-02-10 18:03:18,972 (trainer:756) INFO: 28epoch:train:7101-7200batch: iter_time=1.125e-04, forward_time=0.143, loss_ctc=81.979, loss_interctc_layer6=79.970, loss_interctc_layer12=66.107, loss_interctc_layer15=60.409, loss_interctc_layer21=85.051, loss=74.703, backward_time=0.206, grad_norm=61.453, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.632e-05, train_time=1.471
+[gpua006:0/64] 2024-02-10 18:05:11,706 (trainer:756) INFO: 28epoch:train:7201-7300batch: iter_time=1.079e-04, forward_time=0.143, loss_ctc=68.605, loss_interctc_layer6=80.420, loss_interctc_layer12=67.346, loss_interctc_layer15=61.942, loss_interctc_layer21=70.834, loss=69.830, backward_time=0.208, grad_norm=68.377, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.631e-05, train_time=1.127
+[gpua006:0/64] 2024-02-10 18:07:27,837 (trainer:756) INFO: 28epoch:train:7301-7400batch: iter_time=1.116e-04, forward_time=0.142, loss_ctc=73.241, loss_interctc_layer6=77.510, loss_interctc_layer12=64.396, loss_interctc_layer15=59.008, loss_interctc_layer21=75.871, loss=70.005, backward_time=0.208, grad_norm=57.558, clip=100.000, loss_scale=3.590e+31, optim_step_time=0.138, optim0_lr0=7.630e-05, train_time=1.361
+[gpua006:0/64] 2024-02-10 18:09:39,961 (trainer:756) INFO: 28epoch:train:7401-7500batch: iter_time=1.131e-04, forward_time=0.263, loss_ctc=74.607, loss_interctc_layer6=81.150, loss_interctc_layer12=67.122, loss_interctc_layer15=61.414, loss_interctc_layer21=77.224, loss=72.303, backward_time=0.239, grad_norm=66.138, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.142, optim0_lr0=7.629e-05, train_time=1.321
+[gpua006:0/64] 2024-02-10 18:09:59,990 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua006:0/64] 2024-02-10 18:10:18,920 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 18:10:22,440 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fc6738b23b0>)
+[gpua006:0/64] 2024-02-10 18:10:22,440 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua006:0/64] 2024-02-10 18:10:22,444 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 18:17:18,884 (trainer:756) INFO: 28epoch:train:7501-7600batch: iter_time=3.143, forward_time=0.147, loss_ctc=82.675, loss_interctc_layer6=88.940, loss_interctc_layer12=74.491, loss_interctc_layer15=68.650, loss_interctc_layer21=85.272, loss=80.006, backward_time=0.209, grad_norm=72.420, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.137, optim0_lr0=7.628e-05, train_time=4.589
+[gpua006:0/64] 2024-02-10 18:20:10,179 (trainer:756) INFO: 28epoch:train:7601-7700batch: iter_time=1.154e-04, forward_time=0.224, loss_ctc=93.951, loss_interctc_layer6=96.532, loss_interctc_layer12=80.833, loss_interctc_layer15=74.552, loss_interctc_layer21=97.229, loss=88.619, backward_time=0.274, grad_norm=83.039, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.145, optim0_lr0=7.627e-05, train_time=1.711
+[gpua006:0/64] 2024-02-10 18:22:34,197 (trainer:756) INFO: 28epoch:train:7701-7800batch: iter_time=1.114e-04, forward_time=0.144, loss_ctc=85.066, loss_interctc_layer6=85.979, loss_interctc_layer12=72.074, loss_interctc_layer15=66.281, loss_interctc_layer21=87.967, loss=79.473, backward_time=0.207, grad_norm=78.786, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.626e-05, train_time=1.442
+[gpua006:0/64] 2024-02-10 18:24:44,067 (trainer:756) INFO: 28epoch:train:7801-7900batch: iter_time=1.112e-04, forward_time=0.143, loss_ctc=80.467, loss_interctc_layer6=84.931, loss_interctc_layer12=70.455, loss_interctc_layer15=64.592, loss_interctc_layer21=83.334, loss=76.756, backward_time=0.208, grad_norm=70.695, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.625e-05, train_time=1.298
+[gpua006:0/64] 2024-02-10 18:27:57,889 (trainer:756) INFO: 28epoch:train:7901-8000batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=110.482, loss_interctc_layer6=103.237, loss_interctc_layer12=85.634, loss_interctc_layer15=78.509, loss_interctc_layer21=114.680, loss=98.508, backward_time=0.208, grad_norm=88.734, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.624e-05, train_time=1.939
+[gpua006:0/64] 2024-02-10 18:33:15,665 (trainer:756) INFO: 28epoch:train:8001-8100batch: iter_time=1.167e-04, forward_time=0.486, loss_ctc=91.794, loss_interctc_layer6=93.661, loss_interctc_layer12=77.947, loss_interctc_layer15=71.652, loss_interctc_layer21=94.874, loss=85.986, backward_time=0.848, grad_norm=71.739, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.147, optim0_lr0=7.623e-05, train_time=3.177
+[gpua006:0/64] 2024-02-10 18:35:28,392 (trainer:756) INFO: 28epoch:train:8101-8200batch: iter_time=1.085e-04, forward_time=0.160, loss_ctc=70.932, loss_interctc_layer6=75.659, loss_interctc_layer12=62.255, loss_interctc_layer15=56.776, loss_interctc_layer21=73.270, loss=67.779, backward_time=0.208, grad_norm=60.237, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.622e-05, train_time=1.327
+[gpua006:0/64] 2024-02-10 18:37:09,380 (trainer:756) INFO: 28epoch:train:8201-8300batch: iter_time=1.042e-04, forward_time=0.143, loss_ctc=72.660, loss_interctc_layer6=76.985, loss_interctc_layer12=63.979, loss_interctc_layer15=58.474, loss_interctc_layer21=75.224, loss=69.464, backward_time=0.210, grad_norm=59.516, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.622e-05, train_time=1.010
+[gpua006:0/64] 2024-02-10 18:39:23,546 (trainer:756) INFO: 28epoch:train:8301-8400batch: iter_time=9.165e-05, forward_time=0.143, loss_ctc=85.516, loss_interctc_layer6=81.742, loss_interctc_layer12=67.653, loss_interctc_layer15=61.810, loss_interctc_layer21=88.871, loss=77.119, backward_time=0.211, grad_norm=95.944, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.621e-05, train_time=1.341
+[gpua006:0/64] 2024-02-10 18:42:19,133 (trainer:756) INFO: 28epoch:train:8401-8500batch: iter_time=9.345e-05, forward_time=0.143, loss_ctc=65.653, loss_interctc_layer6=74.212, loss_interctc_layer12=61.836, loss_interctc_layer15=56.642, loss_interctc_layer21=67.985, loss=65.266, backward_time=0.208, grad_norm=63.809, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.620e-05, train_time=1.755
+[gpua006:0/64] 2024-02-10 18:44:38,283 (trainer:756) INFO: 28epoch:train:8501-8600batch: iter_time=9.238e-05, forward_time=0.227, loss_ctc=71.836, loss_interctc_layer6=76.618, loss_interctc_layer12=63.304, loss_interctc_layer15=57.793, loss_interctc_layer21=74.430, loss=68.796, backward_time=0.230, grad_norm=69.045, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.619e-05, train_time=1.392
+[gpua006:0/64] 2024-02-10 18:46:39,780 (trainer:756) INFO: 28epoch:train:8601-8700batch: iter_time=1.014e-04, forward_time=0.158, loss_ctc=70.999, loss_interctc_layer6=81.610, loss_interctc_layer12=67.991, loss_interctc_layer15=62.370, loss_interctc_layer21=73.300, loss=71.254, backward_time=0.215, grad_norm=106.043, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.618e-05, train_time=1.214
+[gpua006:0/64] 2024-02-10 18:48:14,586 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua006:0/64] 2024-02-10 18:48:32,865 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 18:48:36,315 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbabd487220>)
+[gpua006:0/64] 2024-02-10 18:48:36,315 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua006:0/64] 2024-02-10 18:48:36,318 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 18:54:31,873 (trainer:756) INFO: 28epoch:train:8701-8800batch: iter_time=3.158, forward_time=0.191, loss_ctc=72.712, loss_interctc_layer6=79.044, loss_interctc_layer12=65.619, loss_interctc_layer15=60.162, loss_interctc_layer21=75.139, loss=70.535, backward_time=0.216, grad_norm=105.759, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.617e-05, train_time=4.721
+[gpua006:0/64] 2024-02-10 18:56:29,954 (trainer:756) INFO: 28epoch:train:8801-8900batch: iter_time=1.118e-04, forward_time=0.145, loss_ctc=96.439, loss_interctc_layer6=98.448, loss_interctc_layer12=82.869, loss_interctc_layer15=76.797, loss_interctc_layer21=99.637, loss=90.838, backward_time=0.211, grad_norm=82.342, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.616e-05, train_time=1.181
+[gpua006:0/64] 2024-02-10 18:58:39,664 (trainer:756) INFO: 28epoch:train:8901-9000batch: iter_time=1.011e-04, forward_time=0.144, loss_ctc=79.574, loss_interctc_layer6=87.518, loss_interctc_layer12=73.064, loss_interctc_layer15=67.004, loss_interctc_layer21=82.213, loss=77.875, backward_time=0.210, grad_norm=89.536, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.615e-05, train_time=1.297
+[gpua006:0/64] 2024-02-10 19:00:59,709 (trainer:756) INFO: 28epoch:train:9001-9100batch: iter_time=1.101e-04, forward_time=0.145, loss_ctc=84.820, loss_interctc_layer6=81.227, loss_interctc_layer12=67.046, loss_interctc_layer15=61.267, loss_interctc_layer21=88.071, loss=76.486, backward_time=0.210, grad_norm=91.620, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.137, optim0_lr0=7.614e-05, train_time=1.400
+[gpua006:0/64] 2024-02-10 19:03:05,388 (trainer:756) INFO: 28epoch:train:9101-9200batch: iter_time=1.167e-04, forward_time=0.190, loss_ctc=98.899, loss_interctc_layer6=100.636, loss_interctc_layer12=83.746, loss_interctc_layer15=77.092, loss_interctc_layer21=102.433, loss=92.561, backward_time=0.228, grad_norm=92.653, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.613e-05, train_time=1.257
+[gpua006:0/64] 2024-02-10 19:04:57,707 (trainer:756) INFO: 28epoch:train:9201-9300batch: iter_time=1.028e-04, forward_time=0.163, loss_ctc=95.693, loss_interctc_layer6=91.104, loss_interctc_layer12=75.432, loss_interctc_layer15=69.120, loss_interctc_layer21=99.062, loss=86.082, backward_time=0.215, grad_norm=62.247, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.612e-05, train_time=1.123
+[gpua006:0/64] 2024-02-10 19:07:06,552 (trainer:756) INFO: 28epoch:train:9301-9400batch: iter_time=9.435e-05, forward_time=0.167, loss_ctc=88.479, loss_interctc_layer6=91.220, loss_interctc_layer12=75.716, loss_interctc_layer15=69.465, loss_interctc_layer21=91.565, loss=83.289, backward_time=0.212, grad_norm=77.092, clip=100.000, loss_scale=7.180e+31, optim_step_time=0.139, optim0_lr0=7.611e-05, train_time=1.288
+[gpua006:0/64] 2024-02-10 19:09:34,305 (trainer:756) INFO: 28epoch:train:9401-9500batch: iter_time=9.501e-05, forward_time=0.143, loss_ctc=74.341, loss_interctc_layer6=78.131, loss_interctc_layer12=64.613, loss_interctc_layer15=59.009, loss_interctc_layer21=77.141, loss=70.647, backward_time=0.208, grad_norm=71.943, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.138, optim0_lr0=7.611e-05, train_time=1.478
+[gpua006:0/64] 2024-02-10 19:10:17,960 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-10 19:11:28,896 (trainer:756) INFO: 28epoch:train:9501-9600batch: iter_time=1.005e-04, forward_time=0.142, loss_ctc=66.401, loss_interctc_layer6=70.476, loss_interctc_layer12=58.185, loss_interctc_layer15=53.246, loss_interctc_layer21=68.928, loss=63.447, backward_time=0.208, grad_norm=108.197, clip=100.000, loss_scale=5.695e+31, optim_step_time=0.137, optim0_lr0=7.610e-05, train_time=1.146
+[gpua006:0/64] 2024-02-10 19:13:43,754 (trainer:756) INFO: 28epoch:train:9601-9700batch: iter_time=9.942e-05, forward_time=0.164, loss_ctc=82.539, loss_interctc_layer6=79.754, loss_interctc_layer12=65.810, loss_interctc_layer15=60.191, loss_interctc_layer21=85.639, loss=74.787, backward_time=0.210, grad_norm=91.324, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.609e-05, train_time=1.348
+[gpua006:0/64] 2024-02-10 19:16:25,409 (trainer:756) INFO: 28epoch:train:9701-9800batch: iter_time=1.057e-04, forward_time=0.186, loss_ctc=67.350, loss_interctc_layer6=80.262, loss_interctc_layer12=67.032, loss_interctc_layer15=61.500, loss_interctc_layer21=69.440, loss=69.117, backward_time=0.223, grad_norm=60.539, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.608e-05, train_time=1.616
+[gpua006:0/64] 2024-02-10 19:18:36,066 (trainer:756) INFO: 28epoch:train:9801-9900batch: iter_time=8.387e-05, forward_time=0.158, loss_ctc=72.418, loss_interctc_layer6=77.314, loss_interctc_layer12=64.213, loss_interctc_layer15=58.777, loss_interctc_layer21=74.931, loss=69.530, backward_time=0.224, grad_norm=64.017, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.607e-05, train_time=1.306
+[gpua006:0/64] 2024-02-10 19:20:42,152 (trainer:756) INFO: 28epoch:train:9901-10000batch: iter_time=9.833e-05, forward_time=0.143, loss_ctc=75.037, loss_interctc_layer6=81.468, loss_interctc_layer12=67.521, loss_interctc_layer15=61.691, loss_interctc_layer21=77.602, loss=72.664, backward_time=0.207, grad_norm=90.314, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.606e-05, train_time=1.261
+[gpua006:0/64] 2024-02-10 19:21:02,181 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua006:0/64] 2024-02-10 19:21:21,311 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 19:21:24,848 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbae9588b20>)
+[gpua006:0/64] 2024-02-10 19:21:24,848 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua006:0/64] 2024-02-10 19:21:24,851 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 19:27:24,829 (trainer:756) INFO: 28epoch:train:10001-10100batch: iter_time=3.035, forward_time=0.173, loss_ctc=79.981, loss_interctc_layer6=88.442, loss_interctc_layer12=74.034, loss_interctc_layer15=68.053, loss_interctc_layer21=82.549, loss=78.612, backward_time=0.216, grad_norm=72.221, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.605e-05, train_time=4.026
+[gpua006:0/64] 2024-02-10 19:28:59,033 (trainer:756) INFO: 28epoch:train:10101-10200batch: iter_time=9.575e-05, forward_time=0.143, loss_ctc=86.435, loss_interctc_layer6=95.405, loss_interctc_layer12=79.761, loss_interctc_layer15=73.435, loss_interctc_layer21=89.665, loss=84.940, backward_time=0.211, grad_norm=80.640, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.604e-05, train_time=0.942
+[gpua006:0/64] 2024-02-10 19:31:09,448 (trainer:756) INFO: 28epoch:train:10201-10300batch: iter_time=9.500e-05, forward_time=0.142, loss_ctc=74.785, loss_interctc_layer6=85.726, loss_interctc_layer12=71.551, loss_interctc_layer15=65.667, loss_interctc_layer21=77.542, loss=75.054, backward_time=0.210, grad_norm=101.888, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.603e-05, train_time=1.304
+[gpua006:0/64] 2024-02-10 19:33:29,584 (trainer:756) INFO: 28epoch:train:10301-10400batch: iter_time=1.280e-04, forward_time=0.142, loss_ctc=74.294, loss_interctc_layer6=84.152, loss_interctc_layer12=69.624, loss_interctc_layer15=63.777, loss_interctc_layer21=77.062, loss=73.782, backward_time=0.209, grad_norm=70.591, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.602e-05, train_time=1.401
+[gpua006:0/64] 2024-02-10 19:35:54,060 (trainer:756) INFO: 28epoch:train:10401-10500batch: iter_time=1.140e-04, forward_time=0.144, loss_ctc=103.025, loss_interctc_layer6=103.098, loss_interctc_layer12=85.453, loss_interctc_layer15=78.678, loss_interctc_layer21=106.658, loss=95.383, backward_time=0.209, grad_norm=108.465, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.601e-05, train_time=1.445
+[gpua006:0/64] 2024-02-10 19:37:56,408 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-10 19:38:01,264 (trainer:756) INFO: 28epoch:train:10501-10600batch: iter_time=0.002, forward_time=0.240, loss_ctc=86.183, loss_interctc_layer6=93.094, loss_interctc_layer12=77.344, loss_interctc_layer15=71.091, loss_interctc_layer21=89.150, loss=83.373, backward_time=0.245, grad_norm=62.586, clip=100.000, loss_scale=3.954e+31, optim_step_time=0.141, optim0_lr0=7.600e-05, train_time=1.271
+[gpua006:0/64] 2024-02-10 19:40:09,881 (trainer:756) INFO: 28epoch:train:10601-10700batch: iter_time=9.642e-05, forward_time=0.142, loss_ctc=68.368, loss_interctc_layer6=75.600, loss_interctc_layer12=62.333, loss_interctc_layer15=56.858, loss_interctc_layer21=70.712, loss=66.774, backward_time=0.210, grad_norm=67.644, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.600e-05, train_time=1.287
+[gpua006:0/64] 2024-02-10 19:42:27,948 (trainer:756) INFO: 28epoch:train:10701-10800batch: iter_time=1.066e-04, forward_time=0.141, loss_ctc=67.803, loss_interctc_layer6=76.423, loss_interctc_layer12=63.129, loss_interctc_layer15=58.108, loss_interctc_layer21=70.230, loss=67.139, backward_time=0.207, grad_norm=65.405, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.599e-05, train_time=1.380
+[gpua006:0/64] 2024-02-10 19:44:22,208 (trainer:756) INFO: 28epoch:train:10801-10900batch: iter_time=1.059e-04, forward_time=0.145, loss_ctc=79.610, loss_interctc_layer6=81.254, loss_interctc_layer12=67.074, loss_interctc_layer15=61.301, loss_interctc_layer21=82.667, loss=74.381, backward_time=0.209, grad_norm=67.578, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.598e-05, train_time=1.142
+[gpua006:0/64] 2024-02-10 19:46:21,714 (trainer:756) INFO: 28epoch:train:10901-11000batch: iter_time=9.894e-05, forward_time=0.142, loss_ctc=62.620, loss_interctc_layer6=74.272, loss_interctc_layer12=61.973, loss_interctc_layer15=56.754, loss_interctc_layer21=64.705, loss=64.065, backward_time=0.209, grad_norm=55.510, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.597e-05, train_time=1.195
+[gpua006:0/64] 2024-02-10 19:48:24,442 (trainer:756) INFO: 28epoch:train:11001-11100batch: iter_time=9.489e-05, forward_time=0.142, loss_ctc=69.071, loss_interctc_layer6=77.116, loss_interctc_layer12=63.782, loss_interctc_layer15=58.197, loss_interctc_layer21=71.590, loss=67.951, backward_time=0.210, grad_norm=80.685, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.596e-05, train_time=1.227
+[gpua006:0/64] 2024-02-10 19:50:14,591 (trainer:756) INFO: 28epoch:train:11101-11200batch: iter_time=4.636e-04, forward_time=0.159, loss_ctc=67.190, loss_interctc_layer6=81.165, loss_interctc_layer12=67.341, loss_interctc_layer15=61.753, loss_interctc_layer21=69.494, loss=69.389, backward_time=0.212, grad_norm=69.675, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.595e-05, train_time=1.101
+[gpua006:0/64] 2024-02-10 19:51:48,459 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua006:0/64] 2024-02-10 19:52:07,251 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 19:52:10,805 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbb1646b9a0>)
+[gpua006:0/64] 2024-02-10 19:52:10,805 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua006:0/64] 2024-02-10 19:52:10,808 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 19:58:50,572 (trainer:756) INFO: 28epoch:train:11201-11300batch: iter_time=3.157, forward_time=0.245, loss_ctc=71.073, loss_interctc_layer6=79.270, loss_interctc_layer12=65.792, loss_interctc_layer15=60.241, loss_interctc_layer21=73.571, loss=69.990, backward_time=0.257, grad_norm=56.429, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.594e-05, train_time=5.160
+[gpua006:0/64] 2024-02-10 20:00:28,561 (trainer:756) INFO: 28epoch:train:11301-11400batch: iter_time=8.717e-05, forward_time=0.144, loss_ctc=95.977, loss_interctc_layer6=98.503, loss_interctc_layer12=82.711, loss_interctc_layer15=76.568, loss_interctc_layer21=99.390, loss=90.630, backward_time=0.209, grad_norm=86.672, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.593e-05, train_time=0.979
+[gpua006:0/64] 2024-02-10 20:02:23,830 (trainer:756) INFO: 28epoch:train:11401-11500batch: iter_time=9.708e-05, forward_time=0.144, loss_ctc=79.422, loss_interctc_layer6=87.138, loss_interctc_layer12=72.651, loss_interctc_layer15=66.661, loss_interctc_layer21=82.119, loss=77.598, backward_time=0.210, grad_norm=70.060, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.592e-05, train_time=1.153
+[gpua006:0/64] 2024-02-10 20:04:35,416 (trainer:756) INFO: 28epoch:train:11501-11600batch: iter_time=1.153e-04, forward_time=0.145, loss_ctc=85.669, loss_interctc_layer6=81.378, loss_interctc_layer12=67.252, loss_interctc_layer15=61.469, loss_interctc_layer21=88.837, loss=76.921, backward_time=0.210, grad_norm=63.995, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.591e-05, train_time=1.316
+[gpua006:0/64] 2024-02-10 20:07:30,337 (trainer:756) INFO: 28epoch:train:11601-11700batch: iter_time=1.215e-04, forward_time=0.210, loss_ctc=98.422, loss_interctc_layer6=100.472, loss_interctc_layer12=83.483, loss_interctc_layer15=76.811, loss_interctc_layer21=101.864, loss=92.210, backward_time=0.260, grad_norm=90.579, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.143, optim0_lr0=7.590e-05, train_time=1.749
+[gpua006:0/64] 2024-02-10 20:09:52,343 (trainer:756) INFO: 28epoch:train:11701-11800batch: iter_time=1.193e-04, forward_time=0.144, loss_ctc=95.055, loss_interctc_layer6=90.575, loss_interctc_layer12=74.815, loss_interctc_layer15=68.483, loss_interctc_layer21=98.420, loss=85.470, backward_time=0.209, grad_norm=68.513, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.589e-05, train_time=1.420
+[gpua006:0/64] 2024-02-10 20:11:46,412 (trainer:756) INFO: 28epoch:train:11801-11900batch: iter_time=1.090e-04, forward_time=0.144, loss_ctc=89.042, loss_interctc_layer6=91.371, loss_interctc_layer12=75.722, loss_interctc_layer15=69.467, loss_interctc_layer21=92.040, loss=83.528, backward_time=0.210, grad_norm=79.191, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.589e-05, train_time=1.140
+[gpua006:0/64] 2024-02-10 20:14:12,498 (trainer:756) INFO: 28epoch:train:11901-12000batch: iter_time=1.172e-04, forward_time=0.143, loss_ctc=73.197, loss_interctc_layer6=76.797, loss_interctc_layer12=63.461, loss_interctc_layer15=57.947, loss_interctc_layer21=75.683, loss=69.417, backward_time=0.207, grad_norm=92.167, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.588e-05, train_time=1.461
+[gpua006:0/64] 2024-02-10 20:16:08,031 (trainer:756) INFO: 28epoch:train:12001-12100batch: iter_time=1.187e-04, forward_time=0.143, loss_ctc=66.043, loss_interctc_layer6=70.272, loss_interctc_layer12=58.034, loss_interctc_layer15=53.064, loss_interctc_layer21=68.390, loss=63.161, backward_time=0.211, grad_norm=63.940, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.587e-05, train_time=1.154
+[gpua006:0/64] 2024-02-10 20:18:22,807 (trainer:756) INFO: 28epoch:train:12101-12200batch: iter_time=1.211e-04, forward_time=0.144, loss_ctc=81.798, loss_interctc_layer6=79.435, loss_interctc_layer12=65.438, loss_interctc_layer15=59.732, loss_interctc_layer21=84.942, loss=74.269, backward_time=0.209, grad_norm=68.459, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.586e-05, train_time=1.349
+[gpua006:0/64] 2024-02-10 20:21:07,318 (trainer:756) INFO: 28epoch:train:12201-12300batch: iter_time=1.209e-04, forward_time=0.167, loss_ctc=66.920, loss_interctc_layer6=80.017, loss_interctc_layer12=66.680, loss_interctc_layer15=61.131, loss_interctc_layer21=69.174, loss=68.785, backward_time=0.214, grad_norm=59.471, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.585e-05, train_time=1.645
+[gpua006:0/64] 2024-02-10 20:23:21,756 (trainer:756) INFO: 28epoch:train:12301-12400batch: iter_time=1.139e-04, forward_time=0.201, loss_ctc=72.420, loss_interctc_layer6=76.831, loss_interctc_layer12=63.646, loss_interctc_layer15=58.283, loss_interctc_layer21=74.945, loss=69.225, backward_time=0.242, grad_norm=68.773, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.584e-05, train_time=1.344
+[gpua006:0/64] 2024-02-10 20:25:33,020 (trainer:756) INFO: 28epoch:train:12401-12500batch: iter_time=1.036e-04, forward_time=0.143, loss_ctc=74.683, loss_interctc_layer6=81.532, loss_interctc_layer12=67.494, loss_interctc_layer15=61.757, loss_interctc_layer21=77.256, loss=72.544, backward_time=0.208, grad_norm=72.356, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.583e-05, train_time=1.313
+[gpua006:0/64] 2024-02-10 20:25:53,049 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua006:0/64] 2024-02-10 20:26:11,812 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 20:26:15,367 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbace499c60>)
+[gpua006:0/64] 2024-02-10 20:26:15,368 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua006:0/64] 2024-02-10 20:26:15,371 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 20:40:16,239 (trainer:756) INFO: 28epoch:train:12501-12600batch: iter_time=7.836, forward_time=0.179, loss_ctc=81.687, loss_interctc_layer6=87.592, loss_interctc_layer12=73.010, loss_interctc_layer15=67.278, loss_interctc_layer21=84.297, loss=78.773, backward_time=0.214, grad_norm=64.546, clip=100.000, loss_scale=2.130e+31, optim_step_time=0.140, optim0_lr0=7.582e-05, train_time=8.832
+[gpua006:0/64] 2024-02-10 20:42:33,036 (trainer:756) INFO: 28epoch:train:12601-12700batch: iter_time=1.079e-04, forward_time=0.144, loss_ctc=92.979, loss_interctc_layer6=95.621, loss_interctc_layer12=79.680, loss_interctc_layer15=73.516, loss_interctc_layer21=96.158, loss=87.591, backward_time=0.210, grad_norm=84.566, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.581e-05, train_time=1.368
+[gpua006:0/64] 2024-02-10 20:45:05,150 (trainer:756) INFO: 28epoch:train:12701-12800batch: iter_time=4.049e-04, forward_time=0.166, loss_ctc=83.340, loss_interctc_layer6=85.156, loss_interctc_layer12=71.100, loss_interctc_layer15=65.311, loss_interctc_layer21=86.338, loss=78.249, backward_time=0.210, grad_norm=94.122, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.580e-05, train_time=1.520
+[gpua006:0/64] 2024-02-10 20:47:25,657 (trainer:756) INFO: 28epoch:train:12801-12900batch: iter_time=8.711e-05, forward_time=0.185, loss_ctc=79.733, loss_interctc_layer6=84.225, loss_interctc_layer12=69.748, loss_interctc_layer15=63.905, loss_interctc_layer21=82.665, loss=76.055, backward_time=0.286, grad_norm=66.182, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.142, optim0_lr0=7.579e-05, train_time=1.406
+[gpua006:0/64] 2024-02-10 20:49:30,569 (trainer:756) INFO: 28epoch:train:12901-13000batch: iter_time=8.751e-05, forward_time=0.144, loss_ctc=110.055, loss_interctc_layer6=102.111, loss_interctc_layer12=84.619, loss_interctc_layer15=77.562, loss_interctc_layer21=114.045, loss=97.678, backward_time=0.208, grad_norm=77.397, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.579e-05, train_time=1.249
+[gpua006:0/64] 2024-02-10 20:51:28,367 (trainer:756) INFO: 28epoch:train:13001-13100batch: iter_time=8.721e-05, forward_time=0.143, loss_ctc=89.551, loss_interctc_layer6=93.039, loss_interctc_layer12=77.241, loss_interctc_layer15=70.841, loss_interctc_layer21=92.494, loss=84.633, backward_time=0.208, grad_norm=91.628, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.578e-05, train_time=1.178
+[gpua006:0/64] 2024-02-10 20:53:34,597 (trainer:756) INFO: 28epoch:train:13101-13200batch: iter_time=9.877e-05, forward_time=0.142, loss_ctc=70.067, loss_interctc_layer6=75.841, loss_interctc_layer12=62.401, loss_interctc_layer15=56.901, loss_interctc_layer21=72.416, loss=67.525, backward_time=0.208, grad_norm=71.074, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.577e-05, train_time=1.262
+[gpua006:0/64] 2024-02-10 20:56:36,688 (trainer:756) INFO: 28epoch:train:13201-13300batch: iter_time=0.001, forward_time=0.276, loss_ctc=72.160, loss_interctc_layer6=76.038, loss_interctc_layer12=63.209, loss_interctc_layer15=57.696, loss_interctc_layer21=74.800, loss=68.781, backward_time=0.242, grad_norm=80.476, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.145, optim0_lr0=7.576e-05, train_time=1.819
+[gpua006:0/64] 2024-02-10 20:58:26,318 (trainer:756) INFO: 28epoch:train:13301-13400batch: iter_time=1.129e-04, forward_time=0.144, loss_ctc=84.330, loss_interctc_layer6=81.272, loss_interctc_layer12=67.023, loss_interctc_layer15=61.291, loss_interctc_layer21=87.513, loss=76.286, backward_time=0.209, grad_norm=68.390, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.575e-05, train_time=1.097
+[gpua006:0/64] 2024-02-10 21:01:10,580 (trainer:756) INFO: 28epoch:train:13401-13500batch: iter_time=9.002e-05, forward_time=0.142, loss_ctc=64.821, loss_interctc_layer6=74.151, loss_interctc_layer12=61.601, loss_interctc_layer15=56.471, loss_interctc_layer21=66.979, loss=64.805, backward_time=0.206, grad_norm=70.205, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.574e-05, train_time=1.642
+[gpua006:0/64] 2024-02-10 21:03:15,981 (trainer:756) INFO: 28epoch:train:13501-13600batch: iter_time=8.274e-05, forward_time=0.142, loss_ctc=72.852, loss_interctc_layer6=76.770, loss_interctc_layer12=63.430, loss_interctc_layer15=57.850, loss_interctc_layer21=75.414, loss=69.263, backward_time=0.207, grad_norm=58.625, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.573e-05, train_time=1.254
+[gpua006:0/64] 2024-02-10 21:05:36,732 (trainer:756) INFO: 28epoch:train:13601-13700batch: iter_time=8.616e-05, forward_time=0.254, loss_ctc=69.875, loss_interctc_layer6=80.838, loss_interctc_layer12=67.096, loss_interctc_layer15=61.504, loss_interctc_layer21=72.271, loss=70.317, backward_time=0.229, grad_norm=71.536, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.142, optim0_lr0=7.572e-05, train_time=1.407
+[gpua006:0/64] 2024-02-10 21:06:51,500 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua006:0/64] 2024-02-10 21:07:10,535 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 21:07:13,992 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbdb1d4b8e0>)
+[gpua006:0/64] 2024-02-10 21:07:13,992 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua006:0/64] 2024-02-10 21:07:13,996 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 21:15:42,561 (trainer:756) INFO: 28epoch:train:13701-13800batch: iter_time=2.804, forward_time=0.157, loss_ctc=73.179, loss_interctc_layer6=79.465, loss_interctc_layer12=65.856, loss_interctc_layer15=60.350, loss_interctc_layer21=75.775, loss=70.925, backward_time=0.212, grad_norm=60.925, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.571e-05, train_time=6.058
+[gpua006:0/64] 2024-02-10 21:17:54,574 (trainer:756) INFO: 28epoch:train:13801-13900batch: iter_time=7.401e-05, forward_time=0.144, loss_ctc=95.820, loss_interctc_layer6=97.797, loss_interctc_layer12=81.909, loss_interctc_layer15=75.863, loss_interctc_layer21=98.812, loss=90.040, backward_time=0.210, grad_norm=81.974, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.570e-05, train_time=1.320
+[gpua006:0/64] 2024-02-10 21:21:05,192 (trainer:756) INFO: 28epoch:train:13901-14000batch: iter_time=8.071e-05, forward_time=0.142, loss_ctc=78.845, loss_interctc_layer6=86.829, loss_interctc_layer12=72.173, loss_interctc_layer15=66.345, loss_interctc_layer21=81.398, loss=77.118, backward_time=0.208, grad_norm=73.164, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.570e-05, train_time=1.906
+[gpua006:0/64] 2024-02-10 21:23:29,584 (trainer:756) INFO: 28epoch:train:14001-14100batch: iter_time=7.862e-05, forward_time=0.142, loss_ctc=84.306, loss_interctc_layer6=80.920, loss_interctc_layer12=66.679, loss_interctc_layer15=60.958, loss_interctc_layer21=87.443, loss=76.061, backward_time=0.209, grad_norm=79.599, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.569e-05, train_time=1.444
+[gpua006:0/64] 2024-02-10 21:25:56,211 (trainer:756) INFO: 28epoch:train:14101-14200batch: iter_time=8.104e-05, forward_time=0.143, loss_ctc=97.713, loss_interctc_layer6=100.501, loss_interctc_layer12=83.594, loss_interctc_layer15=76.922, loss_interctc_layer21=101.209, loss=91.988, backward_time=0.208, grad_norm=116.750, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.568e-05, train_time=1.466
+[gpua006:0/64] 2024-02-10 21:28:33,560 (trainer:756) INFO: 28epoch:train:14201-14300batch: iter_time=2.436e-04, forward_time=0.273, loss_ctc=94.975, loss_interctc_layer6=91.088, loss_interctc_layer12=75.239, loss_interctc_layer15=68.966, loss_interctc_layer21=98.375, loss=85.729, backward_time=0.236, grad_norm=80.880, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.142, optim0_lr0=7.567e-05, train_time=1.573
+[gpua006:0/64] 2024-02-10 21:31:06,605 (trainer:756) INFO: 28epoch:train:14301-14400batch: iter_time=8.048e-05, forward_time=0.162, loss_ctc=87.931, loss_interctc_layer6=90.785, loss_interctc_layer12=75.254, loss_interctc_layer15=68.937, loss_interctc_layer21=90.928, loss=82.767, backward_time=0.213, grad_norm=75.546, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.566e-05, train_time=1.530
+[gpua006:0/64] 2024-02-10 21:33:29,667 (trainer:756) INFO: 28epoch:train:14401-14500batch: iter_time=7.835e-05, forward_time=0.141, loss_ctc=73.410, loss_interctc_layer6=77.315, loss_interctc_layer12=63.884, loss_interctc_layer15=58.311, loss_interctc_layer21=76.018, loss=69.788, backward_time=0.208, grad_norm=57.899, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.565e-05, train_time=1.430
+[gpua006:0/64] 2024-02-10 21:35:38,605 (trainer:756) INFO: 28epoch:train:14501-14600batch: iter_time=7.727e-05, forward_time=0.142, loss_ctc=65.071, loss_interctc_layer6=69.849, loss_interctc_layer12=57.559, loss_interctc_layer15=52.607, loss_interctc_layer21=67.549, loss=62.527, backward_time=0.209, grad_norm=55.090, clip=100.000, loss_scale=4.259e+31, optim_step_time=0.138, optim0_lr0=7.564e-05, train_time=1.290
+[gpua006:0/64] 2024-02-10 21:38:13,311 (trainer:756) INFO: 28epoch:train:14601-14700batch: iter_time=8.250e-05, forward_time=0.143, loss_ctc=80.665, loss_interctc_layer6=79.299, loss_interctc_layer12=65.339, loss_interctc_layer15=59.681, loss_interctc_layer21=83.832, loss=73.763, backward_time=0.209, grad_norm=64.804, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.138, optim0_lr0=7.563e-05, train_time=1.547
+[gpua006:0/64] 2024-02-10 21:40:26,773 (trainer:756) INFO: 28epoch:train:14701-14800batch: iter_time=7.893e-05, forward_time=0.142, loss_ctc=67.601, loss_interctc_layer6=80.196, loss_interctc_layer12=66.663, loss_interctc_layer15=61.253, loss_interctc_layer21=69.785, loss=69.100, backward_time=0.208, grad_norm=72.142, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.138, optim0_lr0=7.562e-05, train_time=1.334
+[gpua006:0/64] 2024-02-10 21:42:53,312 (trainer:756) INFO: 28epoch:train:14801-14900batch: iter_time=7.790e-05, forward_time=0.141, loss_ctc=71.885, loss_interctc_layer6=77.008, loss_interctc_layer12=63.883, loss_interctc_layer15=58.518, loss_interctc_layer21=74.423, loss=69.143, backward_time=0.209, grad_norm=57.080, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.138, optim0_lr0=7.561e-05, train_time=1.465
+[gpua006:0/64] 2024-02-10 21:45:32,466 (trainer:756) INFO: 28epoch:train:14901-15000batch: iter_time=8.570e-05, forward_time=0.142, loss_ctc=73.819, loss_interctc_layer6=80.906, loss_interctc_layer12=66.882, loss_interctc_layer15=61.159, loss_interctc_layer21=76.336, loss=71.821, backward_time=0.207, grad_norm=72.513, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.138, optim0_lr0=7.561e-05, train_time=1.591
+[gpua006:0/64] 2024-02-10 22:17:09,492 (trainer:355) INFO: 28epoch results: [train] iter_time=0.289, forward_time=0.164, loss_ctc=79.197, loss_interctc_layer6=84.899, loss_interctc_layer12=70.588, loss_interctc_layer15=64.801, loss_interctc_layer21=81.941, loss=76.285, backward_time=0.220, grad_norm=77.835, clip=100.000, loss_scale=2.869e+31, optim_step_time=0.139, optim0_lr0=7.629e-05, train_time=1.790, time=7 hours, 27 minutes and 59.99 seconds, total_count=420000, gpu_max_cached_mem_GB=33.436, [valid] loss_ctc=44.683, cer_ctc=0.206, loss_interctc_layer6=50.428, cer_interctc_layer6=0.222, loss_interctc_layer12=37.599, cer_interctc_layer12=0.156, loss_interctc_layer15=33.213, cer_interctc_layer15=0.131, loss_interctc_layer21=47.233, cer_interctc_layer21=0.218, loss=42.631, time=31 minutes and 12.45 seconds, total_count=130788, gpu_max_cached_mem_GB=33.436
+[gpua006:0/64] 2024-02-10 22:17:31,952 (trainer:410) INFO: The best model has been updated: valid.total_count
+[gpua006:0/64] 2024-02-10 22:17:32,009 (trainer:289) INFO: 29/45epoch started. Estimated time to finish: 5 days, 15 hours and 52 minutes
+[gpua006:0/64] 2024-02-10 22:17:32,041 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua006:0/64] 2024-02-10 22:17:49,841 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 22:17:53,176 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fc8348ad240>)
+[gpua006:0/64] 2024-02-10 22:17:53,177 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua006:0/64] 2024-02-10 22:17:53,180 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 22:25:48,782 (trainer:756) INFO: 29epoch:train:1-100batch: iter_time=2.135, forward_time=0.166, loss_ctc=56.644, loss_interctc_layer6=65.188, loss_interctc_layer12=53.851, loss_interctc_layer15=49.279, loss_interctc_layer21=58.499, loss=56.692, backward_time=0.218, grad_norm=46.923, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.139, optim0_lr0=7.560e-05, train_time=4.967
+[gpua006:0/64] 2024-02-10 22:27:32,650 (trainer:756) INFO: 29epoch:train:101-200batch: iter_time=9.356e-05, forward_time=0.143, loss_ctc=84.346, loss_interctc_layer6=87.904, loss_interctc_layer12=73.366, loss_interctc_layer15=67.340, loss_interctc_layer21=87.289, loss=80.049, backward_time=0.209, grad_norm=70.116, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.139, optim0_lr0=7.559e-05, train_time=1.039
+[gpua006:0/64] 2024-02-10 22:27:51,488 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-10 22:29:19,488 (trainer:756) INFO: 29epoch:train:201-300batch: iter_time=9.414e-05, forward_time=0.143, loss_ctc=78.937, loss_interctc_layer6=84.289, loss_interctc_layer12=70.984, loss_interctc_layer15=65.839, loss_interctc_layer21=81.544, loss=76.318, backward_time=0.209, grad_norm=76.452, clip=100.000, loss_scale=4.589e+31, optim_step_time=0.139, optim0_lr0=7.558e-05, train_time=1.068
+[gpua006:0/64] 2024-02-10 22:30:44,031 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-10 22:31:21,196 (trainer:756) INFO: 29epoch:train:301-400batch: iter_time=1.073e-04, forward_time=0.143, loss_ctc=88.301, loss_interctc_layer6=87.343, loss_interctc_layer12=72.413, loss_interctc_layer15=66.490, loss_interctc_layer21=91.197, loss=81.149, backward_time=0.208, grad_norm=74.327, clip=100.000, loss_scale=3.483e+31, optim_step_time=0.139, optim0_lr0=7.557e-05, train_time=1.217
+[gpua006:0/64] 2024-02-10 22:33:29,626 (trainer:756) INFO: 29epoch:train:401-500batch: iter_time=9.978e-05, forward_time=0.158, loss_ctc=71.902, loss_interctc_layer6=80.918, loss_interctc_layer12=67.990, loss_interctc_layer15=62.872, loss_interctc_layer21=74.121, loss=71.561, backward_time=0.211, grad_norm=86.731, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.556e-05, train_time=1.284
+[gpua006:0/64] 2024-02-10 22:35:38,135 (trainer:756) INFO: 29epoch:train:501-600batch: iter_time=9.866e-05, forward_time=0.160, loss_ctc=91.472, loss_interctc_layer6=90.043, loss_interctc_layer12=75.680, loss_interctc_layer15=69.751, loss_interctc_layer21=94.409, loss=84.271, backward_time=0.208, grad_norm=81.081, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.555e-05, train_time=1.285
+[gpua006:0/64] 2024-02-10 22:37:53,429 (trainer:756) INFO: 29epoch:train:601-700batch: iter_time=1.074e-04, forward_time=0.161, loss_ctc=85.988, loss_interctc_layer6=84.255, loss_interctc_layer12=69.998, loss_interctc_layer15=64.255, loss_interctc_layer21=88.995, loss=78.698, backward_time=0.207, grad_norm=68.287, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.554e-05, train_time=1.353
+[gpua006:0/64] 2024-02-10 22:40:08,861 (trainer:756) INFO: 29epoch:train:701-800batch: iter_time=1.191e-04, forward_time=0.150, loss_ctc=87.957, loss_interctc_layer6=86.966, loss_interctc_layer12=72.382, loss_interctc_layer15=66.589, loss_interctc_layer21=90.944, loss=80.968, backward_time=0.207, grad_norm=75.741, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.553e-05, train_time=1.353
+[gpua006:0/64] 2024-02-10 22:42:47,527 (trainer:756) INFO: 29epoch:train:801-900batch: iter_time=9.964e-05, forward_time=0.159, loss_ctc=103.866, loss_interctc_layer6=99.872, loss_interctc_layer12=82.868, loss_interctc_layer15=76.269, loss_interctc_layer21=107.326, loss=94.040, backward_time=0.216, grad_norm=89.376, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.552e-05, train_time=1.587
+[gpua006:0/64] 2024-02-10 22:44:57,327 (trainer:756) INFO: 29epoch:train:901-1000batch: iter_time=9.651e-05, forward_time=0.143, loss_ctc=75.335, loss_interctc_layer6=77.780, loss_interctc_layer12=64.689, loss_interctc_layer15=59.307, loss_interctc_layer21=78.214, loss=71.065, backward_time=0.209, grad_norm=58.594, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.552e-05, train_time=1.298
+[gpua006:0/64] 2024-02-10 22:47:36,964 (trainer:756) INFO: 29epoch:train:1001-1100batch: iter_time=1.059e-04, forward_time=0.144, loss_ctc=100.826, loss_interctc_layer6=94.477, loss_interctc_layer12=78.339, loss_interctc_layer15=71.827, loss_interctc_layer21=104.414, loss=89.977, backward_time=0.207, grad_norm=84.778, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.551e-05, train_time=1.596
+[gpua006:0/64] 2024-02-10 22:49:54,858 (trainer:756) INFO: 29epoch:train:1101-1200batch: iter_time=9.780e-05, forward_time=0.154, loss_ctc=77.797, loss_interctc_layer6=78.701, loss_interctc_layer12=65.755, loss_interctc_layer15=60.441, loss_interctc_layer21=80.445, loss=72.628, backward_time=0.210, grad_norm=83.147, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.550e-05, train_time=1.379
+[gpua006:0/64] 2024-02-10 22:51:15,267 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua006:0/64] 2024-02-10 22:51:33,666 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 22:51:37,108 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbda576c070>)
+[gpua006:0/64] 2024-02-10 22:51:37,108 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua006:0/64] 2024-02-10 22:51:37,222 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 22:58:08,618 (trainer:756) INFO: 29epoch:train:1201-1300batch: iter_time=2.038, forward_time=0.156, loss_ctc=65.500, loss_interctc_layer6=73.583, loss_interctc_layer12=60.879, loss_interctc_layer15=55.690, loss_interctc_layer21=67.723, loss=64.675, backward_time=0.219, grad_norm=72.452, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.549e-05, train_time=4.937
+[gpua006:0/64] 2024-02-10 22:59:49,503 (trainer:756) INFO: 29epoch:train:1301-1400batch: iter_time=8.524e-05, forward_time=0.148, loss_ctc=67.091, loss_interctc_layer6=73.608, loss_interctc_layer12=60.943, loss_interctc_layer15=55.878, loss_interctc_layer21=69.545, loss=65.413, backward_time=0.209, grad_norm=57.522, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.548e-05, train_time=1.009
+[gpua006:0/64] 2024-02-10 23:01:59,728 (trainer:756) INFO: 29epoch:train:1401-1500batch: iter_time=0.043, forward_time=0.142, loss_ctc=82.166, loss_interctc_layer6=90.300, loss_interctc_layer12=75.574, loss_interctc_layer15=69.799, loss_interctc_layer21=84.997, loss=80.567, backward_time=0.209, grad_norm=74.841, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.547e-05, train_time=1.302
+[gpua006:0/64] 2024-02-10 23:04:36,821 (trainer:756) INFO: 29epoch:train:1501-1600batch: iter_time=8.459e-05, forward_time=0.142, loss_ctc=79.755, loss_interctc_layer6=87.468, loss_interctc_layer12=72.825, loss_interctc_layer15=66.947, loss_interctc_layer21=82.295, loss=77.858, backward_time=0.208, grad_norm=66.521, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.546e-05, train_time=1.569
+[gpua006:0/64] 2024-02-10 23:10:48,616 (trainer:756) INFO: 29epoch:train:1601-1700batch: iter_time=1.000e-04, forward_time=0.142, loss_ctc=66.163, loss_interctc_layer6=71.382, loss_interctc_layer12=59.161, loss_interctc_layer15=54.290, loss_interctc_layer21=68.596, loss=63.918, backward_time=0.207, grad_norm=62.143, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.545e-05, train_time=3.720
+[gpua006:0/64] 2024-02-10 23:13:04,895 (trainer:756) INFO: 29epoch:train:1701-1800batch: iter_time=1.115e-04, forward_time=0.144, loss_ctc=75.350, loss_interctc_layer6=87.943, loss_interctc_layer12=73.659, loss_interctc_layer15=67.987, loss_interctc_layer21=77.817, loss=76.551, backward_time=0.209, grad_norm=66.299, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.544e-05, train_time=1.362
+[gpua006:0/64] 2024-02-10 23:15:00,905 (trainer:756) INFO: 29epoch:train:1801-1900batch: iter_time=8.722e-05, forward_time=0.143, loss_ctc=92.763, loss_interctc_layer6=90.909, loss_interctc_layer12=75.633, loss_interctc_layer15=69.433, loss_interctc_layer21=96.026, loss=84.953, backward_time=0.207, grad_norm=75.725, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.543e-05, train_time=1.160
+[gpua006:0/64] 2024-02-10 23:16:49,083 (trainer:756) INFO: 29epoch:train:1901-2000batch: iter_time=1.031e-04, forward_time=0.142, loss_ctc=75.512, loss_interctc_layer6=80.387, loss_interctc_layer12=66.545, loss_interctc_layer15=61.026, loss_interctc_layer21=78.240, loss=72.342, backward_time=0.207, grad_norm=89.118, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.543e-05, train_time=1.082
+[gpua006:0/64] 2024-02-10 23:18:56,877 (trainer:756) INFO: 29epoch:train:2001-2100batch: iter_time=1.010e-04, forward_time=0.182, loss_ctc=101.675, loss_interctc_layer6=95.384, loss_interctc_layer12=78.914, loss_interctc_layer15=72.207, loss_interctc_layer21=105.128, loss=90.662, backward_time=0.208, grad_norm=77.674, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.542e-05, train_time=1.278
+[gpua006:0/64] 2024-02-10 23:21:05,583 (trainer:756) INFO: 29epoch:train:2101-2200batch: iter_time=8.373e-05, forward_time=0.161, loss_ctc=81.008, loss_interctc_layer6=92.131, loss_interctc_layer12=76.486, loss_interctc_layer15=70.331, loss_interctc_layer21=83.600, loss=80.711, backward_time=0.209, grad_norm=79.184, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.541e-05, train_time=1.287
+[gpua006:0/64] 2024-02-10 23:23:09,227 (trainer:756) INFO: 29epoch:train:2201-2300batch: iter_time=8.730e-05, forward_time=0.143, loss_ctc=86.375, loss_interctc_layer6=82.668, loss_interctc_layer12=68.216, loss_interctc_layer15=62.215, loss_interctc_layer21=89.603, loss=77.815, backward_time=0.210, grad_norm=62.446, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.540e-05, train_time=1.236
+[gpua006:0/64] 2024-02-10 23:25:11,503 (trainer:756) INFO: 29epoch:train:2301-2400batch: iter_time=9.320e-05, forward_time=0.142, loss_ctc=73.255, loss_interctc_layer6=79.827, loss_interctc_layer12=66.111, loss_interctc_layer15=60.567, loss_interctc_layer21=75.964, loss=71.145, backward_time=0.207, grad_norm=60.380, clip=100.000, loss_scale=2.596e+31, optim_step_time=0.138, optim0_lr0=7.539e-05, train_time=1.223
+[gpua006:0/64] 2024-02-10 23:27:10,245 (trainer:756) INFO: 29epoch:train:2401-2500batch: iter_time=8.484e-05, forward_time=0.143, loss_ctc=76.882, loss_interctc_layer6=80.870, loss_interctc_layer12=67.191, loss_interctc_layer15=61.643, loss_interctc_layer21=79.461, loss=73.209, backward_time=0.207, grad_norm=67.950, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.538e-05, train_time=1.187
+[gpua006:0/64] 2024-02-10 23:27:21,346 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua006:0/64] 2024-02-10 23:27:40,012 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-10 23:27:43,446 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbdaae920e0>)
+[gpua006:0/64] 2024-02-10 23:27:43,446 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua006:0/64] 2024-02-10 23:27:43,449 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-10 23:40:20,179 (trainer:756) INFO: 29epoch:train:2501-2600batch: iter_time=2.223, forward_time=0.171, loss_ctc=56.231, loss_interctc_layer6=64.593, loss_interctc_layer12=53.181, loss_interctc_layer15=48.600, loss_interctc_layer21=58.079, loss=56.137, backward_time=0.213, grad_norm=47.108, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.537e-05, train_time=7.899
+[gpua006:0/64] 2024-02-10 23:41:56,147 (trainer:756) INFO: 29epoch:train:2601-2700batch: iter_time=8.299e-05, forward_time=0.143, loss_ctc=83.538, loss_interctc_layer6=87.212, loss_interctc_layer12=72.532, loss_interctc_layer15=66.572, loss_interctc_layer21=86.469, loss=79.264, backward_time=0.209, grad_norm=99.442, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.536e-05, train_time=0.959
+[gpua006:0/64] 2024-02-10 23:43:30,276 (trainer:756) INFO: 29epoch:train:2701-2800batch: iter_time=8.549e-05, forward_time=0.142, loss_ctc=77.443, loss_interctc_layer6=82.903, loss_interctc_layer12=69.534, loss_interctc_layer15=64.464, loss_interctc_layer21=79.819, loss=74.833, backward_time=0.208, grad_norm=67.320, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.535e-05, train_time=0.941
+[gpua006:0/64] 2024-02-10 23:47:05,509 (trainer:756) INFO: 29epoch:train:2801-2900batch: iter_time=9.019e-05, forward_time=0.142, loss_ctc=87.834, loss_interctc_layer6=87.045, loss_interctc_layer12=71.943, loss_interctc_layer15=66.038, loss_interctc_layer21=90.649, loss=80.702, backward_time=0.207, grad_norm=72.093, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.535e-05, train_time=2.152
+[gpua006:0/64] 2024-02-10 23:49:07,926 (trainer:756) INFO: 29epoch:train:2901-3000batch: iter_time=8.606e-05, forward_time=0.142, loss_ctc=71.124, loss_interctc_layer6=80.590, loss_interctc_layer12=67.569, loss_interctc_layer15=62.348, loss_interctc_layer21=73.383, loss=71.003, backward_time=0.208, grad_norm=75.200, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.534e-05, train_time=1.224
+[gpua006:0/64] 2024-02-10 23:51:13,296 (trainer:756) INFO: 29epoch:train:3001-3100batch: iter_time=8.402e-05, forward_time=0.145, loss_ctc=90.334, loss_interctc_layer6=88.360, loss_interctc_layer12=73.709, loss_interctc_layer15=67.812, loss_interctc_layer21=93.382, loss=82.719, backward_time=0.209, grad_norm=92.879, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.533e-05, train_time=1.253
+[gpua006:0/64] 2024-02-10 23:53:34,898 (trainer:756) INFO: 29epoch:train:3101-3200batch: iter_time=8.154e-05, forward_time=0.150, loss_ctc=84.688, loss_interctc_layer6=83.870, loss_interctc_layer12=69.535, loss_interctc_layer15=63.729, loss_interctc_layer21=87.770, loss=77.919, backward_time=0.226, grad_norm=67.185, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.142, optim0_lr0=7.532e-05, train_time=1.414
+[gpua006:0/64] 2024-02-10 23:55:48,180 (trainer:756) INFO: 29epoch:train:3201-3300batch: iter_time=8.362e-05, forward_time=0.143, loss_ctc=85.714, loss_interctc_layer6=84.891, loss_interctc_layer12=70.042, loss_interctc_layer15=64.293, loss_interctc_layer21=88.701, loss=78.728, backward_time=0.209, grad_norm=63.093, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.531e-05, train_time=1.335
+[gpua006:0/64] 2024-02-10 23:57:53,726 (trainer:756) INFO: 29epoch:train:3301-3400batch: iter_time=8.626e-05, forward_time=0.143, loss_ctc=100.697, loss_interctc_layer6=96.565, loss_interctc_layer12=79.915, loss_interctc_layer15=73.338, loss_interctc_layer21=104.274, loss=90.958, backward_time=0.208, grad_norm=71.897, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.530e-05, train_time=1.255
+[gpua006:0/64] 2024-02-11 00:00:33,620 (trainer:756) INFO: 29epoch:train:3401-3500batch: iter_time=8.458e-05, forward_time=0.142, loss_ctc=73.115, loss_interctc_layer6=76.504, loss_interctc_layer12=63.050, loss_interctc_layer15=57.562, loss_interctc_layer21=75.889, loss=69.224, backward_time=0.207, grad_norm=83.840, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.529e-05, train_time=1.599
+[gpua006:0/64] 2024-02-11 00:02:50,234 (trainer:756) INFO: 29epoch:train:3501-3600batch: iter_time=8.717e-05, forward_time=0.142, loss_ctc=100.315, loss_interctc_layer6=94.219, loss_interctc_layer12=77.938, loss_interctc_layer15=71.447, loss_interctc_layer21=103.846, loss=89.553, backward_time=0.207, grad_norm=73.287, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.528e-05, train_time=1.366
+[gpua006:0/64] 2024-02-11 00:04:48,163 (trainer:756) INFO: 29epoch:train:3601-3700batch: iter_time=8.456e-05, forward_time=0.161, loss_ctc=76.924, loss_interctc_layer6=77.532, loss_interctc_layer12=64.637, loss_interctc_layer15=59.262, loss_interctc_layer21=79.548, loss=71.581, backward_time=0.211, grad_norm=65.612, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.527e-05, train_time=1.179
+[gpua006:0/64] 2024-02-11 00:06:15,345 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua006:0/64] 2024-02-11 00:06:34,141 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 00:06:37,549 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbdc471d090>)
+[gpua006:0/64] 2024-02-11 00:06:37,549 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua006:0/64] 2024-02-11 00:06:37,552 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 00:11:47,715 (trainer:756) INFO: 29epoch:train:3701-3800batch: iter_time=2.065, forward_time=0.194, loss_ctc=65.243, loss_interctc_layer6=73.718, loss_interctc_layer12=60.929, loss_interctc_layer15=55.693, loss_interctc_layer21=67.575, loss=64.632, backward_time=0.221, grad_norm=65.417, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.527e-05, train_time=4.195
+[gpua006:0/64] 2024-02-11 00:13:26,517 (trainer:756) INFO: 29epoch:train:3801-3900batch: iter_time=7.613e-05, forward_time=0.142, loss_ctc=66.206, loss_interctc_layer6=72.836, loss_interctc_layer12=60.331, loss_interctc_layer15=55.305, loss_interctc_layer21=68.643, loss=64.664, backward_time=0.211, grad_norm=55.976, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.526e-05, train_time=0.988
+[gpua006:0/64] 2024-02-11 00:15:46,307 (trainer:756) INFO: 29epoch:train:3901-4000batch: iter_time=7.829e-05, forward_time=0.151, loss_ctc=81.668, loss_interctc_layer6=89.200, loss_interctc_layer12=74.805, loss_interctc_layer15=69.021, loss_interctc_layer21=84.353, loss=79.809, backward_time=0.222, grad_norm=71.529, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.525e-05, train_time=1.398
+[gpua006:0/64] 2024-02-11 00:17:51,948 (trainer:756) INFO: 29epoch:train:4001-4100batch: iter_time=8.303e-05, forward_time=0.142, loss_ctc=77.929, loss_interctc_layer6=85.733, loss_interctc_layer12=71.212, loss_interctc_layer15=65.316, loss_interctc_layer21=80.341, loss=76.106, backward_time=0.210, grad_norm=75.567, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.524e-05, train_time=1.256
+[gpua006:0/64] 2024-02-11 00:19:51,628 (trainer:756) INFO: 29epoch:train:4101-4200batch: iter_time=8.992e-05, forward_time=0.149, loss_ctc=65.888, loss_interctc_layer6=71.035, loss_interctc_layer12=58.844, loss_interctc_layer15=53.949, loss_interctc_layer21=68.397, loss=63.623, backward_time=0.219, grad_norm=65.266, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.141, optim0_lr0=7.523e-05, train_time=1.197
+[gpua006:0/64] 2024-02-11 00:20:05,515 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 00:22:32,027 (trainer:756) INFO: 29epoch:train:4201-4300batch: iter_time=8.442e-05, forward_time=0.142, loss_ctc=74.296, loss_interctc_layer6=87.345, loss_interctc_layer12=72.937, loss_interctc_layer15=67.161, loss_interctc_layer21=76.748, loss=75.697, backward_time=0.210, grad_norm=78.363, clip=100.000, loss_scale=2.233e+31, optim_step_time=0.139, optim0_lr0=7.522e-05, train_time=1.601
+[gpua006:0/64] 2024-02-11 00:24:54,024 (trainer:756) INFO: 29epoch:train:4301-4400batch: iter_time=9.786e-05, forward_time=0.144, loss_ctc=91.917, loss_interctc_layer6=90.137, loss_interctc_layer12=74.879, loss_interctc_layer15=68.757, loss_interctc_layer21=95.145, loss=84.167, backward_time=0.209, grad_norm=66.897, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.521e-05, train_time=1.422
+[gpua006:0/64] 2024-02-11 00:26:59,523 (trainer:756) INFO: 29epoch:train:4401-4500batch: iter_time=1.012e-04, forward_time=0.142, loss_ctc=73.702, loss_interctc_layer6=78.682, loss_interctc_layer12=64.865, loss_interctc_layer15=59.497, loss_interctc_layer21=76.405, loss=70.630, backward_time=0.208, grad_norm=73.426, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.520e-05, train_time=1.255
+[gpua006:0/64] 2024-02-11 00:28:55,312 (trainer:756) INFO: 29epoch:train:4501-4600batch: iter_time=8.960e-05, forward_time=0.143, loss_ctc=101.116, loss_interctc_layer6=95.443, loss_interctc_layer12=78.658, loss_interctc_layer15=71.973, loss_interctc_layer21=104.687, loss=90.376, backward_time=0.210, grad_norm=68.096, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.519e-05, train_time=1.158
+[gpua006:0/64] 2024-02-11 00:29:38,760 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 00:30:55,908 (trainer:756) INFO: 29epoch:train:4601-4700batch: iter_time=7.905e-05, forward_time=0.159, loss_ctc=79.805, loss_interctc_layer6=90.213, loss_interctc_layer12=75.051, loss_interctc_layer15=68.980, loss_interctc_layer21=82.598, loss=79.330, backward_time=0.224, grad_norm=71.965, clip=100.000, loss_scale=1.414e+31, optim_step_time=0.140, optim0_lr0=7.519e-05, train_time=1.206
+[gpua006:0/64] 2024-02-11 00:32:40,818 (trainer:756) INFO: 29epoch:train:4701-4800batch: iter_time=8.421e-05, forward_time=0.148, loss_ctc=85.749, loss_interctc_layer6=81.859, loss_interctc_layer12=67.223, loss_interctc_layer15=61.329, loss_interctc_layer21=88.918, loss=77.015, backward_time=0.211, grad_norm=65.935, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.140, optim0_lr0=7.518e-05, train_time=1.049
+[gpua006:0/64] 2024-02-11 00:34:45,450 (trainer:756) INFO: 29epoch:train:4801-4900batch: iter_time=8.469e-05, forward_time=0.159, loss_ctc=73.230, loss_interctc_layer6=80.114, loss_interctc_layer12=66.389, loss_interctc_layer15=60.885, loss_interctc_layer21=75.902, loss=71.304, backward_time=0.219, grad_norm=65.011, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.517e-05, train_time=1.246
+[gpua006:0/64] 2024-02-11 00:36:35,455 (trainer:756) INFO: 29epoch:train:4901-5000batch: iter_time=8.220e-05, forward_time=0.148, loss_ctc=76.319, loss_interctc_layer6=80.102, loss_interctc_layer12=66.410, loss_interctc_layer15=60.756, loss_interctc_layer21=78.952, loss=72.508, backward_time=0.211, grad_norm=103.118, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.516e-05, train_time=1.100
+[gpua006:0/64] 2024-02-11 00:36:55,486 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua006:0/64] 2024-02-11 00:37:14,473 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 00:37:17,873 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbde7bc1810>)
+[gpua006:0/64] 2024-02-11 00:37:17,873 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua006:0/64] 2024-02-11 00:37:17,879 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 00:43:20,315 (trainer:756) INFO: 29epoch:train:5001-5100batch: iter_time=2.868, forward_time=0.145, loss_ctc=56.235, loss_interctc_layer6=64.993, loss_interctc_layer12=53.602, loss_interctc_layer15=48.989, loss_interctc_layer21=58.164, loss=56.397, backward_time=0.212, grad_norm=99.604, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.515e-05, train_time=4.048
+[gpua006:0/64] 2024-02-11 00:44:58,969 (trainer:756) INFO: 29epoch:train:5101-5200batch: iter_time=8.291e-05, forward_time=0.143, loss_ctc=76.498, loss_interctc_layer6=86.226, loss_interctc_layer12=71.564, loss_interctc_layer15=65.695, loss_interctc_layer21=79.123, loss=75.821, backward_time=0.212, grad_norm=67.995, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.514e-05, train_time=0.987
+[gpua006:0/64] 2024-02-11 00:47:09,324 (trainer:756) INFO: 29epoch:train:5201-5300batch: iter_time=8.150e-05, forward_time=0.151, loss_ctc=75.185, loss_interctc_layer6=81.797, loss_interctc_layer12=68.768, loss_interctc_layer15=63.601, loss_interctc_layer21=77.453, loss=73.361, backward_time=0.218, grad_norm=74.841, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.140, optim0_lr0=7.513e-05, train_time=1.303
+[gpua006:0/64] 2024-02-11 00:49:38,394 (trainer:756) INFO: 29epoch:train:5301-5400batch: iter_time=7.901e-04, forward_time=0.146, loss_ctc=83.473, loss_interctc_layer6=85.923, loss_interctc_layer12=71.034, loss_interctc_layer15=65.062, loss_interctc_layer21=86.334, loss=78.365, backward_time=0.215, grad_norm=68.103, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.512e-05, train_time=1.490
+[gpua006:0/64] 2024-02-11 00:51:34,282 (trainer:756) INFO: 29epoch:train:5401-5500batch: iter_time=8.831e-05, forward_time=0.143, loss_ctc=67.425, loss_interctc_layer6=78.844, loss_interctc_layer12=65.798, loss_interctc_layer15=60.654, loss_interctc_layer21=69.662, loss=68.477, backward_time=0.210, grad_norm=61.254, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.512e-05, train_time=1.159
+[gpua006:0/64] 2024-02-11 00:53:29,482 (trainer:756) INFO: 29epoch:train:5501-5600batch: iter_time=8.540e-05, forward_time=0.143, loss_ctc=86.512, loss_interctc_layer6=88.521, loss_interctc_layer12=73.675, loss_interctc_layer15=67.722, loss_interctc_layer21=89.446, loss=81.175, backward_time=0.209, grad_norm=70.346, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.511e-05, train_time=1.152
+[gpua006:0/64] 2024-02-11 00:55:26,182 (trainer:756) INFO: 29epoch:train:5601-5700batch: iter_time=9.299e-05, forward_time=0.173, loss_ctc=79.085, loss_interctc_layer6=83.656, loss_interctc_layer12=69.285, loss_interctc_layer15=63.482, loss_interctc_layer21=81.913, loss=75.484, backward_time=0.211, grad_norm=77.420, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.510e-05, train_time=1.167
+[gpua006:0/64] 2024-02-11 00:57:37,667 (trainer:756) INFO: 29epoch:train:5701-5800batch: iter_time=1.030e-04, forward_time=0.142, loss_ctc=81.494, loss_interctc_layer6=84.546, loss_interctc_layer12=69.814, loss_interctc_layer15=64.009, loss_interctc_layer21=84.471, loss=76.867, backward_time=0.208, grad_norm=128.217, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.509e-05, train_time=1.315
+[gpua006:0/64] 2024-02-11 00:59:46,716 (trainer:756) INFO: 29epoch:train:5801-5900batch: iter_time=1.058e-04, forward_time=0.186, loss_ctc=95.523, loss_interctc_layer6=96.187, loss_interctc_layer12=79.420, loss_interctc_layer15=72.797, loss_interctc_layer21=98.915, loss=88.568, backward_time=0.210, grad_norm=90.762, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.508e-05, train_time=1.290
+[gpua006:0/64] 2024-02-11 01:01:34,768 (trainer:756) INFO: 29epoch:train:5901-6000batch: iter_time=9.813e-05, forward_time=0.143, loss_ctc=71.704, loss_interctc_layer6=76.249, loss_interctc_layer12=62.857, loss_interctc_layer15=57.435, loss_interctc_layer21=74.362, loss=68.521, backward_time=0.211, grad_norm=58.638, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.507e-05, train_time=1.080
+[gpua006:0/64] 2024-02-11 01:03:55,298 (trainer:756) INFO: 29epoch:train:6001-6100batch: iter_time=9.875e-05, forward_time=0.171, loss_ctc=94.571, loss_interctc_layer6=93.282, loss_interctc_layer12=77.059, loss_interctc_layer15=70.579, loss_interctc_layer21=97.926, loss=86.683, backward_time=0.217, grad_norm=67.556, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.506e-05, train_time=1.403
+[gpua006:0/64] 2024-02-11 01:05:50,290 (trainer:756) INFO: 29epoch:train:6101-6200batch: iter_time=9.806e-05, forward_time=0.176, loss_ctc=75.535, loss_interctc_layer6=77.481, loss_interctc_layer12=64.458, loss_interctc_layer15=59.163, loss_interctc_layer21=78.269, loss=70.981, backward_time=0.212, grad_norm=67.945, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.505e-05, train_time=1.151
+[gpua006:0/64] 2024-02-11 01:07:09,747 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua006:0/64] 2024-02-11 01:07:28,575 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 01:07:32,237 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbdb45410c0>)
+[gpua006:0/64] 2024-02-11 01:07:32,237 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua006:0/64] 2024-02-11 01:07:32,241 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 01:13:59,827 (trainer:756) INFO: 29epoch:train:6201-6300batch: iter_time=3.020, forward_time=0.155, loss_ctc=63.830, loss_interctc_layer6=73.575, loss_interctc_layer12=60.766, loss_interctc_layer15=55.540, loss_interctc_layer21=66.060, loss=63.954, backward_time=0.216, grad_norm=62.697, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.141, optim0_lr0=7.504e-05, train_time=4.895
+[gpua006:0/64] 2024-02-11 01:15:34,032 (trainer:756) INFO: 29epoch:train:6301-6400batch: iter_time=8.562e-05, forward_time=0.142, loss_ctc=65.817, loss_interctc_layer6=72.732, loss_interctc_layer12=60.165, loss_interctc_layer15=55.102, loss_interctc_layer21=68.138, loss=64.391, backward_time=0.211, grad_norm=66.765, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.140, optim0_lr0=7.504e-05, train_time=0.942
+[gpua006:0/64] 2024-02-11 01:17:50,970 (trainer:756) INFO: 29epoch:train:6401-6500batch: iter_time=8.941e-05, forward_time=0.141, loss_ctc=79.300, loss_interctc_layer6=88.502, loss_interctc_layer12=74.114, loss_interctc_layer15=68.449, loss_interctc_layer21=81.935, loss=78.460, backward_time=0.208, grad_norm=69.725, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.503e-05, train_time=1.369
+[gpua006:0/64] 2024-02-11 01:20:11,692 (trainer:756) INFO: 29epoch:train:6501-6600batch: iter_time=9.109e-05, forward_time=0.142, loss_ctc=77.337, loss_interctc_layer6=85.678, loss_interctc_layer12=71.033, loss_interctc_layer15=65.078, loss_interctc_layer21=80.011, loss=75.828, backward_time=0.208, grad_norm=78.615, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.140, optim0_lr0=7.502e-05, train_time=1.407
+[gpua006:0/64] 2024-02-11 01:21:52,176 (trainer:756) INFO: 29epoch:train:6601-6700batch: iter_time=8.134e-05, forward_time=0.152, loss_ctc=64.376, loss_interctc_layer6=70.383, loss_interctc_layer12=58.189, loss_interctc_layer15=53.323, loss_interctc_layer21=66.617, loss=62.578, backward_time=0.214, grad_norm=62.446, clip=100.000, loss_scale=1.623e+31, optim_step_time=0.140, optim0_lr0=7.501e-05, train_time=1.005
+[gpua006:0/64] 2024-02-11 01:24:19,820 (trainer:756) INFO: 29epoch:train:6701-6800batch: iter_time=8.191e-05, forward_time=0.143, loss_ctc=73.707, loss_interctc_layer6=86.931, loss_interctc_layer12=72.536, loss_interctc_layer15=66.901, loss_interctc_layer21=76.078, loss=75.230, backward_time=0.209, grad_norm=132.880, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.500e-05, train_time=1.476
+[gpua006:0/64] 2024-02-11 01:26:13,814 (trainer:756) INFO: 29epoch:train:6801-6900batch: iter_time=9.412e-05, forward_time=0.173, loss_ctc=91.040, loss_interctc_layer6=89.932, loss_interctc_layer12=74.647, loss_interctc_layer15=68.506, loss_interctc_layer21=94.137, loss=83.652, backward_time=0.212, grad_norm=75.362, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.499e-05, train_time=1.137
+[gpua006:0/64] 2024-02-11 01:28:31,369 (trainer:756) INFO: 29epoch:train:6901-7000batch: iter_time=8.505e-05, forward_time=0.142, loss_ctc=73.975, loss_interctc_layer6=79.004, loss_interctc_layer12=65.285, loss_interctc_layer15=59.842, loss_interctc_layer21=76.595, loss=70.940, backward_time=0.208, grad_norm=64.471, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.498e-05, train_time=1.376
+[gpua006:0/64] 2024-02-11 01:30:56,128 (trainer:756) INFO: 29epoch:train:7001-7100batch: iter_time=8.130e-05, forward_time=0.153, loss_ctc=101.498, loss_interctc_layer6=95.641, loss_interctc_layer12=78.712, loss_interctc_layer15=72.077, loss_interctc_layer21=105.006, loss=90.587, backward_time=0.209, grad_norm=107.831, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.497e-05, train_time=1.449
+[gpua006:0/64] 2024-02-11 01:33:08,194 (trainer:756) INFO: 29epoch:train:7101-7200batch: iter_time=9.266e-05, forward_time=0.185, loss_ctc=79.002, loss_interctc_layer6=88.989, loss_interctc_layer12=73.965, loss_interctc_layer15=67.973, loss_interctc_layer21=81.836, loss=78.353, backward_time=0.218, grad_norm=68.404, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.497e-05, train_time=1.320
+[gpua006:0/64] 2024-02-11 01:35:08,239 (trainer:756) INFO: 29epoch:train:7201-7300batch: iter_time=9.378e-05, forward_time=0.145, loss_ctc=85.616, loss_interctc_layer6=81.502, loss_interctc_layer12=67.098, loss_interctc_layer15=61.115, loss_interctc_layer21=88.686, loss=76.804, backward_time=0.210, grad_norm=102.610, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.496e-05, train_time=1.200
+[gpua006:0/64] 2024-02-11 01:37:28,046 (trainer:756) INFO: 29epoch:train:7301-7400batch: iter_time=9.381e-05, forward_time=0.155, loss_ctc=72.808, loss_interctc_layer6=80.062, loss_interctc_layer12=66.439, loss_interctc_layer15=60.841, loss_interctc_layer21=75.394, loss=71.109, backward_time=0.222, grad_norm=63.081, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.495e-05, train_time=1.398
+[gpua006:0/64] 2024-02-11 01:39:10,246 (trainer:756) INFO: 29epoch:train:7401-7500batch: iter_time=7.967e-05, forward_time=0.145, loss_ctc=76.534, loss_interctc_layer6=80.289, loss_interctc_layer12=66.617, loss_interctc_layer15=61.005, loss_interctc_layer21=79.180, loss=72.725, backward_time=0.212, grad_norm=66.810, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.494e-05, train_time=1.022
+[gpua006:0/64] 2024-02-11 01:39:30,276 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua006:0/64] 2024-02-11 01:39:49,214 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 01:39:52,615 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbacf3ab5b0>)
+[gpua006:0/64] 2024-02-11 01:39:52,615 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua006:0/64] 2024-02-11 01:39:52,618 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 01:46:42,230 (trainer:756) INFO: 29epoch:train:7501-7600batch: iter_time=3.129, forward_time=0.157, loss_ctc=56.445, loss_interctc_layer6=64.988, loss_interctc_layer12=53.472, loss_interctc_layer15=48.800, loss_interctc_layer21=58.359, loss=56.413, backward_time=0.210, grad_norm=69.474, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.493e-05, train_time=4.518
+[gpua006:0/64] 2024-02-11 01:48:34,146 (trainer:756) INFO: 29epoch:train:7601-7700batch: iter_time=8.315e-05, forward_time=0.143, loss_ctc=75.564, loss_interctc_layer6=86.375, loss_interctc_layer12=71.622, loss_interctc_layer15=65.564, loss_interctc_layer21=78.157, loss=75.456, backward_time=0.210, grad_norm=81.191, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.492e-05, train_time=1.120
+[gpua006:0/64] 2024-02-11 01:50:36,757 (trainer:756) INFO: 29epoch:train:7701-7800batch: iter_time=8.814e-05, forward_time=0.142, loss_ctc=74.569, loss_interctc_layer6=81.548, loss_interctc_layer12=68.460, loss_interctc_layer15=63.506, loss_interctc_layer21=76.574, loss=72.932, backward_time=0.208, grad_norm=77.383, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.491e-05, train_time=1.226
+[gpua006:0/64] 2024-02-11 01:52:30,198 (trainer:756) INFO: 29epoch:train:7801-7900batch: iter_time=7.805e-05, forward_time=0.155, loss_ctc=82.381, loss_interctc_layer6=85.581, loss_interctc_layer12=70.760, loss_interctc_layer15=64.734, loss_interctc_layer21=85.113, loss=77.714, backward_time=0.214, grad_norm=91.866, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.490e-05, train_time=1.134
+[gpua006:0/64] 2024-02-11 01:54:46,593 (trainer:756) INFO: 29epoch:train:7901-8000batch: iter_time=8.602e-05, forward_time=0.154, loss_ctc=66.713, loss_interctc_layer6=78.914, loss_interctc_layer12=65.730, loss_interctc_layer15=60.553, loss_interctc_layer21=69.024, loss=68.187, backward_time=0.213, grad_norm=67.733, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.490e-05, train_time=1.363
+[gpua006:0/64] 2024-02-11 01:56:57,284 (trainer:756) INFO: 29epoch:train:8001-8100batch: iter_time=8.857e-05, forward_time=0.143, loss_ctc=86.064, loss_interctc_layer6=88.521, loss_interctc_layer12=73.716, loss_interctc_layer15=67.666, loss_interctc_layer21=89.055, loss=81.004, backward_time=0.209, grad_norm=80.548, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.489e-05, train_time=1.307
+[gpua006:0/64] 2024-02-11 01:59:01,250 (trainer:756) INFO: 29epoch:train:8101-8200batch: iter_time=8.213e-05, forward_time=0.180, loss_ctc=78.976, loss_interctc_layer6=83.447, loss_interctc_layer12=69.020, loss_interctc_layer15=63.180, loss_interctc_layer21=81.835, loss=75.292, backward_time=0.222, grad_norm=62.236, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.488e-05, train_time=1.239
+[gpua006:0/64] 2024-02-11 02:01:27,619 (trainer:756) INFO: 29epoch:train:8201-8300batch: iter_time=9.174e-05, forward_time=0.166, loss_ctc=82.212, loss_interctc_layer6=85.373, loss_interctc_layer12=70.503, loss_interctc_layer15=64.650, loss_interctc_layer21=85.072, loss=77.562, backward_time=0.210, grad_norm=68.701, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.487e-05, train_time=1.463
+[gpua006:0/64] 2024-02-11 02:03:46,621 (trainer:756) INFO: 29epoch:train:8301-8400batch: iter_time=8.692e-05, forward_time=0.144, loss_ctc=94.667, loss_interctc_layer6=94.626, loss_interctc_layer12=77.945, loss_interctc_layer15=71.523, loss_interctc_layer21=97.978, loss=87.348, backward_time=0.209, grad_norm=119.675, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.486e-05, train_time=1.390
+[gpua006:0/64] 2024-02-11 02:05:45,382 (trainer:756) INFO: 29epoch:train:8401-8500batch: iter_time=8.778e-05, forward_time=0.161, loss_ctc=71.832, loss_interctc_layer6=75.624, loss_interctc_layer12=62.349, loss_interctc_layer15=56.938, loss_interctc_layer21=74.407, loss=68.230, backward_time=0.223, grad_norm=66.219, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.143, optim0_lr0=7.485e-05, train_time=1.187
+[gpua006:0/64] 2024-02-11 02:07:56,619 (trainer:756) INFO: 29epoch:train:8501-8600batch: iter_time=7.810e-05, forward_time=0.147, loss_ctc=94.992, loss_interctc_layer6=92.988, loss_interctc_layer12=76.699, loss_interctc_layer15=70.230, loss_interctc_layer21=98.415, loss=86.665, backward_time=0.212, grad_norm=82.836, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.484e-05, train_time=1.311
+[gpua006:0/64] 2024-02-11 02:09:54,957 (trainer:756) INFO: 29epoch:train:8601-8700batch: iter_time=9.418e-05, forward_time=0.152, loss_ctc=74.472, loss_interctc_layer6=76.816, loss_interctc_layer12=63.812, loss_interctc_layer15=58.506, loss_interctc_layer21=77.169, loss=70.155, backward_time=0.216, grad_norm=64.918, clip=100.000, loss_scale=3.245e+31, optim_step_time=0.139, optim0_lr0=7.483e-05, train_time=1.184
+[gpua006:0/64] 2024-02-11 02:11:15,929 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua006:0/64] 2024-02-11 02:11:34,725 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 02:11:38,132 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbdff877c10>)
+[gpua006:0/64] 2024-02-11 02:11:38,132 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua006:0/64] 2024-02-11 02:11:38,151 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 02:20:10,004 (trainer:756) INFO: 29epoch:train:8701-8800batch: iter_time=2.933, forward_time=0.160, loss_ctc=63.605, loss_interctc_layer6=72.807, loss_interctc_layer12=60.029, loss_interctc_layer15=54.873, loss_interctc_layer21=65.923, loss=63.447, backward_time=0.208, grad_norm=58.723, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.483e-05, train_time=6.150
+[gpua006:0/64] 2024-02-11 02:21:47,194 (trainer:756) INFO: 29epoch:train:8801-8900batch: iter_time=8.832e-05, forward_time=0.143, loss_ctc=65.730, loss_interctc_layer6=72.681, loss_interctc_layer12=60.094, loss_interctc_layer15=55.000, loss_interctc_layer21=68.058, loss=64.313, backward_time=0.211, grad_norm=58.426, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.482e-05, train_time=0.972
+[gpua006:0/64] 2024-02-11 02:23:30,652 (trainer:756) INFO: 29epoch:train:8901-9000batch: iter_time=8.120e-05, forward_time=0.144, loss_ctc=79.202, loss_interctc_layer6=88.525, loss_interctc_layer12=74.119, loss_interctc_layer15=68.343, loss_interctc_layer21=82.399, loss=78.518, backward_time=0.209, grad_norm=81.145, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.481e-05, train_time=1.034
+[gpua006:0/64] 2024-02-11 02:25:35,033 (trainer:756) INFO: 29epoch:train:9001-9100batch: iter_time=8.419e-05, forward_time=0.179, loss_ctc=77.293, loss_interctc_layer6=85.973, loss_interctc_layer12=71.452, loss_interctc_layer15=65.571, loss_interctc_layer21=79.897, loss=76.037, backward_time=0.230, grad_norm=85.384, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.480e-05, train_time=1.243
+[gpua006:0/64] 2024-02-11 02:27:44,050 (trainer:756) INFO: 29epoch:train:9101-9200batch: iter_time=9.070e-05, forward_time=0.142, loss_ctc=64.448, loss_interctc_layer6=70.144, loss_interctc_layer12=58.025, loss_interctc_layer15=53.178, loss_interctc_layer21=66.803, loss=62.520, backward_time=0.207, grad_norm=71.214, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.479e-05, train_time=1.290
+[gpua006:0/64] 2024-02-11 02:29:21,736 (trainer:756) INFO: 29epoch:train:9201-9300batch: iter_time=8.564e-05, forward_time=0.143, loss_ctc=73.669, loss_interctc_layer6=86.697, loss_interctc_layer12=72.304, loss_interctc_layer15=66.495, loss_interctc_layer21=75.997, loss=75.032, backward_time=0.210, grad_norm=79.853, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.478e-05, train_time=0.977
+[gpua006:0/64] 2024-02-11 02:31:55,051 (trainer:756) INFO: 29epoch:train:9301-9400batch: iter_time=8.304e-05, forward_time=0.151, loss_ctc=90.331, loss_interctc_layer6=89.514, loss_interctc_layer12=74.403, loss_interctc_layer15=68.246, loss_interctc_layer21=93.631, loss=83.225, backward_time=0.222, grad_norm=68.454, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.143, optim0_lr0=7.477e-05, train_time=1.533
+[gpua006:0/64] 2024-02-11 02:34:49,430 (trainer:756) INFO: 29epoch:train:9401-9500batch: iter_time=8.988e-05, forward_time=0.144, loss_ctc=73.017, loss_interctc_layer6=78.541, loss_interctc_layer12=64.826, loss_interctc_layer15=59.345, loss_interctc_layer21=75.501, loss=70.246, backward_time=0.212, grad_norm=59.545, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.476e-05, train_time=1.742
+[gpua006:0/64] 2024-02-11 02:37:13,642 (trainer:756) INFO: 29epoch:train:9501-9600batch: iter_time=8.617e-05, forward_time=0.164, loss_ctc=100.498, loss_interctc_layer6=95.128, loss_interctc_layer12=78.479, loss_interctc_layer15=71.736, loss_interctc_layer21=103.997, loss=89.968, backward_time=0.212, grad_norm=84.222, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.476e-05, train_time=1.443
+[gpua006:0/64] 2024-02-11 02:39:19,057 (trainer:756) INFO: 29epoch:train:9601-9700batch: iter_time=8.578e-05, forward_time=0.158, loss_ctc=78.793, loss_interctc_layer6=88.326, loss_interctc_layer12=73.588, loss_interctc_layer15=67.398, loss_interctc_layer21=81.511, loss=77.923, backward_time=0.210, grad_norm=74.622, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.475e-05, train_time=1.254
+[gpua006:0/64] 2024-02-11 02:41:21,583 (trainer:756) INFO: 29epoch:train:9701-9800batch: iter_time=8.158e-05, forward_time=0.180, loss_ctc=85.010, loss_interctc_layer6=81.245, loss_interctc_layer12=66.732, loss_interctc_layer15=60.810, loss_interctc_layer21=88.283, loss=76.416, backward_time=0.209, grad_norm=92.706, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.474e-05, train_time=1.224
+[gpua006:0/64] 2024-02-11 02:43:28,679 (trainer:756) INFO: 29epoch:train:9801-9900batch: iter_time=8.287e-05, forward_time=0.142, loss_ctc=72.321, loss_interctc_layer6=79.696, loss_interctc_layer12=65.952, loss_interctc_layer15=60.337, loss_interctc_layer21=75.014, loss=70.664, backward_time=0.208, grad_norm=81.107, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.473e-05, train_time=1.272
+[gpua006:0/64] 2024-02-11 02:45:25,750 (trainer:756) INFO: 29epoch:train:9901-10000batch: iter_time=8.309e-05, forward_time=0.142, loss_ctc=75.888, loss_interctc_layer6=79.820, loss_interctc_layer12=66.049, loss_interctc_layer15=60.383, loss_interctc_layer21=78.620, loss=72.152, backward_time=0.208, grad_norm=82.636, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.472e-05, train_time=1.170
+[gpua006:0/64] 2024-02-11 02:45:45,780 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua006:0/64] 2024-02-11 02:46:04,353 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 02:46:07,777 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbade9744f0>)
+[gpua006:0/64] 2024-02-11 02:46:07,777 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua006:0/64] 2024-02-11 02:46:07,790 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 02:52:26,606 (trainer:756) INFO: 29epoch:train:10001-10100batch: iter_time=2.971, forward_time=0.178, loss_ctc=56.263, loss_interctc_layer6=64.054, loss_interctc_layer12=52.790, loss_interctc_layer15=48.175, loss_interctc_layer21=58.261, loss=55.909, backward_time=0.220, grad_norm=54.918, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.471e-05, train_time=4.208
+[gpua006:0/64] 2024-02-11 02:54:28,330 (trainer:756) INFO: 29epoch:train:10101-10200batch: iter_time=8.231e-05, forward_time=0.143, loss_ctc=82.795, loss_interctc_layer6=86.084, loss_interctc_layer12=71.463, loss_interctc_layer15=65.387, loss_interctc_layer21=85.654, loss=78.277, backward_time=0.210, grad_norm=64.521, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.470e-05, train_time=1.217
+[gpua006:0/64] 2024-02-11 02:56:38,895 (trainer:756) INFO: 29epoch:train:10201-10300batch: iter_time=9.594e-05, forward_time=0.145, loss_ctc=77.350, loss_interctc_layer6=81.001, loss_interctc_layer12=67.953, loss_interctc_layer15=62.598, loss_interctc_layer21=79.915, loss=73.763, backward_time=0.208, grad_norm=129.614, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.469e-05, train_time=1.305
+[gpua006:0/64] 2024-02-11 02:58:48,821 (trainer:756) INFO: 29epoch:train:10301-10400batch: iter_time=1.058e-04, forward_time=0.156, loss_ctc=86.625, loss_interctc_layer6=85.419, loss_interctc_layer12=70.519, loss_interctc_layer15=64.449, loss_interctc_layer21=89.808, loss=79.364, backward_time=0.210, grad_norm=123.359, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.469e-05, train_time=1.299
+[gpua006:0/64] 2024-02-11 03:01:07,505 (trainer:756) INFO: 29epoch:train:10401-10500batch: iter_time=8.823e-05, forward_time=0.157, loss_ctc=70.964, loss_interctc_layer6=78.413, loss_interctc_layer12=65.273, loss_interctc_layer15=60.169, loss_interctc_layer21=73.685, loss=69.701, backward_time=0.214, grad_norm=81.546, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.141, optim0_lr0=7.468e-05, train_time=1.387
+[gpua006:0/64] 2024-02-11 03:03:46,364 (trainer:756) INFO: 29epoch:train:10501-10600batch: iter_time=8.502e-05, forward_time=0.184, loss_ctc=89.831, loss_interctc_layer6=87.675, loss_interctc_layer12=72.919, loss_interctc_layer15=66.934, loss_interctc_layer21=92.999, loss=82.072, backward_time=0.209, grad_norm=66.457, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.467e-05, train_time=1.588
+[gpua006:0/64] 2024-02-11 03:06:03,678 (trainer:756) INFO: 29epoch:train:10601-10700batch: iter_time=8.868e-05, forward_time=0.142, loss_ctc=83.946, loss_interctc_layer6=82.702, loss_interctc_layer12=68.356, loss_interctc_layer15=62.496, loss_interctc_layer21=87.078, loss=76.916, backward_time=0.208, grad_norm=108.428, clip=100.000, loss_scale=6.490e+31, optim_step_time=0.138, optim0_lr0=7.466e-05, train_time=1.373
+[gpua006:0/64] 2024-02-11 03:07:55,097 (trainer:756) INFO: 29epoch:train:10701-10800batch: iter_time=9.118e-05, forward_time=0.142, loss_ctc=85.688, loss_interctc_layer6=84.434, loss_interctc_layer12=69.641, loss_interctc_layer15=63.789, loss_interctc_layer21=88.838, loss=78.478, backward_time=0.209, grad_norm=95.859, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.138, optim0_lr0=7.465e-05, train_time=1.114
+[gpua006:0/64] 2024-02-11 03:10:28,401 (trainer:756) INFO: 29epoch:train:10801-10900batch: iter_time=9.254e-05, forward_time=0.150, loss_ctc=98.852, loss_interctc_layer6=95.323, loss_interctc_layer12=78.660, loss_interctc_layer15=72.058, loss_interctc_layer21=102.403, loss=89.459, backward_time=0.210, grad_norm=77.894, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.139, optim0_lr0=7.464e-05, train_time=1.533
+[gpua006:0/64] 2024-02-11 03:13:00,190 (trainer:756) INFO: 29epoch:train:10901-11000batch: iter_time=8.414e-05, forward_time=0.171, loss_ctc=72.166, loss_interctc_layer6=75.089, loss_interctc_layer12=61.689, loss_interctc_layer15=56.236, loss_interctc_layer21=74.910, loss=68.018, backward_time=0.223, grad_norm=75.055, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.140, optim0_lr0=7.463e-05, train_time=1.518
+[gpua006:0/64] 2024-02-11 03:14:55,798 (trainer:756) INFO: 29epoch:train:11001-11100batch: iter_time=8.653e-05, forward_time=0.160, loss_ctc=97.977, loss_interctc_layer6=92.818, loss_interctc_layer12=76.537, loss_interctc_layer15=69.886, loss_interctc_layer21=101.576, loss=87.759, backward_time=0.214, grad_norm=63.473, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.140, optim0_lr0=7.463e-05, train_time=1.156
+[gpua006:0/64] 2024-02-11 03:16:51,586 (trainer:756) INFO: 29epoch:train:11101-11200batch: iter_time=9.023e-05, forward_time=0.148, loss_ctc=76.775, loss_interctc_layer6=76.666, loss_interctc_layer12=63.663, loss_interctc_layer15=58.352, loss_interctc_layer21=79.539, loss=70.999, backward_time=0.218, grad_norm=71.386, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.138, optim0_lr0=7.462e-05, train_time=1.158
+[gpua006:0/64] 2024-02-11 03:18:00,630 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua006:0/64] 2024-02-11 03:18:19,068 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 03:18:22,568 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbad1e9f6a0>)
+[gpua006:0/64] 2024-02-11 03:18:22,568 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua006:0/64] 2024-02-11 03:18:22,571 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 03:24:05,374 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 03:24:48,837 (trainer:756) INFO: 29epoch:train:11201-11300batch: iter_time=3.124, forward_time=0.173, loss_ctc=65.286, loss_interctc_layer6=72.770, loss_interctc_layer12=59.927, loss_interctc_layer15=54.719, loss_interctc_layer21=67.586, loss=64.058, backward_time=0.210, grad_norm=61.203, clip=100.000, loss_scale=7.498e+31, optim_step_time=0.140, optim0_lr0=7.461e-05, train_time=4.772
+[gpua006:0/64] 2024-02-11 03:26:44,065 (trainer:756) INFO: 29epoch:train:11301-11400batch: iter_time=8.061e-05, forward_time=0.142, loss_ctc=70.794, loss_interctc_layer6=72.896, loss_interctc_layer12=60.222, loss_interctc_layer15=55.145, loss_interctc_layer21=73.226, loss=66.456, backward_time=0.210, grad_norm=75.480, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.460e-05, train_time=1.153
+[gpua006:0/64] 2024-02-11 03:26:52,726 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 03:29:28,908 (trainer:756) INFO: 29epoch:train:11401-11500batch: iter_time=8.905e-05, forward_time=0.142, loss_ctc=83.747, loss_interctc_layer6=88.081, loss_interctc_layer12=74.348, loss_interctc_layer15=68.229, loss_interctc_layer21=86.704, loss=80.222, backward_time=0.210, grad_norm=90.255, clip=100.000, loss_scale=2.192e+31, optim_step_time=0.139, optim0_lr0=7.459e-05, train_time=1.648
+[gpua006:0/64] 2024-02-11 03:31:54,927 (trainer:756) INFO: 29epoch:train:11501-11600batch: iter_time=9.268e-05, forward_time=0.142, loss_ctc=77.735, loss_interctc_layer6=84.635, loss_interctc_layer12=70.078, loss_interctc_layer15=64.326, loss_interctc_layer21=80.058, loss=75.366, backward_time=0.209, grad_norm=74.275, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.458e-05, train_time=1.460
+[gpua006:0/64] 2024-02-11 03:34:45,920 (trainer:756) INFO: 29epoch:train:11601-11700batch: iter_time=8.900e-05, forward_time=0.148, loss_ctc=69.594, loss_interctc_layer6=69.963, loss_interctc_layer12=57.648, loss_interctc_layer15=52.848, loss_interctc_layer21=72.050, loss=64.421, backward_time=0.212, grad_norm=67.306, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.457e-05, train_time=1.710
+[gpua006:0/64] 2024-02-11 03:37:16,969 (trainer:756) INFO: 29epoch:train:11701-11800batch: iter_time=0.002, forward_time=0.180, loss_ctc=77.077, loss_interctc_layer6=87.285, loss_interctc_layer12=72.735, loss_interctc_layer15=67.005, loss_interctc_layer21=79.527, loss=76.726, backward_time=0.233, grad_norm=75.429, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.457e-05, train_time=1.510
+[gpua006:0/64] 2024-02-11 03:39:56,527 (trainer:756) INFO: 29epoch:train:11801-11900batch: iter_time=9.976e-05, forward_time=0.143, loss_ctc=95.167, loss_interctc_layer6=89.334, loss_interctc_layer12=74.123, loss_interctc_layer15=67.957, loss_interctc_layer21=98.588, loss=85.034, backward_time=0.209, grad_norm=62.141, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.456e-05, train_time=1.596
+[gpua006:0/64] 2024-02-11 03:42:53,228 (trainer:756) INFO: 29epoch:train:11901-12000batch: iter_time=7.995e-05, forward_time=0.151, loss_ctc=76.589, loss_interctc_layer6=77.683, loss_interctc_layer12=63.997, loss_interctc_layer15=58.566, loss_interctc_layer21=79.441, loss=71.255, backward_time=0.219, grad_norm=79.741, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.455e-05, train_time=1.766
+[gpua006:0/64] 2024-02-11 03:45:28,753 (trainer:756) INFO: 29epoch:train:12001-12100batch: iter_time=7.885e-05, forward_time=0.143, loss_ctc=105.476, loss_interctc_layer6=95.455, loss_interctc_layer12=78.648, loss_interctc_layer15=71.906, loss_interctc_layer21=109.144, loss=92.126, backward_time=0.210, grad_norm=99.422, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.454e-05, train_time=1.556
+[gpua006:0/64] 2024-02-11 03:48:30,994 (trainer:756) INFO: 29epoch:train:12101-12200batch: iter_time=7.735e-05, forward_time=0.186, loss_ctc=81.594, loss_interctc_layer6=88.968, loss_interctc_layer12=73.863, loss_interctc_layer15=67.836, loss_interctc_layer21=84.478, loss=79.348, backward_time=0.211, grad_norm=80.343, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.453e-05, train_time=1.822
+[gpua006:0/64] 2024-02-11 03:51:20,989 (trainer:756) INFO: 29epoch:train:12201-12300batch: iter_time=8.342e-05, forward_time=0.145, loss_ctc=87.450, loss_interctc_layer6=80.887, loss_interctc_layer12=66.261, loss_interctc_layer15=60.236, loss_interctc_layer21=90.668, loss=77.100, backward_time=0.211, grad_norm=58.504, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.452e-05, train_time=1.700
+[gpua006:0/64] 2024-02-11 03:53:56,768 (trainer:756) INFO: 29epoch:train:12301-12400batch: iter_time=8.239e-05, forward_time=0.141, loss_ctc=74.476, loss_interctc_layer6=79.408, loss_interctc_layer12=65.645, loss_interctc_layer15=59.966, loss_interctc_layer21=77.270, loss=71.353, backward_time=0.206, grad_norm=72.877, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.451e-05, train_time=1.558
+[gpua006:0/64] 2024-02-11 03:56:27,404 (trainer:756) INFO: 29epoch:train:12401-12500batch: iter_time=8.140e-05, forward_time=0.146, loss_ctc=79.000, loss_interctc_layer6=80.422, loss_interctc_layer12=66.685, loss_interctc_layer15=61.025, loss_interctc_layer21=81.762, loss=73.779, backward_time=0.210, grad_norm=89.789, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.450e-05, train_time=1.506
+[gpua006:0/64] 2024-02-11 03:56:47,433 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua006:0/64] 2024-02-11 03:57:06,017 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 03:57:09,446 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbdfdc5be80>)
+[gpua006:0/64] 2024-02-11 03:57:09,447 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua006:0/64] 2024-02-11 03:57:09,450 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 04:10:03,603 (trainer:756) INFO: 29epoch:train:12501-12600batch: iter_time=3.035, forward_time=0.173, loss_ctc=56.214, loss_interctc_layer6=64.133, loss_interctc_layer12=52.813, loss_interctc_layer15=48.231, loss_interctc_layer21=58.059, loss=55.890, backward_time=0.217, grad_norm=55.421, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.450e-05, train_time=8.162
+[gpua006:0/64] 2024-02-11 04:12:06,308 (trainer:756) INFO: 29epoch:train:12601-12700batch: iter_time=8.586e-05, forward_time=0.143, loss_ctc=81.964, loss_interctc_layer6=85.957, loss_interctc_layer12=71.282, loss_interctc_layer15=65.253, loss_interctc_layer21=84.831, loss=77.857, backward_time=0.210, grad_norm=68.512, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.449e-05, train_time=1.227
+[gpua006:0/64] 2024-02-11 04:14:06,785 (trainer:756) INFO: 29epoch:train:12701-12800batch: iter_time=8.110e-05, forward_time=0.144, loss_ctc=76.625, loss_interctc_layer6=81.208, loss_interctc_layer12=68.205, loss_interctc_layer15=62.928, loss_interctc_layer21=79.186, loss=73.630, backward_time=0.208, grad_norm=72.922, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.448e-05, train_time=1.205
+[gpua006:0/64] 2024-02-11 04:16:04,286 (trainer:756) INFO: 29epoch:train:12801-12900batch: iter_time=8.235e-05, forward_time=0.142, loss_ctc=86.035, loss_interctc_layer6=84.968, loss_interctc_layer12=69.943, loss_interctc_layer15=63.880, loss_interctc_layer21=88.985, loss=78.762, backward_time=0.208, grad_norm=81.099, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.447e-05, train_time=1.174
+[gpua006:0/64] 2024-02-11 04:18:08,944 (trainer:756) INFO: 29epoch:train:12901-13000batch: iter_time=9.038e-05, forward_time=0.143, loss_ctc=71.241, loss_interctc_layer6=79.326, loss_interctc_layer12=66.135, loss_interctc_layer15=60.928, loss_interctc_layer21=73.566, loss=70.239, backward_time=0.209, grad_norm=116.386, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.446e-05, train_time=1.248
+[gpua006:0/64] 2024-02-11 04:20:34,356 (trainer:756) INFO: 29epoch:train:13001-13100batch: iter_time=4.060e-04, forward_time=0.165, loss_ctc=89.595, loss_interctc_layer6=88.045, loss_interctc_layer12=73.201, loss_interctc_layer15=67.162, loss_interctc_layer21=92.704, loss=82.141, backward_time=0.216, grad_norm=71.615, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.445e-05, train_time=1.454
+[gpua006:0/64] 2024-02-11 04:22:42,608 (trainer:756) INFO: 29epoch:train:13101-13200batch: iter_time=8.772e-05, forward_time=0.142, loss_ctc=83.910, loss_interctc_layer6=82.759, loss_interctc_layer12=68.338, loss_interctc_layer15=62.481, loss_interctc_layer21=87.042, loss=76.906, backward_time=0.207, grad_norm=72.332, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.444e-05, train_time=1.282
+[gpua006:0/64] 2024-02-11 04:24:45,354 (trainer:756) INFO: 29epoch:train:13201-13300batch: iter_time=8.957e-05, forward_time=0.143, loss_ctc=84.607, loss_interctc_layer6=84.123, loss_interctc_layer12=69.299, loss_interctc_layer15=63.507, loss_interctc_layer21=87.633, loss=77.834, backward_time=0.209, grad_norm=64.965, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.444e-05, train_time=1.228
+[gpua006:0/64] 2024-02-11 04:26:49,617 (trainer:756) INFO: 29epoch:train:13301-13400batch: iter_time=8.303e-05, forward_time=0.145, loss_ctc=98.963, loss_interctc_layer6=94.802, loss_interctc_layer12=78.302, loss_interctc_layer15=71.760, loss_interctc_layer21=102.357, loss=89.237, backward_time=0.211, grad_norm=115.287, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.443e-05, train_time=1.242
+[gpua006:0/64] 2024-02-11 04:29:42,935 (trainer:756) INFO: 29epoch:train:13401-13500batch: iter_time=8.279e-05, forward_time=0.188, loss_ctc=71.916, loss_interctc_layer6=75.291, loss_interctc_layer12=61.762, loss_interctc_layer15=56.223, loss_interctc_layer21=74.598, loss=67.958, backward_time=0.230, grad_norm=60.352, clip=100.000, loss_scale=3.874e+31, optim_step_time=0.142, optim0_lr0=7.442e-05, train_time=1.733
+[gpua006:0/64] 2024-02-11 04:31:52,907 (trainer:756) INFO: 29epoch:train:13501-13600batch: iter_time=7.958e-05, forward_time=0.145, loss_ctc=98.494, loss_interctc_layer6=92.845, loss_interctc_layer12=76.401, loss_interctc_layer15=69.717, loss_interctc_layer21=102.049, loss=87.901, backward_time=0.210, grad_norm=63.510, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.441e-05, train_time=1.299
+[gpua006:0/64] 2024-02-11 04:33:33,733 (trainer:756) INFO: 29epoch:train:13601-13700batch: iter_time=8.582e-05, forward_time=0.142, loss_ctc=76.065, loss_interctc_layer6=76.226, loss_interctc_layer12=63.166, loss_interctc_layer15=57.785, loss_interctc_layer21=78.717, loss=70.392, backward_time=0.209, grad_norm=71.711, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.440e-05, train_time=1.008
+[gpua006:0/64] 2024-02-11 04:34:48,962 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua006:0/64] 2024-02-11 04:35:07,629 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 04:35:11,064 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbb22916800>)
+[gpua006:0/64] 2024-02-11 04:35:11,064 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua006:0/64] 2024-02-11 04:35:11,070 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 04:40:49,582 (trainer:756) INFO: 29epoch:train:13701-13800batch: iter_time=3.214, forward_time=0.165, loss_ctc=65.065, loss_interctc_layer6=72.810, loss_interctc_layer12=59.976, loss_interctc_layer15=54.817, loss_interctc_layer21=67.309, loss=63.996, backward_time=0.209, grad_norm=70.243, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.439e-05, train_time=4.358
+[gpua006:0/64] 2024-02-11 04:42:23,536 (trainer:756) INFO: 29epoch:train:13801-13900batch: iter_time=9.134e-05, forward_time=0.142, loss_ctc=71.037, loss_interctc_layer6=72.691, loss_interctc_layer12=60.103, loss_interctc_layer15=55.015, loss_interctc_layer21=73.570, loss=66.483, backward_time=0.210, grad_norm=76.216, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.438e-05, train_time=0.940
+[gpua006:0/64] 2024-02-11 04:43:23,285 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 04:44:08,043 (trainer:756) INFO: 29epoch:train:13901-14000batch: iter_time=8.719e-05, forward_time=0.142, loss_ctc=83.093, loss_interctc_layer6=88.749, loss_interctc_layer12=74.176, loss_interctc_layer15=68.352, loss_interctc_layer21=85.928, loss=80.060, backward_time=0.209, grad_norm=82.194, clip=100.000, loss_scale=3.237e+31, optim_step_time=0.138, optim0_lr0=7.438e-05, train_time=1.045
+[gpua006:0/64] 2024-02-11 04:45:57,353 (trainer:756) INFO: 29epoch:train:14001-14100batch: iter_time=9.233e-05, forward_time=0.149, loss_ctc=78.270, loss_interctc_layer6=85.132, loss_interctc_layer12=70.691, loss_interctc_layer15=65.054, loss_interctc_layer21=80.829, loss=75.995, backward_time=0.209, grad_norm=95.374, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.437e-05, train_time=1.093
+[gpua006:0/64] 2024-02-11 04:48:38,225 (trainer:756) INFO: 29epoch:train:14101-14200batch: iter_time=1.006e-04, forward_time=0.179, loss_ctc=69.688, loss_interctc_layer6=69.882, loss_interctc_layer12=57.601, loss_interctc_layer15=52.782, loss_interctc_layer21=72.196, loss=64.429, backward_time=0.265, grad_norm=71.743, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.436e-05, train_time=1.609
+[gpua006:0/64] 2024-02-11 04:51:07,500 (trainer:756) INFO: 29epoch:train:14201-14300batch: iter_time=9.829e-05, forward_time=0.143, loss_ctc=76.684, loss_interctc_layer6=86.689, loss_interctc_layer12=72.252, loss_interctc_layer15=66.618, loss_interctc_layer21=78.904, loss=76.230, backward_time=0.208, grad_norm=154.517, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.435e-05, train_time=1.492
+[gpua006:0/64] 2024-02-11 04:52:54,081 (trainer:756) INFO: 29epoch:train:14301-14400batch: iter_time=1.046e-04, forward_time=0.143, loss_ctc=94.415, loss_interctc_layer6=89.040, loss_interctc_layer12=73.826, loss_interctc_layer15=67.698, loss_interctc_layer21=97.971, loss=84.590, backward_time=0.210, grad_norm=69.812, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.434e-05, train_time=1.066
+[gpua006:0/64] 2024-02-11 04:55:01,200 (trainer:756) INFO: 29epoch:train:14401-14500batch: iter_time=9.665e-05, forward_time=0.142, loss_ctc=77.036, loss_interctc_layer6=78.078, loss_interctc_layer12=64.521, loss_interctc_layer15=59.122, loss_interctc_layer21=79.804, loss=71.712, backward_time=0.209, grad_norm=60.124, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.433e-05, train_time=1.271
+[gpua006:0/64] 2024-02-11 04:58:04,624 (trainer:756) INFO: 29epoch:train:14501-14600batch: iter_time=1.060e-04, forward_time=0.186, loss_ctc=103.927, loss_interctc_layer6=94.152, loss_interctc_layer12=77.549, loss_interctc_layer15=70.868, loss_interctc_layer21=107.516, loss=90.802, backward_time=0.207, grad_norm=72.720, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.432e-05, train_time=1.832
+[gpua006:0/64] 2024-02-11 05:00:07,224 (trainer:756) INFO: 29epoch:train:14601-14700batch: iter_time=9.947e-05, forward_time=0.143, loss_ctc=82.466, loss_interctc_layer6=88.443, loss_interctc_layer12=73.487, loss_interctc_layer15=67.466, loss_interctc_layer21=85.199, loss=79.412, backward_time=0.209, grad_norm=72.956, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.432e-05, train_time=1.227
+[gpua006:0/64] 2024-02-11 05:02:15,340 (trainer:756) INFO: 29epoch:train:14701-14800batch: iter_time=9.756e-05, forward_time=0.147, loss_ctc=85.605, loss_interctc_layer6=80.978, loss_interctc_layer12=66.395, loss_interctc_layer15=60.399, loss_interctc_layer21=88.746, loss=76.425, backward_time=0.212, grad_norm=63.652, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.431e-05, train_time=1.281
+[gpua006:0/64] 2024-02-11 05:04:37,661 (trainer:756) INFO: 29epoch:train:14801-14900batch: iter_time=8.984e-05, forward_time=0.187, loss_ctc=74.967, loss_interctc_layer6=79.376, loss_interctc_layer12=65.500, loss_interctc_layer15=59.950, loss_interctc_layer21=77.674, loss=71.493, backward_time=0.257, grad_norm=74.877, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.143, optim0_lr0=7.430e-05, train_time=1.423
+[gpua006:0/64] 2024-02-11 05:06:46,503 (trainer:756) INFO: 29epoch:train:14901-15000batch: iter_time=9.617e-05, forward_time=0.143, loss_ctc=79.387, loss_interctc_layer6=79.678, loss_interctc_layer12=65.825, loss_interctc_layer15=60.170, loss_interctc_layer21=82.218, loss=73.456, backward_time=0.208, grad_norm=69.506, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.429e-05, train_time=1.288
+[gpua006:0/64] 2024-02-11 05:37:50,327 (trainer:355) INFO: 29epoch results: [train] iter_time=0.219, forward_time=0.152, loss_ctc=79.701, loss_interctc_layer6=82.884, loss_interctc_layer12=68.720, loss_interctc_layer15=63.022, loss_interctc_layer21=82.481, loss=75.362, backward_time=0.212, grad_norm=76.172, clip=100.000, loss_scale=2.872e+31, optim_step_time=0.139, optim0_lr0=7.494e-05, train_time=1.637, time=6 hours, 49 minutes and 38.76 seconds, total_count=435000, gpu_max_cached_mem_GB=33.436, [valid] loss_ctc=43.172, cer_ctc=0.202, loss_interctc_layer6=48.214, cer_interctc_layer6=0.218, loss_interctc_layer12=35.756, cer_interctc_layer12=0.151, loss_interctc_layer15=31.669, cer_interctc_layer15=0.127, loss_interctc_layer21=45.605, cer_interctc_layer21=0.214, loss=40.883, time=30 minutes and 39.48 seconds, total_count=135459, gpu_max_cached_mem_GB=33.436
+[gpua006:0/64] 2024-02-11 05:38:08,838 (trainer:410) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count
+[gpua006:0/64] 2024-02-11 05:38:08,854 (trainer:289) INFO: 30/45epoch started. Estimated time to finish: 5 days, 2 hours and 41 minutes
+[gpua006:0/64] 2024-02-11 05:38:08,869 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua006:0/64] 2024-02-11 05:38:26,695 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 05:38:30,025 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbe2ee7f130>)
+[gpua006:0/64] 2024-02-11 05:38:30,025 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua006:0/64] 2024-02-11 05:38:30,028 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 05:44:48,532 (trainer:756) INFO: 30epoch:train:1-100batch: iter_time=2.550, forward_time=0.169, loss_ctc=83.886, loss_interctc_layer6=86.143, loss_interctc_layer12=71.416, loss_interctc_layer15=65.595, loss_interctc_layer21=87.018, loss=78.811, backward_time=0.216, grad_norm=76.394, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.428e-05, train_time=3.996
+[gpua006:0/64] 2024-02-11 05:46:24,362 (trainer:756) INFO: 30epoch:train:101-200batch: iter_time=9.450e-05, forward_time=0.142, loss_ctc=77.079, loss_interctc_layer6=83.877, loss_interctc_layer12=69.487, loss_interctc_layer15=63.723, loss_interctc_layer21=79.589, loss=74.751, backward_time=0.209, grad_norm=66.154, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.427e-05, train_time=0.958
+[gpua006:0/64] 2024-02-11 05:48:06,032 (trainer:756) INFO: 30epoch:train:201-300batch: iter_time=8.435e-05, forward_time=0.142, loss_ctc=79.679, loss_interctc_layer6=94.692, loss_interctc_layer12=79.061, loss_interctc_layer15=72.977, loss_interctc_layer21=81.973, loss=81.676, backward_time=0.208, grad_norm=76.661, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.426e-05, train_time=1.016
+[gpua006:0/64] 2024-02-11 05:49:51,385 (trainer:756) INFO: 30epoch:train:301-400batch: iter_time=8.504e-05, forward_time=0.156, loss_ctc=63.314, loss_interctc_layer6=76.185, loss_interctc_layer12=62.941, loss_interctc_layer15=57.520, loss_interctc_layer21=65.586, loss=65.109, backward_time=0.214, grad_norm=79.891, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.426e-05, train_time=1.053
+[gpua006:0/64] 2024-02-11 05:51:45,086 (trainer:756) INFO: 30epoch:train:401-500batch: iter_time=8.575e-05, forward_time=0.157, loss_ctc=79.043, loss_interctc_layer6=84.685, loss_interctc_layer12=70.962, loss_interctc_layer15=65.445, loss_interctc_layer21=81.544, loss=76.336, backward_time=0.216, grad_norm=86.354, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.425e-05, train_time=1.137
+[gpua006:0/64] 2024-02-11 05:53:41,779 (trainer:756) INFO: 30epoch:train:501-600batch: iter_time=8.673e-05, forward_time=0.144, loss_ctc=72.271, loss_interctc_layer6=83.011, loss_interctc_layer12=68.951, loss_interctc_layer15=63.229, loss_interctc_layer21=74.702, loss=72.433, backward_time=0.215, grad_norm=85.128, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.424e-05, train_time=1.166
+[gpua006:0/64] 2024-02-11 05:55:54,982 (trainer:756) INFO: 30epoch:train:601-700batch: iter_time=8.519e-05, forward_time=0.145, loss_ctc=74.429, loss_interctc_layer6=86.794, loss_interctc_layer12=72.021, loss_interctc_layer15=66.153, loss_interctc_layer21=76.970, loss=75.273, backward_time=0.210, grad_norm=81.222, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.423e-05, train_time=1.332
+[gpua006:0/64] 2024-02-11 05:57:40,569 (trainer:756) INFO: 30epoch:train:701-800batch: iter_time=8.562e-05, forward_time=0.141, loss_ctc=70.823, loss_interctc_layer6=81.544, loss_interctc_layer12=67.578, loss_interctc_layer15=61.951, loss_interctc_layer21=73.196, loss=71.018, backward_time=0.207, grad_norm=71.925, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.422e-05, train_time=1.056
+[gpua006:0/64] 2024-02-11 05:59:40,552 (trainer:756) INFO: 30epoch:train:801-900batch: iter_time=8.884e-05, forward_time=0.146, loss_ctc=77.422, loss_interctc_layer6=82.801, loss_interctc_layer12=68.691, loss_interctc_layer15=62.995, loss_interctc_layer21=80.244, loss=74.430, backward_time=0.214, grad_norm=120.425, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.421e-05, train_time=1.200
+[gpua006:0/64] 2024-02-11 06:02:06,012 (trainer:756) INFO: 30epoch:train:901-1000batch: iter_time=8.077e-05, forward_time=0.166, loss_ctc=85.304, loss_interctc_layer6=84.038, loss_interctc_layer12=69.850, loss_interctc_layer15=64.205, loss_interctc_layer21=88.208, loss=78.321, backward_time=0.225, grad_norm=65.482, clip=100.000, loss_scale=2.840e+31, optim_step_time=0.142, optim0_lr0=7.421e-05, train_time=1.454
+[gpua006:0/64] 2024-02-11 06:04:19,950 (trainer:756) INFO: 30epoch:train:1001-1100batch: iter_time=8.244e-05, forward_time=0.154, loss_ctc=75.193, loss_interctc_layer6=77.882, loss_interctc_layer12=64.412, loss_interctc_layer15=58.974, loss_interctc_layer21=77.842, loss=70.861, backward_time=0.206, grad_norm=71.430, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.420e-05, train_time=1.339
+[gpua006:0/64] 2024-02-11 06:06:15,638 (trainer:756) INFO: 30epoch:train:1101-1200batch: iter_time=8.471e-05, forward_time=0.153, loss_ctc=84.159, loss_interctc_layer6=85.094, loss_interctc_layer12=71.495, loss_interctc_layer15=66.002, loss_interctc_layer21=87.208, loss=78.791, backward_time=0.207, grad_norm=91.098, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.141, optim0_lr0=7.419e-05, train_time=1.157
+[gpua006:0/64] 2024-02-11 06:07:27,408 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua006:0/64] 2024-02-11 06:07:45,711 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 06:07:49,156 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbb1391e7a0>)
+[gpua006:0/64] 2024-02-11 06:07:49,156 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua006:0/64] 2024-02-11 06:07:49,159 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 06:12:31,917 (trainer:756) INFO: 30epoch:train:1201-1300batch: iter_time=2.597, forward_time=0.146, loss_ctc=80.037, loss_interctc_layer6=89.985, loss_interctc_layer12=74.954, loss_interctc_layer15=69.059, loss_interctc_layer21=82.751, loss=79.357, backward_time=0.208, grad_norm=83.031, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.418e-05, train_time=3.763
+[gpua006:0/64] 2024-02-11 06:14:11,565 (trainer:756) INFO: 30epoch:train:1301-1400batch: iter_time=7.762e-05, forward_time=0.153, loss_ctc=91.967, loss_interctc_layer6=85.720, loss_interctc_layer12=70.929, loss_interctc_layer15=64.970, loss_interctc_layer21=95.217, loss=81.761, backward_time=0.225, grad_norm=68.856, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.417e-05, train_time=0.996
+[gpua006:0/64] 2024-02-11 06:15:58,502 (trainer:756) INFO: 30epoch:train:1401-1500batch: iter_time=8.037e-05, forward_time=0.142, loss_ctc=75.275, loss_interctc_layer6=82.318, loss_interctc_layer12=68.399, loss_interctc_layer15=62.728, loss_interctc_layer21=77.771, loss=73.298, backward_time=0.208, grad_norm=68.011, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.416e-05, train_time=1.070
+[gpua006:0/64] 2024-02-11 06:17:48,067 (trainer:756) INFO: 30epoch:train:1501-1600batch: iter_time=8.359e-05, forward_time=0.142, loss_ctc=79.371, loss_interctc_layer6=87.665, loss_interctc_layer12=72.785, loss_interctc_layer15=66.834, loss_interctc_layer21=81.850, loss=77.701, backward_time=0.208, grad_norm=73.143, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.415e-05, train_time=1.095
+[gpua006:0/64] 2024-02-11 06:19:39,632 (trainer:756) INFO: 30epoch:train:1601-1700batch: iter_time=8.116e-05, forward_time=0.142, loss_ctc=80.149, loss_interctc_layer6=84.932, loss_interctc_layer12=71.029, loss_interctc_layer15=65.413, loss_interctc_layer21=82.905, loss=76.886, backward_time=0.209, grad_norm=96.460, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.415e-05, train_time=1.115
+[gpua006:0/64] 2024-02-11 06:21:45,312 (trainer:756) INFO: 30epoch:train:1701-1800batch: iter_time=8.374e-05, forward_time=0.152, loss_ctc=84.467, loss_interctc_layer6=83.964, loss_interctc_layer12=69.749, loss_interctc_layer15=64.023, loss_interctc_layer21=87.434, loss=77.927, backward_time=0.211, grad_norm=79.364, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.414e-05, train_time=1.257
+[gpua006:0/64] 2024-02-11 06:24:03,105 (trainer:756) INFO: 30epoch:train:1801-1900batch: iter_time=8.369e-05, forward_time=0.148, loss_ctc=66.076, loss_interctc_layer6=79.087, loss_interctc_layer12=65.685, loss_interctc_layer15=60.404, loss_interctc_layer21=68.041, loss=67.859, backward_time=0.213, grad_norm=67.693, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.413e-05, train_time=1.378
+[gpua006:0/64] 2024-02-11 06:26:08,432 (trainer:756) INFO: 30epoch:train:1901-2000batch: iter_time=7.897e-05, forward_time=0.169, loss_ctc=82.813, loss_interctc_layer6=90.188, loss_interctc_layer12=74.849, loss_interctc_layer15=68.792, loss_interctc_layer21=85.686, loss=80.465, backward_time=0.219, grad_norm=86.778, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.143, optim0_lr0=7.412e-05, train_time=1.253
+[gpua006:0/64] 2024-02-11 06:28:07,889 (trainer:756) INFO: 30epoch:train:2001-2100batch: iter_time=8.396e-05, forward_time=0.161, loss_ctc=68.900, loss_interctc_layer6=72.395, loss_interctc_layer12=59.725, loss_interctc_layer15=54.539, loss_interctc_layer21=71.387, loss=65.389, backward_time=0.211, grad_norm=65.247, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.411e-05, train_time=1.194
+[gpua006:0/64] 2024-02-11 06:30:07,211 (trainer:756) INFO: 30epoch:train:2101-2200batch: iter_time=8.719e-05, forward_time=0.143, loss_ctc=98.382, loss_interctc_layer6=90.379, loss_interctc_layer12=74.917, loss_interctc_layer15=68.908, loss_interctc_layer21=101.969, loss=86.911, backward_time=0.209, grad_norm=69.568, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.410e-05, train_time=1.193
+[gpua006:0/64] 2024-02-11 06:32:10,113 (trainer:756) INFO: 30epoch:train:2201-2300batch: iter_time=8.404e-05, forward_time=0.160, loss_ctc=71.355, loss_interctc_layer6=77.724, loss_interctc_layer12=64.267, loss_interctc_layer15=58.847, loss_interctc_layer21=73.639, loss=69.167, backward_time=0.209, grad_norm=66.740, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.409e-05, train_time=1.229
+[gpua006:0/64] 2024-02-11 06:34:05,562 (trainer:756) INFO: 30epoch:train:2301-2400batch: iter_time=7.779e-05, forward_time=0.148, loss_ctc=86.326, loss_interctc_layer6=82.754, loss_interctc_layer12=68.349, loss_interctc_layer15=62.435, loss_interctc_layer21=89.290, loss=77.831, backward_time=0.209, grad_norm=85.615, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.409e-05, train_time=1.154
+[gpua006:0/64] 2024-02-11 06:35:52,623 (trainer:756) INFO: 30epoch:train:2401-2500batch: iter_time=7.590e-05, forward_time=0.169, loss_ctc=82.272, loss_interctc_layer6=86.153, loss_interctc_layer12=72.110, loss_interctc_layer15=66.322, loss_interctc_layer21=84.509, loss=78.273, backward_time=0.218, grad_norm=107.036, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.408e-05, train_time=1.070
+[gpua006:0/64] 2024-02-11 06:36:12,652 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua006:0/64] 2024-02-11 06:36:31,309 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 06:36:34,712 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbdb48d5d50>)
+[gpua006:0/64] 2024-02-11 06:36:34,712 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua006:0/64] 2024-02-11 06:36:34,716 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 06:42:35,011 (trainer:756) INFO: 30epoch:train:2501-2600batch: iter_time=2.884, forward_time=0.144, loss_ctc=82.488, loss_interctc_layer6=85.374, loss_interctc_layer12=70.683, loss_interctc_layer15=64.963, loss_interctc_layer21=85.373, loss=77.776, backward_time=0.210, grad_norm=74.230, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.407e-05, train_time=4.023
+[gpua006:0/64] 2024-02-11 06:44:49,113 (trainer:756) INFO: 30epoch:train:2601-2700batch: iter_time=8.854e-05, forward_time=0.144, loss_ctc=75.766, loss_interctc_layer6=82.922, loss_interctc_layer12=68.536, loss_interctc_layer15=62.813, loss_interctc_layer21=78.408, loss=73.689, backward_time=0.210, grad_norm=60.102, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.406e-05, train_time=1.341
+[gpua006:0/64] 2024-02-11 06:46:46,867 (trainer:756) INFO: 30epoch:train:2701-2800batch: iter_time=8.734e-04, forward_time=0.159, loss_ctc=79.255, loss_interctc_layer6=94.500, loss_interctc_layer12=78.820, loss_interctc_layer15=72.744, loss_interctc_layer21=81.820, loss=81.428, backward_time=0.213, grad_norm=73.901, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.142, optim0_lr0=7.405e-05, train_time=1.177
+[gpua006:0/64] 2024-02-11 06:48:41,513 (trainer:756) INFO: 30epoch:train:2801-2900batch: iter_time=9.464e-05, forward_time=0.142, loss_ctc=62.492, loss_interctc_layer6=75.507, loss_interctc_layer12=62.246, loss_interctc_layer15=56.864, loss_interctc_layer21=64.780, loss=64.378, backward_time=0.209, grad_norm=63.177, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.404e-05, train_time=1.146
+[gpua006:0/64] 2024-02-11 06:50:00,171 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 06:50:37,547 (trainer:756) INFO: 30epoch:train:2901-3000batch: iter_time=8.807e-05, forward_time=0.149, loss_ctc=77.619, loss_interctc_layer6=83.296, loss_interctc_layer12=69.601, loss_interctc_layer15=64.126, loss_interctc_layer21=80.012, loss=74.931, backward_time=0.213, grad_norm=71.330, clip=100.000, loss_scale=4.138e+31, optim_step_time=0.144, optim0_lr0=7.404e-05, train_time=1.160
+[gpua006:0/64] 2024-02-11 06:52:33,929 (trainer:756) INFO: 30epoch:train:3001-3100batch: iter_time=8.909e-05, forward_time=0.161, loss_ctc=71.357, loss_interctc_layer6=83.005, loss_interctc_layer12=68.826, loss_interctc_layer15=63.300, loss_interctc_layer21=73.647, loss=72.027, backward_time=0.219, grad_norm=73.260, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.141, optim0_lr0=7.403e-05, train_time=1.164
+[gpua006:0/64] 2024-02-11 06:54:40,734 (trainer:756) INFO: 30epoch:train:3101-3200batch: iter_time=8.802e-05, forward_time=0.173, loss_ctc=73.641, loss_interctc_layer6=85.972, loss_interctc_layer12=71.148, loss_interctc_layer15=65.303, loss_interctc_layer21=76.324, loss=74.477, backward_time=0.210, grad_norm=65.363, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.402e-05, train_time=1.268
+[gpua006:0/64] 2024-02-11 06:56:39,710 (trainer:756) INFO: 30epoch:train:3201-3300batch: iter_time=8.750e-05, forward_time=0.161, loss_ctc=69.644, loss_interctc_layer6=80.950, loss_interctc_layer12=66.923, loss_interctc_layer15=61.349, loss_interctc_layer21=72.081, loss=70.189, backward_time=0.210, grad_norm=58.655, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.401e-05, train_time=1.190
+[gpua006:0/64] 2024-02-11 06:58:23,109 (trainer:756) INFO: 30epoch:train:3301-3400batch: iter_time=7.224e-05, forward_time=0.145, loss_ctc=76.706, loss_interctc_layer6=82.221, loss_interctc_layer12=67.905, loss_interctc_layer15=62.264, loss_interctc_layer21=79.467, loss=73.713, backward_time=0.211, grad_norm=84.174, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.400e-05, train_time=1.034
+[gpua006:0/64] 2024-02-11 07:00:29,092 (trainer:756) INFO: 30epoch:train:3401-3500batch: iter_time=4.993e-04, forward_time=0.160, loss_ctc=83.824, loss_interctc_layer6=83.173, loss_interctc_layer12=68.807, loss_interctc_layer15=63.085, loss_interctc_layer21=86.821, loss=77.142, backward_time=0.225, grad_norm=68.856, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.399e-05, train_time=1.260
+[gpua006:0/64] 2024-02-11 07:02:49,968 (trainer:756) INFO: 30epoch:train:3501-3600batch: iter_time=8.738e-05, forward_time=0.151, loss_ctc=74.537, loss_interctc_layer6=77.199, loss_interctc_layer12=63.779, loss_interctc_layer15=58.388, loss_interctc_layer21=77.332, loss=70.247, backward_time=0.215, grad_norm=67.324, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.398e-05, train_time=1.409
+[gpua006:0/64] 2024-02-11 07:04:51,226 (trainer:756) INFO: 30epoch:train:3601-3700batch: iter_time=8.413e-05, forward_time=0.144, loss_ctc=81.693, loss_interctc_layer6=83.657, loss_interctc_layer12=69.704, loss_interctc_layer15=64.284, loss_interctc_layer21=84.523, loss=76.772, backward_time=0.211, grad_norm=91.930, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.398e-05, train_time=1.212
+[gpua006:0/64] 2024-02-11 07:06:02,756 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua006:0/64] 2024-02-11 07:06:21,478 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 07:06:24,907 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fc8348c5870>)
+[gpua006:0/64] 2024-02-11 07:06:24,907 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua006:0/64] 2024-02-11 07:06:24,910 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 07:11:18,753 (trainer:756) INFO: 30epoch:train:3701-3800batch: iter_time=2.698, forward_time=0.177, loss_ctc=76.028, loss_interctc_layer6=88.256, loss_interctc_layer12=73.450, loss_interctc_layer15=67.413, loss_interctc_layer21=78.763, loss=76.782, backward_time=0.218, grad_norm=69.536, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.140, optim0_lr0=7.397e-05, train_time=3.875
+[gpua006:0/64] 2024-02-11 07:12:58,381 (trainer:756) INFO: 30epoch:train:3801-3900batch: iter_time=8.270e-05, forward_time=0.143, loss_ctc=83.825, loss_interctc_layer6=84.617, loss_interctc_layer12=69.918, loss_interctc_layer15=64.025, loss_interctc_layer21=86.670, loss=77.811, backward_time=0.211, grad_norm=58.344, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.396e-05, train_time=0.996
+[gpua006:0/64] 2024-02-11 07:14:41,553 (trainer:756) INFO: 30epoch:train:3901-4000batch: iter_time=8.622e-05, forward_time=0.143, loss_ctc=68.872, loss_interctc_layer6=82.190, loss_interctc_layer12=68.327, loss_interctc_layer15=62.815, loss_interctc_layer21=71.229, loss=70.687, backward_time=0.209, grad_norm=63.107, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.395e-05, train_time=1.031
+[gpua006:0/64] 2024-02-11 07:16:25,764 (trainer:756) INFO: 30epoch:train:4001-4100batch: iter_time=8.587e-05, forward_time=0.143, loss_ctc=73.129, loss_interctc_layer6=87.114, loss_interctc_layer12=72.121, loss_interctc_layer15=66.116, loss_interctc_layer21=75.489, loss=74.794, backward_time=0.209, grad_norm=66.688, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.394e-05, train_time=1.042
+[gpua006:0/64] 2024-02-11 07:18:32,221 (trainer:756) INFO: 30epoch:train:4101-4200batch: iter_time=8.643e-05, forward_time=0.153, loss_ctc=74.492, loss_interctc_layer6=85.092, loss_interctc_layer12=70.853, loss_interctc_layer15=65.105, loss_interctc_layer21=77.017, loss=74.512, backward_time=0.211, grad_norm=75.730, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.139, optim0_lr0=7.393e-05, train_time=1.264
+[gpua006:0/64] 2024-02-11 07:20:32,634 (trainer:756) INFO: 30epoch:train:4201-4300batch: iter_time=8.622e-05, forward_time=0.159, loss_ctc=78.948, loss_interctc_layer6=84.138, loss_interctc_layer12=69.843, loss_interctc_layer15=64.052, loss_interctc_layer21=81.600, loss=75.716, backward_time=0.221, grad_norm=81.480, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.393e-05, train_time=1.204
+[gpua006:0/64] 2024-02-11 07:22:46,560 (trainer:756) INFO: 30epoch:train:4301-4400batch: iter_time=8.606e-05, forward_time=0.146, loss_ctc=61.072, loss_interctc_layer6=78.135, loss_interctc_layer12=64.620, loss_interctc_layer15=59.290, loss_interctc_layer21=62.926, loss=65.209, backward_time=0.211, grad_norm=94.762, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.392e-05, train_time=1.339
+[gpua006:0/64] 2024-02-11 07:24:57,110 (trainer:756) INFO: 30epoch:train:4401-4500batch: iter_time=8.655e-05, forward_time=0.144, loss_ctc=77.801, loss_interctc_layer6=89.092, loss_interctc_layer12=73.739, loss_interctc_layer15=67.649, loss_interctc_layer21=80.441, loss=77.744, backward_time=0.208, grad_norm=84.871, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.391e-05, train_time=1.304
+[gpua006:0/64] 2024-02-11 07:26:52,617 (trainer:756) INFO: 30epoch:train:4501-4600batch: iter_time=8.432e-05, forward_time=0.146, loss_ctc=66.257, loss_interctc_layer6=72.231, loss_interctc_layer12=59.582, loss_interctc_layer15=54.356, loss_interctc_layer21=68.766, loss=64.238, backward_time=0.216, grad_norm=61.139, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.138, optim0_lr0=7.390e-05, train_time=1.156
+[gpua006:0/64] 2024-02-11 07:29:08,606 (trainer:756) INFO: 30epoch:train:4601-4700batch: iter_time=7.632e-04, forward_time=0.175, loss_ctc=91.721, loss_interctc_layer6=89.328, loss_interctc_layer12=73.908, loss_interctc_layer15=67.890, loss_interctc_layer21=95.004, loss=83.570, backward_time=0.236, grad_norm=111.211, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.143, optim0_lr0=7.389e-05, train_time=1.359
+[gpua006:0/64] 2024-02-11 07:30:16,880 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 07:31:06,308 (trainer:756) INFO: 30epoch:train:4701-4800batch: iter_time=8.526e-05, forward_time=0.143, loss_ctc=69.149, loss_interctc_layer6=76.866, loss_interctc_layer12=63.740, loss_interctc_layer15=58.263, loss_interctc_layer21=71.297, loss=67.863, backward_time=0.209, grad_norm=80.784, clip=100.000, loss_scale=3.217e+31, optim_step_time=0.138, optim0_lr0=7.388e-05, train_time=1.177
+[gpua006:0/64] 2024-02-11 07:32:58,637 (trainer:756) INFO: 30epoch:train:4801-4900batch: iter_time=8.504e-05, forward_time=0.143, loss_ctc=82.522, loss_interctc_layer6=82.910, loss_interctc_layer12=68.260, loss_interctc_layer15=62.453, loss_interctc_layer21=85.507, loss=76.331, backward_time=0.211, grad_norm=97.806, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.388e-05, train_time=1.123
+[gpua006:0/64] 2024-02-11 07:34:49,276 (trainer:756) INFO: 30epoch:train:4901-5000batch: iter_time=8.240e-05, forward_time=0.144, loss_ctc=75.888, loss_interctc_layer6=86.115, loss_interctc_layer12=71.932, loss_interctc_layer15=66.342, loss_interctc_layer21=78.596, loss=75.775, backward_time=0.209, grad_norm=80.064, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.387e-05, train_time=1.106
+[gpua006:0/64] 2024-02-11 07:35:09,305 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua006:0/64] 2024-02-11 07:35:27,697 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 07:35:31,112 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbddf5df760>)
+[gpua006:0/64] 2024-02-11 07:35:31,112 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua006:0/64] 2024-02-11 07:35:31,115 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 07:41:34,512 (trainer:756) INFO: 30epoch:train:5001-5100batch: iter_time=2.535, forward_time=0.165, loss_ctc=91.213, loss_interctc_layer6=84.989, loss_interctc_layer12=70.429, loss_interctc_layer15=64.628, loss_interctc_layer21=94.560, loss=81.164, backward_time=0.215, grad_norm=81.163, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.386e-05, train_time=4.052
+[gpua006:0/64] 2024-02-11 07:43:22,438 (trainer:756) INFO: 30epoch:train:5101-5200batch: iter_time=9.228e-05, forward_time=0.144, loss_ctc=81.641, loss_interctc_layer6=83.094, loss_interctc_layer12=68.617, loss_interctc_layer15=62.754, loss_interctc_layer21=84.602, loss=76.142, backward_time=0.210, grad_norm=85.234, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.385e-05, train_time=1.079
+[gpua006:0/64] 2024-02-11 07:44:44,562 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 07:45:42,832 (trainer:756) INFO: 30epoch:train:5201-5300batch: iter_time=9.760e-05, forward_time=0.151, loss_ctc=84.932, loss_interctc_layer6=93.758, loss_interctc_layer12=78.026, loss_interctc_layer15=71.836, loss_interctc_layer21=87.605, loss=83.231, backward_time=0.211, grad_norm=71.104, clip=100.000, loss_scale=1.608e+31, optim_step_time=0.139, optim0_lr0=7.384e-05, train_time=1.404
+[gpua006:0/64] 2024-02-11 07:47:47,528 (trainer:756) INFO: 30epoch:train:5301-5400batch: iter_time=9.608e-05, forward_time=0.144, loss_ctc=67.949, loss_interctc_layer6=75.153, loss_interctc_layer12=61.889, loss_interctc_layer15=56.487, loss_interctc_layer21=70.394, loss=66.374, backward_time=0.208, grad_norm=55.932, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.383e-05, train_time=1.246
+[gpua006:0/64] 2024-02-11 07:49:47,060 (trainer:756) INFO: 30epoch:train:5401-5500batch: iter_time=1.119e-04, forward_time=0.143, loss_ctc=78.716, loss_interctc_layer6=82.140, loss_interctc_layer12=68.588, loss_interctc_layer15=63.170, loss_interctc_layer21=81.185, loss=74.760, backward_time=0.209, grad_norm=87.228, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.383e-05, train_time=1.196
+[gpua006:0/64] 2024-02-11 07:51:39,761 (trainer:756) INFO: 30epoch:train:5501-5600batch: iter_time=1.116e-04, forward_time=0.178, loss_ctc=76.729, loss_interctc_layer6=82.569, loss_interctc_layer12=68.442, loss_interctc_layer15=62.746, loss_interctc_layer21=79.421, loss=73.981, backward_time=0.218, grad_norm=76.214, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.141, optim0_lr0=7.382e-05, train_time=1.127
+[gpua006:0/64] 2024-02-11 07:53:53,730 (trainer:756) INFO: 30epoch:train:5601-5700batch: iter_time=9.301e-05, forward_time=0.161, loss_ctc=80.326, loss_interctc_layer6=86.124, loss_interctc_layer12=71.271, loss_interctc_layer15=65.276, loss_interctc_layer21=83.251, loss=77.250, backward_time=0.226, grad_norm=84.093, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.141, optim0_lr0=7.381e-05, train_time=1.338
+[gpua006:0/64] 2024-02-11 07:56:17,702 (trainer:756) INFO: 30epoch:train:5701-5800batch: iter_time=9.525e-05, forward_time=0.143, loss_ctc=72.645, loss_interctc_layer6=80.550, loss_interctc_layer12=66.577, loss_interctc_layer15=60.960, loss_interctc_layer21=75.207, loss=71.188, backward_time=0.208, grad_norm=68.467, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.380e-05, train_time=1.441
+[gpua006:0/64] 2024-02-11 07:58:46,114 (trainer:756) INFO: 30epoch:train:5801-5900batch: iter_time=9.703e-05, forward_time=0.143, loss_ctc=80.740, loss_interctc_layer6=81.211, loss_interctc_layer12=67.050, loss_interctc_layer15=61.325, loss_interctc_layer21=83.779, loss=74.821, backward_time=0.207, grad_norm=68.313, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.379e-05, train_time=1.484
+[gpua006:0/64] 2024-02-11 08:01:05,434 (trainer:756) INFO: 30epoch:train:5901-6000batch: iter_time=8.624e-05, forward_time=0.146, loss_ctc=87.170, loss_interctc_layer6=82.603, loss_interctc_layer12=68.345, loss_interctc_layer15=62.609, loss_interctc_layer21=90.302, loss=78.206, backward_time=0.209, grad_norm=76.583, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.378e-05, train_time=1.393
+[gpua006:0/64] 2024-02-11 08:03:00,425 (trainer:756) INFO: 30epoch:train:6001-6100batch: iter_time=8.228e-05, forward_time=0.143, loss_ctc=78.074, loss_interctc_layer6=76.857, loss_interctc_layer12=63.408, loss_interctc_layer15=57.979, loss_interctc_layer21=80.931, loss=71.450, backward_time=0.209, grad_norm=71.148, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.377e-05, train_time=1.150
+[gpua006:0/64] 2024-02-11 08:05:07,184 (trainer:756) INFO: 30epoch:train:6101-6200batch: iter_time=3.064e-04, forward_time=0.181, loss_ctc=83.721, loss_interctc_layer6=83.074, loss_interctc_layer12=68.956, loss_interctc_layer15=63.387, loss_interctc_layer21=86.508, loss=77.129, backward_time=0.216, grad_norm=72.288, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.141, optim0_lr0=7.377e-05, train_time=1.267
+[gpua006:0/64] 2024-02-11 08:06:50,034 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua006:0/64] 2024-02-11 08:07:08,855 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 08:07:12,283 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbaee5404f0>)
+[gpua006:0/64] 2024-02-11 08:07:12,283 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua006:0/64] 2024-02-11 08:07:12,302 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 08:12:27,843 (trainer:756) INFO: 30epoch:train:6201-6300batch: iter_time=2.958, forward_time=0.153, loss_ctc=80.045, loss_interctc_layer6=87.713, loss_interctc_layer12=72.822, loss_interctc_layer15=66.866, loss_interctc_layer21=83.149, loss=78.119, backward_time=0.217, grad_norm=78.262, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.140, optim0_lr0=7.376e-05, train_time=4.406
+[gpua006:0/64] 2024-02-11 08:14:06,089 (trainer:756) INFO: 30epoch:train:6301-6400batch: iter_time=8.405e-05, forward_time=0.143, loss_ctc=85.242, loss_interctc_layer6=84.820, loss_interctc_layer12=70.053, loss_interctc_layer15=64.169, loss_interctc_layer21=88.365, loss=78.530, backward_time=0.211, grad_norm=137.145, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.375e-05, train_time=0.982
+[gpua006:0/64] 2024-02-11 08:15:41,508 (trainer:756) INFO: 30epoch:train:6401-6500batch: iter_time=8.191e-05, forward_time=0.142, loss_ctc=69.302, loss_interctc_layer6=82.789, loss_interctc_layer12=68.726, loss_interctc_layer15=63.170, loss_interctc_layer21=71.771, loss=71.151, backward_time=0.211, grad_norm=72.629, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.374e-05, train_time=0.954
+[gpua006:0/64] 2024-02-11 08:18:05,286 (trainer:756) INFO: 30epoch:train:6501-6600batch: iter_time=8.453e-05, forward_time=0.173, loss_ctc=73.241, loss_interctc_layer6=87.218, loss_interctc_layer12=72.075, loss_interctc_layer15=65.983, loss_interctc_layer21=75.628, loss=74.829, backward_time=0.226, grad_norm=72.789, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.141, optim0_lr0=7.373e-05, train_time=1.437
+[gpua006:0/64] 2024-02-11 08:20:08,447 (trainer:756) INFO: 30epoch:train:6601-6700batch: iter_time=8.662e-05, forward_time=0.143, loss_ctc=74.230, loss_interctc_layer6=84.182, loss_interctc_layer12=70.239, loss_interctc_layer15=64.422, loss_interctc_layer21=76.869, loss=73.988, backward_time=0.210, grad_norm=67.849, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.372e-05, train_time=1.231
+[gpua006:0/64] 2024-02-11 08:22:15,359 (trainer:756) INFO: 30epoch:train:6701-6800batch: iter_time=8.246e-05, forward_time=0.143, loss_ctc=77.602, loss_interctc_layer6=83.193, loss_interctc_layer12=68.880, loss_interctc_layer15=63.019, loss_interctc_layer21=80.394, loss=74.618, backward_time=0.210, grad_norm=68.478, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.372e-05, train_time=1.269
+[gpua006:0/64] 2024-02-11 08:24:45,111 (trainer:756) INFO: 30epoch:train:6801-6900batch: iter_time=8.310e-05, forward_time=0.142, loss_ctc=60.516, loss_interctc_layer6=77.714, loss_interctc_layer12=64.318, loss_interctc_layer15=58.916, loss_interctc_layer21=62.447, loss=64.782, backward_time=0.208, grad_norm=71.172, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.371e-05, train_time=1.497
+[gpua006:0/64] 2024-02-11 08:26:46,112 (trainer:756) INFO: 30epoch:train:6901-7000batch: iter_time=8.420e-05, forward_time=0.143, loss_ctc=78.290, loss_interctc_layer6=88.883, loss_interctc_layer12=73.576, loss_interctc_layer15=67.475, loss_interctc_layer21=80.995, loss=77.844, backward_time=0.210, grad_norm=66.654, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.370e-05, train_time=1.210
+[gpua006:0/64] 2024-02-11 08:28:58,204 (trainer:756) INFO: 30epoch:train:7001-7100batch: iter_time=7.753e-05, forward_time=0.143, loss_ctc=65.619, loss_interctc_layer6=71.712, loss_interctc_layer12=59.057, loss_interctc_layer15=53.803, loss_interctc_layer21=68.088, loss=63.656, backward_time=0.209, grad_norm=63.227, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.369e-05, train_time=1.321
+[gpua006:0/64] 2024-02-11 08:30:55,638 (trainer:756) INFO: 30epoch:train:7101-7200batch: iter_time=8.269e-05, forward_time=0.155, loss_ctc=91.932, loss_interctc_layer6=88.904, loss_interctc_layer12=73.491, loss_interctc_layer15=67.420, loss_interctc_layer21=95.500, loss=83.450, backward_time=0.211, grad_norm=71.951, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.368e-05, train_time=1.174
+[gpua006:0/64] 2024-02-11 08:33:02,566 (trainer:756) INFO: 30epoch:train:7201-7300batch: iter_time=8.435e-05, forward_time=0.159, loss_ctc=69.595, loss_interctc_layer6=77.287, loss_interctc_layer12=64.000, loss_interctc_layer15=58.584, loss_interctc_layer21=72.026, loss=68.298, backward_time=0.215, grad_norm=75.812, clip=100.000, loss_scale=1.430e+31, optim_step_time=0.139, optim0_lr0=7.367e-05, train_time=1.269
+[gpua006:0/64] 2024-02-11 08:35:24,589 (trainer:756) INFO: 30epoch:train:7301-7400batch: iter_time=8.479e-05, forward_time=0.151, loss_ctc=82.344, loss_interctc_layer6=82.259, loss_interctc_layer12=67.544, loss_interctc_layer15=61.789, loss_interctc_layer21=85.490, loss=75.885, backward_time=0.227, grad_norm=92.020, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.367e-05, train_time=1.420
+[gpua006:0/64] 2024-02-11 08:37:09,238 (trainer:756) INFO: 30epoch:train:7401-7500batch: iter_time=8.149e-05, forward_time=0.141, loss_ctc=75.366, loss_interctc_layer6=85.153, loss_interctc_layer12=71.306, loss_interctc_layer15=65.615, loss_interctc_layer21=78.214, loss=75.131, backward_time=0.209, grad_norm=82.449, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.366e-05, train_time=1.047
+[gpua006:0/64] 2024-02-11 08:37:29,268 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua006:0/64] 2024-02-11 08:37:47,963 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 08:37:51,392 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbafa025ff0>)
+[gpua006:0/64] 2024-02-11 08:37:51,392 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua006:0/64] 2024-02-11 08:37:51,395 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 08:45:08,950 (trainer:756) INFO: 30epoch:train:7501-7600batch: iter_time=2.716, forward_time=0.173, loss_ctc=89.026, loss_interctc_layer6=84.804, loss_interctc_layer12=70.177, loss_interctc_layer15=64.293, loss_interctc_layer21=92.186, loss=80.097, backward_time=0.215, grad_norm=76.868, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.365e-05, train_time=4.797
+[gpua006:0/64] 2024-02-11 08:46:44,774 (trainer:756) INFO: 30epoch:train:7601-7700batch: iter_time=8.082e-05, forward_time=0.143, loss_ctc=80.479, loss_interctc_layer6=82.314, loss_interctc_layer12=67.866, loss_interctc_layer15=62.008, loss_interctc_layer21=83.367, loss=75.207, backward_time=0.209, grad_norm=113.742, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.364e-05, train_time=0.958
+[gpua006:0/64] 2024-02-11 08:48:51,996 (trainer:756) INFO: 30epoch:train:7701-7800batch: iter_time=8.828e-05, forward_time=0.144, loss_ctc=84.748, loss_interctc_layer6=93.894, loss_interctc_layer12=78.138, loss_interctc_layer15=71.892, loss_interctc_layer21=87.303, loss=83.195, backward_time=0.210, grad_norm=79.922, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.363e-05, train_time=1.272
+[gpua006:0/64] 2024-02-11 08:50:53,148 (trainer:756) INFO: 30epoch:train:7801-7900batch: iter_time=9.234e-05, forward_time=0.142, loss_ctc=67.472, loss_interctc_layer6=74.465, loss_interctc_layer12=61.146, loss_interctc_layer15=55.778, loss_interctc_layer21=69.885, loss=65.749, backward_time=0.209, grad_norm=75.154, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.362e-05, train_time=1.211
+[gpua006:0/64] 2024-02-11 08:52:59,664 (trainer:756) INFO: 30epoch:train:7901-8000batch: iter_time=9.596e-05, forward_time=0.143, loss_ctc=78.535, loss_interctc_layer6=82.291, loss_interctc_layer12=68.449, loss_interctc_layer15=62.843, loss_interctc_layer21=81.262, loss=74.676, backward_time=0.209, grad_norm=77.347, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.362e-05, train_time=1.265
+[gpua006:0/64] 2024-02-11 08:55:05,678 (trainer:756) INFO: 30epoch:train:8001-8100batch: iter_time=9.194e-05, forward_time=0.143, loss_ctc=76.692, loss_interctc_layer6=82.552, loss_interctc_layer12=68.354, loss_interctc_layer15=62.613, loss_interctc_layer21=79.379, loss=73.918, backward_time=0.210, grad_norm=82.347, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.361e-05, train_time=1.260
+[gpua006:0/64] 2024-02-11 08:57:23,577 (trainer:756) INFO: 30epoch:train:8101-8200batch: iter_time=8.129e-05, forward_time=0.145, loss_ctc=79.945, loss_interctc_layer6=85.282, loss_interctc_layer12=70.527, loss_interctc_layer15=64.590, loss_interctc_layer21=82.755, loss=76.620, backward_time=0.209, grad_norm=94.790, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.360e-05, train_time=1.379
+[gpua006:0/64] 2024-02-11 08:59:28,153 (trainer:756) INFO: 30epoch:train:8201-8300batch: iter_time=8.718e-05, forward_time=0.170, loss_ctc=71.985, loss_interctc_layer6=80.971, loss_interctc_layer12=67.026, loss_interctc_layer15=61.327, loss_interctc_layer21=74.417, loss=71.145, backward_time=0.215, grad_norm=74.798, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.359e-05, train_time=1.246
+[gpua006:0/64] 2024-02-11 09:01:38,854 (trainer:756) INFO: 30epoch:train:8301-8400batch: iter_time=9.060e-05, forward_time=0.159, loss_ctc=80.671, loss_interctc_layer6=81.170, loss_interctc_layer12=67.064, loss_interctc_layer15=61.290, loss_interctc_layer21=83.766, loss=74.792, backward_time=0.216, grad_norm=83.200, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.358e-05, train_time=1.307
+[gpua006:0/64] 2024-02-11 09:03:37,538 (trainer:756) INFO: 30epoch:train:8401-8500batch: iter_time=8.449e-05, forward_time=0.150, loss_ctc=86.286, loss_interctc_layer6=82.513, loss_interctc_layer12=68.211, loss_interctc_layer15=62.408, loss_interctc_layer21=89.329, loss=77.749, backward_time=0.210, grad_norm=71.436, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.357e-05, train_time=1.186
+[gpua006:0/64] 2024-02-11 09:05:35,768 (trainer:756) INFO: 30epoch:train:8501-8600batch: iter_time=5.864e-04, forward_time=0.176, loss_ctc=77.634, loss_interctc_layer6=76.568, loss_interctc_layer12=63.081, loss_interctc_layer15=57.579, loss_interctc_layer21=80.384, loss=71.049, backward_time=0.232, grad_norm=81.986, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.357e-05, train_time=1.182
+[gpua006:0/64] 2024-02-11 09:07:39,528 (trainer:756) INFO: 30epoch:train:8601-8700batch: iter_time=8.702e-05, forward_time=0.144, loss_ctc=82.278, loss_interctc_layer6=82.738, loss_interctc_layer12=68.591, loss_interctc_layer15=63.065, loss_interctc_layer21=85.245, loss=76.383, backward_time=0.211, grad_norm=73.911, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.356e-05, train_time=1.237
+[gpua006:0/64] 2024-02-11 09:09:01,405 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua006:0/64] 2024-02-11 09:09:20,124 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 09:09:23,563 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbac3d91780>)
+[gpua006:0/64] 2024-02-11 09:09:23,563 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua006:0/64] 2024-02-11 09:09:23,567 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 09:14:31,604 (trainer:756) INFO: 30epoch:train:8701-8800batch: iter_time=2.886, forward_time=0.143, loss_ctc=80.172, loss_interctc_layer6=87.443, loss_interctc_layer12=72.655, loss_interctc_layer15=66.708, loss_interctc_layer21=82.795, loss=77.955, backward_time=0.209, grad_norm=83.009, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.355e-05, train_time=4.121
+[gpua006:0/64] 2024-02-11 09:16:29,217 (trainer:756) INFO: 30epoch:train:8801-8900batch: iter_time=7.831e-05, forward_time=0.143, loss_ctc=84.577, loss_interctc_layer6=84.239, loss_interctc_layer12=69.378, loss_interctc_layer15=63.440, loss_interctc_layer21=87.292, loss=77.785, backward_time=0.211, grad_norm=68.845, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.354e-05, train_time=1.176
+[gpua006:0/64] 2024-02-11 09:16:44,155 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 09:18:30,124 (trainer:756) INFO: 30epoch:train:8901-9000batch: iter_time=7.891e-05, forward_time=0.142, loss_ctc=68.769, loss_interctc_layer6=82.128, loss_interctc_layer12=68.196, loss_interctc_layer15=62.528, loss_interctc_layer21=71.182, loss=70.561, backward_time=0.210, grad_norm=82.918, clip=100.000, loss_scale=1.158e+31, optim_step_time=0.138, optim0_lr0=7.353e-05, train_time=1.209
+[gpua006:0/64] 2024-02-11 09:20:29,125 (trainer:756) INFO: 30epoch:train:9001-9100batch: iter_time=8.201e-05, forward_time=0.162, loss_ctc=73.614, loss_interctc_layer6=87.192, loss_interctc_layer12=72.237, loss_interctc_layer15=66.206, loss_interctc_layer21=76.115, loss=75.073, backward_time=0.218, grad_norm=169.800, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.353e-05, train_time=1.190
+[gpua006:0/64] 2024-02-11 09:22:53,493 (trainer:756) INFO: 30epoch:train:9101-9200batch: iter_time=8.788e-05, forward_time=0.155, loss_ctc=73.500, loss_interctc_layer6=83.781, loss_interctc_layer12=69.658, loss_interctc_layer15=63.928, loss_interctc_layer21=76.034, loss=73.380, backward_time=0.211, grad_norm=89.005, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.352e-05, train_time=1.443
+[gpua006:0/64] 2024-02-11 09:24:38,882 (trainer:756) INFO: 30epoch:train:9201-9300batch: iter_time=8.168e-05, forward_time=0.159, loss_ctc=77.929, loss_interctc_layer6=83.229, loss_interctc_layer12=69.026, loss_interctc_layer15=63.193, loss_interctc_layer21=80.638, loss=74.803, backward_time=0.218, grad_norm=63.917, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.140, optim0_lr0=7.351e-05, train_time=1.054
+[gpua006:0/64] 2024-02-11 09:27:01,630 (trainer:756) INFO: 30epoch:train:9301-9400batch: iter_time=8.299e-05, forward_time=0.147, loss_ctc=60.597, loss_interctc_layer6=77.515, loss_interctc_layer12=64.254, loss_interctc_layer15=58.842, loss_interctc_layer21=62.616, loss=64.765, backward_time=0.212, grad_norm=63.760, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.350e-05, train_time=1.427
+[gpua006:0/64] 2024-02-11 09:29:03,695 (trainer:756) INFO: 30epoch:train:9401-9500batch: iter_time=7.891e-05, forward_time=0.150, loss_ctc=78.022, loss_interctc_layer6=88.738, loss_interctc_layer12=73.611, loss_interctc_layer15=67.495, loss_interctc_layer21=80.810, loss=77.735, backward_time=0.215, grad_norm=71.701, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.349e-05, train_time=1.220
+[gpua006:0/64] 2024-02-11 09:33:01,905 (trainer:756) INFO: 30epoch:train:9501-9600batch: iter_time=8.046e-05, forward_time=0.183, loss_ctc=65.613, loss_interctc_layer6=71.735, loss_interctc_layer12=58.973, loss_interctc_layer15=53.691, loss_interctc_layer21=68.216, loss=63.645, backward_time=0.214, grad_norm=64.749, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.140, optim0_lr0=7.348e-05, train_time=2.381
+[gpua006:0/64] 2024-02-11 09:35:07,847 (trainer:756) INFO: 30epoch:train:9601-9700batch: iter_time=8.683e-05, forward_time=0.143, loss_ctc=90.210, loss_interctc_layer6=87.880, loss_interctc_layer12=72.713, loss_interctc_layer15=66.642, loss_interctc_layer21=93.753, loss=82.240, backward_time=0.209, grad_norm=81.251, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.348e-05, train_time=1.260
+[gpua006:0/64] 2024-02-11 09:37:23,753 (trainer:756) INFO: 30epoch:train:9701-9800batch: iter_time=8.261e-05, forward_time=0.142, loss_ctc=69.091, loss_interctc_layer6=76.598, loss_interctc_layer12=63.412, loss_interctc_layer15=57.968, loss_interctc_layer21=71.346, loss=67.683, backward_time=0.209, grad_norm=94.871, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.347e-05, train_time=1.360
+[gpua006:0/64] 2024-02-11 09:39:19,683 (trainer:756) INFO: 30epoch:train:9801-9900batch: iter_time=8.634e-05, forward_time=0.142, loss_ctc=82.403, loss_interctc_layer6=82.641, loss_interctc_layer12=67.910, loss_interctc_layer15=61.980, loss_interctc_layer21=85.500, loss=76.087, backward_time=0.209, grad_norm=66.467, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.346e-05, train_time=1.159
+[gpua006:0/64] 2024-02-11 09:39:47,754 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 09:41:04,054 (trainer:756) INFO: 30epoch:train:9901-10000batch: iter_time=8.391e-05, forward_time=0.141, loss_ctc=75.199, loss_interctc_layer6=84.620, loss_interctc_layer12=70.606, loss_interctc_layer15=64.994, loss_interctc_layer21=77.874, loss=74.658, backward_time=0.210, grad_norm=93.500, clip=100.000, loss_scale=6.351e+30, optim_step_time=0.139, optim0_lr0=7.345e-05, train_time=1.043
+[gpua006:0/64] 2024-02-11 09:41:24,093 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua006:0/64] 2024-02-11 09:41:42,706 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 09:41:46,063 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbaa6cf39a0>)
+[gpua006:0/64] 2024-02-11 09:41:46,063 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua006:0/64] 2024-02-11 09:41:46,115 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 09:47:31,709 (trainer:756) INFO: 30epoch:train:10001-10100batch: iter_time=2.773, forward_time=0.166, loss_ctc=90.157, loss_interctc_layer6=84.785, loss_interctc_layer12=69.981, loss_interctc_layer15=63.983, loss_interctc_layer21=93.455, loss=80.472, backward_time=0.214, grad_norm=100.817, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.140, optim0_lr0=7.344e-05, train_time=3.876
+[gpua006:0/64] 2024-02-11 09:49:05,898 (trainer:756) INFO: 30epoch:train:10101-10200batch: iter_time=8.027e-05, forward_time=0.143, loss_ctc=80.650, loss_interctc_layer6=82.432, loss_interctc_layer12=67.971, loss_interctc_layer15=62.130, loss_interctc_layer21=83.471, loss=75.331, backward_time=0.212, grad_norm=84.033, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.139, optim0_lr0=7.343e-05, train_time=0.942
+[gpua006:0/64] 2024-02-11 09:50:53,639 (trainer:756) INFO: 30epoch:train:10201-10300batch: iter_time=8.107e-05, forward_time=0.144, loss_ctc=85.096, loss_interctc_layer6=94.497, loss_interctc_layer12=78.677, loss_interctc_layer15=72.314, loss_interctc_layer21=87.743, loss=83.665, backward_time=0.210, grad_norm=82.036, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.139, optim0_lr0=7.343e-05, train_time=1.077
+[gpua006:0/64] 2024-02-11 09:52:46,079 (trainer:756) INFO: 30epoch:train:10301-10400batch: iter_time=9.011e-05, forward_time=0.142, loss_ctc=67.278, loss_interctc_layer6=74.689, loss_interctc_layer12=61.407, loss_interctc_layer15=55.936, loss_interctc_layer21=69.679, loss=65.798, backward_time=0.208, grad_norm=59.395, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.342e-05, train_time=1.124
+[gpua006:0/64] 2024-02-11 09:54:55,256 (trainer:756) INFO: 30epoch:train:10401-10500batch: iter_time=9.260e-05, forward_time=0.195, loss_ctc=78.551, loss_interctc_layer6=82.114, loss_interctc_layer12=68.351, loss_interctc_layer15=62.773, loss_interctc_layer21=81.213, loss=74.600, backward_time=0.217, grad_norm=75.115, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.139, optim0_lr0=7.341e-05, train_time=1.291
+[gpua006:0/64] 2024-02-11 09:56:47,704 (trainer:756) INFO: 30epoch:train:10501-10600batch: iter_time=8.605e-05, forward_time=0.145, loss_ctc=75.077, loss_interctc_layer6=80.899, loss_interctc_layer12=66.875, loss_interctc_layer15=61.220, loss_interctc_layer21=77.644, loss=72.343, backward_time=0.211, grad_norm=72.327, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.340e-05, train_time=1.124
+[gpua006:0/64] 2024-02-11 10:00:04,288 (trainer:756) INFO: 30epoch:train:10601-10700batch: iter_time=8.668e-05, forward_time=0.146, loss_ctc=79.957, loss_interctc_layer6=85.696, loss_interctc_layer12=70.861, loss_interctc_layer15=64.952, loss_interctc_layer21=82.917, loss=76.877, backward_time=0.208, grad_norm=63.638, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.339e-05, train_time=1.966
+[gpua006:0/64] 2024-02-11 10:02:22,055 (trainer:756) INFO: 30epoch:train:10701-10800batch: iter_time=8.343e-05, forward_time=0.143, loss_ctc=72.864, loss_interctc_layer6=80.723, loss_interctc_layer12=66.777, loss_interctc_layer15=61.078, loss_interctc_layer21=75.427, loss=71.374, backward_time=0.208, grad_norm=76.283, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.339e-05, train_time=1.378
+[gpua006:0/64] 2024-02-11 10:04:29,914 (trainer:756) INFO: 30epoch:train:10801-10900batch: iter_time=8.536e-05, forward_time=0.143, loss_ctc=80.854, loss_interctc_layer6=81.317, loss_interctc_layer12=67.271, loss_interctc_layer15=61.523, loss_interctc_layer21=83.801, loss=74.953, backward_time=0.208, grad_norm=81.213, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.338e-05, train_time=1.278
+[gpua006:0/64] 2024-02-11 10:06:44,287 (trainer:756) INFO: 30epoch:train:10901-11000batch: iter_time=8.755e-05, forward_time=0.144, loss_ctc=86.602, loss_interctc_layer6=82.612, loss_interctc_layer12=68.325, loss_interctc_layer15=62.516, loss_interctc_layer21=89.753, loss=77.962, backward_time=0.209, grad_norm=77.054, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.337e-05, train_time=1.344
+[gpua006:0/64] 2024-02-11 10:08:41,450 (trainer:756) INFO: 30epoch:train:11001-11100batch: iter_time=8.202e-05, forward_time=0.150, loss_ctc=77.318, loss_interctc_layer6=76.416, loss_interctc_layer12=63.026, loss_interctc_layer15=57.647, loss_interctc_layer21=80.208, loss=70.923, backward_time=0.217, grad_norm=78.018, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.336e-05, train_time=1.171
+[gpua006:0/64] 2024-02-11 10:10:35,755 (trainer:756) INFO: 30epoch:train:11101-11200batch: iter_time=8.917e-05, forward_time=0.169, loss_ctc=83.060, loss_interctc_layer6=82.439, loss_interctc_layer12=68.465, loss_interctc_layer15=62.782, loss_interctc_layer21=86.194, loss=76.588, backward_time=0.221, grad_norm=78.574, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.139, optim0_lr0=7.335e-05, train_time=1.143
+[gpua006:0/64] 2024-02-11 10:11:51,740 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua006:0/64] 2024-02-11 10:12:10,176 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 10:12:13,617 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbaf54b3d00>)
+[gpua006:0/64] 2024-02-11 10:12:13,617 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua006:0/64] 2024-02-11 10:12:13,643 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 10:17:11,274 (trainer:756) INFO: 30epoch:train:11201-11300batch: iter_time=2.800, forward_time=0.164, loss_ctc=82.535, loss_interctc_layer6=87.384, loss_interctc_layer12=72.391, loss_interctc_layer15=66.345, loss_interctc_layer21=85.469, loss=78.825, backward_time=0.212, grad_norm=85.860, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.334e-05, train_time=3.955
+[gpua006:0/64] 2024-02-11 10:19:00,183 (trainer:756) INFO: 30epoch:train:11301-11400batch: iter_time=7.929e-05, forward_time=0.143, loss_ctc=91.119, loss_interctc_layer6=84.487, loss_interctc_layer12=69.626, loss_interctc_layer15=63.584, loss_interctc_layer21=94.360, loss=80.635, backward_time=0.209, grad_norm=111.284, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.137, optim0_lr0=7.334e-05, train_time=1.089
+[gpua006:0/64] 2024-02-11 10:20:37,227 (trainer:756) INFO: 30epoch:train:11401-11500batch: iter_time=8.395e-05, forward_time=0.143, loss_ctc=74.840, loss_interctc_layer6=82.385, loss_interctc_layer12=68.370, loss_interctc_layer15=62.707, loss_interctc_layer21=77.486, loss=73.158, backward_time=0.209, grad_norm=62.539, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.333e-05, train_time=0.970
+[gpua006:0/64] 2024-02-11 10:22:44,865 (trainer:756) INFO: 30epoch:train:11501-11600batch: iter_time=9.306e-05, forward_time=0.143, loss_ctc=78.562, loss_interctc_layer6=86.482, loss_interctc_layer12=71.463, loss_interctc_layer15=65.468, loss_interctc_layer21=81.094, loss=76.614, backward_time=0.210, grad_norm=73.354, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.332e-05, train_time=1.277
+[gpua006:0/64] 2024-02-11 10:25:32,583 (trainer:756) INFO: 30epoch:train:11601-11700batch: iter_time=9.103e-05, forward_time=0.143, loss_ctc=78.219, loss_interctc_layer6=84.437, loss_interctc_layer12=70.211, loss_interctc_layer15=64.427, loss_interctc_layer21=80.825, loss=75.624, backward_time=0.208, grad_norm=69.100, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.331e-05, train_time=1.677
+[gpua006:0/64] 2024-02-11 10:27:42,126 (trainer:756) INFO: 30epoch:train:11701-11800batch: iter_time=9.309e-05, forward_time=0.144, loss_ctc=84.299, loss_interctc_layer6=83.397, loss_interctc_layer12=68.996, loss_interctc_layer15=63.116, loss_interctc_layer21=87.295, loss=77.421, backward_time=0.210, grad_norm=67.074, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.138, optim0_lr0=7.330e-05, train_time=1.295
+[gpua006:0/64] 2024-02-11 10:29:49,036 (trainer:756) INFO: 30epoch:train:11801-11900batch: iter_time=9.281e-05, forward_time=0.161, loss_ctc=65.442, loss_interctc_layer6=78.249, loss_interctc_layer12=64.905, loss_interctc_layer15=59.423, loss_interctc_layer21=67.562, loss=67.116, backward_time=0.213, grad_norm=63.556, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.139, optim0_lr0=7.329e-05, train_time=1.269
+[gpua006:0/64] 2024-02-11 10:31:59,015 (trainer:756) INFO: 30epoch:train:11901-12000batch: iter_time=7.929e-05, forward_time=0.151, loss_ctc=81.647, loss_interctc_layer6=89.300, loss_interctc_layer12=73.883, loss_interctc_layer15=67.713, loss_interctc_layer21=84.426, loss=79.394, backward_time=0.209, grad_norm=66.652, clip=100.000, loss_scale=8.823e+30, optim_step_time=0.138, optim0_lr0=7.329e-05, train_time=1.300
+[gpua006:0/64] 2024-02-11 10:34:19,829 (trainer:756) INFO: 30epoch:train:12001-12100batch: iter_time=7.927e-05, forward_time=0.168, loss_ctc=67.797, loss_interctc_layer6=71.299, loss_interctc_layer12=58.698, loss_interctc_layer15=53.498, loss_interctc_layer21=70.452, loss=64.349, backward_time=0.214, grad_norm=88.483, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.328e-05, train_time=1.408
+[gpua006:0/64] 2024-02-11 10:36:06,730 (trainer:756) INFO: 30epoch:train:12101-12200batch: iter_time=1.867e-04, forward_time=0.185, loss_ctc=96.335, loss_interctc_layer6=88.641, loss_interctc_layer12=73.258, loss_interctc_layer15=67.129, loss_interctc_layer21=100.012, loss=85.075, backward_time=0.224, grad_norm=86.046, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.327e-05, train_time=1.068
+[gpua006:0/64] 2024-02-11 10:38:32,449 (trainer:756) INFO: 30epoch:train:12201-12300batch: iter_time=8.982e-05, forward_time=0.142, loss_ctc=70.180, loss_interctc_layer6=75.980, loss_interctc_layer12=62.931, loss_interctc_layer15=57.532, loss_interctc_layer21=72.677, loss=67.860, backward_time=0.209, grad_norm=64.964, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.326e-05, train_time=1.457
+[gpua006:0/64] 2024-02-11 10:40:52,065 (trainer:756) INFO: 30epoch:train:12301-12400batch: iter_time=8.895e-05, forward_time=0.143, loss_ctc=86.256, loss_interctc_layer6=81.947, loss_interctc_layer12=67.316, loss_interctc_layer15=61.381, loss_interctc_layer21=89.504, loss=77.281, backward_time=0.210, grad_norm=78.343, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.325e-05, train_time=1.396
+[gpua006:0/64] 2024-02-11 10:42:35,742 (trainer:756) INFO: 30epoch:train:12401-12500batch: iter_time=8.446e-05, forward_time=0.142, loss_ctc=79.353, loss_interctc_layer6=83.874, loss_interctc_layer12=69.784, loss_interctc_layer15=64.239, loss_interctc_layer21=82.242, loss=75.898, backward_time=0.209, grad_norm=84.387, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.325e-05, train_time=1.036
+[gpua006:0/64] 2024-02-11 10:42:55,771 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua006:0/64] 2024-02-11 10:43:14,980 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 10:43:18,412 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbaee670130>)
+[gpua006:0/64] 2024-02-11 10:43:18,412 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua006:0/64] 2024-02-11 10:43:18,415 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 10:49:21,618 (trainer:756) INFO: 30epoch:train:12501-12600batch: iter_time=2.977, forward_time=0.166, loss_ctc=81.879, loss_interctc_layer6=84.421, loss_interctc_layer12=69.749, loss_interctc_layer15=63.731, loss_interctc_layer21=84.918, loss=76.940, backward_time=0.216, grad_norm=78.131, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.324e-05, train_time=4.059
+[gpua006:0/64] 2024-02-11 10:50:59,529 (trainer:756) INFO: 30epoch:train:12601-12700batch: iter_time=8.721e-05, forward_time=0.144, loss_ctc=74.960, loss_interctc_layer6=82.273, loss_interctc_layer12=67.680, loss_interctc_layer15=61.729, loss_interctc_layer21=77.703, loss=72.869, backward_time=0.210, grad_norm=68.433, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.139, optim0_lr0=7.323e-05, train_time=0.979
+[gpua006:0/64] 2024-02-11 10:53:58,343 (trainer:756) INFO: 30epoch:train:12701-12800batch: iter_time=9.306e-05, forward_time=0.143, loss_ctc=78.949, loss_interctc_layer6=94.029, loss_interctc_layer12=78.176, loss_interctc_layer15=71.898, loss_interctc_layer21=81.232, loss=80.857, backward_time=0.208, grad_norm=73.278, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.322e-05, train_time=1.788
+[gpua006:0/64] 2024-02-11 10:57:58,408 (trainer:756) INFO: 30epoch:train:12801-12900batch: iter_time=9.124e-05, forward_time=0.142, loss_ctc=62.128, loss_interctc_layer6=74.926, loss_interctc_layer12=61.540, loss_interctc_layer15=56.015, loss_interctc_layer21=64.242, loss=63.770, backward_time=0.208, grad_norm=71.714, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.321e-05, train_time=2.400
+[gpua006:0/64] 2024-02-11 10:59:55,949 (trainer:756) INFO: 30epoch:train:12901-13000batch: iter_time=9.820e-05, forward_time=0.193, loss_ctc=75.995, loss_interctc_layer6=81.714, loss_interctc_layer12=67.992, loss_interctc_layer15=62.439, loss_interctc_layer21=78.442, loss=73.316, backward_time=0.222, grad_norm=81.969, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.140, optim0_lr0=7.320e-05, train_time=1.175
+[gpua006:0/64] 2024-02-11 11:01:33,509 (trainer:756) INFO: 30epoch:train:13001-13100batch: iter_time=9.000e-05, forward_time=0.143, loss_ctc=70.076, loss_interctc_layer6=81.238, loss_interctc_layer12=67.230, loss_interctc_layer15=61.450, loss_interctc_layer21=72.510, loss=70.501, backward_time=0.210, grad_norm=66.067, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.320e-05, train_time=0.976
+[gpua006:0/64] 2024-02-11 11:04:12,322 (trainer:756) INFO: 30epoch:train:13101-13200batch: iter_time=8.980e-05, forward_time=0.143, loss_ctc=72.823, loss_interctc_layer6=85.010, loss_interctc_layer12=70.206, loss_interctc_layer15=64.220, loss_interctc_layer21=75.580, loss=73.568, backward_time=0.209, grad_norm=65.378, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.319e-05, train_time=1.587
+[gpua006:0/64] 2024-02-11 11:06:08,323 (trainer:756) INFO: 30epoch:train:13201-13300batch: iter_time=8.427e-05, forward_time=0.145, loss_ctc=68.593, loss_interctc_layer6=80.164, loss_interctc_layer12=66.179, loss_interctc_layer15=60.432, loss_interctc_layer21=71.001, loss=69.274, backward_time=0.210, grad_norm=55.383, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.318e-05, train_time=1.161
+[gpua006:0/64] 2024-02-11 11:08:26,423 (trainer:756) INFO: 30epoch:train:13301-13400batch: iter_time=5.206e-04, forward_time=0.171, loss_ctc=75.131, loss_interctc_layer6=81.039, loss_interctc_layer12=66.859, loss_interctc_layer15=61.022, loss_interctc_layer21=77.865, loss=72.383, backward_time=0.229, grad_norm=73.410, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.143, optim0_lr0=7.317e-05, train_time=1.381
+[gpua006:0/64] 2024-02-11 11:12:54,038 (trainer:756) INFO: 30epoch:train:13401-13500batch: iter_time=0.031, forward_time=0.144, loss_ctc=82.987, loss_interctc_layer6=82.304, loss_interctc_layer12=68.006, loss_interctc_layer15=62.185, loss_interctc_layer21=86.021, loss=76.300, backward_time=0.209, grad_norm=57.068, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.316e-05, train_time=2.676
+[gpua006:0/64] 2024-02-11 11:14:59,725 (trainer:756) INFO: 30epoch:train:13501-13600batch: iter_time=8.916e-05, forward_time=0.143, loss_ctc=73.180, loss_interctc_layer6=76.067, loss_interctc_layer12=62.663, loss_interctc_layer15=57.175, loss_interctc_layer21=75.975, loss=69.012, backward_time=0.209, grad_norm=148.635, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.316e-05, train_time=1.257
+[gpua006:0/64] 2024-02-11 11:16:52,166 (trainer:756) INFO: 30epoch:train:13601-13700batch: iter_time=8.558e-05, forward_time=0.142, loss_ctc=79.947, loss_interctc_layer6=81.448, loss_interctc_layer12=67.322, loss_interctc_layer15=62.052, loss_interctc_layer21=82.876, loss=74.729, backward_time=0.210, grad_norm=85.971, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.137, optim0_lr0=7.315e-05, train_time=1.124
+[gpua006:0/64] 2024-02-11 11:18:14,205 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua006:0/64] 2024-02-11 11:18:32,728 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 11:18:36,166 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbb12777dc0>)
+[gpua006:0/64] 2024-02-11 11:18:36,166 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua006:0/64] 2024-02-11 11:18:36,170 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 11:24:01,485 (trainer:756) INFO: 30epoch:train:13701-13800batch: iter_time=2.838, forward_time=0.177, loss_ctc=77.348, loss_interctc_layer6=87.163, loss_interctc_layer12=72.100, loss_interctc_layer15=66.096, loss_interctc_layer21=80.289, loss=76.599, backward_time=0.212, grad_norm=100.786, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.314e-05, train_time=4.293
+[gpua006:0/64] 2024-02-11 11:27:09,523 (trainer:756) INFO: 30epoch:train:13801-13900batch: iter_time=0.004, forward_time=0.146, loss_ctc=90.472, loss_interctc_layer6=84.269, loss_interctc_layer12=69.457, loss_interctc_layer15=63.407, loss_interctc_layer21=93.624, loss=80.246, backward_time=0.210, grad_norm=80.978, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.138, optim0_lr0=7.313e-05, train_time=1.880
+[gpua006:0/64] 2024-02-11 11:29:20,593 (trainer:756) INFO: 30epoch:train:13901-14000batch: iter_time=8.397e-05, forward_time=0.143, loss_ctc=74.303, loss_interctc_layer6=81.544, loss_interctc_layer12=67.659, loss_interctc_layer15=61.987, loss_interctc_layer21=76.871, loss=72.473, backward_time=0.208, grad_norm=60.703, clip=100.000, loss_scale=1.765e+31, optim_step_time=0.138, optim0_lr0=7.312e-05, train_time=1.310
+[gpua006:0/64] 2024-02-11 11:31:50,124 (trainer:756) INFO: 30epoch:train:14001-14100batch: iter_time=8.787e-05, forward_time=0.143, loss_ctc=77.760, loss_interctc_layer6=86.493, loss_interctc_layer12=71.492, loss_interctc_layer15=65.342, loss_interctc_layer21=80.254, loss=76.268, backward_time=0.208, grad_norm=115.488, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.311e-05, train_time=1.495
+[gpua006:0/64] 2024-02-11 11:34:30,593 (trainer:756) INFO: 30epoch:train:14101-14200batch: iter_time=9.368e-05, forward_time=0.142, loss_ctc=78.376, loss_interctc_layer6=83.377, loss_interctc_layer12=69.292, loss_interctc_layer15=63.528, loss_interctc_layer21=81.055, loss=75.126, backward_time=0.207, grad_norm=86.322, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.137, optim0_lr0=7.311e-05, train_time=1.605
+[gpua006:0/64] 2024-02-11 11:36:34,703 (trainer:756) INFO: 30epoch:train:14201-14300batch: iter_time=9.071e-05, forward_time=0.166, loss_ctc=83.720, loss_interctc_layer6=83.067, loss_interctc_layer12=68.704, loss_interctc_layer15=62.851, loss_interctc_layer21=86.593, loss=76.987, backward_time=0.238, grad_norm=204.325, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.310e-05, train_time=1.241
+[gpua006:0/64] 2024-02-11 11:38:43,770 (trainer:756) INFO: 30epoch:train:14301-14400batch: iter_time=9.333e-05, forward_time=0.143, loss_ctc=64.077, loss_interctc_layer6=77.564, loss_interctc_layer12=64.195, loss_interctc_layer15=58.701, loss_interctc_layer21=66.128, loss=66.133, backward_time=0.210, grad_norm=68.315, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.309e-05, train_time=1.290
+[gpua006:0/64] 2024-02-11 11:40:58,131 (trainer:756) INFO: 30epoch:train:14401-14500batch: iter_time=8.736e-05, forward_time=0.144, loss_ctc=81.061, loss_interctc_layer6=87.995, loss_interctc_layer12=72.678, loss_interctc_layer15=66.615, loss_interctc_layer21=84.028, loss=78.476, backward_time=0.209, grad_norm=64.717, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.308e-05, train_time=1.343
+[gpua006:0/64] 2024-02-11 11:42:56,062 (trainer:756) INFO: 30epoch:train:14501-14600batch: iter_time=1.018e-04, forward_time=0.143, loss_ctc=67.444, loss_interctc_layer6=71.430, loss_interctc_layer12=58.759, loss_interctc_layer15=53.549, loss_interctc_layer21=70.054, loss=64.247, backward_time=0.208, grad_norm=59.612, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.307e-05, train_time=1.179
+[gpua006:0/64] 2024-02-11 11:45:25,513 (trainer:756) INFO: 30epoch:train:14601-14700batch: iter_time=9.690e-05, forward_time=0.170, loss_ctc=96.281, loss_interctc_layer6=88.334, loss_interctc_layer12=72.937, loss_interctc_layer15=66.750, loss_interctc_layer21=100.001, loss=84.861, backward_time=0.210, grad_norm=88.428, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.307e-05, train_time=1.494
+[gpua006:0/64] 2024-02-11 11:47:52,753 (trainer:756) INFO: 30epoch:train:14701-14800batch: iter_time=9.051e-05, forward_time=0.162, loss_ctc=69.799, loss_interctc_layer6=76.165, loss_interctc_layer12=62.884, loss_interctc_layer15=57.437, loss_interctc_layer21=72.130, loss=67.683, backward_time=0.221, grad_norm=65.192, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.306e-05, train_time=1.472
+[gpua006:0/64] 2024-02-11 11:50:04,136 (trainer:756) INFO: 30epoch:train:14801-14900batch: iter_time=9.409e-05, forward_time=0.143, loss_ctc=85.427, loss_interctc_layer6=81.718, loss_interctc_layer12=67.026, loss_interctc_layer15=61.153, loss_interctc_layer21=88.475, loss=76.760, backward_time=0.210, grad_norm=98.621, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.305e-05, train_time=1.314
+[gpua006:0/64] 2024-02-11 11:52:28,352 (trainer:756) INFO: 30epoch:train:14901-15000batch: iter_time=9.487e-05, forward_time=0.142, loss_ctc=80.164, loss_interctc_layer6=84.700, loss_interctc_layer12=70.348, loss_interctc_layer15=64.774, loss_interctc_layer21=83.067, loss=76.611, backward_time=0.207, grad_norm=103.115, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.304e-05, train_time=1.442
+[gpua006:0/64] 2024-02-11 12:23:44,419 (trainer:355) INFO: 30epoch results: [train] iter_time=0.222, forward_time=0.151, loss_ctc=77.722, loss_interctc_layer6=83.000, loss_interctc_layer12=68.745, loss_interctc_layer15=63.006, loss_interctc_layer21=80.443, loss=74.583, backward_time=0.212, grad_norm=79.448, clip=100.000, loss_scale=2.001e+31, optim_step_time=0.139, optim0_lr0=7.366e-05, train_time=1.497, time=6 hours, 14 minutes and 43.63 seconds, total_count=450000, gpu_max_cached_mem_GB=33.436, [valid] loss_ctc=42.929, cer_ctc=0.200, loss_interctc_layer6=48.391, cer_interctc_layer6=0.216, loss_interctc_layer12=35.800, cer_interctc_layer12=0.150, loss_interctc_layer15=31.483, cer_interctc_layer15=0.126, loss_interctc_layer21=45.509, cer_interctc_layer21=0.211, loss=40.823, time=30 minutes and 51.89 seconds, total_count=140130, gpu_max_cached_mem_GB=33.436
+[gpua006:0/64] 2024-02-11 12:24:03,451 (trainer:410) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count
+[gpua006:0/64] 2024-02-11 12:24:03,471 (trainer:464) INFO: The model files were removed: exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/23epoch.pth, exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/24epoch.pth, exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/25epoch.pth
+[gpua006:0/64] 2024-02-11 12:24:03,472 (trainer:289) INFO: 31/45epoch started. Estimated time to finish: 4 days, 14 hours and 30 minutes
+[gpua006:0/64] 2024-02-11 12:24:03,487 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua006:0/64] 2024-02-11 12:24:21,399 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua006:0/64] 2024-02-11 12:24:24,699 (abs_task:1660) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TCTCPreprocessor object at 0x7fbb11b63cd0>)
+[gpua006:0/64] 2024-02-11 12:24:24,699 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua006:0/64] 2024-02-11 12:24:24,704 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua006:0/64] 2024-02-11 12:30:28,751 (trainer:756) INFO: 31epoch:train:1-100batch: iter_time=2.540, forward_time=0.171, loss_ctc=79.478, loss_interctc_layer6=91.249, loss_interctc_layer12=75.974, loss_interctc_layer15=69.762, loss_interctc_layer21=82.229, loss=79.738, backward_time=0.219, grad_norm=95.870, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.142, optim0_lr0=7.303e-05, train_time=3.852
+[gpua006:0/64] 2024-02-11 12:32:03,638 (trainer:756) INFO: 31epoch:train:101-200batch: iter_time=9.754e-05, forward_time=0.143, loss_ctc=90.742, loss_interctc_layer6=100.843, loss_interctc_layer12=84.404, loss_interctc_layer15=77.779, loss_interctc_layer21=93.712, loss=89.496, backward_time=0.211, grad_norm=93.727, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.141, optim0_lr0=7.303e-05, train_time=0.949
+[gpua006:0/64] 2024-02-11 12:33:49,036 (trainer:756) INFO: 31epoch:train:201-300batch: iter_time=1.070e-04, forward_time=0.144, loss_ctc=80.151, loss_interctc_layer6=87.363, loss_interctc_layer12=72.736, loss_interctc_layer15=66.888, loss_interctc_layer21=82.880, loss=78.004, backward_time=0.209, grad_norm=81.766, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.140, optim0_lr0=7.302e-05, train_time=1.054
+[gpua006:0/64] 2024-02-11 12:35:30,389 (trainer:756) INFO: 31epoch:train:301-400batch: iter_time=1.019e-04, forward_time=0.141, loss_ctc=80.738, loss_interctc_layer6=85.019, loss_interctc_layer12=70.916, loss_interctc_layer15=65.227, loss_interctc_layer21=83.820, loss=77.144, backward_time=0.208, grad_norm=81.993, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.138, optim0_lr0=7.301e-05, train_time=1.013
+[gpua006:0/64] 2024-02-11 12:37:11,043 (trainer:756) INFO: 31epoch:train:401-500batch: iter_time=9.972e-05, forward_time=0.141, loss_ctc=74.403, loss_interctc_layer6=90.799, loss_interctc_layer12=76.881, loss_interctc_layer15=71.370, loss_interctc_layer21=77.006, loss=78.092, backward_time=0.209, grad_norm=85.803, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.300e-05, train_time=1.006
+[gpua006:0/64] 2024-02-11 12:39:04,327 (trainer:756) INFO: 31epoch:train:501-600batch: iter_time=9.129e-05, forward_time=0.142, loss_ctc=61.394, loss_interctc_layer6=69.887, loss_interctc_layer12=57.788, loss_interctc_layer15=52.903, loss_interctc_layer21=63.427, loss=61.080, backward_time=0.209, grad_norm=92.592, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.139, optim0_lr0=7.299e-05, train_time=1.133
+[gpua006:0/64] 2024-02-11 12:40:03,436 (trainer:687) WARNING: The grad norm is nan. Skipping updating the model.
+[gpua006:0/64] 2024-02-11 12:40:43,039 (trainer:756) INFO: 31epoch:train:601-700batch: iter_time=8.953e-05, forward_time=0.143, loss_ctc=83.905, loss_interctc_layer6=91.537, loss_interctc_layer12=76.493, loss_interctc_layer15=70.470, loss_interctc_layer21=86.578, loss=81.797, backward_time=0.210, grad_norm=106.925, clip=100.000, loss_scale=1.598e+31, optim_step_time=0.138, optim0_lr0=7.298e-05, train_time=0.987
+srun: Job step aborted: Waiting up to 32 seconds for job step to finish.