diff --git "a/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.13.log" "b/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.13.log"
new file mode 100644--- /dev/null
+++ "b/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.13.log"
@@ -0,0 +1,4396 @@
+# Running on gpua014.delta.ncsa.illinois.edu
+# Started at Fri Dec 1 13:56:51 CST 2023
+# SLURMD_NODENAME=gpua014
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2718083
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_END_TIME=1701633393
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2718083
+# SLURM_JOB_NAME=exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpua[014,016,030,032-033,035,037,039-040,042-043,069-070,072,097,099]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA100x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_START_TIME=1701460593
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpua[014,016,030,032-033,035,037,039-040,042-043,069-070,072,097,099]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1
+# SLURM_SUBMIT_HOST=dt-login01.delta.ncsa.illinois.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=120451
+# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua014
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape /scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000 --config conf/train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multipr--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+ocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multipr--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multipr--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+ocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+ocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multipr--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multipr--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multipr--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+ocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+ocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+ocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multipr--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+ocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multipr--fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+ocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/owsm_v3.1/s2t1/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/.dist_init_abbe9c57-62e2-49a8-911b-29b039d30841
+[gpua014:0/64] 2023-12-01 14:00:04,567 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpua014:0/64] 2023-12-01 14:00:05,399 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpua014:0/64] 2023-12-01 14:00:05,429 (s2t:464) INFO: Vocabulary size: 50002
+[gpua014:0/64] 2023-12-01 14:00:18,688 (abs_task:1231) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpua014:0/64] 2023-12-01 14:00:18,699 (abs_task:1232) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): EBranchformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (1): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (2): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (3): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (4): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (5): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (6): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (7): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (8): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (9): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (10): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (11): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (12): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (13): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (14): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (15): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (16): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+      (17): EBranchformerEncoderLayer(
+        (attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (cgmlp): ConvolutionalGatingMLP(
+          (channel_proj1): Sequential(
+            (0): Linear(in_features=1024, out_features=4096, bias=True)
+            (1): GELU(approximate='none')
+          )
+          (csgu): ConvolutionalSpatialGatingUnit(
+            (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True)
+            (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+            (act): Identity()
+            (dropout): Dropout(p=0.1, inplace=False)
+          )
+          (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (feed_forward_macaron): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): Swish()
+        )
+        (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+        (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048)
+        (merge_proj): Linear(in_features=2048, out_features=1024, bias=True)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Identity()
+          (q_norm): Identity()
+          (k_norm): Identity()
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 1.02 B
+    Number of trainable parameters: 1.02 B (100.0%)
+    Size: 4.07 GB
+    Type: torch.float32
+[gpua014:0/64] 2023-12-01 14:00:18,699 (abs_task:1235) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.0002
+    lr: 1.6666666666666667e-09
+    maximize: False
+    weight_decay: 0.0
+)
+[gpua014:0/64] 2023-12-01 14:00:18,699 (abs_task:1236) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0002])
+[gpua014:0/64] 2023-12-01 14:00:18,701 (abs_task:1245) INFO: Saving the configuration in exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/config.yaml
+[gpua014:0/64] 2023-12-01 14:00:24,005 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 14:00:24,901 (abs_task:1616) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev_v3/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fafc06ff9a0>)
+[gpua014:0/64] 2023-12-01 14:00:24,901 (abs_task:1617) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpua014:0/64] 2023-12-01 14:00:24,902 (abs_task:1618) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257
+gpua014:120543:120543 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0>
+gpua014:120543:120543 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua014:120543:120543 [0] NCCL INFO cudaDriverVersion 12020
+NCCL version 2.14.3+cuda11.7
+[gpua014:0/64] 2023-12-01 14:00:34,840 (trainer:284) INFO: 1/40epoch started
+[gpua014:0/64] 2023-12-01 14:00:35,004 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-12-01 14:00:52,537 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 14:00:55,830 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faf21f6c8b0>)
+[gpua014:0/64] 2023-12-01 14:00:55,830 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua014:0/64] 2023-12-01 14:00:55,834 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+gpua070:113435:113435 [1] NCCL INFO cudaDriverVersion 12020
+gpua070:113435:113435 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.70<0>
+gpua070:113435:113435 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua070:113435:113543 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.70<0>
+gpua070:113435:113543 [1] NCCL INFO Using network IB
+gpua070:113435:113543 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua070:113435:113543 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpua070:113435:113543 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua070:113435:113543 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua070:113435:113543 [1] NCCL INFO Connected all rings
+gpua070:113435:113543 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpua070:113435:113543 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpua070:113435:113543 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua070:113435:113543 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua070:113435:113543 [1] NCCL INFO Connected all trees
+gpua070:113435:113543 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua070:113435:113543 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua070:113435:113543 [1] NCCL INFO comm 0x9603cb0 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua014:120544:120544 [1] NCCL INFO cudaDriverVersion 12020
+gpua014:120544:120544 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0>
+gpua014:120544:120544 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua014:120544:120651 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0>
+gpua014:120544:120651 [1] NCCL INFO Using network IB
+gpua014:120544:120651 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua014:120544:120651 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpua014:120544:120651 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua014:120544:120651 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua014:120544:120651 [1] NCCL INFO Connected all rings
+gpua014:120544:120651 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua014:120544:120651 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua014:120544:120651 [1] NCCL INFO Connected all trees
+gpua014:120544:120651 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua014:120544:120651 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua014:120544:120651 [1] NCCL INFO comm 0xdaf0980 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua070:113434:113434 [0] NCCL INFO cudaDriverVersion 12020
+gpua070:113434:113434 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.70<0>
+gpua070:113434:113434 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua070:113434:113545 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.70<0>
+gpua070:113434:113545 [0] NCCL INFO Using network IB
+gpua070:113434:113545 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua070:113434:113545 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpua070:113434:113545 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpua070:113434:113545 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpua070:113434:113545 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua070:113434:113545 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua070:113434:113545 [0] NCCL INFO Connected all rings
+gpua070:113434:113545 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpua070:113434:113545 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpua070:113434:113545 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpua070:113434:113545 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpua070:113434:113545 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpua070:113434:113545 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpua070:113434:113545 [0] NCCL INFO Connected all trees
+gpua070:113434:113545 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua070:113434:113545 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua070:113434:113545 [0] NCCL INFO comm 0xc080c20 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua014:120543:120650 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0>
+gpua014:120543:120650 [0] NCCL INFO Using network IB
+gpua014:120543:120650 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua014:120543:120650 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua014:120543:120650 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua014:120543:120650 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpua014:120543:120650 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpua014:120543:120650 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpua014:120543:120650 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua014:120543:120650 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua014:120543:120650 [0] NCCL INFO Connected all rings
+gpua014:120543:120650 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpua014:120543:120650 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpua014:120543:120650 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpua014:120543:120650 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpua014:120543:120650 [0] NCCL INFO Connected all trees
+gpua014:120543:120650 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua014:120543:120650 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua014:120543:120650 [0] NCCL INFO comm 0x17605a00 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua014:120545:120545 [2] NCCL INFO cudaDriverVersion 12020
+gpua014:120545:120545 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0>
+gpua014:120545:120545 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua014:120545:120648 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0>
+gpua014:120545:120648 [2] NCCL INFO Using network IB
+gpua014:120545:120648 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua014:120545:120648 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpua014:120545:120648 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua014:120545:120648 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua014:120545:120648 [2] NCCL INFO Connected all rings
+gpua014:120545:120648 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua014:120545:120648 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua014:120545:120648 [2] NCCL INFO Connected all trees
+gpua014:120545:120648 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua014:120545:120648 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua014:120545:120648 [2] NCCL INFO comm 0x12653bb0 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua014:120546:120546 [3] NCCL INFO cudaDriverVersion 12020
+gpua014:120546:120546 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0>
+gpua014:120546:120546 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua014:120546:120649 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0>
+gpua014:120546:120649 [3] NCCL INFO Using network IB
+gpua014:120546:120649 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua014:120546:120649 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpua014:120546:120649 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpua014:120546:120649 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpua014:120546:120649 [3] NCCL INFO Connected all rings
+gpua014:120546:120649 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua014:120546:120649 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua014:120546:120649 [3] NCCL INFO Connected all trees
+gpua014:120546:120649 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua014:120546:120649 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua014:120546:120649 [3] NCCL INFO comm 0xb46d220 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua097:565110:565110 [2] NCCL INFO cudaDriverVersion 12020
+gpua097:565110:565110 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.97<0>
+gpua097:565110:565110 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua097:565110:565218 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.97<0>
+gpua097:565110:565218 [2] NCCL INFO Using network IB
+gpua097:565110:565218 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua097:565110:565218 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpua097:565110:565218 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua097:565110:565218 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua097:565110:565218 [2] NCCL INFO Connected all rings
+gpua097:565110:565218 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua097:565110:565218 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua097:565110:565218 [2] NCCL INFO Connected all trees
+gpua097:565110:565218 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua097:565110:565218 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua097:565110:565218 [2] NCCL INFO comm 0xe179630 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua097:565111:565111 [3] NCCL INFO cudaDriverVersion 12020
+gpua097:565111:565111 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.97<0>
+gpua097:565111:565111 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua097:565111:565216 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.97<0>
+gpua097:565111:565216 [3] NCCL INFO Using network IB
+gpua097:565111:565216 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua097:565111:565216 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpua097:565111:565216 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpua097:565111:565216 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpua097:565111:565216 [3] NCCL INFO Connected all rings
+gpua097:565111:565216 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua097:565111:565216 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua097:565111:565216 [3] NCCL INFO Connected all trees
+gpua097:565111:565216 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua097:565111:565216 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua097:565111:565216 [3] NCCL INFO comm 0x1384a030 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua072:246796:246796 [2] NCCL INFO cudaDriverVersion 12020
+gpua072:246796:246796 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.72<0>
+gpua072:246796:246796 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua072:246796:246902 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.72<0>
+gpua072:246796:246902 [2] NCCL INFO Using network IB
+gpua072:246796:246902 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua072:246796:246902 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpua072:246796:246902 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua072:246796:246902 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua072:246796:246902 [2] NCCL INFO Connected all rings
+gpua072:246796:246902 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua072:246796:246902 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua072:246796:246902 [2] NCCL INFO Connected all trees
+gpua072:246796:246902 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua072:246796:246902 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua072:246796:246902 [2] NCCL INFO comm 0xde16580 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua072:246797:246797 [3] NCCL INFO cudaDriverVersion 12020
+gpua072:246797:246797 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.72<0>
+gpua072:246797:246797 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua072:246797:246905 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.72<0>
+gpua072:246797:246905 [3] NCCL INFO Using network IB
+gpua072:246797:246905 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua072:246797:246905 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpua072:246797:246905 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpua072:246797:246905 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpua072:246797:246905 [3] NCCL INFO Connected all rings
+gpua072:246797:246905 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua072:246797:246905 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua035:397820:397820 [1] NCCL INFO cudaDriverVersion 12020
+gpua035:397820:397820 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:397820:397820 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:397820:397928 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:397820:397928 [1] NCCL INFO Using network IB
+gpua035:397820:397928 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua035:397820:397928 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpua035:397820:397928 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua035:397820:397928 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua035:397820:397928 [1] NCCL INFO Connected all rings
+gpua035:397820:397928 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpua035:397820:397928 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpua072:246797:246905 [3] NCCL INFO Connected all trees
+gpua072:246797:246905 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua072:246797:246905 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua072:246797:246905 [3] NCCL INFO comm 0x11360c50 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua035:397820:397928 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua035:397820:397928 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua035:397820:397928 [1] NCCL INFO Connected all trees
+gpua035:397820:397928 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:397820:397928 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:397820:397928 [1] NCCL INFO comm 0x11fd8eb0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua072:246794:246794 [0] NCCL INFO cudaDriverVersion 12020
+gpua072:246794:246794 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.72<0>
+gpua072:246794:246794 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua072:246794:246904 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.72<0>
+gpua072:246794:246904 [0] NCCL INFO Using network IB
+gpua072:246794:246904 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua072:246794:246904 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpua072:246794:246904 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpua072:246794:246904 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpua072:246794:246904 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua072:246794:246904 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua072:246794:246904 [0] NCCL INFO Connected all rings
+gpua035:397819:397819 [0] NCCL INFO cudaDriverVersion 12020
+gpua035:397819:397819 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:397819:397819 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:397819:397927 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:397819:397927 [0] NCCL INFO Using network IB
+gpua035:397819:397927 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua035:397819:397927 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpua035:397819:397927 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpua035:397819:397927 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpua035:397819:397927 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua035:397819:397927 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua035:397819:397927 [0] NCCL INFO Connected all rings
+gpua072:246794:246904 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpua072:246794:246904 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpua072:246794:246904 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpua072:246794:246904 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpua072:246794:246904 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpua072:246794:246904 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpua072:246794:246904 [0] NCCL INFO Connected all trees
+gpua072:246794:246904 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua072:246794:246904 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua072:246794:246904 [0] NCCL INFO comm 0x1288dae0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua035:397819:397927 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpua035:397819:397927 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpua035:397819:397927 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpua035:397819:397927 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpua035:397819:397927 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpua035:397819:397927 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpua035:397819:397927 [0] NCCL INFO Connected all trees
+gpua035:397819:397927 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:397819:397927 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:397819:397927 [0] NCCL INFO comm 0x14def160 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua035:397821:397821 [2] NCCL INFO cudaDriverVersion 12020
+gpua035:397821:397821 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:397821:397821 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:397821:397926 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:397821:397926 [2] NCCL INFO Using network IB
+gpua035:397821:397926 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua035:397821:397926 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpua035:397821:397926 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua035:397821:397926 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua035:397821:397926 [2] NCCL INFO Connected all rings
+gpua035:397821:397926 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua035:397821:397926 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua035:397821:397926 [2] NCCL INFO Connected all trees
+gpua035:397821:397926 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:397821:397926 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:397821:397926 [2] NCCL INFO comm 0x123d8f10 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua035:397822:397822 [3] NCCL INFO cudaDriverVersion 12020
+gpua035:397822:397822 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:397822:397822 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:397822:397925 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:397822:397925 [3] NCCL INFO Using network IB
+gpua035:397822:397925 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua035:397822:397925 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpua035:397822:397925 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpua035:397822:397925 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpua035:397822:397925 [3] NCCL INFO Connected all rings
+gpua035:397822:397925 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua035:397822:397925 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua035:397822:397925 [3] NCCL INFO Connected all trees
+gpua035:397822:397925 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:397822:397925 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:397822:397925 [3] NCCL INFO comm 0x10030950 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua072:246795:246795 [1] NCCL INFO cudaDriverVersion 12020
+gpua072:246795:246795 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.72<0>
+gpua072:246795:246795 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua072:246795:246903 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.72<0>
+gpua072:246795:246903 [1] NCCL INFO Using network IB
+gpua072:246795:246903 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua072:246795:246903 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpua072:246795:246903 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua072:246795:246903 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua072:246795:246903 [1] NCCL INFO Connected all rings
+gpua072:246795:246903 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpua072:246795:246903 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpua072:246795:246903 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua072:246795:246903 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua072:246795:246903 [1] NCCL INFO Connected all trees
+gpua072:246795:246903 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua072:246795:246903 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua072:246795:246903 [1] NCCL INFO comm 0x1219ebd0 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua070:113436:113436 [2] NCCL INFO cudaDriverVersion 12020
+gpua070:113436:113436 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.70<0>
+gpua070:113436:113436 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua070:113436:113544 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.70<0>
+gpua070:113436:113544 [2] NCCL INFO Using network IB
+gpua070:113436:113544 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua070:113436:113544 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpua070:113436:113544 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua070:113436:113544 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua070:113436:113544 [2] NCCL INFO Connected all rings
+gpua070:113436:113544 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua070:113436:113544 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua070:113436:113544 [2] NCCL INFO Connected all trees
+gpua070:113436:113544 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua070:113436:113544 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua070:113436:113544 [2] NCCL INFO comm 0xa0172b0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua070:113437:113437 [3] NCCL INFO cudaDriverVersion 12020
+gpua070:113437:113437 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.70<0>
+gpua070:113437:113437 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua070:113437:113546 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.70<0>
+gpua070:113437:113546 [3] NCCL INFO Using network IB
+gpua070:113437:113546 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua070:113437:113546 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpua070:113437:113546 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpua070:113437:113546 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpua070:113437:113546 [3] NCCL INFO Connected all rings
+gpua070:113437:113546 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua070:113437:113546 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua070:113437:113546 [3] NCCL INFO Connected all trees
+gpua070:113437:113546 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua070:113437:113546 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua070:113437:113546 [3] NCCL INFO comm 0xbabb990 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua097:565108:565108 [0] NCCL INFO cudaDriverVersion 12020
+gpua097:565108:565108 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.97<0>
+gpua097:565108:565108 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua097:565108:565215 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.97<0>
+gpua097:565108:565215 [0] NCCL INFO Using network IB
+gpua097:565108:565215 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua097:565108:565215 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpua097:565108:565215 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpua097:565108:565215 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpua097:565108:565215 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua097:565108:565215 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua097:565108:565215 [0] NCCL INFO Connected all rings
+gpua097:565108:565215 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpua097:565108:565215 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpua097:565108:565215 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpua097:565108:565215 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpua097:565108:565215 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpua097:565108:565215 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpua097:565108:565215 [0] NCCL INFO Connected all trees
+gpua097:565108:565215 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua097:565108:565215 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua097:565108:565215 [0] NCCL INFO comm 0xbd45ac0 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua097:565109:565109 [1] NCCL INFO cudaDriverVersion 12020
+gpua097:565109:565109 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.97<0>
+gpua097:565109:565109 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua097:565109:565217 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.97<0>
+gpua097:565109:565217 [1] NCCL INFO Using network IB
+gpua097:565109:565217 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua097:565109:565217 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpua097:565109:565217 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua097:565109:565217 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua097:565109:565217 [1] NCCL INFO Connected all rings
+gpua097:565109:565217 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpua097:565109:565217 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpua097:565109:565217 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua097:565109:565217 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua097:565109:565217 [1] NCCL INFO Connected all trees
+gpua097:565109:565217 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua097:565109:565217 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua097:565109:565217 [1] NCCL INFO comm 0xb8dc4d0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua042:320057:320057 [3] NCCL INFO cudaDriverVersion 12020
+gpua042:320057:320057 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.42<0>
+gpua042:320057:320057 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua042:320057:320142 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.42<0>
+gpua042:320057:320142 [3] NCCL INFO Using network IB
+gpua042:320057:320142 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua042:320057:320142 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpua042:320057:320142 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpua042:320057:320142 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpua042:320057:320142 [3] NCCL INFO Connected all rings
+gpua042:320057:320142 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua042:320057:320142 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua042:320057:320142 [3] NCCL INFO Connected all trees
+gpua042:320057:320142 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua042:320057:320142 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua042:320057:320142 [3] NCCL INFO comm 0x1b7421a0 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua033:812549:812549 [0] NCCL INFO cudaDriverVersion 12020
+gpua033:812549:812549 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0>
+gpua033:812549:812549 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua033:812549:812653 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.33<0>
+gpua033:812549:812653 [0] NCCL INFO Using network IB
+gpua033:812549:812653 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua033:812549:812653 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpua033:812549:812653 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpua033:812549:812653 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpua033:812549:812653 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua033:812549:812653 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua033:812549:812653 [0] NCCL INFO Connected all rings
+gpua033:812549:812653 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpua033:812549:812653 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpua033:812549:812653 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpua033:812549:812653 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpua033:812549:812653 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpua033:812549:812653 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpua033:812549:812653 [0] NCCL INFO Connected all trees
+gpua033:812549:812653 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua033:812549:812653 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua033:812549:812653 [0] NCCL INFO comm 0x1740f8e0 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua043:1858025:1858025 [0] NCCL INFO cudaDriverVersion 12020
+gpua043:1858025:1858025 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.43<0>
+gpua043:1858025:1858025 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua043:1858025:1858126 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.43<0>
+gpua043:1858025:1858126 [0] NCCL INFO Using network IB
+gpua043:1858025:1858126 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua043:1858025:1858126 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpua043:1858025:1858126 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpua043:1858025:1858126 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpua043:1858025:1858126 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua043:1858025:1858126 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua043:1858025:1858126 [0] NCCL INFO Connected all rings
+gpua043:1858025:1858126 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpua043:1858025:1858126 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpua043:1858025:1858126 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpua043:1858025:1858126 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpua043:1858025:1858126 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpua043:1858025:1858126 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpua043:1858025:1858126 [0] NCCL INFO Connected all trees
+gpua043:1858025:1858126 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua043:1858025:1858126 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua043:1858025:1858126 [0] NCCL INFO comm 0x1e934f50 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua099:116369:116369 [3] NCCL INFO cudaDriverVersion 12020
+gpua099:116369:116369 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.99<0>
+gpua099:116369:116369 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua099:116369:116476 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.99<0>
+gpua099:116369:116476 [3] NCCL INFO Using network IB
+gpua099:116369:116476 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua099:116369:116476 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpua099:116369:116476 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpua099:116369:116476 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpua099:116369:116476 [3] NCCL INFO Connected all rings
+gpua099:116369:116476 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua099:116369:116476 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua099:116369:116476 [3] NCCL INFO Connected all trees
+gpua099:116369:116476 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua099:116369:116476 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua099:116369:116476 [3] NCCL INFO comm 0x1a616df0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua037:388777:388777 [3] NCCL INFO cudaDriverVersion 12020
+gpua037:388777:388777 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.37<0>
+gpua037:388777:388777 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua037:388777:388880 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.37<0>
+gpua037:388777:388880 [3] NCCL INFO Using network IB
+gpua037:388777:388880 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua037:388777:388880 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpua037:388777:388880 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpua037:388777:388880 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpua037:388777:388880 [3] NCCL INFO Connected all rings
+gpua037:388777:388880 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua037:388777:388880 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua037:388777:388880 [3] NCCL INFO Connected all trees
+gpua037:388777:388880 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua037:388777:388880 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua037:388777:388880 [3] NCCL INFO comm 0x173dc770 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua069:1351486:1351486 [2] NCCL INFO cudaDriverVersion 12020
+gpua069:1351486:1351486 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.69<0>
+gpua069:1351486:1351486 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua069:1351486:1351582 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.69<0>
+gpua069:1351486:1351582 [2] NCCL INFO Using network IB
+gpua069:1351486:1351582 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua069:1351486:1351582 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpua069:1351486:1351582 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua069:1351486:1351582 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua069:1351486:1351582 [2] NCCL INFO Connected all rings
+gpua069:1351486:1351582 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua069:1351486:1351582 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua069:1351486:1351582 [2] NCCL INFO Connected all trees
+gpua069:1351486:1351582 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua069:1351486:1351582 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua069:1351486:1351582 [2] NCCL INFO comm 0x1a5de820 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua033:812552:812552 [3] NCCL INFO cudaDriverVersion 12020
+gpua033:812552:812552 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0>
+gpua033:812552:812552 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua033:812552:812658 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.33<0>
+gpua033:812552:812658 [3] NCCL INFO Using network IB
+gpua033:812552:812658 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua033:812552:812658 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpua033:812552:812658 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpua033:812552:812658 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpua033:812552:812658 [3] NCCL INFO Connected all rings
+gpua033:812552:812658 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua033:812552:812658 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua033:812552:812658 [3] NCCL INFO Connected all trees
+gpua033:812552:812658 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua033:812552:812658 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua033:812552:812658 [3] NCCL INFO comm 0x17dae330 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua039:405902:405902 [3] NCCL INFO cudaDriverVersion 12020
+gpua039:405902:405902 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0>
+gpua039:405902:405902 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua039:405902:406005 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.39<0>
+gpua039:405902:406005 [3] NCCL INFO Using network IB
+gpua039:405902:406005 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua039:405902:406005 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpua039:405902:406005 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpua039:405902:406005 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpua039:405902:406005 [3] NCCL INFO Connected all rings
+gpua039:405902:406005 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua039:405902:406005 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua039:405902:406005 [3] NCCL INFO Connected all trees
+gpua039:405902:406005 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua039:405902:406005 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua039:405902:406005 [3] NCCL INFO comm 0x13b6d040 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua037:388775:388775 [1] NCCL INFO cudaDriverVersion 12020
+gpua037:388775:388775 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.37<0>
+gpua037:388775:388775 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua037:388775:388877 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.37<0>
+gpua037:388775:388877 [1] NCCL INFO Using network IB
+gpua037:388775:388877 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua037:388775:388877 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpua037:388775:388877 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua037:388775:388877 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua037:388775:388877 [1] NCCL INFO Connected all rings
+gpua037:388775:388877 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpua037:388775:388877 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpua037:388775:388877 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua037:388775:388877 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua037:388775:388877 [1] NCCL INFO Connected all trees
+gpua037:388775:388877 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua037:388775:388877 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua037:388775:388877 [1] NCCL INFO comm 0x4b939930 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua030:633769:633769 [2] NCCL INFO cudaDriverVersion 12020
+gpua030:633769:633769 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.30<0>
+gpua030:633769:633769 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua030:633769:633871 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.30<0>
+gpua030:633769:633871 [2] NCCL INFO Using network IB
+gpua030:633769:633871 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua030:633769:633871 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpua030:633769:633871 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua030:633769:633871 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua030:633769:633871 [2] NCCL INFO Connected all rings
+gpua030:633769:633871 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua030:633769:633871 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua030:633769:633871 [2] NCCL INFO Connected all trees
+gpua030:633769:633871 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua030:633769:633871 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua030:633769:633871 [2] NCCL INFO comm 0x2115cf90 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua030:633767:633767 [0] NCCL INFO cudaDriverVersion 12020
+gpua030:633767:633767 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.30<0>
+gpua030:633767:633767 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua030:633767:633869 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.30<0>
+gpua030:633767:633869 [0] NCCL INFO Using network IB
+gpua030:633767:633869 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua030:633767:633869 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpua030:633767:633869 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpua030:633767:633869 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpua030:633767:633869 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua030:633767:633869 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua030:633767:633869 [0] NCCL INFO Connected all rings
+gpua030:633767:633869 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpua030:633767:633869 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpua030:633767:633869 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpua030:633767:633869 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpua030:633767:633869 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpua030:633767:633869 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpua030:633767:633869 [0] NCCL INFO Connected all trees
+gpua030:633767:633869 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua030:633767:633869 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua030:633767:633869 [0] NCCL INFO comm 0x3c66f940 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua030:633770:633770 [3] NCCL INFO cudaDriverVersion 12020
+gpua030:633770:633770 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.30<0>
+gpua030:633770:633770 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua030:633770:633870 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.30<0>
+gpua030:633770:633870 [3] NCCL INFO Using network IB
+gpua030:633770:633870 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua030:633770:633870 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpua030:633770:633870 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpua030:633770:633870 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpua030:633770:633870 [3] NCCL INFO Connected all rings
+gpua030:633770:633870 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua030:633770:633870 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua030:633770:633870 [3] NCCL INFO Connected all trees
+gpua030:633770:633870 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua030:633770:633870 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua030:633770:633870 [3] NCCL INFO comm 0xb026570 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua033:812550:812550 [1] NCCL INFO cudaDriverVersion 12020
+gpua033:812550:812550 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0>
+gpua033:812550:812550 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua033:812550:812652 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.33<0>
+gpua033:812550:812652 [1] NCCL INFO Using network IB
+gpua033:812550:812652 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua033:812550:812652 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpua033:812550:812652 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua033:812550:812652 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua033:812550:812652 [1] NCCL INFO Connected all rings
+gpua033:812550:812652 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpua033:812550:812652 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpua033:812550:812652 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua033:812550:812652 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua033:812550:812652 [1] NCCL INFO Connected all trees
+gpua033:812550:812652 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua033:812550:812652 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua033:812550:812652 [1] NCCL INFO comm 0x9e273b0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua040:528570:528570 [2] NCCL INFO cudaDriverVersion 12020
+gpua040:528570:528570 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0>
+gpua040:528570:528570 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua040:528570:528676 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.40<0>
+gpua040:528570:528676 [2] NCCL INFO Using network IB
+gpua040:528570:528676 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua040:528570:528676 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpua040:528570:528676 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua040:528570:528676 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua040:528570:528676 [2] NCCL INFO Connected all rings
+gpua040:528570:528676 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua040:528570:528676 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua040:528570:528676 [2] NCCL INFO Connected all trees
+gpua040:528570:528676 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua040:528570:528676 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua040:528570:528676 [2] NCCL INFO comm 0x18d41010 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua040:528569:528569 [1] NCCL INFO cudaDriverVersion 12020
+gpua040:528569:528569 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0>
+gpua040:528569:528569 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua040:528569:528677 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.40<0>
+gpua040:528569:528677 [1] NCCL INFO Using network IB
+gpua040:528569:528677 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua040:528569:528677 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpua040:528569:528677 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua040:528569:528677 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua040:528569:528677 [1] NCCL INFO Connected all rings
+gpua040:528569:528677 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpua040:528569:528677 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpua040:528569:528677 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua040:528569:528677 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua040:528569:528677 [1] NCCL INFO Connected all trees
+gpua040:528569:528677 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua040:528569:528677 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua040:528569:528677 [1] NCCL INFO comm 0x4a1aa6f0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua030:633768:633768 [1] NCCL INFO cudaDriverVersion 12020
+gpua030:633768:633768 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.30<0>
+gpua030:633768:633768 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua030:633768:633872 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.30<0>
+gpua030:633768:633872 [1] NCCL INFO Using network IB
+gpua030:633768:633872 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua030:633768:633872 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpua030:633768:633872 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua030:633768:633872 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua030:633768:633872 [1] NCCL INFO Connected all rings
+gpua030:633768:633872 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpua030:633768:633872 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpua030:633768:633872 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua030:633768:633872 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua030:633768:633872 [1] NCCL INFO Connected all trees
+gpua030:633768:633872 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua030:633768:633872 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua030:633768:633872 [1] NCCL INFO comm 0x33d14120 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua040:528568:528568 [0] NCCL INFO cudaDriverVersion 12020
+gpua040:528568:528568 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0>
+gpua040:528568:528568 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua040:528568:528678 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.40<0>
+gpua040:528568:528678 [0] NCCL INFO Using network IB
+gpua040:528568:528678 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua040:528568:528678 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpua040:528568:528678 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpua040:528568:528678 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpua040:528568:528678 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua040:528568:528678 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua040:528568:528678 [0] NCCL INFO Connected all rings
+gpua040:528568:528678 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpua040:528568:528678 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpua040:528568:528678 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpua040:528568:528678 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpua040:528568:528678 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpua040:528568:528678 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpua040:528568:528678 [0] NCCL INFO Connected all trees
+gpua040:528568:528678 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua040:528568:528678 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua040:528568:528678 [0] NCCL INFO comm 0x2f7872a0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua069:1351484:1351484 [0] NCCL INFO cudaDriverVersion 12020
+gpua069:1351484:1351484 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.69<0>
+gpua069:1351484:1351484 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua069:1351484:1351579 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.69<0>
+gpua069:1351484:1351579 [0] NCCL INFO Using network IB
+gpua069:1351484:1351579 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua069:1351484:1351579 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpua069:1351484:1351579 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpua069:1351484:1351579 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpua069:1351484:1351579 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua069:1351484:1351579 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua069:1351484:1351579 [0] NCCL INFO Connected all rings
+gpua069:1351484:1351579 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpua069:1351484:1351579 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpua069:1351484:1351579 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpua069:1351484:1351579 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpua069:1351484:1351579 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpua069:1351484:1351579 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpua069:1351484:1351579 [0] NCCL INFO Connected all trees
+gpua069:1351484:1351579 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua069:1351484:1351579 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua069:1351484:1351579 [0] NCCL INFO comm 0x1236b690 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua037:388774:388774 [0] NCCL INFO cudaDriverVersion 12020
+gpua037:388774:388774 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.37<0>
+gpua037:388774:388774 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua037:388774:388879 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.37<0>
+gpua037:388774:388879 [0] NCCL INFO Using network IB
+gpua037:388774:388879 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua037:388774:388879 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpua037:388774:388879 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpua037:388774:388879 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpua037:388774:388879 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua037:388774:388879 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua037:388774:388879 [0] NCCL INFO Connected all rings
+gpua037:388774:388879 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpua037:388774:388879 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpua037:388774:388879 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpua037:388774:388879 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpua037:388774:388879 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpua037:388774:388879 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpua037:388774:388879 [0] NCCL INFO Connected all trees
+gpua037:388774:388879 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua037:388774:388879 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua037:388774:388879 [0] NCCL INFO comm 0x4fff9fb0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua032:426715:426715 [1] NCCL INFO cudaDriverVersion 12020
+gpua032:426715:426715 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.32<0>
+gpua032:426715:426715 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua032:426715:426824 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.32<0>
+gpua032:426715:426824 [1] NCCL INFO Using network IB
+gpua032:426715:426824 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua032:426715:426824 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpua032:426715:426824 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua032:426715:426824 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua032:426715:426824 [1] NCCL INFO Connected all rings
+gpua032:426715:426824 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpua032:426715:426824 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpua032:426715:426824 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua032:426715:426824 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua032:426715:426824 [1] NCCL INFO Connected all trees
+gpua032:426715:426824 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua032:426715:426824 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua032:426715:426824 [1] NCCL INFO comm 0x11cbe830 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua032:426716:426716 [2] NCCL INFO cudaDriverVersion 12020
+gpua032:426716:426716 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.32<0>
+gpua032:426716:426716 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua032:426716:426822 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.32<0>
+gpua032:426716:426822 [2] NCCL INFO Using network IB
+gpua032:426716:426822 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua032:426716:426822 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpua032:426716:426822 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua032:426716:426822 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua032:426716:426822 [2] NCCL INFO Connected all rings
+gpua032:426716:426822 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua032:426716:426822 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua032:426716:426822 [2] NCCL INFO Connected all trees
+gpua032:426716:426822 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua032:426716:426822 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua032:426716:426822 [2] NCCL INFO comm 0x5227a200 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua032:426714:426714 [0] NCCL INFO cudaDriverVersion 12020
+gpua032:426714:426714 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.32<0>
+gpua032:426714:426714 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua032:426714:426825 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.32<0>
+gpua032:426714:426825 [0] NCCL INFO Using network IB
+gpua032:426714:426825 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua032:426714:426825 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpua032:426714:426825 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpua032:426714:426825 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpua032:426714:426825 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua032:426714:426825 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua032:426714:426825 [0] NCCL INFO Connected all rings
+gpua032:426714:426825 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpua032:426714:426825 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpua032:426714:426825 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpua032:426714:426825 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpua032:426714:426825 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpua032:426714:426825 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpua032:426714:426825 [0] NCCL INFO Connected all trees
+gpua032:426714:426825 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua032:426714:426825 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua032:426714:426825 [0] NCCL INFO comm 0x24271710 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua040:528571:528571 [3] NCCL INFO cudaDriverVersion 12020
+gpua040:528571:528571 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.40<0>
+gpua040:528571:528571 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua040:528571:528675 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.40<0>
+gpua040:528571:528675 [3] NCCL INFO Using network IB
+gpua040:528571:528675 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua040:528571:528675 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpua040:528571:528675 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpua040:528571:528675 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpua040:528571:528675 [3] NCCL INFO Connected all rings
+gpua040:528571:528675 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua040:528571:528675 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua040:528571:528675 [3] NCCL INFO Connected all trees
+gpua040:528571:528675 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua040:528571:528675 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua040:528571:528675 [3] NCCL INFO comm 0x181a10e0 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua032:426717:426717 [3] NCCL INFO cudaDriverVersion 12020
+gpua032:426717:426717 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.32<0>
+gpua032:426717:426717 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua032:426717:426823 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.32<0>
+gpua032:426717:426823 [3] NCCL INFO Using network IB
+gpua032:426717:426823 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua032:426717:426823 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpua032:426717:426823 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpua032:426717:426823 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpua032:426717:426823 [3] NCCL INFO Connected all rings
+gpua032:426717:426823 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua032:426717:426823 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua037:388776:388776 [2] NCCL INFO cudaDriverVersion 12020
+gpua037:388776:388776 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.37<0>
+gpua037:388776:388776 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua037:388776:388878 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.37<0>
+gpua037:388776:388878 [2] NCCL INFO Using network IB
+gpua037:388776:388878 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua037:388776:388878 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpua037:388776:388878 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua037:388776:388878 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua037:388776:388878 [2] NCCL INFO Connected all rings
+gpua037:388776:388878 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua037:388776:388878 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua043:1858028:1858028 [3] NCCL INFO cudaDriverVersion 12020
+gpua043:1858028:1858028 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.43<0>
+gpua043:1858028:1858028 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua043:1858028:1858129 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.43<0>
+gpua043:1858028:1858129 [3] NCCL INFO Using network IB
+gpua043:1858028:1858129 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua043:1858028:1858129 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpua043:1858028:1858129 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpua043:1858028:1858129 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpua043:1858028:1858129 [3] NCCL INFO Connected all rings
+gpua043:1858028:1858129 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua043:1858028:1858129 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua032:426717:426823 [3] NCCL INFO Connected all trees
+gpua032:426717:426823 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua032:426717:426823 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua032:426717:426823 [3] NCCL INFO comm 0x1a9ca810 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua037:388776:388878 [2] NCCL INFO Connected all trees
+gpua037:388776:388878 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua037:388776:388878 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua037:388776:388878 [2] NCCL INFO comm 0x226bb050 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua043:1858028:1858129 [3] NCCL INFO Connected all trees
+gpua043:1858028:1858129 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua043:1858028:1858129 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua043:1858028:1858129 [3] NCCL INFO comm 0x12a5de00 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua043:1858027:1858027 [2] NCCL INFO cudaDriverVersion 12020
+gpua043:1858027:1858027 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.43<0>
+gpua043:1858027:1858027 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua043:1858027:1858128 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.43<0>
+gpua043:1858027:1858128 [2] NCCL INFO Using network IB
+gpua043:1858027:1858128 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua043:1858027:1858128 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpua043:1858027:1858128 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua043:1858027:1858128 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua043:1858027:1858128 [2] NCCL INFO Connected all rings
+gpua043:1858027:1858128 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua043:1858027:1858128 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua043:1858027:1858128 [2] NCCL INFO Connected all trees
+gpua043:1858027:1858128 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua043:1858027:1858128 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua043:1858027:1858128 [2] NCCL INFO comm 0x9631ae0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua042:320056:320056 [2] NCCL INFO cudaDriverVersion 12020
+gpua042:320056:320056 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.42<0>
+gpua042:320056:320056 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua042:320056:320141 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.42<0>
+gpua042:320056:320141 [2] NCCL INFO Using network IB
+gpua042:320056:320141 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua042:320056:320141 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpua042:320056:320141 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua042:320056:320141 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua042:320056:320141 [2] NCCL INFO Connected all rings
+gpua042:320056:320141 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua042:320056:320141 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua042:320056:320141 [2] NCCL INFO Connected all trees
+gpua042:320056:320141 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua042:320056:320141 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua042:320056:320141 [2] NCCL INFO comm 0x1241d0f0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua039:405900:405900 [1] NCCL INFO cudaDriverVersion 12020
+gpua039:405900:405900 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0>
+gpua039:405900:405900 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua039:405900:406003 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.39<0>
+gpua039:405900:406003 [1] NCCL INFO Using network IB
+gpua039:405900:406003 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua039:405900:406003 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpua039:405900:406003 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua039:405900:406003 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua039:405900:406003 [1] NCCL INFO Connected all rings
+gpua039:405900:406003 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpua039:405900:406003 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpua039:405900:406003 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua039:405900:406003 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua039:405900:406003 [1] NCCL INFO Connected all trees
+gpua039:405900:406003 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua039:405900:406003 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua039:405900:406003 [1] NCCL INFO comm 0xe6d0e50 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua039:405901:405901 [2] NCCL INFO cudaDriverVersion 12020
+gpua039:405901:405901 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0>
+gpua039:405901:405901 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua039:405901:406004 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.39<0>
+gpua039:405901:406004 [2] NCCL INFO Using network IB
+gpua039:405901:406004 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua039:405901:406004 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpua039:405901:406004 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua039:405901:406004 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua039:405901:406004 [2] NCCL INFO Connected all rings
+gpua039:405901:406004 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua039:405901:406004 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua039:405901:406004 [2] NCCL INFO Connected all trees
+gpua039:405901:406004 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua039:405901:406004 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua039:405901:406004 [2] NCCL INFO comm 0x13738500 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua043:1858026:1858026 [1] NCCL INFO cudaDriverVersion 12020
+gpua043:1858026:1858026 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.43<0>
+gpua043:1858026:1858026 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua043:1858026:1858127 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.43<0>
+gpua043:1858026:1858127 [1] NCCL INFO Using network IB
+gpua043:1858026:1858127 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua043:1858026:1858127 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpua043:1858026:1858127 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua043:1858026:1858127 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua043:1858026:1858127 [1] NCCL INFO Connected all rings
+gpua043:1858026:1858127 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpua043:1858026:1858127 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpua043:1858026:1858127 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua043:1858026:1858127 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua043:1858026:1858127 [1] NCCL INFO Connected all trees
+gpua043:1858026:1858127 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua043:1858026:1858127 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua043:1858026:1858127 [1] NCCL INFO comm 0x15c58d00 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua099:116368:116368 [2] NCCL INFO cudaDriverVersion 12020
+gpua099:116368:116368 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.99<0>
+gpua099:116368:116368 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua099:116368:116473 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.99<0>
+gpua099:116368:116473 [2] NCCL INFO Using network IB
+gpua099:116368:116473 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua099:116368:116473 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpua099:116368:116473 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua099:116368:116473 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua099:116368:116473 [2] NCCL INFO Connected all rings
+gpua099:116368:116473 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua099:116368:116473 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua099:116368:116473 [2] NCCL INFO Connected all trees
+gpua099:116368:116473 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua099:116368:116473 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua099:116368:116473 [2] NCCL INFO comm 0x14b27160 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua069:1351485:1351485 [1] NCCL INFO cudaDriverVersion 12020
+gpua069:1351485:1351485 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.69<0>
+gpua069:1351485:1351485 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua069:1351485:1351580 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.69<0>
+gpua069:1351485:1351580 [1] NCCL INFO Using network IB
+gpua069:1351485:1351580 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua069:1351485:1351580 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpua069:1351485:1351580 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua069:1351485:1351580 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua069:1351485:1351580 [1] NCCL INFO Connected all rings
+gpua069:1351485:1351580 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpua069:1351485:1351580 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpua069:1351485:1351580 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua069:1351485:1351580 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua069:1351485:1351580 [1] NCCL INFO Connected all trees
+gpua069:1351485:1351580 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua069:1351485:1351580 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua069:1351485:1351580 [1] NCCL INFO comm 0xb32c220 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua099:116367:116367 [1] NCCL INFO cudaDriverVersion 12020
+gpua099:116367:116367 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.99<0>
+gpua099:116367:116367 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua099:116367:116475 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.99<0>
+gpua099:116367:116475 [1] NCCL INFO Using network IB
+gpua099:116367:116475 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua099:116367:116475 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpua099:116367:116475 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua099:116367:116475 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua099:116367:116475 [1] NCCL INFO Connected all rings
+gpua099:116367:116475 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua099:116367:116475 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua099:116367:116475 [1] NCCL INFO Connected all trees
+gpua099:116367:116475 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua099:116367:116475 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua099:116367:116475 [1] NCCL INFO comm 0xd1aea70 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua069:1351487:1351487 [3] NCCL INFO cudaDriverVersion 12020
+gpua069:1351487:1351487 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.69<0>
+gpua069:1351487:1351487 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua069:1351487:1351581 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.69<0>
+gpua069:1351487:1351581 [3] NCCL INFO Using network IB
+gpua069:1351487:1351581 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua069:1351487:1351581 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpua069:1351487:1351581 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpua069:1351487:1351581 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpua069:1351487:1351581 [3] NCCL INFO Connected all rings
+gpua069:1351487:1351581 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua069:1351487:1351581 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua069:1351487:1351581 [3] NCCL INFO Connected all trees
+gpua069:1351487:1351581 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua069:1351487:1351581 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua069:1351487:1351581 [3] NCCL INFO comm 0xf2fa520 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua042:320055:320055 [1] NCCL INFO cudaDriverVersion 12020
+gpua042:320055:320055 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.42<0>
+gpua042:320055:320055 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua042:320055:320143 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.42<0>
+gpua042:320055:320143 [1] NCCL INFO Using network IB
+gpua042:320055:320143 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua042:320055:320143 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpua042:320055:320143 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua042:320055:320143 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua042:320055:320143 [1] NCCL INFO Connected all rings
+gpua042:320055:320143 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpua042:320055:320143 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpua042:320055:320143 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua042:320055:320143 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua042:320055:320143 [1] NCCL INFO Connected all trees
+gpua042:320055:320143 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua042:320055:320143 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua042:320055:320143 [1] NCCL INFO comm 0x1fa55e40 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua033:812551:812551 [2] NCCL INFO cudaDriverVersion 12020
+gpua033:812551:812551 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.33<0>
+gpua033:812551:812551 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua033:812551:812654 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.33<0>
+gpua033:812551:812654 [2] NCCL INFO Using network IB
+gpua033:812551:812654 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua033:812551:812654 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpua033:812551:812654 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua033:812551:812654 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua033:812551:812654 [2] NCCL INFO Connected all rings
+gpua033:812551:812654 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua033:812551:812654 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua033:812551:812654 [2] NCCL INFO Connected all trees
+gpua033:812551:812654 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua033:812551:812654 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua033:812551:812654 [2] NCCL INFO comm 0x115cdb20 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua039:405899:405899 [0] NCCL INFO cudaDriverVersion 12020
+gpua039:405899:405899 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0>
+gpua039:405899:405899 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua039:405899:406002 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.39<0>
+gpua039:405899:406002 [0] NCCL INFO Using network IB
+gpua039:405899:406002 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua039:405899:406002 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpua039:405899:406002 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpua039:405899:406002 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpua039:405899:406002 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua039:405899:406002 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua039:405899:406002 [0] NCCL INFO Connected all rings
+gpua039:405899:406002 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpua039:405899:406002 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpua039:405899:406002 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpua039:405899:406002 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpua039:405899:406002 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpua039:405899:406002 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpua039:405899:406002 [0] NCCL INFO Connected all trees
+gpua039:405899:406002 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua039:405899:406002 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua039:405899:406002 [0] NCCL INFO comm 0x194a7bc0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua099:116366:116366 [0] NCCL INFO cudaDriverVersion 12020
+gpua099:116366:116366 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.99<0>
+gpua099:116366:116366 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua099:116366:116474 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.99<0>
+gpua099:116366:116474 [0] NCCL INFO Using network IB
+gpua099:116366:116474 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua099:116366:116474 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpua099:116366:116474 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpua099:116366:116474 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpua099:116366:116474 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua099:116366:116474 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua099:116366:116474 [0] NCCL INFO Connected all rings
+gpua099:116366:116474 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpua099:116366:116474 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpua099:116366:116474 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpua099:116366:116474 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpua099:116366:116474 [0] NCCL INFO Connected all trees
+gpua099:116366:116474 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua099:116366:116474 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua099:116366:116474 [0] NCCL INFO comm 0xe772b30 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua042:320054:320054 [0] NCCL INFO cudaDriverVersion 12020
+gpua042:320054:320054 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.42<0>
+gpua042:320054:320054 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua042:320054:320144 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.42<0>
+gpua042:320054:320144 [0] NCCL INFO Using network IB
+gpua042:320054:320144 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua042:320054:320144 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpua042:320054:320144 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpua042:320054:320144 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpua042:320054:320144 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua042:320054:320144 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua042:320054:320144 [0] NCCL INFO Connected all rings
+gpua042:320054:320144 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpua042:320054:320144 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpua042:320054:320144 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpua042:320054:320144 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpua042:320054:320144 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpua042:320054:320144 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpua042:320054:320144 [0] NCCL INFO Connected all trees
+gpua042:320054:320144 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua042:320054:320144 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua042:320054:320144 [0] NCCL INFO comm 0x19445940 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua016:548241:548241 [2] NCCL INFO cudaDriverVersion 12020
+gpua016:548241:548241 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:548241:548241 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua016:548241:548344 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0>
+gpua016:548241:548344 [2] NCCL INFO Using network IB
+gpua016:548241:548344 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua016:548241:548344 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpua016:548241:548344 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua016:548241:548344 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua016:548241:548344 [2] NCCL INFO Connected all rings
+gpua016:548241:548344 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua016:548241:548344 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua016:548241:548344 [2] NCCL INFO Connected all trees
+gpua016:548241:548344 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:548241:548344 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:548241:548344 [2] NCCL INFO comm 0x1a706d40 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua016:548242:548242 [3] NCCL INFO cudaDriverVersion 12020
+gpua016:548242:548242 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:548242:548242 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua016:548242:548345 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0>
+gpua016:548242:548345 [3] NCCL INFO Using network IB
+gpua016:548242:548345 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua016:548242:548345 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpua016:548242:548345 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpua016:548242:548345 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpua016:548242:548345 [3] NCCL INFO Connected all rings
+gpua016:548242:548345 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua016:548242:548345 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua016:548242:548345 [3] NCCL INFO Connected all trees
+gpua016:548242:548345 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:548242:548345 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:548242:548345 [3] NCCL INFO comm 0x15f2d0d0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua016:548239:548239 [0] NCCL INFO cudaDriverVersion 12020
+gpua016:548239:548239 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:548239:548239 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua016:548239:548346 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0>
+gpua016:548239:548346 [0] NCCL INFO Using network IB
+gpua016:548239:548346 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua016:548239:548346 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpua016:548239:548346 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpua016:548239:548346 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpua016:548239:548346 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua016:548239:548346 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua016:548239:548346 [0] NCCL INFO Connected all rings
+gpua016:548239:548346 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpua016:548239:548346 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpua016:548239:548346 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpua016:548239:548346 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpua016:548239:548346 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpua016:548239:548346 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpua016:548239:548346 [0] NCCL INFO Connected all trees
+gpua016:548239:548346 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:548239:548346 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:548239:548346 [0] NCCL INFO comm 0xb3f8420 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua016:548240:548240 [1] NCCL INFO cudaDriverVersion 12020
+gpua016:548240:548240 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:548240:548240 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua016:548240:548350 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0>
+gpua016:548240:548350 [1] NCCL INFO Using network IB
+gpua016:548240:548350 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua016:548240:548350 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpua016:548240:548350 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua016:548240:548350 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua016:548240:548350 [1] NCCL INFO Connected all rings
+gpua016:548240:548350 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpua016:548240:548350 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpua016:548240:548350 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua016:548240:548350 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua016:548240:548350 [1] NCCL INFO Connected all trees
+gpua016:548240:548350 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:548240:548350 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:548240:548350 [1] NCCL INFO comm 0x1ee0fa90 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+[gpua014:0/64] 2023-12-01 14:07:05,937 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration.
+[gpua014:0/64] 2023-12-01 14:10:44,580 (trainer:735) INFO: 1epoch:train:1-100batch: iter_time=1.242, forward_time=0.241, loss_ctc=6.993e+03, loss_att=381.602, acc=1.508e-05, loss=2.365e+03, backward_time=0.413, grad_norm=1.546e+04, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.080, optim0_lr0=8.583e-08, train_time=6.095
+[gpua014:0/64] 2023-12-01 14:14:42,009 (trainer:735) INFO: 1epoch:train:101-200batch: iter_time=1.204e-04, forward_time=0.183, loss_ctc=1.811e+03, loss_att=370.767, acc=1.847e-05, loss=802.856, backward_time=0.406, grad_norm=1.042e+04, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.079, optim0_lr0=2.525e-07, train_time=2.374
+[gpua014:0/64] 2023-12-01 14:18:34,112 (trainer:735) INFO: 1epoch:train:201-300batch: iter_time=1.045e-04, forward_time=0.163, loss_ctc=363.168, loss_att=377.211, acc=3.199e-05, loss=372.998, backward_time=0.378, grad_norm=295.976, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.077, optim0_lr0=4.192e-07, train_time=2.321
+[gpua014:0/64] 2023-12-01 14:22:07,273 (trainer:735) INFO: 1epoch:train:301-400batch: iter_time=1.014e-04, forward_time=0.143, loss_ctc=312.724, loss_att=342.740, acc=7.256e-05, loss=333.735, backward_time=0.409, grad_norm=247.263, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.077, optim0_lr0=5.858e-07, train_time=2.131
+[gpua014:0/64] 2023-12-01 14:25:12,631 (trainer:735) INFO: 1epoch:train:401-500batch: iter_time=1.021e-04, forward_time=0.146, loss_ctc=396.834, loss_att=429.071, acc=2.900e-04, loss=419.400, backward_time=0.345, grad_norm=278.674, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.077, optim0_lr0=7.525e-07, train_time=1.853
+[gpua014:0/64] 2023-12-01 14:27:52,006 (trainer:735) INFO: 1epoch:train:501-600batch: iter_time=1.146e-04, forward_time=0.195, loss_ctc=356.648, loss_att=393.544, acc=0.002, loss=382.475, backward_time=0.317, grad_norm=299.505, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.079, optim0_lr0=9.192e-07, train_time=1.594
+[gpua014:0/64] 2023-12-01 14:30:28,857 (trainer:735) INFO: 1epoch:train:601-700batch: iter_time=1.154e-04, forward_time=0.164, loss_ctc=380.479, loss_att=405.709, acc=0.019, loss=398.140, backward_time=0.323, grad_norm=313.542, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=1.086e-06, train_time=1.568
+[gpua014:0/64] 2023-12-01 14:33:38,586 (trainer:735) INFO: 1epoch:train:701-800batch: iter_time=1.180e-04, forward_time=0.152, loss_ctc=340.719, loss_att=371.583, acc=0.056, loss=362.323, backward_time=0.358, grad_norm=213.248, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=1.252e-06, train_time=1.897
+[gpua014:0/64] 2023-12-01 14:36:33,047 (trainer:735) INFO: 1epoch:train:801-900batch: iter_time=1.176e-04, forward_time=0.166, loss_ctc=343.491, loss_att=376.449, acc=0.068, loss=366.561, backward_time=0.321, grad_norm=204.164, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.077, optim0_lr0=1.419e-06, train_time=1.744
+[gpua014:0/64] 2023-12-01 14:40:05,746 (trainer:735) INFO: 1epoch:train:901-1000batch: iter_time=1.013e-04, forward_time=0.157, loss_ctc=359.642, loss_att=387.993, acc=0.064, loss=379.488, backward_time=0.434, grad_norm=183.143, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.076, optim0_lr0=1.586e-06, train_time=2.126
+[gpua014:0/64] 2023-12-01 14:42:54,192 (trainer:735) INFO: 1epoch:train:1001-1100batch: iter_time=9.634e-05, forward_time=0.145, loss_ctc=339.557, loss_att=371.584, acc=0.070, loss=361.976, backward_time=0.310, grad_norm=173.238, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.075, optim0_lr0=1.752e-06, train_time=1.685
+[gpua014:0/64] 2023-12-01 14:45:50,006 (trainer:735) INFO: 1epoch:train:1101-1200batch: iter_time=8.967e-05, forward_time=0.145, loss_ctc=317.498, loss_att=344.379, acc=0.064, loss=336.314, backward_time=0.326, grad_norm=141.185, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.075, optim0_lr0=1.919e-06, train_time=1.758
+[gpua014:0/64] 2023-12-01 14:47:29,477 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-12-01 14:47:48,076 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 14:47:51,628 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faf386ea6e0>)
+[gpua014:0/64] 2023-12-01 14:47:51,628 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua014:0/64] 2023-12-01 14:47:51,632 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 14:57:13,932 (trainer:735) INFO: 1epoch:train:1201-1300batch: iter_time=3.315, forward_time=0.146, loss_ctc=321.550, loss_att=341.589, acc=0.071, loss=335.578, backward_time=0.345, grad_norm=149.664, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.076, optim0_lr0=2.086e-06, train_time=6.839
+[gpua014:0/64] 2023-12-01 14:59:27,200 (trainer:735) INFO: 1epoch:train:1301-1400batch: iter_time=8.772e-05, forward_time=0.147, loss_ctc=286.813, loss_att=302.172, acc=0.080, loss=297.564, backward_time=0.291, grad_norm=115.106, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.076, optim0_lr0=2.253e-06, train_time=1.332
+[gpua014:0/64] 2023-12-01 15:01:40,038 (trainer:735) INFO: 1epoch:train:1401-1500batch: iter_time=8.750e-05, forward_time=0.147, loss_ctc=304.420, loss_att=311.951, acc=0.091, loss=309.692, backward_time=0.293, grad_norm=140.440, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.076, optim0_lr0=2.419e-06, train_time=1.328
+[gpua014:0/64] 2023-12-01 15:04:21,855 (trainer:735) INFO: 1epoch:train:1501-1600batch: iter_time=8.805e-05, forward_time=0.152, loss_ctc=287.974, loss_att=299.681, acc=0.093, loss=296.169, backward_time=0.358, grad_norm=125.737, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=2.586e-06, train_time=1.618
+[gpua014:0/64] 2023-12-01 15:06:39,059 (trainer:735) INFO: 1epoch:train:1601-1700batch: iter_time=8.334e-05, forward_time=0.158, loss_ctc=289.758, loss_att=292.720, acc=0.100, loss=291.831, backward_time=0.293, grad_norm=116.708, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.076, optim0_lr0=2.752e-06, train_time=1.372
+[gpua014:0/64] 2023-12-01 15:09:05,945 (trainer:735) INFO: 1epoch:train:1701-1800batch: iter_time=1.043e-04, forward_time=0.167, loss_ctc=325.282, loss_att=331.335, acc=0.111, loss=329.519, backward_time=0.325, grad_norm=157.674, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.077, optim0_lr0=2.919e-06, train_time=1.469
+[gpua014:0/64] 2023-12-01 15:11:47,389 (trainer:735) INFO: 1epoch:train:1801-1900batch: iter_time=8.752e-05, forward_time=0.146, loss_ctc=345.654, loss_att=345.640, acc=0.103, loss=345.644, backward_time=0.317, grad_norm=171.828, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.076, optim0_lr0=3.086e-06, train_time=1.614
+[gpua014:0/64] 2023-12-01 15:14:00,617 (trainer:735) INFO: 1epoch:train:1901-2000batch: iter_time=9.435e-05, forward_time=0.152, loss_ctc=289.072, loss_att=289.856, acc=0.118, loss=289.621, backward_time=0.303, grad_norm=105.791, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.076, optim0_lr0=3.253e-06, train_time=1.332
+[gpua014:0/64] 2023-12-01 15:16:31,254 (trainer:735) INFO: 1epoch:train:2001-2100batch: iter_time=9.163e-05, forward_time=0.161, loss_ctc=313.227, loss_att=312.832, acc=0.112, loss=312.951, backward_time=0.314, grad_norm=123.184, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.078, optim0_lr0=3.419e-06, train_time=1.506
+[gpua014:0/64] 2023-12-01 15:18:39,037 (trainer:735) INFO: 1epoch:train:2101-2200batch: iter_time=9.009e-05, forward_time=0.151, loss_ctc=291.892, loss_att=295.907, acc=0.113, loss=294.702, backward_time=0.282, grad_norm=116.813, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=3.586e-06, train_time=1.277
+[gpua014:0/64] 2023-12-01 15:20:55,100 (trainer:735) INFO: 1epoch:train:2201-2300batch: iter_time=8.270e-05, forward_time=0.150, loss_ctc=280.971, loss_att=277.727, acc=0.118, loss=278.700, backward_time=0.295, grad_norm=99.741, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=3.753e-06, train_time=1.361
+[gpua014:0/64] 2023-12-01 15:23:14,133 (trainer:735) INFO: 1epoch:train:2301-2400batch: iter_time=8.391e-05, forward_time=0.150, loss_ctc=311.697, loss_att=321.507, acc=0.104, loss=318.564, backward_time=0.328, grad_norm=109.042, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.077, optim0_lr0=3.919e-06, train_time=1.390
+[gpua014:0/64] 2023-12-01 15:25:43,682 (trainer:735) INFO: 1epoch:train:2401-2500batch: iter_time=8.345e-05, forward_time=0.147, loss_ctc=262.194, loss_att=263.233, acc=0.120, loss=262.921, backward_time=0.283, grad_norm=112.427, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=4.086e-06, train_time=1.495
+[gpua014:0/64] 2023-12-01 15:26:03,710 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua014:0/64] 2023-12-01 15:26:21,911 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 15:26:25,296 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faa694b4d30>)
+[gpua014:0/64] 2023-12-01 15:26:25,296 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua014:0/64] 2023-12-01 15:26:25,301 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 15:36:34,037 (trainer:735) INFO: 1epoch:train:2501-2600batch: iter_time=1.496, forward_time=0.146, loss_ctc=264.592, loss_att=275.035, acc=0.117, loss=271.902, backward_time=0.281, grad_norm=95.769, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=4.253e-06, train_time=6.503
+[gpua014:0/64] 2023-12-01 15:38:34,624 (trainer:735) INFO: 1epoch:train:2601-2700batch: iter_time=8.774e-05, forward_time=0.146, loss_ctc=264.773, loss_att=271.710, acc=0.116, loss=269.629, backward_time=0.277, grad_norm=93.336, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=4.419e-06, train_time=1.206
+[gpua014:0/64] 2023-12-01 15:40:37,996 (trainer:735) INFO: 1epoch:train:2701-2800batch: iter_time=9.287e-05, forward_time=0.146, loss_ctc=267.997, loss_att=268.393, acc=0.116, loss=268.274, backward_time=0.284, grad_norm=103.328, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=4.586e-06, train_time=1.233
+[gpua014:0/64] 2023-12-01 15:42:59,661 (trainer:735) INFO: 1epoch:train:2801-2900batch: iter_time=9.152e-05, forward_time=0.146, loss_ctc=240.650, loss_att=247.968, acc=0.129, loss=245.772, backward_time=0.303, grad_norm=92.104, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=4.752e-06, train_time=1.416
+[gpua014:0/64] 2023-12-01 15:45:36,425 (trainer:735) INFO: 1epoch:train:2901-3000batch: iter_time=9.655e-05, forward_time=0.146, loss_ctc=314.426, loss_att=316.528, acc=0.111, loss=315.898, backward_time=0.346, grad_norm=117.530, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=4.919e-06, train_time=1.567
+[gpua014:0/64] 2023-12-01 15:47:47,829 (trainer:735) INFO: 1epoch:train:3001-3100batch: iter_time=1.011e-04, forward_time=0.148, loss_ctc=280.513, loss_att=284.128, acc=0.128, loss=283.043, backward_time=0.292, grad_norm=125.652, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=5.086e-06, train_time=1.314
+[gpua014:0/64] 2023-12-01 15:50:10,399 (trainer:735) INFO: 1epoch:train:3101-3200batch: iter_time=9.077e-05, forward_time=0.147, loss_ctc=301.308, loss_att=299.195, acc=0.125, loss=299.829, backward_time=0.349, grad_norm=118.846, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=5.252e-06, train_time=1.425
+[gpua014:0/64] 2023-12-01 15:52:27,374 (trainer:735) INFO: 1epoch:train:3201-3300batch: iter_time=8.635e-05, forward_time=0.146, loss_ctc=280.366, loss_att=280.125, acc=0.131, loss=280.197, backward_time=0.291, grad_norm=108.088, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=5.419e-06, train_time=1.370
+[gpua014:0/64] 2023-12-01 15:54:49,806 (trainer:735) INFO: 1epoch:train:3301-3400batch: iter_time=9.478e-05, forward_time=0.146, loss_ctc=279.437, loss_att=276.236, acc=0.128, loss=277.196, backward_time=0.307, grad_norm=102.923, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=5.586e-06, train_time=1.424
+[gpua014:0/64] 2023-12-01 15:56:47,942 (trainer:735) INFO: 1epoch:train:3401-3500batch: iter_time=9.538e-05, forward_time=0.147, loss_ctc=293.467, loss_att=292.539, acc=0.123, loss=292.818, backward_time=0.279, grad_norm=104.690, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.077, optim0_lr0=5.753e-06, train_time=1.181
+[gpua014:0/64] 2023-12-01 15:58:46,491 (trainer:735) INFO: 1epoch:train:3501-3600batch: iter_time=1.023e-04, forward_time=0.147, loss_ctc=278.873, loss_att=284.634, acc=0.128, loss=282.906, backward_time=0.282, grad_norm=94.844, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=5.919e-06, train_time=1.185
+[gpua014:0/64] 2023-12-01 16:01:11,957 (trainer:735) INFO: 1epoch:train:3601-3700batch: iter_time=9.049e-05, forward_time=0.148, loss_ctc=260.060, loss_att=262.142, acc=0.128, loss=261.517, backward_time=0.286, grad_norm=90.731, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=6.086e-06, train_time=1.454
+[gpua014:0/64] 2023-12-01 16:02:35,807 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua014:0/64] 2023-12-01 16:02:53,787 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 16:02:57,193 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faaff90fd90>)
+[gpua014:0/64] 2023-12-01 16:02:57,193 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua014:0/64] 2023-12-01 16:02:57,236 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 16:13:01,414 (trainer:735) INFO: 1epoch:train:3701-3800batch: iter_time=2.215, forward_time=0.179, loss_ctc=270.913, loss_att=278.894, acc=0.131, loss=276.500, backward_time=0.291, grad_norm=119.388, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.077, optim0_lr0=6.253e-06, train_time=7.094
+[gpua014:0/64] 2023-12-01 16:15:29,950 (trainer:735) INFO: 1epoch:train:3801-3900batch: iter_time=8.502e-05, forward_time=0.146, loss_ctc=243.045, loss_att=247.807, acc=0.138, loss=246.378, backward_time=0.320, grad_norm=87.339, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=6.419e-06, train_time=1.485
+[gpua014:0/64] 2023-12-01 16:17:41,277 (trainer:735) INFO: 1epoch:train:3901-4000batch: iter_time=8.927e-05, forward_time=0.145, loss_ctc=263.178, loss_att=264.345, acc=0.136, loss=263.995, backward_time=0.281, grad_norm=101.626, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.076, optim0_lr0=6.586e-06, train_time=1.313
+[gpua014:0/64] 2023-12-01 16:20:09,808 (trainer:735) INFO: 1epoch:train:4001-4100batch: iter_time=9.263e-05, forward_time=0.147, loss_ctc=248.346, loss_att=254.431, acc=0.137, loss=252.605, backward_time=0.313, grad_norm=97.068, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.075, optim0_lr0=6.752e-06, train_time=1.485
+[gpua014:0/64] 2023-12-01 16:22:36,164 (trainer:735) INFO: 1epoch:train:4101-4200batch: iter_time=9.011e-05, forward_time=0.145, loss_ctc=257.494, loss_att=252.584, acc=0.138, loss=254.057, backward_time=0.297, grad_norm=116.231, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=6.919e-06, train_time=1.463
+[gpua014:0/64] 2023-12-01 16:25:30,802 (trainer:735) INFO: 1epoch:train:4201-4300batch: iter_time=9.481e-05, forward_time=0.147, loss_ctc=296.357, loss_att=297.721, acc=0.135, loss=297.312, backward_time=0.315, grad_norm=143.612, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=7.086e-06, train_time=1.746
+[gpua014:0/64] 2023-12-01 16:28:01,737 (trainer:735) INFO: 1epoch:train:4301-4400batch: iter_time=9.558e-05, forward_time=0.145, loss_ctc=311.059, loss_att=307.100, acc=0.134, loss=308.288, backward_time=0.297, grad_norm=161.742, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=7.253e-06, train_time=1.509
+[gpua014:0/64] 2023-12-01 16:30:36,290 (trainer:735) INFO: 1epoch:train:4401-4500batch: iter_time=1.012e-04, forward_time=0.145, loss_ctc=268.150, loss_att=264.314, acc=0.148, loss=265.465, backward_time=0.337, grad_norm=113.005, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=7.419e-06, train_time=1.545
+[gpua014:0/64] 2023-12-01 16:32:54,304 (trainer:735) INFO: 1epoch:train:4501-4600batch: iter_time=9.267e-05, forward_time=0.145, loss_ctc=291.569, loss_att=285.391, acc=0.138, loss=287.244, backward_time=0.291, grad_norm=114.494, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.075, optim0_lr0=7.586e-06, train_time=1.380
+[gpua014:0/64] 2023-12-01 16:35:13,427 (trainer:735) INFO: 1epoch:train:4601-4700batch: iter_time=8.826e-05, forward_time=0.147, loss_ctc=271.996, loss_att=268.926, acc=0.142, loss=269.847, backward_time=0.302, grad_norm=121.504, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.075, optim0_lr0=7.753e-06, train_time=1.391
+[gpua014:0/64] 2023-12-01 16:37:43,081 (trainer:735) INFO: 1epoch:train:4701-4800batch: iter_time=1.002e-04, forward_time=0.145, loss_ctc=262.633, loss_att=253.957, acc=0.147, loss=256.560, backward_time=0.290, grad_norm=96.330, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.075, optim0_lr0=7.919e-06, train_time=1.496
+[gpua014:0/64] 2023-12-01 16:40:12,101 (trainer:735) INFO: 1epoch:train:4801-4900batch: iter_time=9.381e-05, forward_time=0.145, loss_ctc=293.784, loss_att=295.921, acc=0.133, loss=295.280, backward_time=0.296, grad_norm=95.278, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=8.086e-06, train_time=1.490
+[gpua014:0/64] 2023-12-01 16:42:27,223 (trainer:735) INFO: 1epoch:train:4901-5000batch: iter_time=8.765e-05, forward_time=0.145, loss_ctc=252.226, loss_att=245.845, acc=0.152, loss=247.759, backward_time=0.295, grad_norm=110.756, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.075, optim0_lr0=8.252e-06, train_time=1.351
+[gpua014:0/64] 2023-12-01 16:42:28,728 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua014:0/64] 2023-12-01 16:42:47,895 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 16:42:51,306 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fab0345fca0>)
+[gpua014:0/64] 2023-12-01 16:42:51,306 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua014:0/64] 2023-12-01 16:42:51,311 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 16:52:38,590 (trainer:735) INFO: 1epoch:train:5001-5100batch: iter_time=1.264, forward_time=0.175, loss_ctc=251.654, loss_att=258.088, acc=0.151, loss=256.158, backward_time=0.290, grad_norm=132.451, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=8.419e-06, train_time=6.113
+[gpua014:0/64] 2023-12-01 16:55:02,013 (trainer:735) INFO: 1epoch:train:5101-5200batch: iter_time=9.068e-05, forward_time=0.146, loss_ctc=251.067, loss_att=247.905, acc=0.154, loss=248.853, backward_time=0.286, grad_norm=97.586, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.075, optim0_lr0=8.586e-06, train_time=1.434
+[gpua014:0/64] 2023-12-01 16:57:16,146 (trainer:735) INFO: 1epoch:train:5201-5300batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=260.590, loss_att=250.910, acc=0.151, loss=253.814, backward_time=0.286, grad_norm=125.301, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.075, optim0_lr0=8.753e-06, train_time=1.341
+[gpua014:0/64] 2023-12-01 16:59:13,290 (trainer:735) INFO: 1epoch:train:5301-5400batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=231.251, loss_att=231.261, acc=0.163, loss=231.258, backward_time=0.274, grad_norm=95.088, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.075, optim0_lr0=8.919e-06, train_time=1.171
+[gpua014:0/64] 2023-12-01 17:01:08,454 (trainer:735) INFO: 1epoch:train:5401-5500batch: iter_time=1.071e-04, forward_time=0.146, loss_ctc=306.272, loss_att=296.034, acc=0.145, loss=299.105, backward_time=0.277, grad_norm=117.660, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.075, optim0_lr0=9.086e-06, train_time=1.151
+[gpua014:0/64] 2023-12-01 17:03:06,813 (trainer:735) INFO: 1epoch:train:5501-5600batch: iter_time=9.199e-05, forward_time=0.146, loss_ctc=273.957, loss_att=267.463, acc=0.156, loss=269.411, backward_time=0.278, grad_norm=130.313, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=9.252e-06, train_time=1.183
+[gpua014:0/64] 2023-12-01 17:05:16,260 (trainer:735) INFO: 1epoch:train:5601-5700batch: iter_time=9.331e-05, forward_time=0.146, loss_ctc=296.787, loss_att=280.520, acc=0.155, loss=285.400, backward_time=0.287, grad_norm=143.677, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=9.419e-06, train_time=1.294
+[gpua014:0/64] 2023-12-01 17:07:48,443 (trainer:735) INFO: 1epoch:train:5701-5800batch: iter_time=9.630e-05, forward_time=0.146, loss_ctc=275.486, loss_att=262.430, acc=0.160, loss=266.347, backward_time=0.292, grad_norm=118.419, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=9.586e-06, train_time=1.522
+[gpua014:0/64] 2023-12-01 17:09:58,908 (trainer:735) INFO: 1epoch:train:5801-5900batch: iter_time=1.080e-04, forward_time=0.177, loss_ctc=273.118, loss_att=256.183, acc=0.156, loss=261.263, backward_time=0.305, grad_norm=124.042, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.078, optim0_lr0=9.753e-06, train_time=1.304
+[gpua014:0/64] 2023-12-01 17:12:03,735 (trainer:735) INFO: 1epoch:train:5901-6000batch: iter_time=9.701e-05, forward_time=0.150, loss_ctc=283.582, loss_att=267.734, acc=0.150, loss=272.488, backward_time=0.292, grad_norm=111.864, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.076, optim0_lr0=9.919e-06, train_time=1.248
+[gpua014:0/64] 2023-12-01 17:14:32,817 (trainer:735) INFO: 1epoch:train:6001-6100batch: iter_time=9.672e-05, forward_time=0.147, loss_ctc=270.667, loss_att=259.890, acc=0.159, loss=263.123, backward_time=0.318, grad_norm=104.617, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.009e-05, train_time=1.491
+[gpua014:0/64] 2023-12-01 17:16:33,534 (trainer:735) INFO: 1epoch:train:6101-6200batch: iter_time=9.243e-05, forward_time=0.146, loss_ctc=253.979, loss_att=240.382, acc=0.159, loss=244.461, backward_time=0.275, grad_norm=99.669, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.025e-05, train_time=1.207
+[gpua014:0/64] 2023-12-01 17:17:58,810 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua014:0/64] 2023-12-01 17:18:16,393 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 17:18:19,744 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fadd0e0ec80>)
+[gpua014:0/64] 2023-12-01 17:18:19,744 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua014:0/64] 2023-12-01 17:18:19,747 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 17:23:13,786 (trainer:735) INFO: 1epoch:train:6201-6300batch: iter_time=2.260, forward_time=0.167, loss_ctc=265.214, loss_att=270.523, acc=0.159, loss=268.931, backward_time=0.286, grad_norm=147.161, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.078, optim0_lr0=1.042e-05, train_time=4.002
+[gpua014:0/64] 2023-12-01 17:25:27,734 (trainer:735) INFO: 1epoch:train:6301-6400batch: iter_time=9.845e-05, forward_time=0.148, loss_ctc=237.668, loss_att=234.540, acc=0.166, loss=235.478, backward_time=0.293, grad_norm=110.388, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.077, optim0_lr0=1.059e-05, train_time=1.339
+[gpua014:0/64] 2023-12-01 17:27:29,426 (trainer:735) INFO: 1epoch:train:6401-6500batch: iter_time=1.037e-04, forward_time=0.146, loss_ctc=258.967, loss_att=257.488, acc=0.166, loss=257.932, backward_time=0.282, grad_norm=156.186, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.077, optim0_lr0=1.075e-05, train_time=1.217
+[gpua014:0/64] 2023-12-01 17:29:52,189 (trainer:735) INFO: 1epoch:train:6501-6600batch: iter_time=9.143e-05, forward_time=0.146, loss_ctc=243.588, loss_att=245.315, acc=0.161, loss=244.797, backward_time=0.298, grad_norm=149.198, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.092e-05, train_time=1.427
+[gpua014:0/64] 2023-12-01 17:32:25,109 (trainer:735) INFO: 1epoch:train:6601-6700batch: iter_time=9.171e-05, forward_time=0.148, loss_ctc=251.352, loss_att=244.576, acc=0.162, loss=246.609, backward_time=0.314, grad_norm=118.494, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.109e-05, train_time=1.529
+[gpua014:0/64] 2023-12-01 17:34:23,597 (trainer:735) INFO: 1epoch:train:6701-6800batch: iter_time=9.960e-05, forward_time=0.145, loss_ctc=290.639, loss_att=286.000, acc=0.165, loss=287.391, backward_time=0.279, grad_norm=152.296, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.125e-05, train_time=1.185
+[gpua014:0/64] 2023-12-01 17:36:30,083 (trainer:735) INFO: 1epoch:train:6801-6900batch: iter_time=9.659e-05, forward_time=0.184, loss_ctc=303.533, loss_att=296.442, acc=0.158, loss=298.570, backward_time=0.291, grad_norm=138.473, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.079, optim0_lr0=1.142e-05, train_time=1.265
+[gpua014:0/64] 2023-12-01 17:39:07,403 (trainer:735) INFO: 1epoch:train:6901-7000batch: iter_time=9.992e-05, forward_time=0.146, loss_ctc=263.260, loss_att=251.031, acc=0.171, loss=254.700, backward_time=0.303, grad_norm=114.045, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.159e-05, train_time=1.573
+[gpua014:0/64] 2023-12-01 17:41:23,492 (trainer:735) INFO: 1epoch:train:7001-7100batch: iter_time=1.056e-04, forward_time=0.146, loss_ctc=286.485, loss_att=276.186, acc=0.160, loss=279.275, backward_time=0.282, grad_norm=128.722, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.175e-05, train_time=1.361
+[gpua014:0/64] 2023-12-01 17:43:20,732 (trainer:735) INFO: 1epoch:train:7101-7200batch: iter_time=9.524e-05, forward_time=0.146, loss_ctc=266.074, loss_att=251.387, acc=0.168, loss=255.793, backward_time=0.275, grad_norm=122.291, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.077, optim0_lr0=1.192e-05, train_time=1.172
+[gpua014:0/64] 2023-12-01 17:46:03,402 (trainer:735) INFO: 1epoch:train:7201-7300batch: iter_time=9.206e-05, forward_time=0.148, loss_ctc=258.402, loss_att=243.885, acc=0.168, loss=248.240, backward_time=0.319, grad_norm=94.645, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.209e-05, train_time=1.626
+[gpua014:0/64] 2023-12-01 17:48:09,912 (trainer:735) INFO: 1epoch:train:7301-7400batch: iter_time=9.867e-05, forward_time=0.149, loss_ctc=287.191, loss_att=281.928, acc=0.159, loss=283.507, backward_time=0.287, grad_norm=122.159, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.077, optim0_lr0=1.225e-05, train_time=1.265
+[gpua014:0/64] 2023-12-01 17:50:39,227 (trainer:735) INFO: 1epoch:train:7401-7500batch: iter_time=9.611e-05, forward_time=0.157, loss_ctc=247.679, loss_att=231.054, acc=0.179, loss=236.042, backward_time=0.290, grad_norm=140.200, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.077, optim0_lr0=1.242e-05, train_time=1.493
+[gpua014:0/64] 2023-12-01 17:50:41,396 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua014:0/64] 2023-12-01 17:50:59,149 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 17:51:02,507 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fab1a49c790>)
+[gpua014:0/64] 2023-12-01 17:51:02,507 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua014:0/64] 2023-12-01 17:51:02,510 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 17:57:43,033 (trainer:735) INFO: 1epoch:train:7501-7600batch: iter_time=1.227, forward_time=0.166, loss_ctc=246.492, loss_att=246.093, acc=0.175, loss=246.213, backward_time=0.275, grad_norm=137.566, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.078, optim0_lr0=1.259e-05, train_time=4.238
+[gpua014:0/64] 2023-12-01 17:59:38,199 (trainer:735) INFO: 1epoch:train:7601-7700batch: iter_time=8.441e-05, forward_time=0.145, loss_ctc=247.005, loss_att=230.701, acc=0.173, loss=235.592, backward_time=0.274, grad_norm=97.859, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.275e-05, train_time=1.151
+[gpua014:0/64] 2023-12-01 18:01:51,150 (trainer:735) INFO: 1epoch:train:7701-7800batch: iter_time=8.477e-05, forward_time=0.146, loss_ctc=255.268, loss_att=242.094, acc=0.172, loss=246.046, backward_time=0.293, grad_norm=132.953, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.292e-05, train_time=1.329
+[gpua014:0/64] 2023-12-01 18:03:58,255 (trainer:735) INFO: 1epoch:train:7801-7900batch: iter_time=9.725e-05, forward_time=0.146, loss_ctc=227.749, loss_att=218.347, acc=0.183, loss=221.167, backward_time=0.295, grad_norm=126.035, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.309e-05, train_time=1.271
+[gpua014:0/64] 2023-12-01 18:06:39,739 (trainer:735) INFO: 1epoch:train:7901-8000batch: iter_time=1.014e-04, forward_time=0.146, loss_ctc=299.963, loss_att=281.729, acc=0.163, loss=287.199, backward_time=0.334, grad_norm=116.960, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.076, optim0_lr0=1.325e-05, train_time=1.615
+[gpua014:0/64] 2023-12-01 18:09:01,143 (trainer:735) INFO: 1epoch:train:8001-8100batch: iter_time=8.970e-05, forward_time=0.146, loss_ctc=268.352, loss_att=257.585, acc=0.174, loss=260.815, backward_time=0.291, grad_norm=121.230, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.076, optim0_lr0=1.342e-05, train_time=1.414
+[gpua014:0/64] 2023-12-01 18:11:07,939 (trainer:735) INFO: 1epoch:train:8101-8200batch: iter_time=9.384e-05, forward_time=0.147, loss_ctc=291.624, loss_att=266.127, acc=0.175, loss=273.776, backward_time=0.282, grad_norm=121.262, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.359e-05, train_time=1.268
+[gpua014:0/64] 2023-12-01 18:13:15,295 (trainer:735) INFO: 1epoch:train:8201-8300batch: iter_time=8.868e-05, forward_time=0.146, loss_ctc=268.262, loss_att=245.852, acc=0.178, loss=252.575, backward_time=0.288, grad_norm=121.714, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.076, optim0_lr0=1.375e-05, train_time=1.273
+[gpua014:0/64] 2023-12-01 18:15:34,351 (trainer:735) INFO: 1epoch:train:8301-8400batch: iter_time=9.466e-05, forward_time=0.146, loss_ctc=269.133, loss_att=248.879, acc=0.176, loss=254.955, backward_time=0.303, grad_norm=140.618, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.392e-05, train_time=1.390
+[gpua014:0/64] 2023-12-01 18:18:05,843 (trainer:735) INFO: 1epoch:train:8401-8500batch: iter_time=9.687e-05, forward_time=0.146, loss_ctc=279.170, loss_att=255.702, acc=0.170, loss=262.743, backward_time=0.334, grad_norm=114.836, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.076, optim0_lr0=1.409e-05, train_time=1.515
+[gpua014:0/64] 2023-12-01 18:20:27,499 (trainer:735) INFO: 1epoch:train:8501-8600batch: iter_time=9.098e-05, forward_time=0.167, loss_ctc=267.981, loss_att=249.955, acc=0.177, loss=255.363, backward_time=0.290, grad_norm=121.410, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.425e-05, train_time=1.416
+[gpua014:0/64] 2023-12-01 18:22:46,843 (trainer:735) INFO: 1epoch:train:8601-8700batch: iter_time=9.569e-05, forward_time=0.154, loss_ctc=250.707, loss_att=231.833, acc=0.178, loss=237.495, backward_time=0.303, grad_norm=135.515, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.442e-05, train_time=1.393
+[gpua014:0/64] 2023-12-01 18:24:05,992 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua014:0/64] 2023-12-01 18:24:23,861 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 18:24:27,206 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1a0dd2620>)
+[gpua014:0/64] 2023-12-01 18:24:27,206 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua014:0/64] 2023-12-01 18:24:27,209 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 18:30:11,357 (trainer:735) INFO: 1epoch:train:8701-8800batch: iter_time=2.067, forward_time=0.187, loss_ctc=258.394, loss_att=247.375, acc=0.183, loss=250.681, backward_time=0.281, grad_norm=136.599, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.459e-05, train_time=4.445
+[gpua014:0/64] 2023-12-01 18:32:07,162 (trainer:735) INFO: 1epoch:train:8801-8900batch: iter_time=9.005e-05, forward_time=0.147, loss_ctc=234.144, loss_att=215.308, acc=0.187, loss=220.959, backward_time=0.275, grad_norm=116.876, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.475e-05, train_time=1.158
+[gpua014:0/64] 2023-12-01 18:34:16,531 (trainer:735) INFO: 1epoch:train:8901-9000batch: iter_time=9.748e-05, forward_time=0.146, loss_ctc=253.756, loss_att=232.972, acc=0.187, loss=239.207, backward_time=0.294, grad_norm=116.690, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.492e-05, train_time=1.293
+[gpua014:0/64] 2023-12-01 18:36:51,834 (trainer:735) INFO: 1epoch:train:9001-9100batch: iter_time=9.650e-05, forward_time=0.149, loss_ctc=237.440, loss_att=223.361, acc=0.182, loss=227.585, backward_time=0.318, grad_norm=132.493, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.076, optim0_lr0=1.509e-05, train_time=1.553
+[gpua014:0/64] 2023-12-01 18:39:12,439 (trainer:735) INFO: 1epoch:train:9101-9200batch: iter_time=9.300e-05, forward_time=0.157, loss_ctc=246.241, loss_att=222.500, acc=0.185, loss=229.622, backward_time=0.315, grad_norm=99.738, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.081, optim0_lr0=1.525e-05, train_time=1.406
+[gpua014:0/64] 2023-12-01 18:41:20,531 (trainer:735) INFO: 1epoch:train:9201-9300batch: iter_time=9.251e-05, forward_time=0.146, loss_ctc=284.697, loss_att=263.306, acc=0.183, loss=269.723, backward_time=0.299, grad_norm=131.302, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.542e-05, train_time=1.281
+[gpua014:0/64] 2023-12-01 18:43:21,453 (trainer:735) INFO: 1epoch:train:9301-9400batch: iter_time=8.837e-05, forward_time=0.146, loss_ctc=300.923, loss_att=273.795, acc=0.176, loss=281.933, backward_time=0.288, grad_norm=128.529, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.559e-05, train_time=1.209
+[gpua014:0/64] 2023-12-01 18:45:40,977 (trainer:735) INFO: 1epoch:train:9401-9500batch: iter_time=9.160e-05, forward_time=0.148, loss_ctc=254.756, loss_att=229.158, acc=0.193, loss=236.838, backward_time=0.337, grad_norm=115.328, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.076, optim0_lr0=1.575e-05, train_time=1.395
+[gpua014:0/64] 2023-12-01 18:47:45,117 (trainer:735) INFO: 1epoch:train:9501-9600batch: iter_time=9.770e-05, forward_time=0.146, loss_ctc=281.014, loss_att=254.660, acc=0.178, loss=262.566, backward_time=0.280, grad_norm=126.059, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.592e-05, train_time=1.241
+[gpua014:0/64] 2023-12-01 18:50:30,713 (trainer:735) INFO: 1epoch:train:9601-9700batch: iter_time=9.660e-05, forward_time=0.146, loss_ctc=261.619, loss_att=235.483, acc=0.188, loss=243.324, backward_time=0.311, grad_norm=110.424, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.078, optim0_lr0=1.609e-05, train_time=1.656
+[gpua014:0/64] 2023-12-01 18:52:43,958 (trainer:735) INFO: 1epoch:train:9701-9800batch: iter_time=8.890e-05, forward_time=0.146, loss_ctc=252.519, loss_att=224.496, acc=0.186, loss=232.902, backward_time=0.294, grad_norm=105.396, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.625e-05, train_time=1.332
+[gpua014:0/64] 2023-12-01 18:54:50,629 (trainer:735) INFO: 1epoch:train:9801-9900batch: iter_time=1.021e-04, forward_time=0.147, loss_ctc=281.238, loss_att=263.748, acc=0.175, loss=268.995, backward_time=0.291, grad_norm=120.853, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.077, optim0_lr0=1.642e-05, train_time=1.266
+[gpua014:0/64] 2023-12-01 18:57:03,749 (trainer:735) INFO: 1epoch:train:9901-10000batch: iter_time=9.522e-05, forward_time=0.147, loss_ctc=241.093, loss_att=213.843, acc=0.198, loss=222.018, backward_time=0.292, grad_norm=120.530, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.076, optim0_lr0=1.659e-05, train_time=1.331
+[gpua014:0/64] 2023-12-01 18:57:06,044 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua014:0/64] 2023-12-01 18:57:24,012 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 18:57:27,677 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1a0dd1b70>)
+[gpua014:0/64] 2023-12-01 18:57:27,677 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua014:0/64] 2023-12-01 18:57:27,680 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 19:04:28,341 (trainer:735) INFO: 1epoch:train:10001-10100batch: iter_time=3.040, forward_time=0.181, loss_ctc=243.198, loss_att=227.113, acc=0.184, loss=231.939, backward_time=0.327, grad_norm=122.261, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.675e-05, train_time=4.446
+[gpua014:0/64] 2023-12-01 19:07:14,704 (trainer:735) INFO: 1epoch:train:10101-10200batch: iter_time=8.389e-05, forward_time=0.149, loss_ctc=243.991, loss_att=216.847, acc=0.191, loss=224.990, backward_time=0.337, grad_norm=105.938, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.692e-05, train_time=1.663
+[gpua014:0/64] 2023-12-01 19:10:02,484 (trainer:735) INFO: 1epoch:train:10201-10300batch: iter_time=8.385e-05, forward_time=0.144, loss_ctc=252.366, loss_att=219.695, acc=0.186, loss=229.496, backward_time=0.336, grad_norm=128.780, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.709e-05, train_time=1.678
+[gpua014:0/64] 2023-12-01 19:12:37,605 (trainer:735) INFO: 1epoch:train:10301-10400batch: iter_time=8.707e-05, forward_time=0.144, loss_ctc=222.133, loss_att=201.248, acc=0.200, loss=207.514, backward_time=0.310, grad_norm=100.306, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.725e-05, train_time=1.551
+[gpua014:0/64] 2023-12-01 19:15:32,038 (trainer:735) INFO: 1epoch:train:10401-10500batch: iter_time=8.612e-05, forward_time=0.149, loss_ctc=294.038, loss_att=260.403, acc=0.176, loss=270.494, backward_time=0.354, grad_norm=123.488, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.742e-05, train_time=1.744
+[gpua014:0/64] 2023-12-01 19:18:31,957 (trainer:735) INFO: 1epoch:train:10501-10600batch: iter_time=8.372e-05, forward_time=0.155, loss_ctc=264.668, loss_att=235.517, acc=0.190, loss=244.262, backward_time=0.331, grad_norm=137.498, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.759e-05, train_time=1.799
+[gpua014:0/64] 2023-12-01 19:21:02,246 (trainer:735) INFO: 1epoch:train:10601-10700batch: iter_time=7.738e-05, forward_time=0.182, loss_ctc=288.489, loss_att=245.227, acc=0.191, loss=258.206, backward_time=0.328, grad_norm=139.840, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.775e-05, train_time=1.503
+[gpua014:0/64] 2023-12-01 19:23:50,799 (trainer:735) INFO: 1epoch:train:10701-10800batch: iter_time=7.935e-05, forward_time=0.159, loss_ctc=261.749, loss_att=227.897, acc=0.192, loss=238.053, backward_time=0.321, grad_norm=111.292, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.076, optim0_lr0=1.792e-05, train_time=1.685
+[gpua014:0/64] 2023-12-01 19:26:23,475 (trainer:735) INFO: 1epoch:train:10801-10900batch: iter_time=8.446e-05, forward_time=0.149, loss_ctc=263.830, loss_att=225.232, acc=0.189, loss=236.812, backward_time=0.306, grad_norm=103.300, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.809e-05, train_time=1.527
+[gpua014:0/64] 2023-12-01 19:28:59,105 (trainer:735) INFO: 1epoch:train:10901-11000batch: iter_time=8.685e-05, forward_time=0.146, loss_ctc=274.453, loss_att=237.166, acc=0.183, loss=248.352, backward_time=0.322, grad_norm=94.833, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.825e-05, train_time=1.556
+[gpua014:0/64] 2023-12-01 19:31:17,072 (trainer:735) INFO: 1epoch:train:11001-11100batch: iter_time=8.357e-05, forward_time=0.146, loss_ctc=262.077, loss_att=230.818, acc=0.188, loss=240.196, backward_time=0.296, grad_norm=109.526, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.842e-05, train_time=1.379
+[gpua014:0/64] 2023-12-01 19:33:46,226 (trainer:735) INFO: 1epoch:train:11101-11200batch: iter_time=8.363e-05, forward_time=0.146, loss_ctc=244.855, loss_att=213.987, acc=0.193, loss=223.247, backward_time=0.304, grad_norm=102.767, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.859e-05, train_time=1.491
+[gpua014:0/64] 2023-12-01 19:35:19,270 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua014:0/64] 2023-12-01 19:35:37,491 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 19:35:41,161 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa192c871c0>)
+[gpua014:0/64] 2023-12-01 19:35:41,161 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua014:0/64] 2023-12-01 19:35:41,164 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 19:42:43,690 (trainer:735) INFO: 1epoch:train:11201-11300batch: iter_time=2.229, forward_time=0.146, loss_ctc=253.190, loss_att=227.129, acc=0.194, loss=234.947, backward_time=0.314, grad_norm=118.394, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.875e-05, train_time=5.374
+[gpua014:0/64] 2023-12-01 19:44:57,343 (trainer:735) INFO: 1epoch:train:11301-11400batch: iter_time=9.029e-05, forward_time=0.146, loss_ctc=227.449, loss_att=200.357, acc=0.199, loss=208.485, backward_time=0.286, grad_norm=93.892, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.076, optim0_lr0=1.892e-05, train_time=1.336
+[gpua014:0/64] 2023-12-01 19:47:30,367 (trainer:735) INFO: 1epoch:train:11401-11500batch: iter_time=8.404e-05, forward_time=0.146, loss_ctc=248.403, loss_att=215.263, acc=0.200, loss=225.205, backward_time=0.307, grad_norm=109.611, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.909e-05, train_time=1.530
+[gpua014:0/64] 2023-12-01 19:50:29,642 (trainer:735) INFO: 1epoch:train:11501-11600batch: iter_time=9.730e-05, forward_time=0.163, loss_ctc=233.548, loss_att=206.293, acc=0.194, loss=214.469, backward_time=0.302, grad_norm=111.326, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.080, optim0_lr0=1.925e-05, train_time=1.793
+[gpua014:0/64] 2023-12-01 19:52:59,164 (trainer:735) INFO: 1epoch:train:11601-11700batch: iter_time=9.853e-05, forward_time=0.146, loss_ctc=240.373, loss_att=205.553, acc=0.196, loss=215.999, backward_time=0.326, grad_norm=91.335, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.076, optim0_lr0=1.942e-05, train_time=1.495
+[gpua014:0/64] 2023-12-01 19:55:32,243 (trainer:735) INFO: 1epoch:train:11701-11800batch: iter_time=9.460e-05, forward_time=0.178, loss_ctc=277.699, loss_att=242.968, acc=0.194, loss=253.387, backward_time=0.352, grad_norm=119.000, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.959e-05, train_time=1.530
+[gpua014:0/64] 2023-12-01 19:58:05,866 (trainer:735) INFO: 1epoch:train:11801-11900batch: iter_time=9.228e-05, forward_time=0.162, loss_ctc=296.193, loss_att=253.638, acc=0.190, loss=266.404, backward_time=0.321, grad_norm=111.885, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.975e-05, train_time=1.536
+[gpua014:0/64] 2023-12-01 20:01:09,296 (trainer:735) INFO: 1epoch:train:11901-12000batch: iter_time=9.786e-05, forward_time=0.150, loss_ctc=251.110, loss_att=215.937, acc=0.201, loss=226.489, backward_time=0.311, grad_norm=104.710, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.077, optim0_lr0=1.992e-05, train_time=1.834
+[gpua014:0/64] 2023-12-01 20:04:05,807 (trainer:735) INFO: 1epoch:train:12001-12100batch: iter_time=9.768e-05, forward_time=0.147, loss_ctc=277.105, loss_att=236.050, acc=0.188, loss=248.367, backward_time=0.339, grad_norm=95.426, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.077, optim0_lr0=2.009e-05, train_time=1.765
+[gpua014:0/64] 2023-12-01 20:06:49,090 (trainer:735) INFO: 1epoch:train:12101-12200batch: iter_time=1.027e-04, forward_time=0.146, loss_ctc=255.002, loss_att=220.086, acc=0.196, loss=230.561, backward_time=0.342, grad_norm=93.660, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.077, optim0_lr0=2.025e-05, train_time=1.633
+[gpua014:0/64] 2023-12-01 20:09:45,786 (trainer:735) INFO: 1epoch:train:12201-12300batch: iter_time=9.897e-05, forward_time=0.146, loss_ctc=248.810, loss_att=209.401, acc=0.195, loss=221.224, backward_time=0.326, grad_norm=99.296, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.077, optim0_lr0=2.042e-05, train_time=1.767
+[gpua014:0/64] 2023-12-01 20:12:37,436 (trainer:735) INFO: 1epoch:train:12301-12400batch: iter_time=1.074e-04, forward_time=0.148, loss_ctc=271.778, loss_att=241.634, acc=0.187, loss=250.677, backward_time=0.310, grad_norm=93.476, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.077, optim0_lr0=2.059e-05, train_time=1.716
+[gpua014:0/64] 2023-12-01 20:15:10,941 (trainer:735) INFO: 1epoch:train:12401-12500batch: iter_time=9.798e-05, forward_time=0.147, loss_ctc=236.291, loss_att=201.831, acc=0.207, loss=212.169, backward_time=0.323, grad_norm=88.501, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.077, optim0_lr0=2.075e-05, train_time=1.535
+[gpua014:0/64] 2023-12-01 20:15:30,969 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua014:0/64] 2023-12-01 20:15:49,033 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 20:15:52,675 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1c4f2f910>)
+[gpua014:0/64] 2023-12-01 20:15:52,675 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua014:0/64] 2023-12-01 20:15:52,678 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 20:21:56,835 (trainer:735) INFO: 1epoch:train:12501-12600batch: iter_time=2.575, forward_time=0.180, loss_ctc=237.115, loss_att=223.677, acc=0.196, loss=227.708, backward_time=0.317, grad_norm=91.014, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.077, optim0_lr0=2.092e-05, train_time=4.059
+[gpua014:0/64] 2023-12-01 20:24:20,234 (trainer:735) INFO: 1epoch:train:12601-12700batch: iter_time=8.269e-05, forward_time=0.145, loss_ctc=238.303, loss_att=209.478, acc=0.203, loss=218.125, backward_time=0.332, grad_norm=87.628, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.075, optim0_lr0=2.109e-05, train_time=1.434
+[gpua014:0/64] 2023-12-01 20:26:51,137 (trainer:735) INFO: 1epoch:train:12701-12800batch: iter_time=8.317e-05, forward_time=0.145, loss_ctc=245.225, loss_att=218.841, acc=0.194, loss=226.756, backward_time=0.287, grad_norm=89.958, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.075, optim0_lr0=2.125e-05, train_time=1.509
+[gpua014:0/64] 2023-12-01 20:29:53,714 (trainer:735) INFO: 1epoch:train:12801-12900batch: iter_time=9.426e-05, forward_time=0.146, loss_ctc=216.298, loss_att=195.958, acc=0.210, loss=202.060, backward_time=0.347, grad_norm=82.993, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.076, optim0_lr0=2.142e-05, train_time=1.826
+[gpua014:0/64] 2023-12-01 20:32:55,062 (trainer:735) INFO: 1epoch:train:12901-13000batch: iter_time=1.017e-04, forward_time=0.147, loss_ctc=286.479, loss_att=254.755, acc=0.188, loss=264.272, backward_time=0.327, grad_norm=100.950, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.076, optim0_lr0=2.159e-05, train_time=1.813
+[gpua014:0/64] 2023-12-01 20:35:14,287 (trainer:735) INFO: 1epoch:train:13001-13100batch: iter_time=1.002e-04, forward_time=0.158, loss_ctc=257.878, loss_att=232.308, acc=0.199, loss=239.979, backward_time=0.303, grad_norm=97.473, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.075, optim0_lr0=2.175e-05, train_time=1.392
+[gpua014:0/64] 2023-12-01 20:37:43,130 (trainer:735) INFO: 1epoch:train:13101-13200batch: iter_time=9.748e-05, forward_time=0.145, loss_ctc=282.708, loss_att=242.343, acc=0.199, loss=254.453, backward_time=0.372, grad_norm=107.639, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.075, optim0_lr0=2.192e-05, train_time=1.488
+[gpua014:0/64] 2023-12-01 20:40:05,003 (trainer:735) INFO: 1epoch:train:13201-13300batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=254.669, loss_att=221.948, acc=0.199, loss=231.765, backward_time=0.283, grad_norm=105.612, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.076, optim0_lr0=2.209e-05, train_time=1.419
+[gpua014:0/64] 2023-12-01 20:42:15,158 (trainer:735) INFO: 1epoch:train:13301-13400batch: iter_time=9.791e-05, forward_time=0.145, loss_ctc=258.364, loss_att=227.424, acc=0.195, loss=236.706, backward_time=0.297, grad_norm=105.310, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.076, optim0_lr0=2.225e-05, train_time=1.301
+[gpua014:0/64] 2023-12-01 20:45:19,629 (trainer:735) INFO: 1epoch:train:13401-13500batch: iter_time=1.073e-04, forward_time=0.161, loss_ctc=266.952, loss_att=232.710, acc=0.193, loss=242.982, backward_time=0.367, grad_norm=93.457, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.081, optim0_lr0=2.242e-05, train_time=1.844
+[gpua014:0/64] 2023-12-01 20:47:51,216 (trainer:735) INFO: 1epoch:train:13501-13600batch: iter_time=1.025e-04, forward_time=0.172, loss_ctc=253.742, loss_att=226.403, acc=0.199, loss=234.605, backward_time=0.359, grad_norm=94.120, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.081, optim0_lr0=2.259e-05, train_time=1.516
+[gpua014:0/64] 2023-12-01 20:50:04,612 (trainer:735) INFO: 1epoch:train:13601-13700batch: iter_time=1.122e-04, forward_time=0.148, loss_ctc=238.514, loss_att=211.688, acc=0.202, loss=219.736, backward_time=0.289, grad_norm=82.262, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.076, optim0_lr0=2.275e-05, train_time=1.334
+[gpua014:0/64] 2023-12-01 20:51:49,714 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua014:0/64] 2023-12-01 20:52:07,806 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 20:52:11,487 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faa6abcff10>)
+[gpua014:0/64] 2023-12-01 20:52:11,487 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua014:0/64] 2023-12-01 20:52:11,491 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 20:58:24,207 (trainer:735) INFO: 1epoch:train:13701-13800batch: iter_time=2.851, forward_time=0.197, loss_ctc=246.645, loss_att=222.845, acc=0.205, loss=229.985, backward_time=0.353, grad_norm=100.254, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.079, optim0_lr0=2.292e-05, train_time=4.996
+[gpua014:0/64] 2023-12-01 21:00:30,597 (trainer:735) INFO: 1epoch:train:13801-13900batch: iter_time=8.952e-05, forward_time=0.147, loss_ctc=223.636, loss_att=195.963, acc=0.205, loss=204.265, backward_time=0.277, grad_norm=74.092, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.077, optim0_lr0=2.309e-05, train_time=1.264
+[gpua014:0/64] 2023-12-01 21:02:53,908 (trainer:735) INFO: 1epoch:train:13901-14000batch: iter_time=1.040e-04, forward_time=0.146, loss_ctc=241.966, loss_att=213.252, acc=0.207, loss=221.866, backward_time=0.324, grad_norm=83.624, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.077, optim0_lr0=2.325e-05, train_time=1.433
+[gpua014:0/64] 2023-12-01 21:05:47,156 (trainer:735) INFO: 1epoch:train:14001-14100batch: iter_time=1.037e-04, forward_time=0.147, loss_ctc=226.181, loss_att=202.817, acc=0.204, loss=209.826, backward_time=0.319, grad_norm=83.280, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.077, optim0_lr0=2.342e-05, train_time=1.732
+[gpua014:0/64] 2023-12-01 21:07:58,884 (trainer:735) INFO: 1epoch:train:14101-14200batch: iter_time=1.052e-04, forward_time=0.146, loss_ctc=235.935, loss_att=205.310, acc=0.203, loss=214.497, backward_time=0.299, grad_norm=90.214, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.077, optim0_lr0=2.359e-05, train_time=1.317
+[gpua014:0/64] 2023-12-01 21:10:37,432 (trainer:735) INFO: 1epoch:train:14201-14300batch: iter_time=9.612e-05, forward_time=0.238, loss_ctc=272.481, loss_att=240.489, acc=0.202, loss=250.087, backward_time=0.337, grad_norm=111.087, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.081, optim0_lr0=2.375e-05, train_time=1.585
+[gpua014:0/64] 2023-12-01 21:13:33,671 (trainer:735) INFO: 1epoch:train:14301-14400batch: iter_time=9.259e-05, forward_time=0.155, loss_ctc=289.643, loss_att=252.224, acc=0.196, loss=263.449, backward_time=0.338, grad_norm=92.412, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.077, optim0_lr0=2.392e-05, train_time=1.763
+[gpua014:0/64] 2023-12-01 21:16:54,066 (trainer:735) INFO: 1epoch:train:14401-14500batch: iter_time=1.039e-04, forward_time=0.147, loss_ctc=245.397, loss_att=211.125, acc=0.209, loss=221.407, backward_time=0.339, grad_norm=99.188, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.077, optim0_lr0=2.409e-05, train_time=2.004
+[gpua014:0/64] 2023-12-01 21:19:15,519 (trainer:735) INFO: 1epoch:train:14501-14600batch: iter_time=9.136e-05, forward_time=0.147, loss_ctc=270.904, loss_att=235.653, acc=0.195, loss=246.228, backward_time=0.299, grad_norm=93.683, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.077, optim0_lr0=2.425e-05, train_time=1.414
+[gpua014:0/64] 2023-12-01 21:22:15,529 (trainer:735) INFO: 1epoch:train:14601-14700batch: iter_time=9.465e-05, forward_time=0.146, loss_ctc=249.068, loss_att=215.629, acc=0.206, loss=225.661, backward_time=0.316, grad_norm=93.945, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.077, optim0_lr0=2.442e-05, train_time=1.800
+[gpua014:0/64] 2023-12-01 21:24:53,300 (trainer:735) INFO: 1epoch:train:14701-14800batch: iter_time=9.322e-05, forward_time=0.147, loss_ctc=243.617, loss_att=207.677, acc=0.203, loss=218.459, backward_time=0.359, grad_norm=95.568, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.077, optim0_lr0=2.459e-05, train_time=1.577
+[gpua014:0/64] 2023-12-01 21:27:49,230 (trainer:735) INFO: 1epoch:train:14801-14900batch: iter_time=9.348e-05, forward_time=0.148, loss_ctc=266.488, loss_att=242.287, acc=0.193, loss=249.547, backward_time=0.308, grad_norm=79.637, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.077, optim0_lr0=2.475e-05, train_time=1.759
+[gpua014:0/64] 2023-12-01 21:30:09,111 (trainer:735) INFO: 1epoch:train:14901-15000batch: iter_time=9.008e-05, forward_time=0.245, loss_ctc=232.766, loss_att=198.490, acc=0.214, loss=208.773, backward_time=0.312, grad_norm=85.173, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.081, optim0_lr0=2.492e-05, train_time=1.399
+[gpua014:0/64] 2023-12-01 21:55:50,872 (trainer:341) INFO: 1epoch results: [train] iter_time=0.172, forward_time=0.153, loss_ctc=326.964, loss_att=260.915, acc=0.155, loss=280.730, backward_time=0.311, grad_norm=291.862, clip=100.000, loss_scale=1.669e+06, optim_step_time=0.077, optim0_lr0=1.250e-05, train_time=1.798, time=7 hours, 29 minutes and 58.73 seconds, total_count=15000, gpu_max_cached_mem_GB=35.859, [valid] loss_ctc=191.709, cer_ctc=0.978, loss_att=165.154, acc=0.172, cer=0.718, wer=1.000, loss=173.120, time=25 minutes and 17.19 seconds, total_count=4671, gpu_max_cached_mem_GB=35.859
+[gpua014:0/64] 2023-12-01 21:56:19,038 (trainer:389) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua014:0/64] 2023-12-01 21:56:19,038 (trainer:272) INFO: 2/40epoch started. Estimated time to finish: 1 week, 5 days and 21 hours
+[gpua014:0/64] 2023-12-01 21:56:19,051 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-12-01 21:56:36,952 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 21:56:40,494 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1d5a179a0>)
+[gpua014:0/64] 2023-12-01 21:56:40,495 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua014:0/64] 2023-12-01 21:56:40,498 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 22:03:51,400 (trainer:735) INFO: 2epoch:train:1-100batch: iter_time=2.691, forward_time=0.161, loss_ctc=228.644, loss_att=195.283, acc=0.204, loss=205.291, backward_time=0.289, grad_norm=83.175, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.082, optim0_lr0=2.509e-05, train_time=4.523
+[gpua014:0/64] 2023-12-01 22:06:24,168 (trainer:735) INFO: 2epoch:train:101-200batch: iter_time=9.058e-05, forward_time=0.149, loss_ctc=240.878, loss_att=203.028, acc=0.210, loss=214.383, backward_time=0.306, grad_norm=69.135, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.081, optim0_lr0=2.525e-05, train_time=1.527
+[gpua014:0/64] 2023-12-01 22:08:39,132 (trainer:735) INFO: 2epoch:train:201-300batch: iter_time=1.029e-04, forward_time=0.148, loss_ctc=268.580, loss_att=226.868, acc=0.205, loss=239.382, backward_time=0.303, grad_norm=85.677, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.080, optim0_lr0=2.542e-05, train_time=1.349
+[gpua014:0/64] 2023-12-01 22:11:45,692 (trainer:735) INFO: 2epoch:train:301-400batch: iter_time=1.074e-04, forward_time=0.147, loss_ctc=247.907, loss_att=204.109, acc=0.209, loss=217.248, backward_time=0.322, grad_norm=86.384, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.081, optim0_lr0=2.559e-05, train_time=1.865
+[gpua014:0/64] 2023-12-01 22:13:59,499 (trainer:735) INFO: 2epoch:train:401-500batch: iter_time=9.850e-05, forward_time=0.147, loss_ctc=239.530, loss_att=203.112, acc=0.203, loss=214.038, backward_time=0.290, grad_norm=90.194, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.080, optim0_lr0=2.575e-05, train_time=1.338
+[gpua014:0/64] 2023-12-01 22:16:21,967 (trainer:735) INFO: 2epoch:train:501-600batch: iter_time=9.841e-05, forward_time=0.147, loss_ctc=228.462, loss_att=193.843, acc=0.211, loss=204.229, backward_time=0.302, grad_norm=73.001, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.080, optim0_lr0=2.592e-05, train_time=1.424
+[gpua014:0/64] 2023-12-01 22:18:52,279 (trainer:735) INFO: 2epoch:train:601-700batch: iter_time=9.178e-05, forward_time=0.151, loss_ctc=230.352, loss_att=194.108, acc=0.212, loss=204.981, backward_time=0.314, grad_norm=79.025, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.080, optim0_lr0=2.609e-05, train_time=1.503
+[gpua014:0/64] 2023-12-01 22:21:12,734 (trainer:735) INFO: 2epoch:train:701-800batch: iter_time=9.955e-05, forward_time=0.206, loss_ctc=255.727, loss_att=225.291, acc=0.207, loss=234.422, backward_time=0.312, grad_norm=74.980, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.083, optim0_lr0=2.625e-05, train_time=1.403
+[gpua014:0/64] 2023-12-01 22:23:53,428 (trainer:735) INFO: 2epoch:train:801-900batch: iter_time=8.980e-05, forward_time=0.153, loss_ctc=235.261, loss_att=200.125, acc=0.211, loss=210.666, backward_time=0.306, grad_norm=75.219, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.080, optim0_lr0=2.642e-05, train_time=1.608
+[gpua014:0/64] 2023-12-01 22:26:14,584 (trainer:735) INFO: 2epoch:train:901-1000batch: iter_time=0.001, forward_time=0.166, loss_ctc=245.922, loss_att=212.928, acc=0.203, loss=222.826, backward_time=0.336, grad_norm=78.526, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.094, optim0_lr0=2.659e-05, train_time=1.411
+[gpua014:0/64] 2023-12-01 22:28:31,295 (trainer:735) INFO: 2epoch:train:1001-1100batch: iter_time=9.864e-05, forward_time=0.148, loss_ctc=251.165, loss_att=211.892, acc=0.196, loss=223.674, backward_time=0.292, grad_norm=80.387, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.079, optim0_lr0=2.675e-05, train_time=1.367
+[gpua014:0/64] 2023-12-01 22:31:29,071 (trainer:735) INFO: 2epoch:train:1101-1200batch: iter_time=9.653e-05, forward_time=0.148, loss_ctc=266.125, loss_att=227.850, acc=0.202, loss=239.333, backward_time=0.334, grad_norm=70.179, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.692e-05, train_time=1.778
+[gpua014:0/64] 2023-12-01 22:32:56,941 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-12-01 22:33:14,694 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 22:33:18,324 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1bfca26e0>)
+[gpua014:0/64] 2023-12-01 22:33:18,324 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua014:0/64] 2023-12-01 22:33:18,327 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+[gpua014:0/64] 2023-12-01 22:42:43,228 (trainer:735) INFO: 2epoch:train:1201-1300batch: iter_time=2.513, forward_time=0.147, loss_ctc=250.076, loss_att=205.528, acc=0.206, loss=218.892, backward_time=0.307, grad_norm=102.304, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.709e-05, train_time=6.741
+[gpua014:0/64] 2023-12-01 22:44:38,267 (trainer:735) INFO: 2epoch:train:1301-1400batch: iter_time=8.209e-05, forward_time=0.148, loss_ctc=229.910, loss_att=200.217, acc=0.209, loss=209.125, backward_time=0.279, grad_norm=71.530, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.079, optim0_lr0=2.725e-05, train_time=1.150
+[gpua014:0/64] 2023-12-01 22:46:37,970 (trainer:735) INFO: 2epoch:train:1401-1500batch: iter_time=8.550e-05, forward_time=0.147, loss_ctc=248.199, loss_att=208.432, acc=0.217, loss=220.362, backward_time=0.279, grad_norm=76.550, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.079, optim0_lr0=2.742e-05, train_time=1.197
+[gpua014:0/64] 2023-12-01 22:49:01,564 (trainer:735) INFO: 2epoch:train:1501-1600batch: iter_time=8.857e-05, forward_time=0.147, loss_ctc=234.812, loss_att=204.335, acc=0.212, loss=213.478, backward_time=0.299, grad_norm=78.784, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.079, optim0_lr0=2.759e-05, train_time=1.436
+[gpua014:0/64] 2023-12-01 22:51:28,912 (trainer:735) INFO: 2epoch:train:1601-1700batch: iter_time=9.151e-05, forward_time=0.147, loss_ctc=248.393, loss_att=209.799, acc=0.208, loss=221.377, backward_time=0.306, grad_norm=83.875, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.079, optim0_lr0=2.775e-05, train_time=1.473
+[gpua014:0/64] 2023-12-01 22:54:06,356 (trainer:735) INFO: 2epoch:train:1701-1800batch: iter_time=8.225e-05, forward_time=0.150, loss_ctc=237.501, loss_att=213.146, acc=0.206, loss=220.453, backward_time=0.344, grad_norm=72.037, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.079, optim0_lr0=2.792e-05, train_time=1.574
+[gpua014:0/64] 2023-12-01 22:56:10,676 (trainer:735) INFO: 2epoch:train:1801-1900batch: iter_time=8.748e-05, forward_time=0.148, loss_ctc=225.275, loss_att=192.357, acc=0.215, loss=202.232, backward_time=0.286, grad_norm=74.895, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.079, optim0_lr0=2.809e-05, train_time=1.243
+[gpua014:0/64] 2023-12-01 22:58:43,994 (trainer:735) INFO: 2epoch:train:1901-2000batch: iter_time=9.041e-05, forward_time=0.148, loss_ctc=247.026, loss_att=223.054, acc=0.207, loss=230.245, backward_time=0.302, grad_norm=72.679, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.825e-05, train_time=1.533
+[gpua014:0/64] 2023-12-01 23:01:26,657 (trainer:735) INFO: 2epoch:train:2001-2100batch: iter_time=8.789e-05, forward_time=0.147, loss_ctc=245.911, loss_att=211.953, acc=0.211, loss=222.140, backward_time=0.309, grad_norm=73.699, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.842e-05, train_time=1.626
+[gpua014:0/64] 2023-12-01 23:04:12,421 (trainer:735) INFO: 2epoch:train:2101-2200batch: iter_time=8.606e-05, forward_time=0.147, loss_ctc=232.767, loss_att=206.003, acc=0.213, loss=214.032, backward_time=0.297, grad_norm=70.048, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.079, optim0_lr0=2.859e-05, train_time=1.657
+[gpua014:0/64] 2023-12-01 23:06:25,180 (trainer:735) INFO: 2epoch:train:2201-2300batch: iter_time=8.863e-05, forward_time=0.147, loss_ctc=258.437, loss_att=219.707, acc=0.200, loss=231.326, backward_time=0.278, grad_norm=70.992, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.875e-05, train_time=1.327
+[gpua014:0/64] 2023-12-01 23:09:18,352 (trainer:735) INFO: 2epoch:train:2301-2400batch: iter_time=8.908e-05, forward_time=0.150, loss_ctc=245.686, loss_att=213.864, acc=0.206, loss=223.411, backward_time=0.335, grad_norm=70.380, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.892e-05, train_time=1.732
+[gpua014:0/64] 2023-12-01 23:11:39,174 (trainer:735) INFO: 2epoch:train:2401-2500batch: iter_time=8.654e-05, forward_time=0.147, loss_ctc=252.775, loss_att=210.243, acc=0.208, loss=223.002, backward_time=0.304, grad_norm=93.371, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.079, optim0_lr0=2.909e-05, train_time=1.408
+[gpua014:0/64] 2023-12-01 23:11:40,734 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua014:0/64] 2023-12-01 23:11:58,701 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 23:12:02,154 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faa6b988130>)
+[gpua014:0/64] 2023-12-01 23:12:02,154 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua014:0/64] 2023-12-01 23:12:02,158 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 23:21:42,061 (trainer:735) INFO: 2epoch:train:2501-2600batch: iter_time=1.364, forward_time=0.192, loss_ctc=223.606, loss_att=188.011, acc=0.213, loss=198.690, backward_time=0.286, grad_norm=80.384, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.925e-05, train_time=6.029
+[gpua014:0/64] 2023-12-01 23:23:42,343 (trainer:735) INFO: 2epoch:train:2601-2700batch: iter_time=9.384e-05, forward_time=0.148, loss_ctc=237.671, loss_att=201.886, acc=0.219, loss=212.621, backward_time=0.278, grad_norm=72.238, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.942e-05, train_time=1.203
+[gpua014:0/64] 2023-12-01 23:25:37,843 (trainer:735) INFO: 2epoch:train:2701-2800batch: iter_time=9.825e-05, forward_time=0.150, loss_ctc=264.972, loss_att=222.378, acc=0.210, loss=235.157, backward_time=0.278, grad_norm=93.355, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.959e-05, train_time=1.155
+[gpua014:0/64] 2023-12-01 23:28:01,374 (trainer:735) INFO: 2epoch:train:2801-2900batch: iter_time=9.435e-05, forward_time=0.148, loss_ctc=245.617, loss_att=206.694, acc=0.216, loss=218.371, backward_time=0.329, grad_norm=78.479, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.975e-05, train_time=1.435
+[gpua014:0/64] 2023-12-01 23:30:36,252 (trainer:735) INFO: 2epoch:train:2901-3000batch: iter_time=1.108e-04, forward_time=0.148, loss_ctc=233.442, loss_att=203.443, acc=0.208, loss=212.443, backward_time=0.312, grad_norm=76.552, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.080, optim0_lr0=2.992e-05, train_time=1.549
+[gpua014:0/64] 2023-12-01 23:33:27,184 (trainer:735) INFO: 2epoch:train:3001-3100batch: iter_time=9.405e-05, forward_time=0.147, loss_ctc=226.758, loss_att=192.107, acc=0.216, loss=202.502, backward_time=0.337, grad_norm=66.891, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.080, optim0_lr0=3.009e-05, train_time=1.709
+[gpua014:0/64] 2023-12-01 23:35:54,706 (trainer:735) INFO: 2epoch:train:3101-3200batch: iter_time=9.386e-05, forward_time=0.147, loss_ctc=226.839, loss_att=194.471, acc=0.217, loss=204.181, backward_time=0.322, grad_norm=73.900, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.080, optim0_lr0=3.025e-05, train_time=1.475
+[gpua014:0/64] 2023-12-01 23:38:04,111 (trainer:735) INFO: 2epoch:train:3201-3300batch: iter_time=1.033e-04, forward_time=0.148, loss_ctc=251.664, loss_att=223.729, acc=0.211, loss=232.109, backward_time=0.296, grad_norm=73.527, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.080, optim0_lr0=3.042e-05, train_time=1.294
+[gpua014:0/64] 2023-12-01 23:40:00,542 (trainer:735) INFO: 2epoch:train:3301-3400batch: iter_time=9.540e-05, forward_time=0.154, loss_ctc=230.236, loss_att=199.438, acc=0.216, loss=208.678, backward_time=0.280, grad_norm=71.614, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.080, optim0_lr0=3.059e-05, train_time=1.164
+[gpua014:0/64] 2023-12-01 23:42:04,006 (trainer:735) INFO: 2epoch:train:3401-3500batch: iter_time=9.647e-05, forward_time=0.148, loss_ctc=243.553, loss_att=209.801, acc=0.211, loss=219.926, backward_time=0.285, grad_norm=75.907, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.081, optim0_lr0=3.075e-05, train_time=1.234
+[gpua014:0/64] 2023-12-01 23:44:39,013 (trainer:735) INFO: 2epoch:train:3501-3600batch: iter_time=9.791e-05, forward_time=0.149, loss_ctc=247.101, loss_att=210.720, acc=0.205, loss=221.635, backward_time=0.293, grad_norm=73.155, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.081, optim0_lr0=3.092e-05, train_time=1.550
+[gpua014:0/64] 2023-12-01 23:47:10,822 (trainer:735) INFO: 2epoch:train:3601-3700batch: iter_time=9.270e-05, forward_time=0.148, loss_ctc=262.162, loss_att=225.901, acc=0.208, loss=236.779, backward_time=0.303, grad_norm=67.333, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.080, optim0_lr0=3.109e-05, train_time=1.518
+[gpua014:0/64] 2023-12-01 23:48:40,944 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua014:0/64] 2023-12-01 23:48:59,027 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-01 23:49:02,660 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faad7c69600>)
+[gpua014:0/64] 2023-12-01 23:49:02,660 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua014:0/64] 2023-12-01 23:49:02,663 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-01 23:55:14,226 (trainer:735) INFO: 2epoch:train:3701-3800batch: iter_time=3.257, forward_time=0.149, loss_ctc=247.356, loss_att=199.929, acc=0.214, loss=214.157, backward_time=0.289, grad_norm=89.676, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.080, optim0_lr0=3.125e-05, train_time=4.834
+[gpua014:0/64] 2023-12-01 23:57:15,504 (trainer:735) INFO: 2epoch:train:3801-3900batch: iter_time=8.677e-05, forward_time=0.148, loss_ctc=225.847, loss_att=193.260, acc=0.217, loss=203.037, backward_time=0.279, grad_norm=69.774, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.080, optim0_lr0=3.142e-05, train_time=1.213
+[gpua014:0/64] 2023-12-01 23:59:53,844 (trainer:735) INFO: 2epoch:train:3901-4000batch: iter_time=9.093e-05, forward_time=0.148, loss_ctc=245.914, loss_att=202.953, acc=0.224, loss=215.841, backward_time=0.306, grad_norm=72.863, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.080, optim0_lr0=3.159e-05, train_time=1.583
+[gpua014:0/64] 2023-12-02 00:02:22,419 (trainer:735) INFO: 2epoch:train:4001-4100batch: iter_time=9.981e-05, forward_time=0.179, loss_ctc=232.332, loss_att=197.134, acc=0.220, loss=207.693, backward_time=0.353, grad_norm=69.783, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.085, optim0_lr0=3.175e-05, train_time=1.486
+[gpua014:0/64] 2023-12-02 00:04:50,780 (trainer:735) INFO: 2epoch:train:4101-4200batch: iter_time=9.903e-05, forward_time=0.148, loss_ctc=244.448, loss_att=203.453, acc=0.215, loss=215.751, backward_time=0.343, grad_norm=92.936, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.081, optim0_lr0=3.192e-05, train_time=1.483
+[gpua014:0/64] 2023-12-02 00:06:47,970 (trainer:735) INFO: 2epoch:train:4201-4300batch: iter_time=9.718e-05, forward_time=0.148, loss_ctc=234.535, loss_att=205.321, acc=0.214, loss=214.085, backward_time=0.279, grad_norm=73.847, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.081, optim0_lr0=3.209e-05, train_time=1.172
+[gpua014:0/64] 2023-12-02 00:09:26,651 (trainer:735) INFO: 2epoch:train:4301-4400batch: iter_time=9.709e-05, forward_time=0.188, loss_ctc=221.989, loss_att=186.475, acc=0.221, loss=197.129, backward_time=0.297, grad_norm=70.250, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.083, optim0_lr0=3.225e-05, train_time=1.587
+[gpua014:0/64] 2023-12-02 00:11:28,877 (trainer:735) INFO: 2epoch:train:4401-4500batch: iter_time=9.968e-05, forward_time=0.158, loss_ctc=243.156, loss_att=215.855, acc=0.215, loss=224.046, backward_time=0.282, grad_norm=70.949, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.081, optim0_lr0=3.242e-05, train_time=1.222
+[gpua014:0/64] 2023-12-02 00:13:53,007 (trainer:735) INFO: 2epoch:train:4501-4600batch: iter_time=9.926e-05, forward_time=0.148, loss_ctc=241.251, loss_att=204.829, acc=0.220, loss=215.755, backward_time=0.328, grad_norm=72.101, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.081, optim0_lr0=3.259e-05, train_time=1.441
+[gpua014:0/64] 2023-12-02 00:16:03,216 (trainer:735) INFO: 2epoch:train:4601-4700batch: iter_time=8.818e-05, forward_time=0.147, loss_ctc=230.488, loss_att=199.625, acc=0.221, loss=208.884, backward_time=0.281, grad_norm=66.316, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.080, optim0_lr0=3.275e-05, train_time=1.302
+[gpua014:0/64] 2023-12-02 00:18:44,535 (trainer:735) INFO: 2epoch:train:4701-4800batch: iter_time=9.296e-05, forward_time=0.153, loss_ctc=256.278, loss_att=213.687, acc=0.207, loss=226.464, backward_time=0.307, grad_norm=65.439, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.081, optim0_lr0=3.292e-05, train_time=1.613
+[gpua014:0/64] 2023-12-02 00:21:01,124 (trainer:735) INFO: 2epoch:train:4801-4900batch: iter_time=9.584e-05, forward_time=0.148, loss_ctc=242.734, loss_att=207.358, acc=0.215, loss=217.971, backward_time=0.306, grad_norm=67.244, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.081, optim0_lr0=3.309e-05, train_time=1.366
+[gpua014:0/64] 2023-12-02 00:23:01,787 (trainer:735) INFO: 2epoch:train:4901-5000batch: iter_time=9.531e-05, forward_time=0.148, loss_ctc=248.684, loss_att=202.796, acc=0.217, loss=216.562, backward_time=0.279, grad_norm=87.974, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.081, optim0_lr0=3.325e-05, train_time=1.206
+[gpua014:0/64] 2023-12-02 00:23:05,471 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua014:0/64] 2023-12-02 00:23:23,904 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 00:23:27,701 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1844b4e80>)
+[gpua014:0/64] 2023-12-02 00:23:27,701 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua014:0/64] 2023-12-02 00:23:27,704 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 00:30:07,634 (trainer:735) INFO: 2epoch:train:5001-5100batch: iter_time=1.349, forward_time=0.147, loss_ctc=221.059, loss_att=182.902, acc=0.217, loss=194.349, backward_time=0.279, grad_norm=80.393, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.342e-05, train_time=4.258
+[gpua014:0/64] 2023-12-02 00:32:08,605 (trainer:735) INFO: 2epoch:train:5101-5200batch: iter_time=8.465e-05, forward_time=0.147, loss_ctc=234.121, loss_att=192.629, acc=0.223, loss=205.077, backward_time=0.282, grad_norm=61.792, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.359e-05, train_time=1.209
+[gpua014:0/64] 2023-12-02 00:34:42,981 (trainer:735) INFO: 2epoch:train:5201-5300batch: iter_time=8.962e-05, forward_time=0.147, loss_ctc=260.220, loss_att=213.440, acc=0.218, loss=227.474, backward_time=0.310, grad_norm=81.529, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.375e-05, train_time=1.544
+[gpua014:0/64] 2023-12-02 00:36:53,562 (trainer:735) INFO: 2epoch:train:5301-5400batch: iter_time=9.651e-05, forward_time=0.147, loss_ctc=241.713, loss_att=195.402, acc=0.222, loss=209.295, backward_time=0.284, grad_norm=70.639, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.392e-05, train_time=1.306
+[gpua014:0/64] 2023-12-02 00:39:13,177 (trainer:735) INFO: 2epoch:train:5401-5500batch: iter_time=8.999e-05, forward_time=0.147, loss_ctc=230.876, loss_att=192.603, acc=0.217, loss=204.085, backward_time=0.287, grad_norm=69.493, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.409e-05, train_time=1.396
+[gpua014:0/64] 2023-12-02 00:41:42,117 (trainer:735) INFO: 2epoch:train:5501-5600batch: iter_time=8.357e-05, forward_time=0.147, loss_ctc=224.522, loss_att=184.251, acc=0.225, loss=196.332, backward_time=0.337, grad_norm=69.842, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.425e-05, train_time=1.489
+[gpua014:0/64] 2023-12-02 00:44:09,123 (trainer:735) INFO: 2epoch:train:5601-5700batch: iter_time=8.358e-05, forward_time=0.147, loss_ctc=222.750, loss_att=183.919, acc=0.226, loss=195.568, backward_time=0.284, grad_norm=69.726, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.442e-05, train_time=1.470
+[gpua014:0/64] 2023-12-02 00:46:42,243 (trainer:735) INFO: 2epoch:train:5701-5800batch: iter_time=8.957e-05, forward_time=0.147, loss_ctc=247.615, loss_att=212.301, acc=0.221, loss=222.895, backward_time=0.298, grad_norm=68.786, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.459e-05, train_time=1.531
+[gpua014:0/64] 2023-12-02 00:49:01,750 (trainer:735) INFO: 2epoch:train:5801-5900batch: iter_time=9.011e-05, forward_time=0.146, loss_ctc=225.968, loss_att=189.054, acc=0.226, loss=200.128, backward_time=0.288, grad_norm=65.127, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.475e-05, train_time=1.395
+[gpua014:0/64] 2023-12-02 00:51:21,460 (trainer:735) INFO: 2epoch:train:5901-6000batch: iter_time=8.938e-05, forward_time=0.147, loss_ctc=237.548, loss_att=200.642, acc=0.218, loss=211.714, backward_time=0.310, grad_norm=69.078, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.492e-05, train_time=1.397
+[gpua014:0/64] 2023-12-02 00:53:24,295 (trainer:735) INFO: 2epoch:train:6001-6100batch: iter_time=9.136e-05, forward_time=0.161, loss_ctc=242.830, loss_att=200.738, acc=0.208, loss=213.366, backward_time=0.288, grad_norm=61.544, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.509e-05, train_time=1.228
+[gpua014:0/64] 2023-12-02 00:55:48,969 (trainer:735) INFO: 2epoch:train:6101-6200batch: iter_time=8.399e-05, forward_time=0.188, loss_ctc=259.777, loss_att=216.552, acc=0.217, loss=229.520, backward_time=0.326, grad_norm=71.651, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.525e-05, train_time=1.447
+[gpua014:0/64] 2023-12-02 00:57:19,062 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua014:0/64] 2023-12-02 00:57:37,767 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 00:57:41,273 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1da3c9210>)
+[gpua014:0/64] 2023-12-02 00:57:41,273 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua014:0/64] 2023-12-02 00:57:41,276 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 01:08:10,562 (trainer:735) INFO: 2epoch:train:6201-6300batch: iter_time=2.441, forward_time=0.178, loss_ctc=241.465, loss_att=191.547, acc=0.220, loss=206.522, backward_time=0.282, grad_norm=81.766, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.542e-05, train_time=7.416
+[gpua014:0/64] 2023-12-02 01:10:42,894 (trainer:735) INFO: 2epoch:train:6301-6400batch: iter_time=9.147e-05, forward_time=0.148, loss_ctc=221.845, loss_att=183.584, acc=0.225, loss=195.062, backward_time=0.289, grad_norm=71.025, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.081, optim0_lr0=3.559e-05, train_time=1.523
+[gpua014:0/64] 2023-12-02 01:12:55,494 (trainer:735) INFO: 2epoch:train:6401-6500batch: iter_time=7.902e-05, forward_time=0.147, loss_ctc=240.874, loss_att=194.487, acc=0.230, loss=208.403, backward_time=0.307, grad_norm=65.711, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.080, optim0_lr0=3.575e-05, train_time=1.326
+[gpua014:0/64] 2023-12-02 01:15:48,127 (trainer:735) INFO: 2epoch:train:6501-6600batch: iter_time=9.108e-05, forward_time=0.146, loss_ctc=227.752, loss_att=186.977, acc=0.225, loss=199.209, backward_time=0.325, grad_norm=66.908, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.080, optim0_lr0=3.592e-05, train_time=1.726
+[gpua014:0/64] 2023-12-02 01:18:26,629 (trainer:735) INFO: 2epoch:train:6601-6700batch: iter_time=8.833e-05, forward_time=0.148, loss_ctc=238.884, loss_att=192.097, acc=0.224, loss=206.133, backward_time=0.340, grad_norm=72.355, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.080, optim0_lr0=3.609e-05, train_time=1.585
+[gpua014:0/64] 2023-12-02 01:20:56,274 (trainer:735) INFO: 2epoch:train:6701-6800batch: iter_time=8.333e-05, forward_time=0.146, loss_ctc=230.530, loss_att=195.255, acc=0.222, loss=205.837, backward_time=0.295, grad_norm=61.058, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.080, optim0_lr0=3.625e-05, train_time=1.496
+[gpua014:0/64] 2023-12-02 01:22:54,259 (trainer:735) INFO: 2epoch:train:6801-6900batch: iter_time=8.795e-05, forward_time=0.146, loss_ctc=219.372, loss_att=177.274, acc=0.231, loss=189.904, backward_time=0.277, grad_norm=61.259, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.080, optim0_lr0=3.642e-05, train_time=1.180
+[gpua014:0/64] 2023-12-02 01:25:15,693 (trainer:735) INFO: 2epoch:train:6901-7000batch: iter_time=8.409e-05, forward_time=0.154, loss_ctc=239.938, loss_att=204.390, acc=0.222, loss=215.054, backward_time=0.298, grad_norm=69.962, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.659e-05, train_time=1.414
+[gpua014:0/64] 2023-12-02 01:27:44,967 (trainer:735) INFO: 2epoch:train:7001-7100batch: iter_time=8.631e-05, forward_time=0.156, loss_ctc=237.995, loss_att=196.664, acc=0.225, loss=209.064, backward_time=0.321, grad_norm=75.602, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.675e-05, train_time=1.493
+[gpua014:0/64] 2023-12-02 01:30:32,086 (trainer:735) INFO: 2epoch:train:7101-7200batch: iter_time=8.559e-05, forward_time=0.146, loss_ctc=228.761, loss_att=190.563, acc=0.227, loss=202.023, backward_time=0.315, grad_norm=76.526, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.080, optim0_lr0=3.692e-05, train_time=1.671
+[gpua014:0/64] 2023-12-02 01:32:50,665 (trainer:735) INFO: 2epoch:train:7201-7300batch: iter_time=8.294e-05, forward_time=0.173, loss_ctc=253.301, loss_att=204.932, acc=0.214, loss=219.443, backward_time=0.294, grad_norm=71.420, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.084, optim0_lr0=3.709e-05, train_time=1.386
+[gpua014:0/64] 2023-12-02 01:35:34,542 (trainer:735) INFO: 2epoch:train:7301-7400batch: iter_time=7.885e-05, forward_time=0.157, loss_ctc=239.139, loss_att=197.836, acc=0.220, loss=210.227, backward_time=0.341, grad_norm=71.179, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.725e-05, train_time=1.639
+[gpua014:0/64] 2023-12-02 01:37:55,373 (trainer:735) INFO: 2epoch:train:7401-7500batch: iter_time=8.261e-05, forward_time=0.158, loss_ctc=245.797, loss_att=197.295, acc=0.221, loss=211.846, backward_time=0.299, grad_norm=83.041, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.742e-05, train_time=1.408
+[gpua014:0/64] 2023-12-02 01:38:15,401 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua014:0/64] 2023-12-02 01:38:34,255 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 01:38:37,789 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1c3871d80>)
+[gpua014:0/64] 2023-12-02 01:38:37,789 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua014:0/64] 2023-12-02 01:38:37,793 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 01:48:49,335 (trainer:735) INFO: 2epoch:train:7501-7600batch: iter_time=2.931, forward_time=0.158, loss_ctc=218.394, loss_att=177.916, acc=0.226, loss=190.059, backward_time=0.280, grad_norm=82.764, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.759e-05, train_time=6.539
+[gpua014:0/64] 2023-12-02 01:51:20,423 (trainer:735) INFO: 2epoch:train:7601-7700batch: iter_time=7.649e-05, forward_time=0.147, loss_ctc=231.850, loss_att=187.301, acc=0.230, loss=200.666, backward_time=0.304, grad_norm=63.484, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.775e-05, train_time=1.511
+[gpua014:0/64] 2023-12-02 01:53:57,815 (trainer:735) INFO: 2epoch:train:7701-7800batch: iter_time=8.629e-05, forward_time=0.150, loss_ctc=257.279, loss_att=207.481, acc=0.226, loss=222.420, backward_time=0.309, grad_norm=74.649, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.792e-05, train_time=1.574
+[gpua014:0/64] 2023-12-02 01:55:56,250 (trainer:735) INFO: 2epoch:train:7801-7900batch: iter_time=8.550e-05, forward_time=0.147, loss_ctc=239.273, loss_att=190.039, acc=0.229, loss=204.809, backward_time=0.277, grad_norm=72.661, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.809e-05, train_time=1.184
+[gpua014:0/64] 2023-12-02 01:57:56,483 (trainer:735) INFO: 2epoch:train:7901-8000batch: iter_time=9.193e-05, forward_time=0.161, loss_ctc=228.484, loss_att=187.044, acc=0.225, loss=199.476, backward_time=0.293, grad_norm=68.615, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.825e-05, train_time=1.202
+[gpua014:0/64] 2023-12-02 02:00:57,204 (trainer:735) INFO: 2epoch:train:8001-8100batch: iter_time=9.195e-05, forward_time=0.157, loss_ctc=221.475, loss_att=179.126, acc=0.231, loss=191.831, backward_time=0.334, grad_norm=64.708, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.842e-05, train_time=1.807
+[gpua014:0/64] 2023-12-02 02:03:02,372 (trainer:735) INFO: 2epoch:train:8101-8200batch: iter_time=8.904e-05, forward_time=0.148, loss_ctc=220.783, loss_att=180.198, acc=0.231, loss=192.374, backward_time=0.293, grad_norm=66.597, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.859e-05, train_time=1.251
+[gpua014:0/64] 2023-12-02 02:05:08,600 (trainer:735) INFO: 2epoch:train:8201-8300batch: iter_time=9.210e-05, forward_time=0.153, loss_ctc=246.553, loss_att=206.866, acc=0.228, loss=218.772, backward_time=0.279, grad_norm=73.119, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.875e-05, train_time=1.262
+[gpua014:0/64] 2023-12-02 02:07:18,631 (trainer:735) INFO: 2epoch:train:8301-8400batch: iter_time=8.935e-05, forward_time=0.162, loss_ctc=225.541, loss_att=184.626, acc=0.231, loss=196.901, backward_time=0.287, grad_norm=68.424, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.892e-05, train_time=1.300
+[gpua014:0/64] 2023-12-02 02:09:42,402 (trainer:735) INFO: 2epoch:train:8401-8500batch: iter_time=8.607e-05, forward_time=0.161, loss_ctc=236.798, loss_att=196.524, acc=0.225, loss=208.606, backward_time=0.308, grad_norm=71.861, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.909e-05, train_time=1.437
+[gpua014:0/64] 2023-12-02 02:12:13,383 (trainer:735) INFO: 2epoch:train:8501-8600batch: iter_time=8.177e-05, forward_time=0.152, loss_ctc=240.447, loss_att=195.912, acc=0.215, loss=209.273, backward_time=0.310, grad_norm=67.109, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.083, optim0_lr0=3.925e-05, train_time=1.510
+[gpua014:0/64] 2023-12-02 02:14:43,881 (trainer:735) INFO: 2epoch:train:8601-8700batch: iter_time=8.509e-05, forward_time=0.169, loss_ctc=257.881, loss_att=212.598, acc=0.220, loss=226.183, backward_time=0.297, grad_norm=68.781, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.942e-05, train_time=1.505
+[gpua014:0/64] 2023-12-02 02:16:14,302 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua014:0/64] 2023-12-02 02:16:33,043 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 02:16:36,583 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faa9489f400>)
+[gpua014:0/64] 2023-12-02 02:16:36,583 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua014:0/64] 2023-12-02 02:16:36,586 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 02:25:56,609 (trainer:735) INFO: 2epoch:train:8701-8800batch: iter_time=2.558, forward_time=0.180, loss_ctc=239.401, loss_att=187.972, acc=0.226, loss=203.401, backward_time=0.289, grad_norm=80.779, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.959e-05, train_time=6.727
+[gpua014:0/64] 2023-12-02 02:28:31,976 (trainer:735) INFO: 2epoch:train:8801-8900batch: iter_time=8.428e-05, forward_time=0.146, loss_ctc=219.763, loss_att=179.970, acc=0.232, loss=191.908, backward_time=0.294, grad_norm=60.203, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.975e-05, train_time=1.553
+[gpua014:0/64] 2023-12-02 02:31:00,501 (trainer:735) INFO: 2epoch:train:8901-9000batch: iter_time=8.274e-05, forward_time=0.146, loss_ctc=239.862, loss_att=191.197, acc=0.237, loss=205.797, backward_time=0.295, grad_norm=63.272, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.081, optim0_lr0=3.992e-05, train_time=1.485
+[gpua014:0/64] 2023-12-02 02:33:30,484 (trainer:735) INFO: 2epoch:train:9001-9100batch: iter_time=9.331e-05, forward_time=0.146, loss_ctc=224.986, loss_att=182.375, acc=0.234, loss=195.159, backward_time=0.351, grad_norm=64.582, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.081, optim0_lr0=4.009e-05, train_time=1.500
+[gpua014:0/64] 2023-12-02 02:35:30,668 (trainer:735) INFO: 2epoch:train:9101-9200batch: iter_time=1.636e-04, forward_time=0.151, loss_ctc=237.631, loss_att=187.697, acc=0.231, loss=202.677, backward_time=0.285, grad_norm=73.417, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.082, optim0_lr0=4.025e-05, train_time=1.202
+[gpua014:0/64] 2023-12-02 02:38:20,659 (trainer:735) INFO: 2epoch:train:9201-9300batch: iter_time=9.910e-05, forward_time=0.169, loss_ctc=228.789, loss_att=190.748, acc=0.227, loss=202.161, backward_time=0.345, grad_norm=68.091, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.084, optim0_lr0=4.042e-05, train_time=1.700
+[gpua014:0/64] 2023-12-02 02:40:50,045 (trainer:735) INFO: 2epoch:train:9301-9400batch: iter_time=9.506e-05, forward_time=0.147, loss_ctc=216.909, loss_att=172.652, acc=0.235, loss=185.929, backward_time=0.319, grad_norm=64.914, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.081, optim0_lr0=4.059e-05, train_time=1.494
+[gpua014:0/64] 2023-12-02 02:43:14,550 (trainer:735) INFO: 2epoch:train:9401-9500batch: iter_time=8.891e-05, forward_time=0.159, loss_ctc=238.009, loss_att=198.947, acc=0.227, loss=210.665, backward_time=0.302, grad_norm=75.124, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.085, optim0_lr0=4.075e-05, train_time=1.445
+[gpua014:0/64] 2023-12-02 02:46:10,183 (trainer:735) INFO: 2epoch:train:9501-9600batch: iter_time=9.554e-05, forward_time=0.166, loss_ctc=234.781, loss_att=192.019, acc=0.231, loss=204.848, backward_time=0.396, grad_norm=69.015, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.081, optim0_lr0=4.092e-05, train_time=1.756
+[gpua014:0/64] 2023-12-02 02:48:29,507 (trainer:735) INFO: 2epoch:train:9601-9700batch: iter_time=8.529e-05, forward_time=0.148, loss_ctc=225.321, loss_att=186.867, acc=0.231, loss=198.403, backward_time=0.308, grad_norm=67.728, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.081, optim0_lr0=4.109e-05, train_time=1.393
+[gpua014:0/64] 2023-12-02 02:51:17,556 (trainer:735) INFO: 2epoch:train:9701-9800batch: iter_time=9.664e-05, forward_time=0.162, loss_ctc=247.546, loss_att=199.617, acc=0.220, loss=213.995, backward_time=0.324, grad_norm=61.871, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.084, optim0_lr0=4.125e-05, train_time=1.680
+[gpua014:0/64] 2023-12-02 02:53:42,625 (trainer:735) INFO: 2epoch:train:9801-9900batch: iter_time=9.966e-05, forward_time=0.155, loss_ctc=236.207, loss_att=192.527, acc=0.225, loss=205.631, backward_time=0.286, grad_norm=71.278, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.081, optim0_lr0=4.142e-05, train_time=1.450
+[gpua014:0/64] 2023-12-02 02:56:05,620 (trainer:735) INFO: 2epoch:train:9901-10000batch: iter_time=9.653e-05, forward_time=0.148, loss_ctc=244.276, loss_att=194.147, acc=0.227, loss=209.186, backward_time=0.294, grad_norm=80.165, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.081, optim0_lr0=4.159e-05, train_time=1.430
+[gpua014:0/64] 2023-12-02 02:56:25,644 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua014:0/64] 2023-12-02 02:56:44,354 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 02:56:47,904 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faa940abf10>)
+[gpua014:0/64] 2023-12-02 02:56:47,904 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua014:0/64] 2023-12-02 02:56:47,907 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 03:05:20,532 (trainer:735) INFO: 2epoch:train:10001-10100batch: iter_time=2.373, forward_time=0.173, loss_ctc=215.414, loss_att=176.840, acc=0.230, loss=188.412, backward_time=0.286, grad_norm=83.590, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.081, optim0_lr0=4.175e-05, train_time=5.549
+[gpua014:0/64] 2023-12-02 03:07:58,160 (trainer:735) INFO: 2epoch:train:10101-10200batch: iter_time=9.056e-05, forward_time=0.149, loss_ctc=229.134, loss_att=190.145, acc=0.235, loss=201.842, backward_time=0.295, grad_norm=68.041, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.080, optim0_lr0=4.192e-05, train_time=1.576
+[gpua014:0/64] 2023-12-02 03:10:00,765 (trainer:735) INFO: 2epoch:train:10201-10300batch: iter_time=8.568e-05, forward_time=0.150, loss_ctc=255.535, loss_att=209.303, acc=0.230, loss=223.172, backward_time=0.279, grad_norm=81.680, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.080, optim0_lr0=4.209e-05, train_time=1.226
+[gpua014:0/64] 2023-12-02 03:12:01,737 (trainer:735) INFO: 2epoch:train:10301-10400batch: iter_time=9.543e-05, forward_time=0.164, loss_ctc=237.059, loss_att=193.617, acc=0.235, loss=206.650, backward_time=0.279, grad_norm=84.855, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.081, optim0_lr0=4.225e-05, train_time=1.209
+[gpua014:0/64] 2023-12-02 03:14:26,629 (trainer:735) INFO: 2epoch:train:10401-10500batch: iter_time=8.130e-05, forward_time=0.162, loss_ctc=226.466, loss_att=191.518, acc=0.227, loss=202.002, backward_time=0.319, grad_norm=76.493, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.081, optim0_lr0=4.242e-05, train_time=1.448
+[gpua014:0/64] 2023-12-02 03:16:57,673 (trainer:735) INFO: 2epoch:train:10501-10600batch: iter_time=8.432e-05, forward_time=0.148, loss_ctc=219.094, loss_att=180.626, acc=0.234, loss=192.167, backward_time=0.309, grad_norm=66.937, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.080, optim0_lr0=4.259e-05, train_time=1.511
+[gpua014:0/64] 2023-12-02 03:19:42,357 (trainer:735) INFO: 2epoch:train:10601-10700batch: iter_time=8.551e-05, forward_time=0.166, loss_ctc=217.550, loss_att=183.549, acc=0.234, loss=193.749, backward_time=0.330, grad_norm=69.588, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.082, optim0_lr0=4.275e-05, train_time=1.647
+[gpua014:0/64] 2023-12-02 03:22:25,789 (trainer:735) INFO: 2epoch:train:10701-10800batch: iter_time=8.976e-05, forward_time=0.151, loss_ctc=245.019, loss_att=208.709, acc=0.231, loss=219.602, backward_time=0.319, grad_norm=67.640, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.080, optim0_lr0=4.292e-05, train_time=1.634
+[gpua014:0/64] 2023-12-02 03:24:45,187 (trainer:735) INFO: 2epoch:train:10801-10900batch: iter_time=9.381e-05, forward_time=0.148, loss_ctc=222.546, loss_att=186.369, acc=0.236, loss=197.222, backward_time=0.296, grad_norm=61.782, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.080, optim0_lr0=4.309e-05, train_time=1.394
+[gpua014:0/64] 2023-12-02 03:27:13,107 (trainer:735) INFO: 2epoch:train:10901-11000batch: iter_time=8.975e-05, forward_time=0.153, loss_ctc=233.232, loss_att=196.138, acc=0.230, loss=207.266, backward_time=0.322, grad_norm=65.380, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.080, optim0_lr0=4.325e-05, train_time=1.479
+[gpua014:0/64] 2023-12-02 03:29:14,847 (trainer:735) INFO: 2epoch:train:11001-11100batch: iter_time=8.128e-05, forward_time=0.148, loss_ctc=237.065, loss_att=197.035, acc=0.222, loss=209.044, backward_time=0.279, grad_norm=65.562, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.080, optim0_lr0=4.342e-05, train_time=1.217
+[gpua014:0/64] 2023-12-02 03:31:24,806 (trainer:735) INFO: 2epoch:train:11101-11200batch: iter_time=8.888e-05, forward_time=0.179, loss_ctc=254.756, loss_att=212.779, acc=0.225, loss=225.372, backward_time=0.287, grad_norm=66.141, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.359e-05, train_time=1.299
+[gpua014:0/64] 2023-12-02 03:33:07,175 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua014:0/64] 2023-12-02 03:33:25,693 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 03:33:29,351 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faa940fd0f0>)
+[gpua014:0/64] 2023-12-02 03:33:29,351 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua014:0/64] 2023-12-02 03:33:29,355 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 03:38:28,541 (trainer:735) INFO: 2epoch:train:11201-11300batch: iter_time=2.701, forward_time=0.167, loss_ctc=239.228, loss_att=186.575, acc=0.231, loss=202.371, backward_time=0.289, grad_norm=78.544, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.081, optim0_lr0=4.375e-05, train_time=4.236
+[gpua014:0/64] 2023-12-02 03:40:29,668 (trainer:735) INFO: 2epoch:train:11301-11400batch: iter_time=8.144e-05, forward_time=0.148, loss_ctc=218.246, loss_att=181.599, acc=0.235, loss=192.593, backward_time=0.281, grad_norm=68.699, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.081, optim0_lr0=4.392e-05, train_time=1.212
+[gpua014:0/64] 2023-12-02 03:42:30,242 (trainer:735) INFO: 2epoch:train:11401-11500batch: iter_time=9.995e-05, forward_time=0.147, loss_ctc=238.435, loss_att=191.913, acc=0.240, loss=205.869, backward_time=0.278, grad_norm=69.425, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.080, optim0_lr0=4.409e-05, train_time=1.206
+[gpua014:0/64] 2023-12-02 03:44:41,354 (trainer:735) INFO: 2epoch:train:11501-11600batch: iter_time=8.658e-05, forward_time=0.148, loss_ctc=222.352, loss_att=184.327, acc=0.238, loss=195.734, backward_time=0.289, grad_norm=64.163, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.081, optim0_lr0=4.425e-05, train_time=1.311
+[gpua014:0/64] 2023-12-02 03:47:11,392 (trainer:735) INFO: 2epoch:train:11601-11700batch: iter_time=8.793e-05, forward_time=0.165, loss_ctc=235.277, loss_att=190.246, acc=0.233, loss=203.755, backward_time=0.299, grad_norm=69.732, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.083, optim0_lr0=4.442e-05, train_time=1.500
+[gpua014:0/64] 2023-12-02 03:49:22,609 (trainer:735) INFO: 2epoch:train:11701-11800batch: iter_time=9.096e-05, forward_time=0.152, loss_ctc=224.892, loss_att=193.075, acc=0.232, loss=202.620, backward_time=0.283, grad_norm=60.686, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.080, optim0_lr0=4.459e-05, train_time=1.312
+[gpua014:0/64] 2023-12-02 03:51:37,318 (trainer:735) INFO: 2epoch:train:11801-11900batch: iter_time=8.709e-05, forward_time=0.147, loss_ctc=213.435, loss_att=176.042, acc=0.239, loss=187.260, backward_time=0.296, grad_norm=59.199, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.080, optim0_lr0=4.475e-05, train_time=1.347
+[gpua014:0/64] 2023-12-02 03:54:00,662 (trainer:735) INFO: 2epoch:train:11901-12000batch: iter_time=9.203e-05, forward_time=0.147, loss_ctc=233.163, loss_att=201.611, acc=0.231, loss=211.076, backward_time=0.304, grad_norm=65.152, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.080, optim0_lr0=4.492e-05, train_time=1.433
+[gpua014:0/64] 2023-12-02 03:56:06,449 (trainer:735) INFO: 2epoch:train:12001-12100batch: iter_time=8.491e-05, forward_time=0.146, loss_ctc=231.013, loss_att=191.346, acc=0.235, loss=203.246, backward_time=0.283, grad_norm=68.921, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.080, optim0_lr0=4.509e-05, train_time=1.258
+[gpua014:0/64] 2023-12-02 03:58:17,666 (trainer:735) INFO: 2epoch:train:12101-12200batch: iter_time=3.398e-04, forward_time=0.159, loss_ctc=222.465, loss_att=188.126, acc=0.237, loss=198.428, backward_time=0.312, grad_norm=68.942, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.525e-05, train_time=1.312
+[gpua014:0/64] 2023-12-02 04:00:19,047 (trainer:735) INFO: 2epoch:train:12201-12300batch: iter_time=8.117e-05, forward_time=0.147, loss_ctc=242.951, loss_att=201.873, acc=0.225, loss=214.196, backward_time=0.278, grad_norm=63.618, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.080, optim0_lr0=4.542e-05, train_time=1.214
+[gpua014:0/64] 2023-12-02 04:02:49,382 (trainer:735) INFO: 2epoch:train:12301-12400batch: iter_time=8.303e-05, forward_time=0.180, loss_ctc=231.740, loss_att=194.189, acc=0.232, loss=205.454, backward_time=0.293, grad_norm=65.143, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.080, optim0_lr0=4.559e-05, train_time=1.503
+[gpua014:0/64] 2023-12-02 04:05:07,331 (trainer:735) INFO: 2epoch:train:12401-12500batch: iter_time=8.013e-05, forward_time=0.148, loss_ctc=240.400, loss_att=192.567, acc=0.234, loss=206.917, backward_time=0.281, grad_norm=73.166, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.080, optim0_lr0=4.575e-05, train_time=1.380
+[gpua014:0/64] 2023-12-02 04:05:27,357 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua014:0/64] 2023-12-02 04:05:46,130 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 04:05:49,689 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fab13c57ac0>)
+[gpua014:0/64] 2023-12-02 04:05:49,690 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua014:0/64] 2023-12-02 04:05:49,693 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 04:14:26,754 (trainer:735) INFO: 2epoch:train:12501-12600batch: iter_time=2.727, forward_time=0.170, loss_ctc=210.012, loss_att=170.877, acc=0.237, loss=182.618, backward_time=0.280, grad_norm=69.009, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.081, optim0_lr0=4.592e-05, train_time=5.594
+[gpua014:0/64] 2023-12-02 04:16:28,964 (trainer:735) INFO: 2epoch:train:12601-12700batch: iter_time=7.963e-05, forward_time=0.148, loss_ctc=222.786, loss_att=185.667, acc=0.241, loss=196.803, backward_time=0.286, grad_norm=50.981, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.081, optim0_lr0=4.609e-05, train_time=1.222
+[gpua014:0/64] 2023-12-02 04:18:58,844 (trainer:735) INFO: 2epoch:train:12701-12800batch: iter_time=8.828e-05, forward_time=0.148, loss_ctc=247.718, loss_att=202.908, acc=0.237, loss=216.351, backward_time=0.301, grad_norm=73.443, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.081, optim0_lr0=4.625e-05, train_time=1.499
+[gpua014:0/64] 2023-12-02 04:20:57,856 (trainer:735) INFO: 2epoch:train:12801-12900batch: iter_time=8.825e-05, forward_time=0.150, loss_ctc=232.321, loss_att=188.582, acc=0.242, loss=201.704, backward_time=0.280, grad_norm=64.223, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.081, optim0_lr0=4.642e-05, train_time=1.190
+[gpua014:0/64] 2023-12-02 04:23:01,040 (trainer:735) INFO: 2epoch:train:12901-13000batch: iter_time=8.895e-05, forward_time=0.148, loss_ctc=219.923, loss_att=186.663, acc=0.234, loss=196.641, backward_time=0.289, grad_norm=64.027, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.081, optim0_lr0=4.659e-05, train_time=1.232
+[gpua014:0/64] 2023-12-02 04:25:40,120 (trainer:735) INFO: 2epoch:train:13001-13100batch: iter_time=9.421e-05, forward_time=0.163, loss_ctc=213.606, loss_att=177.116, acc=0.240, loss=188.063, backward_time=0.316, grad_norm=60.814, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.675e-05, train_time=1.591
+[gpua014:0/64] 2023-12-02 04:28:23,721 (trainer:735) INFO: 2epoch:train:13101-13200batch: iter_time=9.327e-05, forward_time=0.148, loss_ctc=208.896, loss_att=178.370, acc=0.241, loss=187.528, backward_time=0.323, grad_norm=54.400, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.081, optim0_lr0=4.692e-05, train_time=1.636
+[gpua014:0/64] 2023-12-02 04:31:05,455 (trainer:735) INFO: 2epoch:train:13201-13300batch: iter_time=8.957e-05, forward_time=0.168, loss_ctc=236.334, loss_att=203.779, acc=0.236, loss=213.546, backward_time=0.389, grad_norm=56.858, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.709e-05, train_time=1.617
+[gpua014:0/64] 2023-12-02 04:33:35,298 (trainer:735) INFO: 2epoch:train:13301-13400batch: iter_time=9.074e-05, forward_time=0.152, loss_ctc=213.057, loss_att=182.659, acc=0.239, loss=191.778, backward_time=0.335, grad_norm=55.252, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.084, optim0_lr0=4.725e-05, train_time=1.498
+[gpua014:0/64] 2023-12-02 04:35:37,634 (trainer:735) INFO: 2epoch:train:13401-13500batch: iter_time=9.692e-05, forward_time=0.148, loss_ctc=223.706, loss_att=191.792, acc=0.236, loss=201.366, backward_time=0.282, grad_norm=54.092, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.081, optim0_lr0=4.742e-05, train_time=1.224
+[gpua014:0/64] 2023-12-02 04:37:36,814 (trainer:735) INFO: 2epoch:train:13501-13600batch: iter_time=9.013e-05, forward_time=0.166, loss_ctc=225.266, loss_att=192.826, acc=0.227, loss=202.558, backward_time=0.284, grad_norm=57.886, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.081, optim0_lr0=4.759e-05, train_time=1.191
+[gpua014:0/64] 2023-12-02 04:40:09,397 (trainer:735) INFO: 2epoch:train:13601-13700batch: iter_time=8.959e-05, forward_time=0.163, loss_ctc=240.199, loss_att=208.808, acc=0.232, loss=218.225, backward_time=0.333, grad_norm=54.543, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.085, optim0_lr0=4.775e-05, train_time=1.526
+[gpua014:0/64] 2023-12-02 04:41:42,718 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua014:0/64] 2023-12-02 04:42:01,741 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 04:42:05,373 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1eeba1630>)
+[gpua014:0/64] 2023-12-02 04:42:05,373 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua014:0/64] 2023-12-02 04:42:05,376 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 04:52:29,984 (trainer:735) INFO: 2epoch:train:13701-13800batch: iter_time=2.648, forward_time=0.176, loss_ctc=225.891, loss_att=181.648, acc=0.236, loss=194.921, backward_time=0.288, grad_norm=76.876, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.792e-05, train_time=7.405
+[gpua014:0/64] 2023-12-02 04:54:31,831 (trainer:735) INFO: 2epoch:train:13801-13900batch: iter_time=8.227e-05, forward_time=0.147, loss_ctc=207.796, loss_att=174.991, acc=0.240, loss=184.832, backward_time=0.279, grad_norm=55.852, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.080, optim0_lr0=4.809e-05, train_time=1.219
+[gpua014:0/64] 2023-12-02 04:57:00,978 (trainer:735) INFO: 2epoch:train:13901-14000batch: iter_time=9.534e-05, forward_time=0.147, loss_ctc=223.416, loss_att=185.815, acc=0.246, loss=197.095, backward_time=0.300, grad_norm=58.444, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.080, optim0_lr0=4.825e-05, train_time=1.491
+[gpua014:0/64] 2023-12-02 04:59:59,580 (trainer:735) INFO: 2epoch:train:14001-14100batch: iter_time=1.035e-04, forward_time=0.147, loss_ctc=210.037, loss_att=176.956, acc=0.243, loss=186.880, backward_time=0.331, grad_norm=60.233, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.081, optim0_lr0=4.842e-05, train_time=1.786
+[gpua014:0/64] 2023-12-02 05:02:16,828 (trainer:735) INFO: 2epoch:train:14101-14200batch: iter_time=9.171e-05, forward_time=0.147, loss_ctc=224.917, loss_att=182.201, acc=0.240, loss=195.015, backward_time=0.295, grad_norm=66.051, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.081, optim0_lr0=4.859e-05, train_time=1.372
+[gpua014:0/64] 2023-12-02 05:04:32,064 (trainer:735) INFO: 2epoch:train:14201-14300batch: iter_time=8.576e-05, forward_time=0.178, loss_ctc=211.782, loss_att=184.378, acc=0.237, loss=192.599, backward_time=0.302, grad_norm=57.782, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.875e-05, train_time=1.352
+[gpua014:0/64] 2023-12-02 05:07:10,007 (trainer:735) INFO: 2epoch:train:14301-14400batch: iter_time=1.037e-04, forward_time=0.147, loss_ctc=199.525, loss_att=168.454, acc=0.244, loss=177.776, backward_time=0.299, grad_norm=52.399, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.080, optim0_lr0=4.892e-05, train_time=1.579
+[gpua014:0/64] 2023-12-02 05:09:40,096 (trainer:735) INFO: 2epoch:train:14401-14500batch: iter_time=9.008e-05, forward_time=0.147, loss_ctc=216.116, loss_att=191.849, acc=0.240, loss=199.129, backward_time=0.319, grad_norm=59.402, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.081, optim0_lr0=4.909e-05, train_time=1.501
+[gpua014:0/64] 2023-12-02 05:11:50,237 (trainer:735) INFO: 2epoch:train:14501-14600batch: iter_time=9.267e-05, forward_time=0.201, loss_ctc=215.338, loss_att=185.502, acc=0.242, loss=194.453, backward_time=0.303, grad_norm=57.580, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.085, optim0_lr0=4.925e-05, train_time=1.299
+[gpua014:0/64] 2023-12-02 05:14:16,493 (trainer:735) INFO: 2epoch:train:14601-14700batch: iter_time=9.552e-05, forward_time=0.147, loss_ctc=203.337, loss_att=180.864, acc=0.244, loss=187.606, backward_time=0.324, grad_norm=52.470, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.080, optim0_lr0=4.942e-05, train_time=1.465
+[gpua014:0/64] 2023-12-02 05:16:46,020 (trainer:735) INFO: 2epoch:train:14701-14800batch: iter_time=1.002e-04, forward_time=0.147, loss_ctc=218.710, loss_att=193.163, acc=0.231, loss=200.827, backward_time=0.322, grad_norm=52.565, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.080, optim0_lr0=4.959e-05, train_time=1.495
+[gpua014:0/64] 2023-12-02 05:18:42,453 (trainer:735) INFO: 2epoch:train:14801-14900batch: iter_time=8.883e-05, forward_time=0.147, loss_ctc=211.646, loss_att=187.683, acc=0.236, loss=194.872, backward_time=0.281, grad_norm=57.378, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.080, optim0_lr0=4.975e-05, train_time=1.164
+[gpua014:0/64] 2023-12-02 05:21:20,463 (trainer:735) INFO: 2epoch:train:14901-15000batch: iter_time=8.682e-05, forward_time=0.149, loss_ctc=218.791, loss_att=186.400, acc=0.237, loss=196.117, backward_time=0.306, grad_norm=69.264, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.080, optim0_lr0=4.992e-05, train_time=1.580
+[gpua014:0/64] 2023-12-02 05:45:46,471 (trainer:341) INFO: 2epoch results: [train] iter_time=0.197, forward_time=0.154, loss_ctc=234.296, loss_att=196.121, acc=0.224, loss=207.573, backward_time=0.303, grad_norm=70.434, clip=100.000, loss_scale=2.847e+08, optim_step_time=0.081, optim0_lr0=3.750e-05, train_time=1.780, time=7 hours, 25 minutes and 25.19 seconds, total_count=30000, gpu_max_cached_mem_GB=35.859, [valid] loss_ctc=171.986, cer_ctc=0.829, loss_att=139.745, acc=0.203, cer=0.685, wer=1.000, loss=149.418, time=24 minutes and 2.18 seconds, total_count=9342, gpu_max_cached_mem_GB=35.859
+[gpua014:0/64] 2023-12-02 05:46:05,135 (trainer:389) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua014:0/64] 2023-12-02 05:46:05,178 (trainer:272) INFO: 3/40epoch started. Estimated time to finish: 1 week, 5 days and 11 hours
+[gpua014:0/64] 2023-12-02 05:46:05,242 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-12-02 05:46:23,227 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 05:46:26,689 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fadd0f6fb80>)
+[gpua014:0/64] 2023-12-02 05:46:26,690 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua014:0/64] 2023-12-02 05:46:26,693 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 05:52:17,015 (trainer:735) INFO: 3epoch:train:1-100batch: iter_time=1.669, forward_time=0.179, loss_ctc=228.002, loss_att=211.268, acc=0.232, loss=216.288, backward_time=0.284, grad_norm=70.299, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.082, optim0_lr0=5.026e-05, train_time=3.718
+[gpua014:0/64] 2023-12-02 05:54:13,793 (trainer:735) INFO: 3epoch:train:101-200batch: iter_time=8.730e-05, forward_time=0.148, loss_ctc=236.157, loss_att=212.825, acc=0.227, loss=219.825, backward_time=0.277, grad_norm=63.819, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.076e-05, train_time=1.168
+[gpua014:0/64] 2023-12-02 05:56:15,052 (trainer:735) INFO: 3epoch:train:201-300batch: iter_time=7.883e-05, forward_time=0.148, loss_ctc=202.633, loss_att=176.869, acc=0.247, loss=184.598, backward_time=0.282, grad_norm=63.862, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.080, optim0_lr0=5.126e-05, train_time=1.212
+[gpua014:0/64] 2023-12-02 05:58:26,067 (trainer:735) INFO: 3epoch:train:301-400batch: iter_time=8.863e-05, forward_time=0.149, loss_ctc=205.745, loss_att=192.161, acc=0.243, loss=196.236, backward_time=0.285, grad_norm=59.050, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.176e-05, train_time=1.310
+[gpua014:0/64] 2023-12-02 06:00:41,729 (trainer:735) INFO: 3epoch:train:401-500batch: iter_time=9.312e-05, forward_time=0.148, loss_ctc=219.160, loss_att=190.235, acc=0.242, loss=198.913, backward_time=0.293, grad_norm=65.805, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.226e-05, train_time=1.356
+[gpua014:0/64] 2023-12-02 06:02:54,578 (trainer:735) INFO: 3epoch:train:501-600batch: iter_time=8.690e-05, forward_time=0.148, loss_ctc=195.180, loss_att=179.330, acc=0.249, loss=184.085, backward_time=0.292, grad_norm=60.019, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.276e-05, train_time=1.328
+[gpua014:0/64] 2023-12-02 06:05:00,713 (trainer:735) INFO: 3epoch:train:601-700batch: iter_time=9.834e-05, forward_time=0.149, loss_ctc=210.221, loss_att=199.872, acc=0.228, loss=202.977, backward_time=0.283, grad_norm=62.172, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.326e-05, train_time=1.261
+[gpua014:0/64] 2023-12-02 06:07:23,366 (trainer:735) INFO: 3epoch:train:701-800batch: iter_time=8.971e-05, forward_time=0.149, loss_ctc=222.312, loss_att=212.251, acc=0.226, loss=215.269, backward_time=0.302, grad_norm=66.625, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.376e-05, train_time=1.426
+[gpua014:0/64] 2023-12-02 06:09:44,292 (trainer:735) INFO: 3epoch:train:801-900batch: iter_time=8.950e-05, forward_time=0.155, loss_ctc=194.497, loss_att=183.477, acc=0.247, loss=186.783, backward_time=0.299, grad_norm=58.676, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.426e-05, train_time=1.409
+[gpua014:0/64] 2023-12-02 06:12:11,370 (trainer:735) INFO: 3epoch:train:901-1000batch: iter_time=8.294e-05, forward_time=0.201, loss_ctc=227.050, loss_att=208.051, acc=0.229, loss=213.750, backward_time=0.340, grad_norm=70.260, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.087, optim0_lr0=5.476e-05, train_time=1.471
+[gpua014:0/64] 2023-12-02 06:14:37,900 (trainer:735) INFO: 3epoch:train:1001-1100batch: iter_time=8.762e-05, forward_time=0.147, loss_ctc=218.079, loss_att=197.897, acc=0.247, loss=203.951, backward_time=0.296, grad_norm=65.801, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.080, optim0_lr0=5.526e-05, train_time=1.465
+[gpua014:0/64] 2023-12-02 06:16:49,421 (trainer:735) INFO: 3epoch:train:1101-1200batch: iter_time=8.435e-05, forward_time=0.147, loss_ctc=200.804, loss_att=185.832, acc=0.237, loss=190.324, backward_time=0.287, grad_norm=54.386, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.080, optim0_lr0=5.576e-05, train_time=1.315
+[gpua014:0/64] 2023-12-02 06:18:17,145 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-12-02 06:18:35,257 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 06:18:38,936 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1b0541630>)
+[gpua014:0/64] 2023-12-02 06:18:38,936 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua014:0/64] 2023-12-02 06:18:38,939 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 06:23:20,999 (trainer:735) INFO: 3epoch:train:1201-1300batch: iter_time=2.122, forward_time=0.146, loss_ctc=207.175, loss_att=187.190, acc=0.245, loss=193.185, backward_time=0.279, grad_norm=62.301, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.080, optim0_lr0=5.626e-05, train_time=3.915
+[gpua014:0/64] 2023-12-02 06:25:17,342 (trainer:735) INFO: 3epoch:train:1301-1400batch: iter_time=9.363e-05, forward_time=0.146, loss_ctc=223.158, loss_att=220.472, acc=0.229, loss=221.278, backward_time=0.277, grad_norm=67.856, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.080, optim0_lr0=5.676e-05, train_time=1.163
+[gpua014:0/64] 2023-12-02 06:27:13,486 (trainer:735) INFO: 3epoch:train:1401-1500batch: iter_time=8.885e-05, forward_time=0.147, loss_ctc=218.296, loss_att=198.257, acc=0.235, loss=204.269, backward_time=0.278, grad_norm=67.634, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.080, optim0_lr0=5.726e-05, train_time=1.161
+[gpua014:0/64] 2023-12-02 06:29:31,665 (trainer:735) INFO: 3epoch:train:1501-1600batch: iter_time=8.657e-05, forward_time=0.146, loss_ctc=182.667, loss_att=162.619, acc=0.258, loss=168.633, backward_time=0.281, grad_norm=59.726, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.080, optim0_lr0=5.776e-05, train_time=1.382
+[gpua014:0/64] 2023-12-02 06:31:34,996 (trainer:735) INFO: 3epoch:train:1601-1700batch: iter_time=9.725e-05, forward_time=0.154, loss_ctc=190.371, loss_att=179.515, acc=0.245, loss=182.772, backward_time=0.281, grad_norm=58.683, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.826e-05, train_time=1.233
+[gpua014:0/64] 2023-12-02 06:33:53,420 (trainer:735) INFO: 3epoch:train:1701-1800batch: iter_time=9.940e-05, forward_time=0.149, loss_ctc=202.708, loss_att=181.056, acc=0.248, loss=187.552, backward_time=0.299, grad_norm=66.377, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.080, optim0_lr0=5.876e-05, train_time=1.384
+[gpua014:0/64] 2023-12-02 06:36:22,315 (trainer:735) INFO: 3epoch:train:1801-1900batch: iter_time=1.018e-04, forward_time=0.183, loss_ctc=198.102, loss_att=187.823, acc=0.241, loss=190.907, backward_time=0.299, grad_norm=67.424, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.082, optim0_lr0=5.926e-05, train_time=1.489
+[gpua014:0/64] 2023-12-02 06:38:34,154 (trainer:735) INFO: 3epoch:train:1901-2000batch: iter_time=9.577e-05, forward_time=0.185, loss_ctc=207.249, loss_att=196.113, acc=0.226, loss=199.454, backward_time=0.304, grad_norm=66.198, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.083, optim0_lr0=5.976e-05, train_time=1.318
+[gpua014:0/64] 2023-12-02 06:40:54,142 (trainer:735) INFO: 3epoch:train:2001-2100batch: iter_time=9.229e-05, forward_time=0.147, loss_ctc=189.511, loss_att=194.372, acc=0.238, loss=192.914, backward_time=0.288, grad_norm=54.131, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.081, optim0_lr0=6.026e-05, train_time=1.400
+[gpua014:0/64] 2023-12-02 06:43:15,442 (trainer:735) INFO: 3epoch:train:2101-2200batch: iter_time=9.456e-05, forward_time=0.147, loss_ctc=181.904, loss_att=176.572, acc=0.248, loss=178.172, backward_time=0.287, grad_norm=53.296, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.076e-05, train_time=1.413
+[gpua014:0/64] 2023-12-02 06:45:25,411 (trainer:735) INFO: 3epoch:train:2201-2300batch: iter_time=1.038e-04, forward_time=0.147, loss_ctc=224.185, loss_att=205.200, acc=0.238, loss=210.895, backward_time=0.289, grad_norm=73.181, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.126e-05, train_time=1.299
+[gpua014:0/64] 2023-12-02 06:47:45,077 (trainer:735) INFO: 3epoch:train:2301-2400batch: iter_time=9.596e-05, forward_time=0.147, loss_ctc=214.462, loss_att=202.972, acc=0.240, loss=206.419, backward_time=0.288, grad_norm=72.035, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.081, optim0_lr0=6.176e-05, train_time=1.396
+[gpua014:0/64] 2023-12-02 06:50:07,080 (trainer:735) INFO: 3epoch:train:2401-2500batch: iter_time=8.996e-05, forward_time=0.148, loss_ctc=179.389, loss_att=168.432, acc=0.249, loss=171.719, backward_time=0.294, grad_norm=58.047, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.226e-05, train_time=1.420
+[gpua014:0/64] 2023-12-02 06:50:21,605 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua014:0/64] 2023-12-02 06:50:39,919 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 06:50:43,505 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1cbecba60>)
+[gpua014:0/64] 2023-12-02 06:50:43,505 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua014:0/64] 2023-12-02 06:50:43,509 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 06:56:42,000 (trainer:735) INFO: 3epoch:train:2501-2600batch: iter_time=2.301, forward_time=0.148, loss_ctc=204.502, loss_att=199.310, acc=0.239, loss=200.868, backward_time=0.282, grad_norm=63.914, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.276e-05, train_time=3.949
+[gpua014:0/64] 2023-12-02 06:58:39,772 (trainer:735) INFO: 3epoch:train:2601-2700batch: iter_time=9.687e-05, forward_time=0.148, loss_ctc=210.794, loss_att=204.540, acc=0.232, loss=206.416, backward_time=0.279, grad_norm=63.476, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.081, optim0_lr0=6.326e-05, train_time=1.178
+[gpua014:0/64] 2023-12-02 07:00:43,395 (trainer:735) INFO: 3epoch:train:2701-2800batch: iter_time=8.916e-05, forward_time=0.149, loss_ctc=183.478, loss_att=170.153, acc=0.253, loss=174.151, backward_time=0.277, grad_norm=59.304, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.376e-05, train_time=1.236
+[gpua014:0/64] 2023-12-02 07:03:02,312 (trainer:735) INFO: 3epoch:train:2801-2900batch: iter_time=9.799e-05, forward_time=0.147, loss_ctc=181.866, loss_att=180.270, acc=0.252, loss=180.749, backward_time=0.299, grad_norm=61.074, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.426e-05, train_time=1.389
+[gpua014:0/64] 2023-12-02 07:05:33,867 (trainer:735) INFO: 3epoch:train:2901-3000batch: iter_time=9.989e-05, forward_time=0.149, loss_ctc=198.329, loss_att=181.812, acc=0.247, loss=186.767, backward_time=0.315, grad_norm=65.672, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.081, optim0_lr0=6.476e-05, train_time=1.515
+[gpua014:0/64] 2023-12-02 07:07:55,138 (trainer:735) INFO: 3epoch:train:3001-3100batch: iter_time=9.515e-05, forward_time=0.148, loss_ctc=173.038, loss_att=170.609, acc=0.254, loss=171.338, backward_time=0.316, grad_norm=56.425, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.081, optim0_lr0=6.526e-05, train_time=1.412
+[gpua014:0/64] 2023-12-02 07:09:56,226 (trainer:735) INFO: 3epoch:train:3101-3200batch: iter_time=1.050e-04, forward_time=0.147, loss_ctc=188.454, loss_att=187.081, acc=0.234, loss=187.493, backward_time=0.285, grad_norm=56.743, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.081, optim0_lr0=6.576e-05, train_time=1.211
+[gpua014:0/64] 2023-12-02 07:12:13,872 (trainer:735) INFO: 3epoch:train:3201-3300batch: iter_time=1.064e-04, forward_time=0.157, loss_ctc=195.710, loss_att=201.621, acc=0.231, loss=199.848, backward_time=0.291, grad_norm=64.573, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.626e-05, train_time=1.376
+[gpua014:0/64] 2023-12-02 07:14:19,712 (trainer:735) INFO: 3epoch:train:3301-3400batch: iter_time=5.654e-04, forward_time=0.152, loss_ctc=172.874, loss_att=176.856, acc=0.252, loss=175.662, backward_time=0.282, grad_norm=48.591, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.676e-05, train_time=1.258
+[gpua014:0/64] 2023-12-02 07:16:44,556 (trainer:735) INFO: 3epoch:train:3401-3500batch: iter_time=1.031e-04, forward_time=0.148, loss_ctc=204.747, loss_att=200.606, acc=0.234, loss=201.848, backward_time=0.296, grad_norm=71.815, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.726e-05, train_time=1.448
+[gpua014:0/64] 2023-12-02 07:19:02,005 (trainer:735) INFO: 3epoch:train:3501-3600batch: iter_time=1.014e-04, forward_time=0.148, loss_ctc=195.084, loss_att=191.722, acc=0.252, loss=192.730, backward_time=0.281, grad_norm=61.177, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.776e-05, train_time=1.374
+[gpua014:0/64] 2023-12-02 07:21:08,932 (trainer:735) INFO: 3epoch:train:3601-3700batch: iter_time=1.027e-04, forward_time=0.147, loss_ctc=181.828, loss_att=178.146, acc=0.241, loss=179.251, backward_time=0.289, grad_norm=58.999, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.826e-05, train_time=1.269
+[gpua014:0/64] 2023-12-02 07:22:10,497 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua014:0/64] 2023-12-02 07:22:28,713 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 07:22:32,261 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa193950430>)
+[gpua014:0/64] 2023-12-02 07:22:32,261 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua014:0/64] 2023-12-02 07:22:32,264 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 07:28:05,398 (trainer:735) INFO: 3epoch:train:3701-3800batch: iter_time=1.532, forward_time=0.173, loss_ctc=185.226, loss_att=178.957, acc=0.253, loss=180.838, backward_time=0.284, grad_norm=60.657, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.876e-05, train_time=4.164
+[gpua014:0/64] 2023-12-02 07:30:12,898 (trainer:735) INFO: 3epoch:train:3801-3900batch: iter_time=9.228e-05, forward_time=0.148, loss_ctc=199.185, loss_att=214.605, acc=0.235, loss=209.979, backward_time=0.297, grad_norm=61.770, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.080, optim0_lr0=6.926e-05, train_time=1.275
+[gpua014:0/64] 2023-12-02 07:32:28,363 (trainer:735) INFO: 3epoch:train:3901-4000batch: iter_time=9.550e-05, forward_time=0.147, loss_ctc=196.148, loss_att=194.584, acc=0.239, loss=195.053, backward_time=0.303, grad_norm=62.766, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.081, optim0_lr0=6.976e-05, train_time=1.354
+[gpua014:0/64] 2023-12-02 07:34:42,117 (trainer:735) INFO: 3epoch:train:4001-4100batch: iter_time=1.076e-04, forward_time=0.146, loss_ctc=164.864, loss_att=159.754, acc=0.261, loss=161.287, backward_time=0.308, grad_norm=57.145, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.080, optim0_lr0=7.026e-05, train_time=1.337
+[gpua014:0/64] 2023-12-02 07:36:40,702 (trainer:735) INFO: 3epoch:train:4101-4200batch: iter_time=9.920e-05, forward_time=0.147, loss_ctc=170.748, loss_att=174.848, acc=0.253, loss=173.618, backward_time=0.277, grad_norm=60.782, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.080, optim0_lr0=7.076e-05, train_time=1.186
+[gpua014:0/64] 2023-12-02 07:38:45,124 (trainer:735) INFO: 3epoch:train:4201-4300batch: iter_time=1.040e-04, forward_time=0.147, loss_ctc=184.028, loss_att=177.190, acc=0.253, loss=179.242, backward_time=0.279, grad_norm=56.825, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.080, optim0_lr0=7.126e-05, train_time=1.244
+[gpua014:0/64] 2023-12-02 07:41:01,554 (trainer:735) INFO: 3epoch:train:4301-4400batch: iter_time=8.823e-05, forward_time=0.149, loss_ctc=178.232, loss_att=183.838, acc=0.251, loss=182.156, backward_time=0.282, grad_norm=55.913, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.080, optim0_lr0=7.176e-05, train_time=1.364
+[gpua014:0/64] 2023-12-02 07:43:16,160 (trainer:735) INFO: 3epoch:train:4401-4500batch: iter_time=8.796e-05, forward_time=0.147, loss_ctc=186.949, loss_att=191.632, acc=0.231, loss=190.227, backward_time=0.285, grad_norm=68.564, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.080, optim0_lr0=7.226e-05, train_time=1.346
+[gpua014:0/64] 2023-12-02 07:45:57,018 (trainer:735) INFO: 3epoch:train:4501-4600batch: iter_time=9.078e-05, forward_time=0.147, loss_ctc=167.858, loss_att=190.113, acc=0.243, loss=183.436, backward_time=0.309, grad_norm=49.247, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.276e-05, train_time=1.608
+[gpua014:0/64] 2023-12-02 07:48:11,353 (trainer:735) INFO: 3epoch:train:4601-4700batch: iter_time=9.417e-05, forward_time=0.147, loss_ctc=164.286, loss_att=171.875, acc=0.255, loss=169.598, backward_time=0.283, grad_norm=50.524, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.326e-05, train_time=1.343
+[gpua014:0/64] 2023-12-02 07:50:29,218 (trainer:735) INFO: 3epoch:train:4701-4800batch: iter_time=1.009e-04, forward_time=0.147, loss_ctc=204.063, loss_att=201.026, acc=0.242, loss=201.937, backward_time=0.286, grad_norm=67.134, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.376e-05, train_time=1.378
+[gpua014:0/64] 2023-12-02 07:52:40,268 (trainer:735) INFO: 3epoch:train:4801-4900batch: iter_time=9.459e-05, forward_time=0.148, loss_ctc=195.981, loss_att=199.829, acc=0.244, loss=198.675, backward_time=0.287, grad_norm=69.654, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.080, optim0_lr0=7.426e-05, train_time=1.310
+[gpua014:0/64] 2023-12-02 07:54:51,185 (trainer:735) INFO: 3epoch:train:4901-5000batch: iter_time=9.387e-05, forward_time=0.147, loss_ctc=162.098, loss_att=165.972, acc=0.254, loss=164.810, backward_time=0.289, grad_norm=50.323, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.080, optim0_lr0=7.476e-05, train_time=1.309
+[gpua014:0/64] 2023-12-02 07:55:09,452 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua014:0/64] 2023-12-02 07:55:27,460 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 07:55:31,162 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa18ae2da20>)
+[gpua014:0/64] 2023-12-02 07:55:31,162 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua014:0/64] 2023-12-02 07:55:31,165 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+[gpua014:0/64] 2023-12-02 08:04:35,430 (trainer:735) INFO: 3epoch:train:5001-5100batch: iter_time=2.277, forward_time=0.181, loss_ctc=185.698, loss_att=204.028, acc=0.241, loss=198.529, backward_time=0.426, grad_norm=61.539, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.526e-05, train_time=5.842
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+[gpua014:0/64] 2023-12-02 08:07:50,370 (trainer:735) INFO: 3epoch:train:5101-5200batch: iter_time=0.012, forward_time=0.148, loss_ctc=190.325, loss_att=208.522, acc=0.236, loss=203.063, backward_time=0.427, grad_norm=55.142, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.576e-05, train_time=1.949
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+[gpua014:0/64] 2023-12-02 08:10:48,346 (trainer:735) INFO: 3epoch:train:5201-5300batch: iter_time=8.892e-05, forward_time=0.148, loss_ctc=168.177, loss_att=170.886, acc=0.257, loss=170.073, backward_time=0.319, grad_norm=60.344, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.626e-05, train_time=1.780
+[gpua014:0/64] 2023-12-02 08:14:32,474 (trainer:735) INFO: 3epoch:train:5301-5400batch: iter_time=9.124e-05, forward_time=0.149, loss_ctc=164.046, loss_att=185.511, acc=0.255, loss=179.071, backward_time=0.451, grad_norm=58.907, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.676e-05, train_time=2.241
+[gpua014:0/64] 2023-12-02 08:17:49,065 (trainer:735) INFO: 3epoch:train:5401-5500batch: iter_time=9.177e-05, forward_time=0.151, loss_ctc=181.099, loss_att=182.855, acc=0.252, loss=182.328, backward_time=0.381, grad_norm=58.986, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.726e-05, train_time=1.966
+[gpua014:0/64] 2023-12-02 08:21:14,759 (trainer:735) INFO: 3epoch:train:5501-5600batch: iter_time=9.155e-05, forward_time=0.149, loss_ctc=156.201, loss_att=173.684, acc=0.257, loss=168.439, backward_time=0.381, grad_norm=53.914, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.776e-05, train_time=2.057
+[gpua014:0/64] 2023-12-02 08:25:05,658 (trainer:735) INFO: 3epoch:train:5601-5700batch: iter_time=9.586e-05, forward_time=0.148, loss_ctc=171.206, loss_att=192.834, acc=0.238, loss=186.346, backward_time=0.427, grad_norm=54.920, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.826e-05, train_time=2.309
+[gpua014:0/64] 2023-12-02 08:28:12,077 (trainer:735) INFO: 3epoch:train:5701-5800batch: iter_time=8.964e-05, forward_time=0.148, loss_ctc=178.223, loss_att=205.903, acc=0.232, loss=197.599, backward_time=0.351, grad_norm=61.067, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.876e-05, train_time=1.864
+[gpua014:0/64] 2023-12-02 08:30:47,179 (trainer:735) INFO: 3epoch:train:5801-5900batch: iter_time=9.905e-05, forward_time=0.149, loss_ctc=156.872, loss_att=178.314, acc=0.257, loss=171.881, backward_time=0.326, grad_norm=52.399, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.926e-05, train_time=1.551
+[gpua014:0/64] 2023-12-02 08:33:33,263 (trainer:735) INFO: 3epoch:train:5901-6000batch: iter_time=1.011e-04, forward_time=0.148, loss_ctc=185.679, loss_att=200.314, acc=0.241, loss=195.923, backward_time=0.323, grad_norm=63.377, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.081, optim0_lr0=7.976e-05, train_time=1.661
+[gpua014:0/64] 2023-12-02 08:36:33,229 (trainer:735) INFO: 3epoch:train:6001-6100batch: iter_time=9.723e-05, forward_time=0.148, loss_ctc=180.087, loss_att=191.101, acc=0.255, loss=187.796, backward_time=0.392, grad_norm=65.783, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.026e-05, train_time=1.799
+[gpua014:0/64] 2023-12-02 08:38:58,005 (trainer:735) INFO: 3epoch:train:6101-6200batch: iter_time=9.402e-05, forward_time=0.150, loss_ctc=163.785, loss_att=176.946, acc=0.249, loss=172.998, backward_time=0.314, grad_norm=54.940, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.076e-05, train_time=1.448
+[gpua014:0/64] 2023-12-02 08:40:15,935 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua014:0/64] 2023-12-02 08:40:33,977 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 08:40:37,405 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1939aecb0>)
+[gpua014:0/64] 2023-12-02 08:40:37,406 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua014:0/64] 2023-12-02 08:40:37,409 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 08:46:03,394 (trainer:735) INFO: 3epoch:train:6201-6300batch: iter_time=1.583, forward_time=0.187, loss_ctc=169.222, loss_att=182.573, acc=0.255, loss=178.568, backward_time=0.310, grad_norm=58.498, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.126e-05, train_time=4.253
+[gpua014:0/64] 2023-12-02 08:48:04,838 (trainer:735) INFO: 3epoch:train:6301-6400batch: iter_time=1.024e-04, forward_time=0.149, loss_ctc=181.214, loss_att=219.993, acc=0.238, loss=208.359, backward_time=0.296, grad_norm=56.289, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.176e-05, train_time=1.214
+[gpua014:0/64] 2023-12-02 08:50:01,346 (trainer:735) INFO: 3epoch:train:6401-6500batch: iter_time=9.846e-05, forward_time=0.147, loss_ctc=178.013, loss_att=194.814, acc=0.245, loss=189.774, backward_time=0.277, grad_norm=53.561, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.226e-05, train_time=1.165
+[gpua014:0/64] 2023-12-02 08:52:11,423 (trainer:735) INFO: 3epoch:train:6501-6600batch: iter_time=1.076e-04, forward_time=0.149, loss_ctc=149.002, loss_att=161.095, acc=0.265, loss=157.467, backward_time=0.281, grad_norm=54.043, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.276e-05, train_time=1.301
+[gpua014:0/64] 2023-12-02 08:54:16,465 (trainer:735) INFO: 3epoch:train:6601-6700batch: iter_time=9.897e-05, forward_time=0.147, loss_ctc=155.405, loss_att=177.837, acc=0.258, loss=171.107, backward_time=0.278, grad_norm=51.565, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.326e-05, train_time=1.250
+[gpua014:0/64] 2023-12-02 08:56:32,802 (trainer:735) INFO: 3epoch:train:6701-6800batch: iter_time=9.801e-05, forward_time=0.148, loss_ctc=167.761, loss_att=176.649, acc=0.257, loss=173.983, backward_time=0.292, grad_norm=56.596, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.376e-05, train_time=1.363
+[gpua014:0/64] 2023-12-02 08:58:54,961 (trainer:735) INFO: 3epoch:train:6801-6900batch: iter_time=9.572e-05, forward_time=0.148, loss_ctc=163.001, loss_att=187.700, acc=0.252, loss=180.291, backward_time=0.319, grad_norm=55.662, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.426e-05, train_time=1.421
+[gpua014:0/64] 2023-12-02 09:00:54,148 (trainer:735) INFO: 3epoch:train:6901-7000batch: iter_time=1.020e-04, forward_time=0.148, loss_ctc=170.009, loss_att=197.577, acc=0.237, loss=189.307, backward_time=0.281, grad_norm=59.871, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.476e-05, train_time=1.192
+[gpua014:0/64] 2023-12-02 09:02:55,910 (trainer:735) INFO: 3epoch:train:7001-7100batch: iter_time=9.341e-05, forward_time=0.148, loss_ctc=152.911, loss_att=190.702, acc=0.250, loss=179.364, backward_time=0.278, grad_norm=45.977, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.526e-05, train_time=1.217
+[gpua014:0/64] 2023-12-02 09:05:17,641 (trainer:735) INFO: 3epoch:train:7101-7200batch: iter_time=8.863e-05, forward_time=0.148, loss_ctc=149.389, loss_att=173.076, acc=0.261, loss=165.970, backward_time=0.282, grad_norm=48.395, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.576e-05, train_time=1.417
+[gpua014:0/64] 2023-12-02 09:07:33,094 (trainer:735) INFO: 3epoch:train:7201-7300batch: iter_time=8.313e-05, forward_time=0.149, loss_ctc=187.109, loss_att=199.802, acc=0.248, loss=195.994, backward_time=0.292, grad_norm=62.859, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.080, optim0_lr0=8.626e-05, train_time=1.354
+[gpua014:0/64] 2023-12-02 09:09:49,405 (trainer:735) INFO: 3epoch:train:7301-7400batch: iter_time=8.681e-05, forward_time=0.148, loss_ctc=177.453, loss_att=198.511, acc=0.251, loss=192.194, backward_time=0.290, grad_norm=65.337, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.080, optim0_lr0=8.676e-05, train_time=1.363
+[gpua014:0/64] 2023-12-02 09:11:54,833 (trainer:735) INFO: 3epoch:train:7401-7500batch: iter_time=9.076e-05, forward_time=0.148, loss_ctc=151.150, loss_att=168.808, acc=0.256, loss=163.511, backward_time=0.282, grad_norm=54.731, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.080, optim0_lr0=8.726e-05, train_time=1.254
+[gpua014:0/64] 2023-12-02 09:12:09,835 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua014:0/64] 2023-12-02 09:12:27,948 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 09:12:31,479 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1d25cf520>)
+[gpua014:0/64] 2023-12-02 09:12:31,479 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua014:0/64] 2023-12-02 09:12:31,482 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 09:20:44,891 (trainer:735) INFO: 3epoch:train:7501-7600batch: iter_time=2.188, forward_time=0.177, loss_ctc=172.666, loss_att=199.284, acc=0.247, loss=191.299, backward_time=0.285, grad_norm=61.212, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.080, optim0_lr0=8.776e-05, train_time=5.300
+[gpua014:0/64] 2023-12-02 09:22:43,132 (trainer:735) INFO: 3epoch:train:7601-7700batch: iter_time=9.041e-05, forward_time=0.148, loss_ctc=172.852, loss_att=201.767, acc=0.245, loss=193.093, backward_time=0.279, grad_norm=54.186, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.080, optim0_lr0=8.826e-05, train_time=1.182
+[gpua014:0/64] 2023-12-02 09:24:37,954 (trainer:735) INFO: 3epoch:train:7701-7800batch: iter_time=8.594e-05, forward_time=0.147, loss_ctc=154.703, loss_att=168.147, acc=0.264, loss=164.114, backward_time=0.279, grad_norm=52.409, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.080, optim0_lr0=8.876e-05, train_time=1.148
+[gpua014:0/64] 2023-12-02 09:26:56,737 (trainer:735) INFO: 3epoch:train:7801-7900batch: iter_time=9.889e-05, forward_time=0.147, loss_ctc=149.686, loss_att=181.268, acc=0.260, loss=171.794, backward_time=0.312, grad_norm=56.192, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.079, optim0_lr0=8.926e-05, train_time=1.388
+[gpua014:0/64] 2023-12-02 09:29:26,773 (trainer:735) INFO: 3epoch:train:7901-8000batch: iter_time=8.607e-05, forward_time=0.148, loss_ctc=165.542, loss_att=177.885, acc=0.259, loss=174.182, backward_time=0.303, grad_norm=57.761, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.079, optim0_lr0=8.976e-05, train_time=1.500
+[gpua014:0/64] 2023-12-02 09:31:52,675 (trainer:735) INFO: 3epoch:train:8001-8100batch: iter_time=8.410e-05, forward_time=0.147, loss_ctc=143.544, loss_att=168.700, acc=0.267, loss=161.153, backward_time=0.314, grad_norm=53.308, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.079, optim0_lr0=9.026e-05, train_time=1.459
+[gpua014:0/64] 2023-12-02 09:34:12,338 (trainer:735) INFO: 3epoch:train:8101-8200batch: iter_time=9.791e-05, forward_time=0.147, loss_ctc=157.524, loss_att=187.644, acc=0.246, loss=178.608, backward_time=0.306, grad_norm=51.076, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.079, optim0_lr0=9.076e-05, train_time=1.396
+[gpua014:0/64] 2023-12-02 09:36:35,692 (trainer:735) INFO: 3epoch:train:8201-8300batch: iter_time=8.743e-05, forward_time=0.148, loss_ctc=163.031, loss_att=201.076, acc=0.242, loss=189.662, backward_time=0.309, grad_norm=54.903, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.080, optim0_lr0=9.126e-05, train_time=1.433
+[gpua014:0/64] 2023-12-02 09:39:17,669 (trainer:735) INFO: 3epoch:train:8301-8400batch: iter_time=8.631e-05, forward_time=0.148, loss_ctc=143.837, loss_att=175.360, acc=0.265, loss=165.903, backward_time=0.344, grad_norm=48.711, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.081, optim0_lr0=9.176e-05, train_time=1.620
+[gpua014:0/64] 2023-12-02 09:41:39,397 (trainer:735) INFO: 3epoch:train:8401-8500batch: iter_time=8.646e-05, forward_time=0.184, loss_ctc=174.924, loss_att=195.495, acc=0.248, loss=189.324, backward_time=0.302, grad_norm=63.089, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.083, optim0_lr0=9.226e-05, train_time=1.417
+[gpua014:0/64] 2023-12-02 09:44:21,584 (trainer:735) INFO: 3epoch:train:8501-8600batch: iter_time=7.518e-05, forward_time=0.167, loss_ctc=165.362, loss_att=185.392, acc=0.264, loss=179.383, backward_time=0.334, grad_norm=63.679, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.084, optim0_lr0=9.276e-05, train_time=1.622
+[gpua014:0/64] 2023-12-02 09:46:35,784 (trainer:735) INFO: 3epoch:train:8601-8700batch: iter_time=7.958e-05, forward_time=0.148, loss_ctc=151.720, loss_att=173.935, acc=0.256, loss=167.270, backward_time=0.315, grad_norm=53.130, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.081, optim0_lr0=9.326e-05, train_time=1.342
+[gpua014:0/64] 2023-12-02 09:48:08,689 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua014:0/64] 2023-12-02 09:48:27,249 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 09:48:30,873 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1c0c1ffa0>)
+[gpua014:0/64] 2023-12-02 09:48:30,873 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua014:0/64] 2023-12-02 09:48:30,877 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 09:54:29,914 (trainer:735) INFO: 3epoch:train:8701-8800batch: iter_time=2.298, forward_time=0.148, loss_ctc=159.213, loss_att=177.872, acc=0.264, loss=172.274, backward_time=0.324, grad_norm=61.455, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.081, optim0_lr0=9.376e-05, train_time=4.741
+[gpua014:0/64] 2023-12-02 09:59:02,798 (trainer:735) INFO: 3epoch:train:8801-8900batch: iter_time=9.542e-05, forward_time=0.148, loss_ctc=168.177, loss_att=213.873, acc=0.247, loss=200.164, backward_time=0.446, grad_norm=56.474, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.081, optim0_lr0=9.426e-05, train_time=2.729
+[gpua014:0/64] 2023-12-02 10:04:03,286 (trainer:735) INFO: 3epoch:train:8901-9000batch: iter_time=9.234e-05, forward_time=0.150, loss_ctc=165.558, loss_att=191.251, acc=0.254, loss=183.543, backward_time=0.470, grad_norm=52.706, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.080, optim0_lr0=9.476e-05, train_time=3.005
+[gpua014:0/64] 2023-12-02 10:07:32,809 (trainer:735) INFO: 3epoch:train:9001-9100batch: iter_time=1.020e-04, forward_time=0.148, loss_ctc=141.179, loss_att=157.916, acc=0.274, loss=152.895, backward_time=0.357, grad_norm=56.937, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.081, optim0_lr0=9.526e-05, train_time=2.095
+[gpua014:0/64] 2023-12-02 10:11:06,095 (trainer:735) INFO: 3epoch:train:9101-9200batch: iter_time=9.680e-05, forward_time=0.148, loss_ctc=142.423, loss_att=173.041, acc=0.268, loss=163.856, backward_time=0.331, grad_norm=50.704, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.081, optim0_lr0=9.576e-05, train_time=2.133
+[gpua014:0/64] 2023-12-02 10:14:17,124 (trainer:735) INFO: 3epoch:train:9201-9300batch: iter_time=1.031e-04, forward_time=0.148, loss_ctc=156.876, loss_att=172.231, acc=0.268, loss=167.624, backward_time=0.325, grad_norm=58.780, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.080, optim0_lr0=9.626e-05, train_time=1.910
+[gpua014:0/64] 2023-12-02 10:17:21,935 (trainer:735) INFO: 3epoch:train:9301-9400batch: iter_time=1.025e-04, forward_time=0.148, loss_ctc=150.062, loss_att=182.742, acc=0.263, loss=172.938, backward_time=0.380, grad_norm=49.851, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.080, optim0_lr0=9.676e-05, train_time=1.848
+[gpua014:0/64] 2023-12-02 10:20:20,346 (trainer:735) INFO: 3epoch:train:9401-9500batch: iter_time=1.004e-04, forward_time=0.148, loss_ctc=158.929, loss_att=192.776, acc=0.247, loss=182.622, backward_time=0.332, grad_norm=58.957, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.081, optim0_lr0=9.726e-05, train_time=1.784
+[gpua014:0/64] 2023-12-02 10:23:18,610 (trainer:735) INFO: 3epoch:train:9501-9600batch: iter_time=1.037e-04, forward_time=0.148, loss_ctc=142.286, loss_att=186.684, acc=0.260, loss=173.364, backward_time=0.323, grad_norm=45.102, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.080, optim0_lr0=9.776e-05, train_time=1.782
+[gpua014:0/64] 2023-12-02 10:26:16,537 (trainer:735) INFO: 3epoch:train:9601-9700batch: iter_time=9.187e-05, forward_time=0.188, loss_ctc=138.481, loss_att=168.146, acc=0.273, loss=159.246, backward_time=0.366, grad_norm=46.020, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.083, optim0_lr0=9.826e-05, train_time=1.779
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+[gpua014:0/64] 2023-12-02 10:29:29,156 (trainer:735) INFO: 3epoch:train:9701-9800batch: iter_time=9.215e-05, forward_time=0.177, loss_ctc=173.922, loss_att=195.208, acc=0.258, loss=188.822, backward_time=0.335, grad_norm=58.375, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.083, optim0_lr0=9.876e-05, train_time=1.926
+[gpua014:0/64] 2023-12-02 10:32:25,423 (trainer:735) INFO: 3epoch:train:9801-9900batch: iter_time=9.065e-05, forward_time=0.148, loss_ctc=165.394, loss_att=192.719, acc=0.263, loss=184.522, backward_time=0.326, grad_norm=63.012, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.080, optim0_lr0=9.926e-05, train_time=1.762
+[gpua014:0/64] 2023-12-02 10:35:02,478 (trainer:735) INFO: 3epoch:train:9901-10000batch: iter_time=9.229e-05, forward_time=0.148, loss_ctc=140.538, loss_att=164.243, acc=0.271, loss=157.131, backward_time=0.314, grad_norm=50.548, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.079, optim0_lr0=9.976e-05, train_time=1.570
+[gpua014:0/64] 2023-12-02 10:35:22,521 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua014:0/64] 2023-12-02 10:35:41,016 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 10:35:44,546 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1be4c4d30>)
+[gpua014:0/64] 2023-12-02 10:35:44,546 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua014:0/64] 2023-12-02 10:35:44,549 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 10:42:58,252 (trainer:735) INFO: 3epoch:train:10001-10100batch: iter_time=2.524, forward_time=0.147, loss_ctc=160.571, loss_att=189.574, acc=0.260, loss=180.873, backward_time=0.342, grad_norm=61.213, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.080, optim0_lr0=1.003e-04, train_time=4.758
+[gpua014:0/64] 2023-12-02 10:46:54,314 (trainer:735) INFO: 3epoch:train:10101-10200batch: iter_time=9.102e-05, forward_time=0.149, loss_ctc=163.186, loss_att=195.177, acc=0.255, loss=185.580, backward_time=0.397, grad_norm=59.076, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.080, optim0_lr0=1.008e-04, train_time=2.360
+[gpua014:0/64] 2023-12-02 10:50:40,903 (trainer:735) INFO: 3epoch:train:10201-10300batch: iter_time=9.343e-05, forward_time=0.148, loss_ctc=145.607, loss_att=160.146, acc=0.277, loss=155.784, backward_time=0.485, grad_norm=57.050, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.080, optim0_lr0=1.013e-04, train_time=2.266
+[gpua014:0/64] 2023-12-02 10:55:34,362 (trainer:735) INFO: 3epoch:train:10301-10400batch: iter_time=9.916e-05, forward_time=0.148, loss_ctc=141.139, loss_att=170.487, acc=0.275, loss=161.682, backward_time=0.389, grad_norm=57.496, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.080, optim0_lr0=1.018e-04, train_time=2.934
+[gpua014:0/64] 2023-12-02 10:59:23,901 (trainer:735) INFO: 3epoch:train:10401-10500batch: iter_time=9.167e-05, forward_time=0.148, loss_ctc=157.825, loss_att=169.309, acc=0.274, loss=165.864, backward_time=0.463, grad_norm=56.027, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.080, optim0_lr0=1.023e-04, train_time=2.295
+[gpua014:0/64] 2023-12-02 11:03:04,500 (trainer:735) INFO: 3epoch:train:10501-10600batch: iter_time=9.324e-05, forward_time=0.147, loss_ctc=134.878, loss_att=160.241, acc=0.280, loss=152.632, backward_time=0.360, grad_norm=57.387, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.079, optim0_lr0=1.028e-04, train_time=2.206
+[gpua014:0/64] 2023-12-02 11:06:55,440 (trainer:735) INFO: 3epoch:train:10601-10700batch: iter_time=9.637e-05, forward_time=0.168, loss_ctc=149.469, loss_att=176.660, acc=0.260, loss=168.503, backward_time=0.415, grad_norm=55.830, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.033e-04, train_time=2.309
+[gpua014:0/64] 2023-12-02 11:10:34,972 (trainer:735) INFO: 3epoch:train:10701-10800batch: iter_time=1.011e-04, forward_time=0.180, loss_ctc=152.670, loss_att=190.649, acc=0.256, loss=179.255, backward_time=0.407, grad_norm=54.301, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.038e-04, train_time=2.195
+[gpua014:0/64] 2023-12-02 11:14:37,348 (trainer:735) INFO: 3epoch:train:10801-10900batch: iter_time=9.170e-05, forward_time=0.148, loss_ctc=134.124, loss_att=165.814, acc=0.283, loss=156.307, backward_time=0.492, grad_norm=49.683, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.081, optim0_lr0=1.043e-04, train_time=2.424
+[gpua014:0/64] 2023-12-02 11:17:56,897 (trainer:735) INFO: 3epoch:train:10901-11000batch: iter_time=8.734e-05, forward_time=0.148, loss_ctc=164.031, loss_att=187.434, acc=0.265, loss=180.413, backward_time=0.345, grad_norm=63.483, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.081, optim0_lr0=1.048e-04, train_time=1.995
+[gpua014:0/64] 2023-12-02 11:22:53,796 (trainer:735) INFO: 3epoch:train:11001-11100batch: iter_time=9.598e-05, forward_time=0.148, loss_ctc=155.868, loss_att=177.032, acc=0.283, loss=170.683, backward_time=0.426, grad_norm=56.724, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.081, optim0_lr0=1.053e-04, train_time=2.969
+[gpua014:0/64] 2023-12-02 11:26:49,734 (trainer:735) INFO: 3epoch:train:11101-11200batch: iter_time=9.843e-05, forward_time=0.148, loss_ctc=141.809, loss_att=164.499, acc=0.274, loss=157.692, backward_time=0.440, grad_norm=49.847, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.081, optim0_lr0=1.058e-04, train_time=2.359
+[gpua014:0/64] 2023-12-02 11:28:50,807 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua014:0/64] 2023-12-02 11:29:09,110 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 11:29:12,671 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1f0183df0>)
+[gpua014:0/64] 2023-12-02 11:29:12,671 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua014:0/64] 2023-12-02 11:29:12,674 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 11:35:32,859 (trainer:735) INFO: 3epoch:train:11201-11300batch: iter_time=2.239, forward_time=0.153, loss_ctc=151.191, loss_att=171.157, acc=0.282, loss=165.167, backward_time=0.312, grad_norm=58.014, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.081, optim0_lr0=1.063e-04, train_time=5.231
+[gpua014:0/64] 2023-12-02 11:37:30,265 (trainer:735) INFO: 3epoch:train:11301-11400batch: iter_time=9.261e-05, forward_time=0.147, loss_ctc=157.180, loss_att=206.314, acc=0.267, loss=191.574, backward_time=0.279, grad_norm=56.029, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.081, optim0_lr0=1.068e-04, train_time=1.174
+[gpua014:0/64] 2023-12-02 11:39:35,292 (trainer:735) INFO: 3epoch:train:11401-11500batch: iter_time=2.424e-04, forward_time=0.203, loss_ctc=154.704, loss_att=183.221, acc=0.274, loss=174.666, backward_time=0.307, grad_norm=55.784, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.090, optim0_lr0=1.073e-04, train_time=1.250
+[gpua014:0/64] 2023-12-02 11:41:42,566 (trainer:735) INFO: 3epoch:train:11501-11600batch: iter_time=9.613e-05, forward_time=0.166, loss_ctc=132.223, loss_att=150.771, acc=0.297, loss=145.207, backward_time=0.300, grad_norm=54.751, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.084, optim0_lr0=1.078e-04, train_time=1.273
+[gpua014:0/64] 2023-12-02 11:44:01,861 (trainer:735) INFO: 3epoch:train:11601-11700batch: iter_time=9.784e-05, forward_time=0.149, loss_ctc=135.522, loss_att=166.513, acc=0.294, loss=157.216, backward_time=0.333, grad_norm=52.632, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.081, optim0_lr0=1.083e-04, train_time=1.393
+[gpua014:0/64] 2023-12-02 11:46:16,717 (trainer:735) INFO: 3epoch:train:11701-11800batch: iter_time=9.705e-05, forward_time=0.152, loss_ctc=149.139, loss_att=163.504, acc=0.293, loss=159.195, backward_time=0.296, grad_norm=54.988, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.080, optim0_lr0=1.088e-04, train_time=1.348
+[gpua014:0/64] 2023-12-02 11:48:19,226 (trainer:735) INFO: 3epoch:train:11801-11900batch: iter_time=8.814e-05, forward_time=0.148, loss_ctc=142.518, loss_att=174.564, acc=0.289, loss=164.950, backward_time=0.281, grad_norm=54.088, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.080, optim0_lr0=1.093e-04, train_time=1.225
+[gpua014:0/64] 2023-12-02 11:50:33,655 (trainer:735) INFO: 3epoch:train:11901-12000batch: iter_time=8.651e-05, forward_time=0.148, loss_ctc=149.039, loss_att=184.955, acc=0.272, loss=174.180, backward_time=0.286, grad_norm=56.596, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.081, optim0_lr0=1.098e-04, train_time=1.344
+[gpua014:0/64] 2023-12-02 11:52:54,488 (trainer:735) INFO: 3epoch:train:12001-12100batch: iter_time=8.979e-05, forward_time=0.148, loss_ctc=134.263, loss_att=175.946, acc=0.290, loss=163.441, backward_time=0.312, grad_norm=49.400, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.080, optim0_lr0=1.103e-04, train_time=1.408
+[gpua014:0/64] 2023-12-02 11:55:26,022 (trainer:735) INFO: 3epoch:train:12101-12200batch: iter_time=9.049e-05, forward_time=0.171, loss_ctc=129.888, loss_att=157.381, acc=0.306, loss=149.133, backward_time=0.300, grad_norm=47.194, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.080, optim0_lr0=1.108e-04, train_time=1.515
+[gpua014:0/64] 2023-12-02 11:58:11,500 (trainer:735) INFO: 3epoch:train:12201-12300batch: iter_time=2.307e-04, forward_time=0.251, loss_ctc=167.334, loss_att=185.967, acc=0.289, loss=180.377, backward_time=0.316, grad_norm=60.606, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.101, optim0_lr0=1.113e-04, train_time=1.655
+[gpua014:0/64] 2023-12-02 12:00:27,614 (trainer:735) INFO: 3epoch:train:12301-12400batch: iter_time=8.438e-05, forward_time=0.150, loss_ctc=155.457, loss_att=181.381, acc=0.297, loss=173.604, backward_time=0.302, grad_norm=64.995, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.080, optim0_lr0=1.118e-04, train_time=1.361
+[gpua014:0/64] 2023-12-02 12:02:56,073 (trainer:735) INFO: 3epoch:train:12401-12500batch: iter_time=8.668e-05, forward_time=0.148, loss_ctc=132.483, loss_att=155.799, acc=0.295, loss=148.804, backward_time=0.289, grad_norm=52.817, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.123e-04, train_time=1.484
+[gpua014:0/64] 2023-12-02 12:03:16,101 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua014:0/64] 2023-12-02 12:03:34,758 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 12:03:38,346 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faa727f5660>)
+[gpua014:0/64] 2023-12-02 12:03:38,346 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua014:0/64] 2023-12-02 12:03:38,349 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 12:09:29,478 (trainer:735) INFO: 3epoch:train:12501-12600batch: iter_time=2.364, forward_time=0.148, loss_ctc=151.786, loss_att=177.526, acc=0.287, loss=169.804, backward_time=0.287, grad_norm=62.791, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.128e-04, train_time=3.934
+[gpua014:0/64] 2023-12-02 12:11:43,512 (trainer:735) INFO: 3epoch:train:12601-12700batch: iter_time=8.169e-05, forward_time=0.148, loss_ctc=152.202, loss_att=178.964, acc=0.293, loss=170.935, backward_time=0.306, grad_norm=56.108, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.133e-04, train_time=1.340
+[gpua014:0/64] 2023-12-02 12:13:47,787 (trainer:735) INFO: 3epoch:train:12701-12800batch: iter_time=8.987e-05, forward_time=0.148, loss_ctc=137.069, loss_att=147.854, acc=0.314, loss=144.618, backward_time=0.289, grad_norm=55.040, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.138e-04, train_time=1.243
+[gpua014:0/64] 2023-12-02 12:16:03,711 (trainer:735) INFO: 3epoch:train:12801-12900batch: iter_time=9.256e-05, forward_time=0.150, loss_ctc=134.717, loss_att=157.227, acc=0.316, loss=150.474, backward_time=0.297, grad_norm=62.429, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.080, optim0_lr0=1.143e-04, train_time=1.359
+[gpua014:0/64] 2023-12-02 12:18:11,170 (trainer:735) INFO: 3epoch:train:12901-13000batch: iter_time=8.646e-05, forward_time=0.147, loss_ctc=147.562, loss_att=157.952, acc=0.310, loss=154.835, backward_time=0.285, grad_norm=59.245, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.080, optim0_lr0=1.148e-04, train_time=1.274
+[gpua014:0/64] 2023-12-02 12:20:20,956 (trainer:735) INFO: 3epoch:train:13001-13100batch: iter_time=8.868e-05, forward_time=0.149, loss_ctc=127.606, loss_att=146.213, acc=0.322, loss=140.631, backward_time=0.285, grad_norm=53.187, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.153e-04, train_time=1.298
+[gpua014:0/64] 2023-12-02 12:22:40,033 (trainer:735) INFO: 3epoch:train:13101-13200batch: iter_time=9.189e-05, forward_time=0.249, loss_ctc=141.507, loss_att=165.692, acc=0.292, loss=158.437, backward_time=0.343, grad_norm=54.650, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.091, optim0_lr0=1.158e-04, train_time=1.390
+[gpua014:0/64] 2023-12-02 12:24:56,272 (trainer:735) INFO: 3epoch:train:13201-13300batch: iter_time=8.762e-05, forward_time=0.148, loss_ctc=145.749, loss_att=175.758, acc=0.298, loss=166.755, backward_time=0.286, grad_norm=55.681, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.163e-04, train_time=1.363
+[gpua014:0/64] 2023-12-02 12:27:18,952 (trainer:735) INFO: 3epoch:train:13301-13400batch: iter_time=9.064e-05, forward_time=0.147, loss_ctc=125.851, loss_att=148.720, acc=0.328, loss=141.859, backward_time=0.310, grad_norm=50.288, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.168e-04, train_time=1.427
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+[gpua014:0/64] 2023-12-02 12:29:55,738 (trainer:735) INFO: 3epoch:train:13401-13500batch: iter_time=9.853e-05, forward_time=0.147, loss_ctc=155.317, loss_att=170.846, acc=0.309, loss=166.187, backward_time=0.290, grad_norm=61.573, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.173e-04, train_time=1.568
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+[gpua014:0/64] 2023-12-02 12:32:52,615 (trainer:735) INFO: 3epoch:train:13501-13600batch: iter_time=9.719e-05, forward_time=0.147, loss_ctc=148.023, loss_att=161.920, acc=0.330, loss=157.751, backward_time=0.370, grad_norm=64.320, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.178e-04, train_time=1.769
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : GID table change
+[gpua014:0/64] 2023-12-02 12:35:30,083 (trainer:735) INFO: 3epoch:train:13601-13700batch: iter_time=1.018e-04, forward_time=0.147, loss_ctc=134.349, loss_att=151.497, acc=0.315, loss=146.352, backward_time=0.311, grad_norm=56.791, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.183e-04, train_time=1.574
+[gpua014:0/64] 2023-12-02 12:37:11,406 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua014:0/64] 2023-12-02 12:37:30,018 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 12:37:33,579 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa169b493f0>)
+[gpua014:0/64] 2023-12-02 12:37:33,579 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua014:0/64] 2023-12-02 12:37:33,582 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 12:42:48,300 (trainer:735) INFO: 3epoch:train:13701-13800batch: iter_time=2.855, forward_time=0.186, loss_ctc=140.856, loss_att=152.160, acc=0.325, loss=148.769, backward_time=0.293, grad_norm=55.086, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.188e-04, train_time=4.382
+[gpua014:0/64] 2023-12-02 12:44:46,003 (trainer:735) INFO: 3epoch:train:13801-13900batch: iter_time=7.882e-05, forward_time=0.149, loss_ctc=149.397, loss_att=181.390, acc=0.319, loss=171.792, backward_time=0.282, grad_norm=60.156, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.193e-04, train_time=1.177
+[gpua014:0/64] 2023-12-02 12:46:46,457 (trainer:735) INFO: 3epoch:train:13901-14000batch: iter_time=7.959e-05, forward_time=0.148, loss_ctc=147.318, loss_att=165.127, acc=0.316, loss=159.784, backward_time=0.281, grad_norm=61.673, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.198e-04, train_time=1.205
+[gpua014:0/64] 2023-12-02 12:48:58,566 (trainer:735) INFO: 3epoch:train:14001-14100batch: iter_time=8.920e-05, forward_time=0.150, loss_ctc=126.089, loss_att=132.024, acc=0.345, loss=130.244, backward_time=0.290, grad_norm=55.598, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.081, optim0_lr0=1.203e-04, train_time=1.321
+[gpua014:0/64] 2023-12-02 12:51:10,112 (trainer:735) INFO: 3epoch:train:14101-14200batch: iter_time=8.560e-05, forward_time=0.147, loss_ctc=126.480, loss_att=145.040, acc=0.341, loss=139.472, backward_time=0.286, grad_norm=54.950, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.208e-04, train_time=1.315
+[gpua014:0/64] 2023-12-02 12:53:16,552 (trainer:735) INFO: 3epoch:train:14201-14300batch: iter_time=8.435e-05, forward_time=0.147, loss_ctc=138.288, loss_att=142.637, acc=0.343, loss=141.332, backward_time=0.286, grad_norm=59.312, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.213e-04, train_time=1.264
+[gpua014:0/64] 2023-12-02 12:55:41,396 (trainer:735) INFO: 3epoch:train:14301-14400batch: iter_time=8.812e-05, forward_time=0.149, loss_ctc=136.252, loss_att=154.754, acc=0.331, loss=149.204, backward_time=0.299, grad_norm=60.942, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.218e-04, train_time=1.448
+[gpua014:0/64] 2023-12-02 12:57:52,183 (trainer:735) INFO: 3epoch:train:14401-14500batch: iter_time=9.502e-05, forward_time=0.147, loss_ctc=141.488, loss_att=163.728, acc=0.313, loss=157.056, backward_time=0.286, grad_norm=55.353, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.223e-04, train_time=1.307
+[gpua014:0/64] 2023-12-02 13:00:29,349 (trainer:735) INFO: 3epoch:train:14501-14600batch: iter_time=8.521e-05, forward_time=0.147, loss_ctc=127.361, loss_att=153.774, acc=0.344, loss=145.850, backward_time=0.304, grad_norm=53.117, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.228e-04, train_time=1.572
+[gpua014:0/64] 2023-12-02 13:02:47,839 (trainer:735) INFO: 3epoch:train:14601-14700batch: iter_time=8.519e-05, forward_time=0.148, loss_ctc=123.523, loss_att=138.039, acc=0.355, loss=133.684, backward_time=0.287, grad_norm=53.743, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.081, optim0_lr0=1.233e-04, train_time=1.385
+[gpua014:0/64] 2023-12-02 13:05:16,612 (trainer:735) INFO: 3epoch:train:14701-14800batch: iter_time=8.483e-05, forward_time=0.182, loss_ctc=155.128, loss_att=165.373, acc=0.335, loss=162.300, backward_time=0.331, grad_norm=60.285, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.087, optim0_lr0=1.238e-04, train_time=1.488
+[gpua014:0/64] 2023-12-02 13:07:59,486 (trainer:735) INFO: 3epoch:train:14801-14900batch: iter_time=9.191e-05, forward_time=0.156, loss_ctc=148.089, loss_att=161.586, acc=0.346, loss=157.537, backward_time=0.317, grad_norm=69.661, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.081, optim0_lr0=1.243e-04, train_time=1.629
+[gpua014:0/64] 2023-12-02 13:10:17,987 (trainer:735) INFO: 3epoch:train:14901-15000batch: iter_time=9.055e-05, forward_time=0.147, loss_ctc=126.788, loss_att=137.912, acc=0.341, loss=134.575, backward_time=0.297, grad_norm=54.252, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.248e-04, train_time=1.384
+[gpua014:0/64] 2023-12-02 13:36:33,442 (trainer:341) INFO: 3epoch results: [train] iter_time=0.173, forward_time=0.154, loss_ctc=167.745, loss_att=180.270, acc=0.266, loss=176.512, backward_time=0.317, grad_norm=58.242, clip=100.000, loss_scale=5.469e+10, optim_step_time=0.081, optim0_lr0=8.751e-05, train_time=1.777, time=7 hours, 24 minutes and 36.95 seconds, total_count=45000, gpu_max_cached_mem_GB=37.328, [valid] loss_ctc=116.449, cer_ctc=0.563, loss_att=113.783, acc=0.271, cer=0.637, wer=1.000, loss=114.583, time=25 minutes and 51.21 seconds, total_count=14013, gpu_max_cached_mem_GB=37.328
+[gpua014:0/64] 2023-12-02 13:36:56,856 (trainer:389) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua014:0/64] 2023-12-02 13:36:57,028 (trainer:272) INFO: 4/40epoch started. Estimated time to finish: 1 week, 5 days and 3 hours
+[gpua014:0/64] 2023-12-02 13:36:58,118 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-12-02 13:37:16,420 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 13:37:19,878 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1c4fe7be0>)
+[gpua014:0/64] 2023-12-02 13:37:19,878 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua014:0/64] 2023-12-02 13:37:19,882 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 13:44:34,781 (trainer:735) INFO: 4epoch:train:1-100batch: iter_time=2.620, forward_time=0.180, loss_ctc=138.678, loss_att=148.016, acc=0.348, loss=145.215, backward_time=0.286, grad_norm=70.096, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.253e-04, train_time=4.566
+[gpua014:0/64] 2023-12-02 13:46:32,425 (trainer:735) INFO: 4epoch:train:101-200batch: iter_time=8.942e-05, forward_time=0.148, loss_ctc=129.528, loss_att=153.496, acc=0.344, loss=146.306, backward_time=0.279, grad_norm=61.181, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.258e-04, train_time=1.176
+[gpua014:0/64] 2023-12-02 13:48:28,375 (trainer:735) INFO: 4epoch:train:201-300batch: iter_time=8.966e-05, forward_time=0.148, loss_ctc=136.562, loss_att=150.098, acc=0.372, loss=146.037, backward_time=0.279, grad_norm=59.926, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.263e-04, train_time=1.159
+[gpua014:0/64] 2023-12-02 13:50:27,198 (trainer:735) INFO: 4epoch:train:301-400batch: iter_time=9.546e-05, forward_time=0.148, loss_ctc=141.008, loss_att=156.586, acc=0.360, loss=151.913, backward_time=0.283, grad_norm=60.723, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.268e-04, train_time=1.188
+[gpua014:0/64] 2023-12-02 13:52:39,209 (trainer:735) INFO: 4epoch:train:401-500batch: iter_time=8.476e-05, forward_time=0.147, loss_ctc=118.002, loss_att=131.845, acc=0.367, loss=127.692, backward_time=0.279, grad_norm=50.606, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.273e-04, train_time=1.320
+[gpua014:0/64] 2023-12-02 13:54:56,492 (trainer:735) INFO: 4epoch:train:501-600batch: iter_time=8.831e-05, forward_time=0.149, loss_ctc=123.047, loss_att=134.059, acc=0.365, loss=130.755, backward_time=0.298, grad_norm=56.136, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.278e-04, train_time=1.373
+[gpua014:0/64] 2023-12-02 13:57:22,805 (trainer:735) INFO: 4epoch:train:601-700batch: iter_time=9.415e-05, forward_time=0.147, loss_ctc=133.830, loss_att=150.838, acc=0.357, loss=145.735, backward_time=0.293, grad_norm=54.666, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.283e-04, train_time=1.463
+[gpua014:0/64] 2023-12-02 13:59:43,119 (trainer:735) INFO: 4epoch:train:701-800batch: iter_time=9.223e-05, forward_time=0.147, loss_ctc=120.225, loss_att=129.129, acc=0.380, loss=126.458, backward_time=0.291, grad_norm=59.766, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.080, optim0_lr0=1.288e-04, train_time=1.403
+[gpua014:0/64] 2023-12-02 14:02:00,524 (trainer:735) INFO: 4epoch:train:801-900batch: iter_time=1.032e-04, forward_time=0.150, loss_ctc=117.609, loss_att=128.477, acc=0.395, loss=125.217, backward_time=0.333, grad_norm=47.611, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.081, optim0_lr0=1.293e-04, train_time=1.374
+[gpua014:0/64] 2023-12-02 14:04:44,092 (trainer:735) INFO: 4epoch:train:901-1000batch: iter_time=9.628e-05, forward_time=0.233, loss_ctc=135.008, loss_att=142.705, acc=0.361, loss=140.396, backward_time=0.359, grad_norm=64.217, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.091, optim0_lr0=1.298e-04, train_time=1.635
+[gpua014:0/64] 2023-12-02 14:07:07,356 (trainer:735) INFO: 4epoch:train:1001-1100batch: iter_time=9.881e-05, forward_time=0.147, loss_ctc=130.155, loss_att=140.847, acc=0.362, loss=137.639, backward_time=0.305, grad_norm=53.251, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.079, optim0_lr0=1.303e-04, train_time=1.432
+[gpua014:0/64] 2023-12-02 14:09:31,221 (trainer:735) INFO: 4epoch:train:1101-1200batch: iter_time=9.789e-05, forward_time=0.149, loss_ctc=128.611, loss_att=144.921, acc=0.379, loss=140.028, backward_time=0.299, grad_norm=56.044, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.308e-04, train_time=1.438
+[gpua014:0/64] 2023-12-02 14:10:55,620 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-12-02 14:11:14,313 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 14:11:17,895 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1d20e31c0>)
+[gpua014:0/64] 2023-12-02 14:11:17,895 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua014:0/64] 2023-12-02 14:11:17,898 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 14:21:07,694 (trainer:735) INFO: 4epoch:train:1201-1300batch: iter_time=2.857, forward_time=0.149, loss_ctc=128.423, loss_att=136.282, acc=0.376, loss=133.924, backward_time=0.284, grad_norm=57.022, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.079, optim0_lr0=1.313e-04, train_time=6.965
+[gpua014:0/64] 2023-12-02 14:23:04,615 (trainer:735) INFO: 4epoch:train:1301-1400batch: iter_time=7.990e-05, forward_time=0.147, loss_ctc=126.913, loss_att=137.683, acc=0.374, loss=134.452, backward_time=0.278, grad_norm=56.571, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.079, optim0_lr0=1.318e-04, train_time=1.169
+[gpua014:0/64] 2023-12-02 14:25:08,104 (trainer:735) INFO: 4epoch:train:1401-1500batch: iter_time=8.614e-05, forward_time=0.146, loss_ctc=135.103, loss_att=140.843, acc=0.373, loss=139.121, backward_time=0.284, grad_norm=62.983, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.079, optim0_lr0=1.323e-04, train_time=1.235
+[gpua014:0/64] 2023-12-02 14:27:18,021 (trainer:735) INFO: 4epoch:train:1501-1600batch: iter_time=8.612e-05, forward_time=0.149, loss_ctc=118.244, loss_att=126.987, acc=0.406, loss=124.364, backward_time=0.291, grad_norm=53.009, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.328e-04, train_time=1.299
+[gpua014:0/64] 2023-12-02 14:29:20,955 (trainer:735) INFO: 4epoch:train:1601-1700batch: iter_time=9.039e-05, forward_time=0.146, loss_ctc=145.046, loss_att=150.803, acc=0.376, loss=149.076, backward_time=0.277, grad_norm=63.942, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.079, optim0_lr0=1.333e-04, train_time=1.229
+[gpua014:0/64] 2023-12-02 14:31:16,888 (trainer:735) INFO: 4epoch:train:1701-1800batch: iter_time=9.123e-05, forward_time=0.149, loss_ctc=109.009, loss_att=113.098, acc=0.394, loss=111.871, backward_time=0.278, grad_norm=45.146, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.079, optim0_lr0=1.338e-04, train_time=1.159
+[gpua014:0/64] 2023-12-02 14:33:38,302 (trainer:735) INFO: 4epoch:train:1801-1900batch: iter_time=8.453e-05, forward_time=0.148, loss_ctc=130.677, loss_att=142.843, acc=0.373, loss=139.193, backward_time=0.308, grad_norm=55.075, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.343e-04, train_time=1.414
+[gpua014:0/64] 2023-12-02 14:35:59,944 (trainer:735) INFO: 4epoch:train:1901-2000batch: iter_time=8.592e-05, forward_time=0.146, loss_ctc=118.240, loss_att=116.488, acc=0.409, loss=117.014, backward_time=0.298, grad_norm=53.935, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.079, optim0_lr0=1.348e-04, train_time=1.416
+[gpua014:0/64] 2023-12-02 14:38:06,895 (trainer:735) INFO: 4epoch:train:2001-2100batch: iter_time=8.710e-05, forward_time=0.147, loss_ctc=115.378, loss_att=124.558, acc=0.399, loss=121.804, backward_time=0.279, grad_norm=50.468, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.353e-04, train_time=1.269
+[gpua014:0/64] 2023-12-02 14:40:36,415 (trainer:735) INFO: 4epoch:train:2101-2200batch: iter_time=9.054e-05, forward_time=0.199, loss_ctc=135.370, loss_att=135.077, acc=0.408, loss=135.165, backward_time=0.344, grad_norm=67.864, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.086, optim0_lr0=1.358e-04, train_time=1.495
+[gpua014:0/64] 2023-12-02 14:42:51,147 (trainer:735) INFO: 4epoch:train:2201-2300batch: iter_time=8.779e-05, forward_time=0.166, loss_ctc=119.471, loss_att=121.296, acc=0.398, loss=120.749, backward_time=0.302, grad_norm=49.943, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.082, optim0_lr0=1.363e-04, train_time=1.347
+[gpua014:0/64] 2023-12-02 14:45:09,003 (trainer:735) INFO: 4epoch:train:2301-2400batch: iter_time=8.554e-05, forward_time=0.148, loss_ctc=128.583, loss_att=133.505, acc=0.396, loss=132.029, backward_time=0.294, grad_norm=53.823, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.368e-04, train_time=1.378
+[gpua014:0/64] 2023-12-02 14:47:13,760 (trainer:735) INFO: 4epoch:train:2401-2500batch: iter_time=8.603e-05, forward_time=0.147, loss_ctc=124.794, loss_att=132.482, acc=0.393, loss=130.176, backward_time=0.280, grad_norm=53.930, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.079, optim0_lr0=1.373e-04, train_time=1.247
+[gpua014:0/64] 2023-12-02 14:47:19,130 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua014:0/64] 2023-12-02 14:47:37,514 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 14:47:41,024 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1bfc9bdc0>)
+[gpua014:0/64] 2023-12-02 14:47:41,025 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua014:0/64] 2023-12-02 14:47:41,028 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 14:54:41,717 (trainer:735) INFO: 4epoch:train:2501-2600batch: iter_time=1.364, forward_time=0.148, loss_ctc=131.506, loss_att=131.674, acc=0.404, loss=131.624, backward_time=0.281, grad_norm=69.023, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.378e-04, train_time=4.479
+[gpua014:0/64] 2023-12-02 14:56:38,972 (trainer:735) INFO: 4epoch:train:2601-2700batch: iter_time=8.186e-05, forward_time=0.147, loss_ctc=123.433, loss_att=140.773, acc=0.389, loss=135.571, backward_time=0.280, grad_norm=62.195, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.383e-04, train_time=1.172
+[gpua014:0/64] 2023-12-02 14:58:55,184 (trainer:735) INFO: 4epoch:train:2701-2800batch: iter_time=8.758e-05, forward_time=0.148, loss_ctc=129.834, loss_att=133.576, acc=0.427, loss=132.453, backward_time=0.287, grad_norm=64.170, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.388e-04, train_time=1.362
+[gpua014:0/64] 2023-12-02 15:00:54,754 (trainer:735) INFO: 4epoch:train:2801-2900batch: iter_time=9.345e-05, forward_time=0.148, loss_ctc=135.437, loss_att=141.995, acc=0.409, loss=140.027, backward_time=0.284, grad_norm=60.393, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.393e-04, train_time=1.195
+[gpua014:0/64] 2023-12-02 15:02:59,560 (trainer:735) INFO: 4epoch:train:2901-3000batch: iter_time=9.119e-05, forward_time=0.147, loss_ctc=113.566, loss_att=121.474, acc=0.410, loss=119.102, backward_time=0.281, grad_norm=49.346, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.080, optim0_lr0=1.398e-04, train_time=1.248
+[gpua014:0/64] 2023-12-02 15:05:10,408 (trainer:735) INFO: 4epoch:train:3001-3100batch: iter_time=1.005e-04, forward_time=0.147, loss_ctc=117.130, loss_att=122.517, acc=0.407, loss=120.901, backward_time=0.278, grad_norm=57.286, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.403e-04, train_time=1.308
+[gpua014:0/64] 2023-12-02 15:07:25,745 (trainer:735) INFO: 4epoch:train:3101-3200batch: iter_time=9.788e-05, forward_time=0.148, loss_ctc=129.097, loss_att=137.807, acc=0.403, loss=135.194, backward_time=0.300, grad_norm=55.630, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.408e-04, train_time=1.353
+[gpua014:0/64] 2023-12-02 15:09:28,430 (trainer:735) INFO: 4epoch:train:3201-3300batch: iter_time=1.002e-04, forward_time=0.147, loss_ctc=113.650, loss_att=112.913, acc=0.437, loss=113.134, backward_time=0.281, grad_norm=51.309, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.413e-04, train_time=1.227
+[gpua014:0/64] 2023-12-02 15:11:33,535 (trainer:735) INFO: 4epoch:train:3301-3400batch: iter_time=1.045e-04, forward_time=0.148, loss_ctc=111.800, loss_att=115.357, acc=0.446, loss=114.290, backward_time=0.277, grad_norm=48.117, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.418e-04, train_time=1.251
+[gpua014:0/64] 2023-12-02 15:13:46,802 (trainer:735) INFO: 4epoch:train:3401-3500batch: iter_time=1.009e-04, forward_time=0.148, loss_ctc=129.051, loss_att=127.189, acc=0.409, loss=127.747, backward_time=0.292, grad_norm=65.007, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.423e-04, train_time=1.332
+[gpua014:0/64] 2023-12-02 15:16:20,239 (trainer:735) INFO: 4epoch:train:3501-3600batch: iter_time=1.064e-04, forward_time=0.150, loss_ctc=123.001, loss_att=129.283, acc=0.406, loss=127.398, backward_time=0.297, grad_norm=48.829, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.428e-04, train_time=1.534
+[gpua014:0/64] 2023-12-02 15:18:41,132 (trainer:735) INFO: 4epoch:train:3601-3700batch: iter_time=5.984e-04, forward_time=0.148, loss_ctc=121.886, loss_att=131.067, acc=0.426, loss=128.313, backward_time=0.300, grad_norm=52.915, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.082, optim0_lr0=1.433e-04, train_time=1.409
+[gpua014:0/64] 2023-12-02 15:20:06,064 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua014:0/64] 2023-12-02 15:20:24,452 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 15:20:28,264 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1a0a0cbe0>)
+[gpua014:0/64] 2023-12-02 15:20:28,264 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua014:0/64] 2023-12-02 15:20:28,267 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 15:30:50,624 (trainer:735) INFO: 4epoch:train:3701-3800batch: iter_time=3.158, forward_time=0.185, loss_ctc=124.110, loss_att=129.212, acc=0.412, loss=127.682, backward_time=0.292, grad_norm=59.247, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.438e-04, train_time=7.295
+[gpua014:0/64] 2023-12-02 15:33:11,250 (trainer:735) INFO: 4epoch:train:3801-3900batch: iter_time=7.697e-05, forward_time=0.148, loss_ctc=122.269, loss_att=130.381, acc=0.422, loss=127.948, backward_time=0.344, grad_norm=54.853, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.081, optim0_lr0=1.443e-04, train_time=1.406
+[gpua014:0/64] 2023-12-02 15:35:11,097 (trainer:735) INFO: 4epoch:train:3901-4000batch: iter_time=8.097e-05, forward_time=0.147, loss_ctc=130.463, loss_att=134.568, acc=0.414, loss=133.337, backward_time=0.279, grad_norm=58.973, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.081, optim0_lr0=1.448e-04, train_time=1.198
+[gpua014:0/64] 2023-12-02 15:37:28,762 (trainer:735) INFO: 4epoch:train:4001-4100batch: iter_time=8.370e-05, forward_time=0.147, loss_ctc=113.481, loss_att=116.255, acc=0.456, loss=115.423, backward_time=0.290, grad_norm=47.643, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.081, optim0_lr0=1.453e-04, train_time=1.376
+[gpua014:0/64] 2023-12-02 15:39:35,254 (trainer:735) INFO: 4epoch:train:4101-4200batch: iter_time=8.549e-05, forward_time=0.150, loss_ctc=138.711, loss_att=143.522, acc=0.415, loss=142.079, backward_time=0.282, grad_norm=64.768, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.081, optim0_lr0=1.458e-04, train_time=1.265
+[gpua014:0/64] 2023-12-02 15:41:56,559 (trainer:735) INFO: 4epoch:train:4201-4300batch: iter_time=8.484e-05, forward_time=0.147, loss_ctc=105.583, loss_att=106.792, acc=0.433, loss=106.429, backward_time=0.324, grad_norm=48.019, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.463e-04, train_time=1.413
+[gpua014:0/64] 2023-12-02 15:43:57,408 (trainer:735) INFO: 4epoch:train:4301-4400batch: iter_time=8.407e-05, forward_time=0.147, loss_ctc=127.064, loss_att=133.506, acc=0.416, loss=131.573, backward_time=0.283, grad_norm=55.278, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.081, optim0_lr0=1.468e-04, train_time=1.208
+[gpua014:0/64] 2023-12-02 15:46:21,984 (trainer:735) INFO: 4epoch:train:4401-4500batch: iter_time=9.300e-05, forward_time=0.147, loss_ctc=112.156, loss_att=110.872, acc=0.449, loss=111.257, backward_time=0.304, grad_norm=50.797, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.081, optim0_lr0=1.473e-04, train_time=1.446
+[gpua014:0/64] 2023-12-02 15:48:37,832 (trainer:735) INFO: 4epoch:train:4501-4600batch: iter_time=8.954e-05, forward_time=0.147, loss_ctc=109.371, loss_att=113.508, acc=0.447, loss=112.267, backward_time=0.293, grad_norm=46.146, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.081, optim0_lr0=1.478e-04, train_time=1.358
+[gpua014:0/64] 2023-12-02 15:51:09,633 (trainer:735) INFO: 4epoch:train:4601-4700batch: iter_time=8.773e-05, forward_time=0.149, loss_ctc=128.300, loss_att=122.535, acc=0.455, loss=124.265, backward_time=0.302, grad_norm=59.883, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.483e-04, train_time=1.518
+[gpua014:0/64] 2023-12-02 15:53:44,631 (trainer:735) INFO: 4epoch:train:4701-4800batch: iter_time=8.396e-05, forward_time=0.146, loss_ctc=114.585, loss_att=112.758, acc=0.438, loss=113.306, backward_time=0.300, grad_norm=48.370, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.488e-04, train_time=1.550
+[gpua014:0/64] 2023-12-02 15:55:53,461 (trainer:735) INFO: 4epoch:train:4801-4900batch: iter_time=8.028e-05, forward_time=0.150, loss_ctc=122.506, loss_att=124.177, acc=0.439, loss=123.676, backward_time=0.279, grad_norm=52.728, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.493e-04, train_time=1.288
+[gpua014:0/64] 2023-12-02 15:58:11,155 (trainer:735) INFO: 4epoch:train:4901-5000batch: iter_time=7.790e-05, forward_time=0.147, loss_ctc=119.962, loss_att=130.880, acc=0.421, loss=127.604, backward_time=0.288, grad_norm=51.414, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.080, optim0_lr0=1.498e-04, train_time=1.377
+[gpua014:0/64] 2023-12-02 15:58:15,925 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua014:0/64] 2023-12-02 15:58:34,695 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 15:58:38,292 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1744bfdc0>)
+[gpua014:0/64] 2023-12-02 15:58:38,292 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua014:0/64] 2023-12-02 15:58:38,295 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 16:05:50,591 (trainer:735) INFO: 4epoch:train:5001-5100batch: iter_time=1.421, forward_time=0.194, loss_ctc=123.697, loss_att=116.285, acc=0.444, loss=118.509, backward_time=0.289, grad_norm=59.986, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.082, optim0_lr0=1.503e-04, train_time=4.594
+[gpua014:0/64] 2023-12-02 16:07:56,339 (trainer:735) INFO: 4epoch:train:5101-5200batch: iter_time=8.299e-05, forward_time=0.147, loss_ctc=116.890, loss_att=121.017, acc=0.427, loss=119.779, backward_time=0.296, grad_norm=53.775, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.081, optim0_lr0=1.508e-04, train_time=1.257
+[gpua014:0/64] 2023-12-02 16:09:56,330 (trainer:735) INFO: 4epoch:train:5201-5300batch: iter_time=8.890e-05, forward_time=0.149, loss_ctc=125.129, loss_att=119.386, acc=0.465, loss=121.109, backward_time=0.279, grad_norm=52.282, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.513e-04, train_time=1.200
+[gpua014:0/64] 2023-12-02 16:11:57,286 (trainer:735) INFO: 4epoch:train:5301-5400batch: iter_time=8.569e-05, forward_time=0.146, loss_ctc=129.438, loss_att=128.126, acc=0.448, loss=128.519, backward_time=0.278, grad_norm=54.881, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.518e-04, train_time=1.209
+[gpua014:0/64] 2023-12-02 16:14:04,400 (trainer:735) INFO: 4epoch:train:5401-5500batch: iter_time=8.970e-05, forward_time=0.147, loss_ctc=109.440, loss_att=110.582, acc=0.438, loss=110.240, backward_time=0.297, grad_norm=53.599, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.523e-04, train_time=1.271
+[gpua014:0/64] 2023-12-02 16:16:09,967 (trainer:735) INFO: 4epoch:train:5501-5600batch: iter_time=9.340e-05, forward_time=0.147, loss_ctc=113.191, loss_att=112.258, acc=0.438, loss=112.538, backward_time=0.290, grad_norm=50.801, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.528e-04, train_time=1.255
+[gpua014:0/64] 2023-12-02 16:18:07,095 (trainer:735) INFO: 4epoch:train:5601-5700batch: iter_time=9.679e-05, forward_time=0.147, loss_ctc=122.063, loss_att=124.328, acc=0.432, loss=123.649, backward_time=0.277, grad_norm=49.664, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.533e-04, train_time=1.171
+[gpua014:0/64] 2023-12-02 16:20:37,391 (trainer:735) INFO: 4epoch:train:5701-5800batch: iter_time=8.696e-05, forward_time=0.147, loss_ctc=108.198, loss_att=99.711, acc=0.474, loss=102.257, backward_time=0.300, grad_norm=49.804, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.538e-04, train_time=1.503
+[gpua014:0/64] 2023-12-02 16:23:21,936 (trainer:735) INFO: 4epoch:train:5801-5900batch: iter_time=8.833e-05, forward_time=0.147, loss_ctc=107.963, loss_att=104.882, acc=0.478, loss=105.806, backward_time=0.314, grad_norm=46.591, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.543e-04, train_time=1.645
+[gpua014:0/64] 2023-12-02 16:25:33,553 (trainer:735) INFO: 4epoch:train:5901-6000batch: iter_time=8.629e-05, forward_time=0.150, loss_ctc=124.856, loss_att=116.713, acc=0.438, loss=119.156, backward_time=0.283, grad_norm=63.832, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.548e-04, train_time=1.316
+[gpua014:0/64] 2023-12-02 16:28:13,857 (trainer:735) INFO: 4epoch:train:6001-6100batch: iter_time=9.113e-05, forward_time=0.147, loss_ctc=117.265, loss_att=117.284, acc=0.441, loss=117.278, backward_time=0.307, grad_norm=46.746, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.553e-04, train_time=1.603
+[gpua014:0/64] 2023-12-02 16:30:37,846 (trainer:735) INFO: 4epoch:train:6101-6200batch: iter_time=1.050e-04, forward_time=0.149, loss_ctc=116.763, loss_att=115.901, acc=0.461, loss=116.160, backward_time=0.311, grad_norm=51.698, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.558e-04, train_time=1.440
+[gpua014:0/64] 2023-12-02 16:31:56,196 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua014:0/64] 2023-12-02 16:32:14,729 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 16:32:18,274 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa17509df60>)
+[gpua014:0/64] 2023-12-02 16:32:18,274 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua014:0/64] 2023-12-02 16:32:18,277 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 16:37:31,869 (trainer:735) INFO: 4epoch:train:6201-6300batch: iter_time=2.771, forward_time=0.147, loss_ctc=119.929, loss_att=112.894, acc=0.447, loss=115.005, backward_time=0.285, grad_norm=54.207, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.563e-04, train_time=4.140
+[gpua014:0/64] 2023-12-02 16:39:33,651 (trainer:735) INFO: 4epoch:train:6301-6400batch: iter_time=8.531e-05, forward_time=0.147, loss_ctc=117.930, loss_att=117.013, acc=0.451, loss=117.288, backward_time=0.278, grad_norm=54.872, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.568e-04, train_time=1.218
+[gpua014:0/64] 2023-12-02 16:41:41,994 (trainer:735) INFO: 4epoch:train:6401-6500batch: iter_time=8.802e-05, forward_time=0.147, loss_ctc=124.353, loss_att=116.238, acc=0.455, loss=118.672, backward_time=0.287, grad_norm=53.128, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.573e-04, train_time=1.283
+[gpua014:0/64] 2023-12-02 16:43:46,918 (trainer:735) INFO: 4epoch:train:6501-6600batch: iter_time=9.544e-05, forward_time=0.148, loss_ctc=109.626, loss_att=106.763, acc=0.483, loss=107.622, backward_time=0.294, grad_norm=47.247, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.578e-04, train_time=1.249
+[gpua014:0/64] 2023-12-02 16:45:44,353 (trainer:735) INFO: 4epoch:train:6601-6700batch: iter_time=1.004e-04, forward_time=0.148, loss_ctc=135.462, loss_att=128.681, acc=0.449, loss=130.715, backward_time=0.280, grad_norm=61.537, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.081, optim0_lr0=1.583e-04, train_time=1.174
+[gpua014:0/64] 2023-12-02 16:47:44,346 (trainer:735) INFO: 4epoch:train:6701-6800batch: iter_time=9.132e-05, forward_time=0.147, loss_ctc=101.900, loss_att=99.961, acc=0.454, loss=100.543, backward_time=0.285, grad_norm=44.839, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.588e-04, train_time=1.200
+[gpua014:0/64] 2023-12-02 16:50:04,730 (trainer:735) INFO: 4epoch:train:6801-6900batch: iter_time=9.217e-05, forward_time=0.147, loss_ctc=121.314, loss_att=122.326, acc=0.442, loss=122.022, backward_time=0.287, grad_norm=54.781, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.593e-04, train_time=1.404
+[gpua014:0/64] 2023-12-02 16:52:27,399 (trainer:735) INFO: 4epoch:train:6901-7000batch: iter_time=9.114e-05, forward_time=0.147, loss_ctc=109.197, loss_att=98.290, acc=0.477, loss=101.562, backward_time=0.304, grad_norm=51.637, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.080, optim0_lr0=1.598e-04, train_time=1.426
+[gpua014:0/64] 2023-12-02 16:54:50,741 (trainer:735) INFO: 4epoch:train:7001-7100batch: iter_time=9.321e-05, forward_time=0.236, loss_ctc=106.980, loss_att=105.158, acc=0.475, loss=105.705, backward_time=0.359, grad_norm=48.822, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.085, optim0_lr0=1.603e-04, train_time=1.433
+[gpua014:0/64] 2023-12-02 16:57:09,895 (trainer:735) INFO: 4epoch:train:7101-7200batch: iter_time=9.285e-05, forward_time=0.147, loss_ctc=123.681, loss_att=114.008, acc=0.477, loss=116.910, backward_time=0.292, grad_norm=60.814, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.608e-04, train_time=1.391
+[gpua014:0/64] 2023-12-02 16:59:22,128 (trainer:735) INFO: 4epoch:train:7201-7300batch: iter_time=9.212e-05, forward_time=0.146, loss_ctc=110.584, loss_att=104.204, acc=0.465, loss=106.118, backward_time=0.281, grad_norm=49.744, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.613e-04, train_time=1.322
+[gpua014:0/64] 2023-12-02 17:01:33,916 (trainer:735) INFO: 4epoch:train:7301-7400batch: iter_time=8.770e-05, forward_time=0.147, loss_ctc=119.263, loss_att=115.800, acc=0.462, loss=116.839, backward_time=0.287, grad_norm=52.155, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.618e-04, train_time=1.318
+[gpua014:0/64] 2023-12-02 17:03:44,565 (trainer:735) INFO: 4epoch:train:7401-7500batch: iter_time=8.884e-05, forward_time=0.147, loss_ctc=116.281, loss_att=115.212, acc=0.455, loss=115.533, backward_time=0.277, grad_norm=49.512, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.623e-04, train_time=1.306
+[gpua014:0/64] 2023-12-02 17:03:50,058 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua014:0/64] 2023-12-02 17:04:08,708 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 17:04:12,265 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa16578de70>)
+[gpua014:0/64] 2023-12-02 17:04:12,265 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua014:0/64] 2023-12-02 17:04:12,268 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 17:14:16,741 (trainer:735) INFO: 4epoch:train:7501-7600batch: iter_time=1.400, forward_time=0.150, loss_ctc=118.753, loss_att=106.656, acc=0.474, loss=110.285, backward_time=0.408, grad_norm=54.752, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.080, optim0_lr0=1.628e-04, train_time=6.322
+[gpua014:0/64] 2023-12-02 17:18:55,297 (trainer:735) INFO: 4epoch:train:7601-7700batch: iter_time=8.355e-05, forward_time=0.147, loss_ctc=114.616, loss_att=112.945, acc=0.453, loss=113.446, backward_time=0.469, grad_norm=51.751, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.080, optim0_lr0=1.633e-04, train_time=2.785
+[gpua014:0/64] 2023-12-02 17:22:37,827 (trainer:735) INFO: 4epoch:train:7701-7800batch: iter_time=9.257e-05, forward_time=0.147, loss_ctc=120.973, loss_att=111.815, acc=0.490, loss=114.563, backward_time=0.399, grad_norm=51.579, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.638e-04, train_time=2.225
+[gpua014:0/64] 2023-12-02 17:26:32,034 (trainer:735) INFO: 4epoch:train:7801-7900batch: iter_time=9.408e-05, forward_time=0.149, loss_ctc=126.657, loss_att=120.766, acc=0.475, loss=122.533, backward_time=0.441, grad_norm=57.301, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.080, optim0_lr0=1.643e-04, train_time=2.342
+[gpua014:0/64] 2023-12-02 17:29:51,898 (trainer:735) INFO: 4epoch:train:7901-8000batch: iter_time=9.108e-05, forward_time=0.151, loss_ctc=106.484, loss_att=102.934, acc=0.468, loss=103.999, backward_time=0.397, grad_norm=47.216, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.080, optim0_lr0=1.648e-04, train_time=1.998
+[gpua014:0/64] 2023-12-02 17:33:17,761 (trainer:735) INFO: 4epoch:train:8001-8100batch: iter_time=9.327e-05, forward_time=0.147, loss_ctc=108.255, loss_att=105.586, acc=0.464, loss=106.387, backward_time=0.411, grad_norm=50.759, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.653e-04, train_time=2.058
+[gpua014:0/64] 2023-12-02 17:37:49,424 (trainer:735) INFO: 4epoch:train:8101-8200batch: iter_time=9.500e-05, forward_time=0.147, loss_ctc=119.509, loss_att=115.912, acc=0.458, loss=116.991, backward_time=0.447, grad_norm=51.081, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.658e-04, train_time=2.716
+[gpua014:0/64] 2023-12-02 17:42:28,406 (trainer:735) INFO: 4epoch:train:8201-8300batch: iter_time=8.926e-05, forward_time=0.149, loss_ctc=106.460, loss_att=94.038, acc=0.500, loss=97.765, backward_time=0.468, grad_norm=51.602, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.663e-04, train_time=2.790
+[gpua014:0/64] 2023-12-02 17:46:38,276 (trainer:735) INFO: 4epoch:train:8301-8400batch: iter_time=9.040e-05, forward_time=0.147, loss_ctc=103.390, loss_att=97.239, acc=0.505, loss=99.084, backward_time=0.474, grad_norm=45.119, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.080, optim0_lr0=1.668e-04, train_time=2.498
+[gpua014:0/64] 2023-12-02 17:50:26,249 (trainer:735) INFO: 4epoch:train:8401-8500batch: iter_time=8.744e-05, forward_time=0.147, loss_ctc=119.541, loss_att=107.007, acc=0.470, loss=110.767, backward_time=0.377, grad_norm=59.242, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.673e-04, train_time=2.280
+[gpua014:0/64] 2023-12-02 17:54:35,080 (trainer:735) INFO: 4epoch:train:8501-8600batch: iter_time=9.023e-05, forward_time=0.147, loss_ctc=114.822, loss_att=111.835, acc=0.462, loss=112.731, backward_time=0.498, grad_norm=48.876, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.079, optim0_lr0=1.678e-04, train_time=2.488
+[gpua014:0/64] 2023-12-02 17:58:22,246 (trainer:735) INFO: 4epoch:train:8601-8700batch: iter_time=1.021e-04, forward_time=0.147, loss_ctc=113.143, loss_att=108.742, acc=0.483, loss=110.062, backward_time=0.415, grad_norm=49.032, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.080, optim0_lr0=1.683e-04, train_time=2.271
+[gpua014:0/64] 2023-12-02 18:00:24,985 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua014:0/64] 2023-12-02 18:00:43,342 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 18:00:46,848 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa17502ff40>)
+[gpua014:0/64] 2023-12-02 18:00:46,849 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua014:0/64] 2023-12-02 18:00:46,852 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 18:06:20,624 (trainer:735) INFO: 4epoch:train:8701-8800batch: iter_time=2.813, forward_time=0.186, loss_ctc=116.620, loss_att=108.320, acc=0.472, loss=110.810, backward_time=0.447, grad_norm=54.462, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.688e-04, train_time=4.784
+[gpua014:0/64] 2023-12-02 18:08:15,972 (trainer:735) INFO: 4epoch:train:8801-8900batch: iter_time=8.563e-05, forward_time=0.148, loss_ctc=113.889, loss_att=117.065, acc=0.476, loss=116.112, backward_time=0.278, grad_norm=54.025, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.080, optim0_lr0=1.693e-04, train_time=1.153
+[gpua014:0/64] 2023-12-02 18:10:17,597 (trainer:735) INFO: 4epoch:train:8901-9000batch: iter_time=8.042e-05, forward_time=0.147, loss_ctc=121.284, loss_att=121.350, acc=0.467, loss=121.330, backward_time=0.277, grad_norm=54.603, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.080, optim0_lr0=1.698e-04, train_time=1.216
+[gpua014:0/64] 2023-12-02 18:12:42,342 (trainer:735) INFO: 4epoch:train:9001-9100batch: iter_time=8.946e-05, forward_time=0.147, loss_ctc=107.748, loss_att=103.290, acc=0.506, loss=104.628, backward_time=0.295, grad_norm=47.943, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.703e-04, train_time=1.447
+[gpua014:0/64] 2023-12-02 18:15:04,147 (trainer:735) INFO: 4epoch:train:9101-9200batch: iter_time=9.505e-05, forward_time=0.147, loss_ctc=130.539, loss_att=126.143, acc=0.470, loss=127.462, backward_time=0.290, grad_norm=57.593, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.708e-04, train_time=1.418
+[gpua014:0/64] 2023-12-02 18:17:32,224 (trainer:735) INFO: 4epoch:train:9201-9300batch: iter_time=8.912e-05, forward_time=0.147, loss_ctc=99.151, loss_att=95.097, acc=0.480, loss=96.313, backward_time=0.303, grad_norm=46.428, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.713e-04, train_time=1.481
+[gpua014:0/64] 2023-12-02 18:19:41,861 (trainer:735) INFO: 4epoch:train:9301-9400batch: iter_time=9.240e-05, forward_time=0.147, loss_ctc=119.299, loss_att=118.665, acc=0.469, loss=118.856, backward_time=0.282, grad_norm=51.552, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.718e-04, train_time=1.296
+[gpua014:0/64] 2023-12-02 18:21:59,658 (trainer:735) INFO: 4epoch:train:9401-9500batch: iter_time=9.231e-05, forward_time=0.147, loss_ctc=104.572, loss_att=98.281, acc=0.497, loss=100.168, backward_time=0.300, grad_norm=48.716, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.723e-04, train_time=1.378
+[gpua014:0/64] 2023-12-02 18:24:08,336 (trainer:735) INFO: 4epoch:train:9501-9600batch: iter_time=9.301e-05, forward_time=0.147, loss_ctc=103.689, loss_att=99.005, acc=0.502, loss=100.410, backward_time=0.291, grad_norm=50.102, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.728e-04, train_time=1.287
+[gpua014:0/64] 2023-12-02 18:26:27,234 (trainer:735) INFO: 4epoch:train:9601-9700batch: iter_time=9.086e-05, forward_time=0.148, loss_ctc=121.116, loss_att=109.589, acc=0.501, loss=113.047, backward_time=0.289, grad_norm=58.446, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.733e-04, train_time=1.389
+[gpua014:0/64] 2023-12-02 18:28:26,708 (trainer:735) INFO: 4epoch:train:9701-9800batch: iter_time=8.542e-05, forward_time=0.148, loss_ctc=107.955, loss_att=101.281, acc=0.486, loss=103.283, backward_time=0.280, grad_norm=47.832, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.738e-04, train_time=1.195
+[gpua014:0/64] 2023-12-02 18:30:58,470 (trainer:735) INFO: 4epoch:train:9801-9900batch: iter_time=8.622e-05, forward_time=0.147, loss_ctc=114.206, loss_att=111.097, acc=0.483, loss=112.030, backward_time=0.303, grad_norm=47.750, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.743e-04, train_time=1.517
+[gpua014:0/64] 2023-12-02 18:33:20,862 (trainer:735) INFO: 4epoch:train:9901-10000batch: iter_time=8.672e-05, forward_time=0.147, loss_ctc=112.257, loss_att=117.783, acc=0.466, loss=116.125, backward_time=0.305, grad_norm=50.489, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.748e-04, train_time=1.424
+[gpua014:0/64] 2023-12-02 18:33:26,141 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua014:0/64] 2023-12-02 18:33:44,880 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 18:33:48,441 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1691b0a00>)
+[gpua014:0/64] 2023-12-02 18:33:48,441 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua014:0/64] 2023-12-02 18:33:48,444 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 18:40:23,412 (trainer:735) INFO: 4epoch:train:10001-10100batch: iter_time=1.378, forward_time=0.147, loss_ctc=116.995, loss_att=102.621, acc=0.491, loss=106.933, backward_time=0.286, grad_norm=55.104, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.753e-04, train_time=4.225
+[gpua014:0/64] 2023-12-02 18:42:20,385 (trainer:735) INFO: 4epoch:train:10101-10200batch: iter_time=8.430e-05, forward_time=0.146, loss_ctc=111.902, loss_att=108.955, acc=0.472, loss=109.839, backward_time=0.277, grad_norm=51.795, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.758e-04, train_time=1.170
+[gpua014:0/64] 2023-12-02 18:44:17,903 (trainer:735) INFO: 4epoch:train:10201-10300batch: iter_time=8.349e-05, forward_time=0.147, loss_ctc=117.771, loss_att=106.423, acc=0.507, loss=109.828, backward_time=0.279, grad_norm=52.611, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.763e-04, train_time=1.175
+[gpua014:0/64] 2023-12-02 18:46:45,654 (trainer:735) INFO: 4epoch:train:10301-10400batch: iter_time=8.588e-05, forward_time=0.155, loss_ctc=122.709, loss_att=114.499, acc=0.495, loss=116.962, backward_time=0.313, grad_norm=55.999, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.768e-04, train_time=1.477
+[gpua014:0/64] 2023-12-02 18:49:24,429 (trainer:735) INFO: 4epoch:train:10401-10500batch: iter_time=8.798e-05, forward_time=0.247, loss_ctc=103.011, loss_att=97.584, acc=0.485, loss=99.212, backward_time=0.322, grad_norm=48.671, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.090, optim0_lr0=1.773e-04, train_time=1.587
+[gpua014:0/64] 2023-12-02 18:51:59,547 (trainer:735) INFO: 4epoch:train:10501-10600batch: iter_time=8.589e-05, forward_time=0.146, loss_ctc=107.399, loss_att=100.315, acc=0.479, loss=102.440, backward_time=0.314, grad_norm=50.828, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.778e-04, train_time=1.551
+[gpua014:0/64] 2023-12-02 18:54:21,263 (trainer:735) INFO: 4epoch:train:10601-10700batch: iter_time=8.335e-05, forward_time=0.146, loss_ctc=117.122, loss_att=110.922, acc=0.474, loss=112.782, backward_time=0.292, grad_norm=50.568, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.783e-04, train_time=1.417
+[gpua014:0/64] 2023-12-02 18:56:44,505 (trainer:735) INFO: 4epoch:train:10701-10800batch: iter_time=8.270e-05, forward_time=0.150, loss_ctc=103.728, loss_att=88.584, acc=0.518, loss=93.127, backward_time=0.284, grad_norm=49.668, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.788e-04, train_time=1.432
+[gpua014:0/64] 2023-12-02 18:59:06,494 (trainer:735) INFO: 4epoch:train:10801-10900batch: iter_time=8.053e-05, forward_time=0.146, loss_ctc=102.418, loss_att=93.562, acc=0.523, loss=96.219, backward_time=0.310, grad_norm=45.565, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.793e-04, train_time=1.420
+[gpua014:0/64] 2023-12-02 19:01:17,834 (trainer:735) INFO: 4epoch:train:10901-11000batch: iter_time=8.209e-05, forward_time=0.148, loss_ctc=119.151, loss_att=103.355, acc=0.485, loss=108.094, backward_time=0.293, grad_norm=62.478, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.080, optim0_lr0=1.798e-04, train_time=1.313
+[gpua014:0/64] 2023-12-02 19:03:24,634 (trainer:735) INFO: 4epoch:train:11001-11100batch: iter_time=8.398e-05, forward_time=0.147, loss_ctc=111.818, loss_att=106.497, acc=0.479, loss=108.093, backward_time=0.289, grad_norm=46.925, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.081, optim0_lr0=1.803e-04, train_time=1.268
+[gpua014:0/64] 2023-12-02 19:05:54,212 (trainer:735) INFO: 4epoch:train:11101-11200batch: iter_time=8.379e-05, forward_time=0.147, loss_ctc=110.631, loss_att=103.821, acc=0.500, loss=105.864, backward_time=0.282, grad_norm=49.942, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.808e-04, train_time=1.496
+[gpua014:0/64] 2023-12-02 19:07:35,986 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua014:0/64] 2023-12-02 19:07:54,995 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 19:07:58,541 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa176f0f940>)
+[gpua014:0/64] 2023-12-02 19:07:58,541 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua014:0/64] 2023-12-02 19:07:58,545 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 19:15:48,092 (trainer:735) INFO: 4epoch:train:11201-11300batch: iter_time=2.512, forward_time=0.147, loss_ctc=113.200, loss_att=99.576, acc=0.494, loss=103.664, backward_time=0.335, grad_norm=51.391, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.813e-04, train_time=5.939
+[gpua014:0/64] 2023-12-02 19:18:03,048 (trainer:735) INFO: 4epoch:train:11301-11400batch: iter_time=7.548e-05, forward_time=0.147, loss_ctc=110.500, loss_att=104.599, acc=0.492, loss=106.370, backward_time=0.293, grad_norm=52.494, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.818e-04, train_time=1.349
+[gpua014:0/64] 2023-12-02 19:20:17,807 (trainer:735) INFO: 4epoch:train:11401-11500batch: iter_time=8.214e-05, forward_time=0.146, loss_ctc=118.935, loss_att=105.294, acc=0.493, loss=109.386, backward_time=0.339, grad_norm=50.675, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.823e-04, train_time=1.347
+[gpua014:0/64] 2023-12-02 19:22:43,063 (trainer:735) INFO: 4epoch:train:11501-11600batch: iter_time=9.268e-05, forward_time=0.146, loss_ctc=104.433, loss_att=96.899, acc=0.520, loss=99.160, backward_time=0.321, grad_norm=48.715, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.828e-04, train_time=1.452
+[gpua014:0/64] 2023-12-02 19:25:03,065 (trainer:735) INFO: 4epoch:train:11601-11700batch: iter_time=9.023e-05, forward_time=0.146, loss_ctc=129.251, loss_att=116.128, acc=0.487, loss=120.065, backward_time=0.285, grad_norm=61.110, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.833e-04, train_time=1.400
+[gpua014:0/64] 2023-12-02 19:27:34,592 (trainer:735) INFO: 4epoch:train:11701-11800batch: iter_time=9.513e-05, forward_time=0.146, loss_ctc=97.063, loss_att=90.911, acc=0.491, loss=92.756, backward_time=0.312, grad_norm=46.061, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.838e-04, train_time=1.515
+[gpua014:0/64] 2023-12-02 19:29:35,880 (trainer:735) INFO: 4epoch:train:11801-11900batch: iter_time=9.686e-05, forward_time=0.146, loss_ctc=115.954, loss_att=112.584, acc=0.478, loss=113.595, backward_time=0.278, grad_norm=50.509, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.843e-04, train_time=1.213
+[gpua014:0/64] 2023-12-02 19:32:04,597 (trainer:735) INFO: 4epoch:train:11901-12000batch: iter_time=9.099e-05, forward_time=0.146, loss_ctc=104.448, loss_att=87.947, acc=0.517, loss=92.897, backward_time=0.325, grad_norm=49.418, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.848e-04, train_time=1.487
+[gpua014:0/64] 2023-12-02 19:34:15,386 (trainer:735) INFO: 4epoch:train:12001-12100batch: iter_time=8.316e-05, forward_time=0.146, loss_ctc=101.240, loss_att=93.807, acc=0.515, loss=96.037, backward_time=0.290, grad_norm=47.784, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.853e-04, train_time=1.308
+[gpua014:0/64] 2023-12-02 19:36:30,702 (trainer:735) INFO: 4epoch:train:12101-12200batch: iter_time=8.624e-05, forward_time=0.147, loss_ctc=117.844, loss_att=104.479, acc=0.512, loss=108.489, backward_time=0.289, grad_norm=57.134, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.858e-04, train_time=1.353
+[gpua014:0/64] 2023-12-02 19:38:57,636 (trainer:735) INFO: 4epoch:train:12201-12300batch: iter_time=9.031e-05, forward_time=0.166, loss_ctc=105.354, loss_att=94.435, acc=0.505, loss=97.711, backward_time=0.305, grad_norm=46.823, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.863e-04, train_time=1.469
+[gpua014:0/64] 2023-12-02 19:41:50,296 (trainer:735) INFO: 4epoch:train:12301-12400batch: iter_time=8.712e-05, forward_time=0.166, loss_ctc=112.662, loss_att=105.197, acc=0.495, loss=107.436, backward_time=0.354, grad_norm=49.673, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.084, optim0_lr0=1.868e-04, train_time=1.726
+[gpua014:0/64] 2023-12-02 19:44:34,994 (trainer:735) INFO: 4epoch:train:12401-12500batch: iter_time=8.528e-05, forward_time=0.185, loss_ctc=110.136, loss_att=103.808, acc=0.492, loss=105.706, backward_time=0.309, grad_norm=47.508, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.081, optim0_lr0=1.873e-04, train_time=1.647
+[gpua014:0/64] 2023-12-02 19:44:40,269 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua014:0/64] 2023-12-02 19:44:58,504 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 19:45:02,045 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa12d0b2140>)
+[gpua014:0/64] 2023-12-02 19:45:02,045 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua014:0/64] 2023-12-02 19:45:02,048 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 19:51:37,584 (trainer:735) INFO: 4epoch:train:12501-12600batch: iter_time=1.365, forward_time=0.159, loss_ctc=113.665, loss_att=101.677, acc=0.510, loss=105.273, backward_time=0.280, grad_norm=53.686, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.878e-04, train_time=4.226
+[gpua014:0/64] 2023-12-02 19:53:36,056 (trainer:735) INFO: 4epoch:train:12601-12700batch: iter_time=7.495e-05, forward_time=0.147, loss_ctc=109.263, loss_att=115.184, acc=0.484, loss=113.408, backward_time=0.284, grad_norm=52.643, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.883e-04, train_time=1.185
+[gpua014:0/64] 2023-12-02 19:55:39,693 (trainer:735) INFO: 4epoch:train:12701-12800batch: iter_time=7.979e-05, forward_time=0.149, loss_ctc=114.353, loss_att=103.743, acc=0.531, loss=106.926, backward_time=0.279, grad_norm=51.472, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.888e-04, train_time=1.236
+[gpua014:0/64] 2023-12-02 19:57:46,745 (trainer:735) INFO: 4epoch:train:12801-12900batch: iter_time=9.210e-05, forward_time=0.147, loss_ctc=119.646, loss_att=113.461, acc=0.508, loss=115.317, backward_time=0.282, grad_norm=55.010, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.080, optim0_lr0=1.893e-04, train_time=1.270
+[gpua014:0/64] 2023-12-02 20:00:02,200 (trainer:735) INFO: 4epoch:train:12901-13000batch: iter_time=8.790e-05, forward_time=0.147, loss_ctc=101.919, loss_att=98.354, acc=0.499, loss=99.423, backward_time=0.300, grad_norm=48.798, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.081, optim0_lr0=1.898e-04, train_time=1.354
+[gpua014:0/64] 2023-12-02 20:02:42,832 (trainer:735) INFO: 4epoch:train:13001-13100batch: iter_time=8.852e-05, forward_time=0.147, loss_ctc=103.696, loss_att=97.070, acc=0.502, loss=99.058, backward_time=0.312, grad_norm=49.688, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.903e-04, train_time=1.606
+[gpua014:0/64] 2023-12-02 20:05:20,109 (trainer:735) INFO: 4epoch:train:13101-13200batch: iter_time=8.665e-05, forward_time=0.147, loss_ctc=114.256, loss_att=110.700, acc=0.494, loss=111.767, backward_time=0.293, grad_norm=49.722, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.908e-04, train_time=1.573
+[gpua014:0/64] 2023-12-02 20:07:46,743 (trainer:735) INFO: 4epoch:train:13201-13300batch: iter_time=8.349e-05, forward_time=0.147, loss_ctc=100.597, loss_att=87.391, acc=0.531, loss=91.353, backward_time=0.286, grad_norm=48.320, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.913e-04, train_time=1.466
+[gpua014:0/64] 2023-12-02 20:10:02,229 (trainer:735) INFO: 4epoch:train:13301-13400batch: iter_time=8.087e-05, forward_time=0.147, loss_ctc=99.182, loss_att=91.570, acc=0.538, loss=93.854, backward_time=0.282, grad_norm=45.973, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.918e-04, train_time=1.355
+[gpua014:0/64] 2023-12-02 20:12:09,479 (trainer:735) INFO: 4epoch:train:13401-13500batch: iter_time=8.730e-05, forward_time=0.148, loss_ctc=116.088, loss_att=101.225, acc=0.503, loss=105.684, backward_time=0.286, grad_norm=61.258, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.923e-04, train_time=1.272
+[gpua014:0/64] 2023-12-02 20:14:32,372 (trainer:735) INFO: 4epoch:train:13501-13600batch: iter_time=8.944e-05, forward_time=0.149, loss_ctc=109.986, loss_att=104.582, acc=0.491, loss=106.203, backward_time=0.311, grad_norm=48.928, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.928e-04, train_time=1.429
+[gpua014:0/64] 2023-12-02 20:16:47,277 (trainer:735) INFO: 4epoch:train:13601-13700batch: iter_time=8.771e-05, forward_time=0.147, loss_ctc=108.483, loss_att=107.134, acc=0.509, loss=107.538, backward_time=0.299, grad_norm=51.752, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.933e-04, train_time=1.349
+[gpua014:0/64] 2023-12-02 20:18:27,255 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua014:0/64] 2023-12-02 20:18:46,330 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 20:18:49,946 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa133a87c70>)
+[gpua014:0/64] 2023-12-02 20:18:49,946 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua014:0/64] 2023-12-02 20:18:49,949 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 20:24:01,121 (trainer:735) INFO: 4epoch:train:13701-13800batch: iter_time=2.851, forward_time=0.180, loss_ctc=111.219, loss_att=101.474, acc=0.504, loss=104.397, backward_time=0.295, grad_norm=54.144, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.081, optim0_lr0=1.938e-04, train_time=4.338
+[gpua014:0/64] 2023-12-02 20:26:00,023 (trainer:735) INFO: 4epoch:train:13801-13900batch: iter_time=0.002, forward_time=0.148, loss_ctc=109.238, loss_att=104.344, acc=0.513, loss=105.812, backward_time=0.279, grad_norm=54.421, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.943e-04, train_time=1.189
+[gpua014:0/64] 2023-12-02 20:28:07,691 (trainer:735) INFO: 4epoch:train:13901-14000batch: iter_time=8.126e-05, forward_time=0.149, loss_ctc=116.187, loss_att=108.753, acc=0.504, loss=110.984, backward_time=0.284, grad_norm=54.897, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.948e-04, train_time=1.276
+[gpua014:0/64] 2023-12-02 20:30:37,527 (trainer:735) INFO: 4epoch:train:14001-14100batch: iter_time=8.912e-05, forward_time=0.147, loss_ctc=102.260, loss_att=94.218, acc=0.536, loss=96.631, backward_time=0.316, grad_norm=45.882, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.953e-04, train_time=1.498
+[gpua014:0/64] 2023-12-02 20:33:02,898 (trainer:735) INFO: 4epoch:train:14101-14200batch: iter_time=9.148e-05, forward_time=0.147, loss_ctc=125.911, loss_att=115.543, acc=0.500, loss=118.653, backward_time=0.334, grad_norm=56.970, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.958e-04, train_time=1.453
+[gpua014:0/64] 2023-12-02 20:35:15,665 (trainer:735) INFO: 4epoch:train:14201-14300batch: iter_time=9.597e-05, forward_time=0.146, loss_ctc=95.288, loss_att=86.461, acc=0.515, loss=89.109, backward_time=0.288, grad_norm=45.196, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.963e-04, train_time=1.327
+[gpua014:0/64] 2023-12-02 20:37:11,801 (trainer:735) INFO: 4epoch:train:14301-14400batch: iter_time=8.727e-05, forward_time=0.150, loss_ctc=113.768, loss_att=108.266, acc=0.503, loss=109.916, backward_time=0.278, grad_norm=50.368, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.968e-04, train_time=1.161
+[gpua014:0/64] 2023-12-02 20:39:42,318 (trainer:735) INFO: 4epoch:train:14401-14500batch: iter_time=8.353e-05, forward_time=0.263, loss_ctc=100.585, loss_att=90.152, acc=0.525, loss=93.282, backward_time=0.332, grad_norm=49.297, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.084, optim0_lr0=1.973e-04, train_time=1.505
+[gpua014:0/64] 2023-12-02 20:42:04,100 (trainer:735) INFO: 4epoch:train:14501-14600batch: iter_time=9.199e-05, forward_time=0.148, loss_ctc=98.499, loss_att=91.770, acc=0.530, loss=93.789, backward_time=0.336, grad_norm=46.575, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.978e-04, train_time=1.417
+[gpua014:0/64] 2023-12-02 20:44:33,881 (trainer:735) INFO: 4epoch:train:14601-14700batch: iter_time=9.473e-05, forward_time=0.146, loss_ctc=114.948, loss_att=100.431, acc=0.531, loss=104.786, backward_time=0.334, grad_norm=55.750, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.983e-04, train_time=1.498
+[gpua014:0/64] 2023-12-02 20:47:23,757 (trainer:735) INFO: 4epoch:train:14701-14800batch: iter_time=8.770e-05, forward_time=0.149, loss_ctc=103.978, loss_att=92.965, acc=0.517, loss=96.269, backward_time=0.374, grad_norm=47.948, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.988e-04, train_time=1.699
+[gpua014:0/64] 2023-12-02 20:49:33,493 (trainer:735) INFO: 4epoch:train:14801-14900batch: iter_time=8.120e-05, forward_time=0.150, loss_ctc=109.127, loss_att=103.414, acc=0.509, loss=105.128, backward_time=0.282, grad_norm=48.830, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.993e-04, train_time=1.297
+[gpua014:0/64] 2023-12-02 20:52:05,795 (trainer:735) INFO: 4epoch:train:14901-15000batch: iter_time=8.120e-05, forward_time=0.147, loss_ctc=108.337, loss_att=108.815, acc=0.497, loss=108.672, backward_time=0.311, grad_norm=49.029, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.080, optim0_lr0=1.998e-04, train_time=1.523
+[gpua014:0/64] 2023-12-02 21:18:10,699 (trainer:341) INFO: 4epoch results: [train] iter_time=0.177, forward_time=0.152, loss_ctc=116.786, loss_att=114.897, acc=0.457, loss=115.464, backward_time=0.308, grad_norm=53.077, clip=100.000, loss_scale=9.328e+12, optim_step_time=0.080, optim0_lr0=1.625e-04, train_time=1.740, time=7 hours, 15 minutes and 32.49 seconds, total_count=60000, gpu_max_cached_mem_GB=37.328, [valid] loss_ctc=91.652, cer_ctc=0.445, loss_att=77.951, acc=0.427, cer=0.532, wer=1.000, loss=82.061, time=25 minutes and 40.95 seconds, total_count=18684, gpu_max_cached_mem_GB=37.328
+[gpua014:0/64] 2023-12-02 21:18:31,751 (trainer:389) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua014:0/64] 2023-12-02 21:18:31,837 (trainer:272) INFO: 5/40epoch started. Estimated time to finish: 1 week, 4 days and 17 hours
+[gpua014:0/64] 2023-12-02 21:18:31,914 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-12-02 21:18:49,997 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 21:18:53,775 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1c4fe7fd0>)
+[gpua014:0/64] 2023-12-02 21:18:53,775 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua014:0/64] 2023-12-02 21:18:53,778 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 21:25:13,054 (trainer:735) INFO: 5epoch:train:1-100batch: iter_time=2.543, forward_time=0.170, loss_ctc=109.786, loss_att=95.078, acc=0.488, loss=99.490, backward_time=0.284, grad_norm=60.128, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.999e-04, train_time=4.011
+[gpua014:0/64] 2023-12-02 21:27:10,724 (trainer:735) INFO: 5epoch:train:101-200batch: iter_time=8.415e-05, forward_time=0.147, loss_ctc=117.489, loss_att=114.207, acc=0.485, loss=115.192, backward_time=0.277, grad_norm=56.160, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.080, optim0_lr0=1.997e-04, train_time=1.177
+[gpua014:0/64] 2023-12-02 21:29:20,423 (trainer:735) INFO: 5epoch:train:201-300batch: iter_time=8.843e-05, forward_time=0.255, loss_ctc=112.078, loss_att=103.130, acc=0.502, loss=105.814, backward_time=0.302, grad_norm=48.697, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.087, optim0_lr0=1.996e-04, train_time=1.296
+[gpua014:0/64] 2023-12-02 21:31:27,019 (trainer:735) INFO: 5epoch:train:301-400batch: iter_time=8.703e-05, forward_time=0.168, loss_ctc=130.047, loss_att=119.893, acc=0.496, loss=122.940, backward_time=0.289, grad_norm=62.472, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.994e-04, train_time=1.266
+[gpua014:0/64] 2023-12-02 21:33:44,144 (trainer:735) INFO: 5epoch:train:401-500batch: iter_time=8.175e-05, forward_time=0.146, loss_ctc=112.137, loss_att=104.134, acc=0.523, loss=106.535, backward_time=0.287, grad_norm=49.256, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.080, optim0_lr0=1.993e-04, train_time=1.371
+[gpua014:0/64] 2023-12-02 21:35:56,122 (trainer:735) INFO: 5epoch:train:501-600batch: iter_time=8.423e-05, forward_time=0.146, loss_ctc=98.811, loss_att=94.922, acc=0.502, loss=96.089, backward_time=0.294, grad_norm=44.748, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.080, optim0_lr0=1.991e-04, train_time=1.320
+[gpua014:0/64] 2023-12-02 21:38:12,878 (trainer:735) INFO: 5epoch:train:601-700batch: iter_time=8.923e-05, forward_time=0.146, loss_ctc=133.458, loss_att=107.378, acc=0.502, loss=115.202, backward_time=0.288, grad_norm=61.860, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.080, optim0_lr0=1.989e-04, train_time=1.367
+[gpua014:0/64] 2023-12-02 21:40:21,799 (trainer:735) INFO: 5epoch:train:701-800batch: iter_time=8.425e-05, forward_time=0.146, loss_ctc=114.878, loss_att=103.549, acc=0.524, loss=106.948, backward_time=0.282, grad_norm=51.135, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.080, optim0_lr0=1.988e-04, train_time=1.289
+[gpua014:0/64] 2023-12-02 21:43:06,872 (trainer:735) INFO: 5epoch:train:801-900batch: iter_time=5.886e-04, forward_time=0.218, loss_ctc=100.453, loss_att=83.157, acc=0.531, loss=88.346, backward_time=0.320, grad_norm=47.226, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.085, optim0_lr0=1.986e-04, train_time=1.651
+[gpua014:0/64] 2023-12-02 21:45:39,067 (trainer:735) INFO: 5epoch:train:901-1000batch: iter_time=8.360e-05, forward_time=0.161, loss_ctc=118.688, loss_att=108.715, acc=0.496, loss=111.707, backward_time=0.314, grad_norm=54.989, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.984e-04, train_time=1.521
+[gpua014:0/64] 2023-12-02 21:48:01,408 (trainer:735) INFO: 5epoch:train:1001-1100batch: iter_time=8.699e-05, forward_time=0.153, loss_ctc=117.521, loss_att=107.935, acc=0.507, loss=110.811, backward_time=0.297, grad_norm=50.178, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.081, optim0_lr0=1.983e-04, train_time=1.424
+[gpua014:0/64] 2023-12-02 21:50:17,105 (trainer:735) INFO: 5epoch:train:1101-1200batch: iter_time=9.116e-05, forward_time=0.148, loss_ctc=96.952, loss_att=80.224, acc=0.566, loss=85.243, backward_time=0.285, grad_norm=41.041, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.081, optim0_lr0=1.981e-04, train_time=1.357
+[gpua014:0/64] 2023-12-02 21:51:43,289 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-12-02 21:52:01,348 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 21:52:04,793 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fab74f307f0>)
+[gpua014:0/64] 2023-12-02 21:52:04,793 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua014:0/64] 2023-12-02 21:52:04,796 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 22:01:10,001 (trainer:735) INFO: 5epoch:train:1201-1300batch: iter_time=2.375, forward_time=0.148, loss_ctc=104.222, loss_att=85.740, acc=0.531, loss=91.285, backward_time=0.305, grad_norm=53.141, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.081, optim0_lr0=1.979e-04, train_time=6.529
+[gpua014:0/64] 2023-12-02 22:03:26,502 (trainer:735) INFO: 5epoch:train:1301-1400batch: iter_time=8.742e-05, forward_time=0.147, loss_ctc=107.829, loss_att=102.140, acc=0.476, loss=103.847, backward_time=0.290, grad_norm=53.323, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.081, optim0_lr0=1.978e-04, train_time=1.365
+[gpua014:0/64] 2023-12-02 22:06:12,008 (trainer:735) INFO: 5epoch:train:1401-1500batch: iter_time=9.178e-05, forward_time=0.148, loss_ctc=110.784, loss_att=102.111, acc=0.508, loss=104.713, backward_time=0.415, grad_norm=51.788, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.080, optim0_lr0=1.976e-04, train_time=1.655
+[gpua014:0/64] 2023-12-02 22:08:24,345 (trainer:735) INFO: 5epoch:train:1501-1600batch: iter_time=9.597e-05, forward_time=0.147, loss_ctc=127.398, loss_att=112.015, acc=0.495, loss=116.630, backward_time=0.292, grad_norm=66.333, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.080, optim0_lr0=1.975e-04, train_time=1.323
+[gpua014:0/64] 2023-12-02 22:10:46,150 (trainer:735) INFO: 5epoch:train:1601-1700batch: iter_time=8.880e-05, forward_time=0.147, loss_ctc=112.139, loss_att=111.340, acc=0.506, loss=111.580, backward_time=0.292, grad_norm=49.645, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.081, optim0_lr0=1.973e-04, train_time=1.418
+[gpua014:0/64] 2023-12-02 22:13:13,881 (trainer:735) INFO: 5epoch:train:1701-1800batch: iter_time=8.792e-05, forward_time=0.147, loss_ctc=99.274, loss_att=92.790, acc=0.524, loss=94.735, backward_time=0.301, grad_norm=46.354, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.080, optim0_lr0=1.971e-04, train_time=1.477
+[gpua014:0/64] 2023-12-02 22:15:38,945 (trainer:735) INFO: 5epoch:train:1801-1900batch: iter_time=9.107e-05, forward_time=0.148, loss_ctc=116.253, loss_att=94.823, acc=0.498, loss=101.252, backward_time=0.315, grad_norm=56.448, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.081, optim0_lr0=1.970e-04, train_time=1.450
+[gpua014:0/64] 2023-12-02 22:18:05,830 (trainer:735) INFO: 5epoch:train:1901-2000batch: iter_time=9.015e-05, forward_time=0.163, loss_ctc=119.214, loss_att=110.864, acc=0.496, loss=113.369, backward_time=0.294, grad_norm=56.177, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.081, optim0_lr0=1.968e-04, train_time=1.469
+[gpua014:0/64] 2023-12-02 22:21:16,149 (trainer:735) INFO: 5epoch:train:2001-2100batch: iter_time=1.009e-04, forward_time=0.226, loss_ctc=100.068, loss_att=85.879, acc=0.546, loss=90.136, backward_time=0.334, grad_norm=46.835, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.083, optim0_lr0=1.967e-04, train_time=1.903
+[gpua014:0/64] 2023-12-02 22:24:07,980 (trainer:735) INFO: 5epoch:train:2101-2200batch: iter_time=8.767e-05, forward_time=0.177, loss_ctc=109.160, loss_att=96.207, acc=0.499, loss=100.093, backward_time=0.414, grad_norm=51.968, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.083, optim0_lr0=1.965e-04, train_time=1.718
+[gpua014:0/64] 2023-12-02 22:26:36,158 (trainer:735) INFO: 5epoch:train:2201-2300batch: iter_time=9.696e-05, forward_time=0.147, loss_ctc=110.828, loss_att=94.971, acc=0.512, loss=99.728, backward_time=0.289, grad_norm=50.086, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.963e-04, train_time=1.482
+[gpua014:0/64] 2023-12-02 22:29:26,049 (trainer:735) INFO: 5epoch:train:2301-2400batch: iter_time=8.902e-05, forward_time=0.146, loss_ctc=110.614, loss_att=96.346, acc=0.542, loss=100.626, backward_time=0.341, grad_norm=49.599, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.962e-04, train_time=1.698
+[gpua014:0/64] 2023-12-02 22:32:00,900 (trainer:735) INFO: 5epoch:train:2401-2500batch: iter_time=9.152e-05, forward_time=0.147, loss_ctc=90.713, loss_att=76.251, acc=0.554, loss=80.590, backward_time=0.300, grad_norm=44.094, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.960e-04, train_time=1.549
+[gpua014:0/64] 2023-12-02 22:32:20,928 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua014:0/64] 2023-12-02 22:32:39,533 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 22:32:43,071 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faafb391000>)
+[gpua014:0/64] 2023-12-02 22:32:43,071 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua014:0/64] 2023-12-02 22:32:43,075 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 22:40:13,705 (trainer:735) INFO: 5epoch:train:2501-2600batch: iter_time=2.483, forward_time=0.147, loss_ctc=103.848, loss_att=87.966, acc=0.505, loss=92.731, backward_time=0.291, grad_norm=52.754, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.959e-04, train_time=4.928
+[gpua014:0/64] 2023-12-02 22:42:22,058 (trainer:735) INFO: 5epoch:train:2601-2700batch: iter_time=8.133e-05, forward_time=0.147, loss_ctc=112.698, loss_att=107.127, acc=0.502, loss=108.798, backward_time=0.286, grad_norm=55.854, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.957e-04, train_time=1.283
+[gpua014:0/64] 2023-12-02 22:44:24,166 (trainer:735) INFO: 5epoch:train:2701-2800batch: iter_time=7.829e-05, forward_time=0.146, loss_ctc=106.664, loss_att=96.902, acc=0.514, loss=99.831, backward_time=0.287, grad_norm=47.583, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.956e-04, train_time=1.221
+[gpua014:0/64] 2023-12-02 22:46:28,469 (trainer:735) INFO: 5epoch:train:2801-2900batch: iter_time=8.753e-05, forward_time=0.147, loss_ctc=122.852, loss_att=114.924, acc=0.505, loss=117.303, backward_time=0.282, grad_norm=60.588, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.954e-04, train_time=1.243
+[gpua014:0/64] 2023-12-02 22:48:41,574 (trainer:735) INFO: 5epoch:train:2901-3000batch: iter_time=8.426e-05, forward_time=0.148, loss_ctc=108.580, loss_att=99.240, acc=0.531, loss=102.042, backward_time=0.295, grad_norm=47.464, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.953e-04, train_time=1.331
+[gpua014:0/64] 2023-12-02 22:50:44,408 (trainer:735) INFO: 5epoch:train:3001-3100batch: iter_time=8.272e-05, forward_time=0.149, loss_ctc=95.845, loss_att=88.957, acc=0.516, loss=91.023, backward_time=0.277, grad_norm=43.993, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.951e-04, train_time=1.228
+[gpua014:0/64] 2023-12-02 22:52:50,551 (trainer:735) INFO: 5epoch:train:3101-3200batch: iter_time=8.174e-05, forward_time=0.147, loss_ctc=121.930, loss_att=97.876, acc=0.518, loss=105.092, backward_time=0.291, grad_norm=64.464, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.949e-04, train_time=1.261
+[gpua014:0/64] 2023-12-02 22:54:55,109 (trainer:735) INFO: 5epoch:train:3201-3300batch: iter_time=9.005e-05, forward_time=0.148, loss_ctc=107.702, loss_att=96.965, acc=0.531, loss=100.186, backward_time=0.280, grad_norm=49.099, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.948e-04, train_time=1.245
+[gpua014:0/64] 2023-12-02 22:57:09,488 (trainer:735) INFO: 5epoch:train:3301-3400batch: iter_time=8.815e-05, forward_time=0.147, loss_ctc=95.758, loss_att=78.849, acc=0.542, loss=83.922, backward_time=0.282, grad_norm=42.029, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.946e-04, train_time=1.344
+[gpua014:0/64] 2023-12-02 22:59:39,260 (trainer:735) INFO: 5epoch:train:3401-3500batch: iter_time=8.155e-05, forward_time=0.147, loss_ctc=113.475, loss_att=100.097, acc=0.507, loss=104.110, backward_time=0.312, grad_norm=54.744, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.945e-04, train_time=1.498
+[gpua014:0/64] 2023-12-02 23:01:51,046 (trainer:735) INFO: 5epoch:train:3501-3600batch: iter_time=8.410e-05, forward_time=0.154, loss_ctc=112.172, loss_att=102.030, acc=0.516, loss=105.073, backward_time=0.283, grad_norm=50.074, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.943e-04, train_time=1.318
+[gpua014:0/64] 2023-12-02 23:04:43,798 (trainer:735) INFO: 5epoch:train:3601-3700batch: iter_time=8.192e-05, forward_time=0.196, loss_ctc=93.780, loss_att=75.578, acc=0.578, loss=81.039, backward_time=0.450, grad_norm=39.885, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.090, optim0_lr0=1.942e-04, train_time=1.727
+[gpua014:0/64] 2023-12-02 23:05:50,085 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua014:0/64] 2023-12-02 23:06:08,647 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 23:06:12,156 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faafb36bd90>)
+[gpua014:0/64] 2023-12-02 23:06:12,157 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua014:0/64] 2023-12-02 23:06:12,160 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 23:14:01,263 (trainer:735) INFO: 5epoch:train:3701-3800batch: iter_time=1.318, forward_time=0.190, loss_ctc=99.844, loss_att=80.580, acc=0.544, loss=86.359, backward_time=0.291, grad_norm=49.397, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.940e-04, train_time=5.574
+[gpua014:0/64] 2023-12-02 23:15:58,727 (trainer:735) INFO: 5epoch:train:3801-3900batch: iter_time=7.886e-05, forward_time=0.146, loss_ctc=102.800, loss_att=95.147, acc=0.497, loss=97.442, backward_time=0.276, grad_norm=51.200, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.939e-04, train_time=1.175
+[gpua014:0/64] 2023-12-02 23:17:58,819 (trainer:735) INFO: 5epoch:train:3901-4000batch: iter_time=7.950e-05, forward_time=0.146, loss_ctc=107.442, loss_att=96.401, acc=0.526, loss=99.713, backward_time=0.277, grad_norm=54.408, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.080, optim0_lr0=1.937e-04, train_time=1.201
+[gpua014:0/64] 2023-12-02 23:20:02,565 (trainer:735) INFO: 5epoch:train:4001-4100batch: iter_time=8.264e-05, forward_time=0.148, loss_ctc=122.133, loss_att=106.846, acc=0.513, loss=111.432, backward_time=0.278, grad_norm=58.939, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.080, optim0_lr0=1.936e-04, train_time=1.237
+[gpua014:0/64] 2023-12-02 23:22:41,597 (trainer:735) INFO: 5epoch:train:4101-4200batch: iter_time=8.341e-05, forward_time=0.146, loss_ctc=108.007, loss_att=104.629, acc=0.525, loss=105.643, backward_time=0.327, grad_norm=46.565, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.080, optim0_lr0=1.934e-04, train_time=1.590
+[gpua014:0/64] 2023-12-02 23:24:51,722 (trainer:735) INFO: 5epoch:train:4201-4300batch: iter_time=8.133e-05, forward_time=0.145, loss_ctc=96.327, loss_att=87.514, acc=0.543, loss=90.158, backward_time=0.283, grad_norm=43.057, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.081, optim0_lr0=1.933e-04, train_time=1.301
+[gpua014:0/64] 2023-12-02 23:27:13,960 (trainer:735) INFO: 5epoch:train:4301-4400batch: iter_time=9.028e-05, forward_time=0.146, loss_ctc=111.265, loss_att=88.522, acc=0.519, loss=95.345, backward_time=0.287, grad_norm=50.357, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.081, optim0_lr0=1.931e-04, train_time=1.422
+[gpua014:0/64] 2023-12-02 23:29:21,180 (trainer:735) INFO: 5epoch:train:4401-4500batch: iter_time=8.659e-05, forward_time=0.146, loss_ctc=115.042, loss_att=105.613, acc=0.514, loss=108.442, backward_time=0.279, grad_norm=53.217, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.081, optim0_lr0=1.930e-04, train_time=1.272
+[gpua014:0/64] 2023-12-02 23:31:26,728 (trainer:735) INFO: 5epoch:train:4501-4600batch: iter_time=8.134e-05, forward_time=0.146, loss_ctc=95.745, loss_att=80.494, acc=0.565, loss=85.069, backward_time=0.279, grad_norm=42.330, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.081, optim0_lr0=1.928e-04, train_time=1.255
+[gpua014:0/64] 2023-12-02 23:33:50,789 (trainer:735) INFO: 5epoch:train:4601-4700batch: iter_time=7.782e-05, forward_time=0.149, loss_ctc=104.190, loss_att=91.003, acc=0.515, loss=94.959, backward_time=0.293, grad_norm=52.464, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.080, optim0_lr0=1.927e-04, train_time=1.440
+[gpua014:0/64] 2023-12-02 23:36:03,477 (trainer:735) INFO: 5epoch:train:4701-4800batch: iter_time=8.568e-05, forward_time=0.148, loss_ctc=106.443, loss_att=89.843, acc=0.531, loss=94.823, backward_time=0.301, grad_norm=48.980, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.080, optim0_lr0=1.925e-04, train_time=1.327
+[gpua014:0/64] 2023-12-02 23:38:34,688 (trainer:735) INFO: 5epoch:train:4801-4900batch: iter_time=8.245e-05, forward_time=0.146, loss_ctc=107.894, loss_att=91.112, acc=0.559, loss=96.147, backward_time=0.304, grad_norm=48.763, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.081, optim0_lr0=1.924e-04, train_time=1.512
+[gpua014:0/64] 2023-12-02 23:40:50,624 (trainer:735) INFO: 5epoch:train:4901-5000batch: iter_time=8.559e-05, forward_time=0.145, loss_ctc=87.074, loss_att=72.664, acc=0.570, loss=76.987, backward_time=0.287, grad_norm=42.296, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.080, optim0_lr0=1.922e-04, train_time=1.359
+[gpua014:0/64] 2023-12-02 23:41:10,429 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua014:0/64] 2023-12-02 23:41:28,760 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-02 23:41:32,253 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1207e77c0>)
+[gpua014:0/64] 2023-12-02 23:41:32,253 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua014:0/64] 2023-12-02 23:41:32,256 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-02 23:48:58,597 (trainer:735) INFO: 5epoch:train:5001-5100batch: iter_time=3.656, forward_time=0.185, loss_ctc=100.104, loss_att=85.929, acc=0.520, loss=90.181, backward_time=0.290, grad_norm=49.081, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.083, optim0_lr0=1.921e-04, train_time=4.879
+[gpua014:0/64] 2023-12-02 23:50:59,056 (trainer:735) INFO: 5epoch:train:5101-5200batch: iter_time=8.819e-05, forward_time=0.146, loss_ctc=108.283, loss_att=106.533, acc=0.523, loss=107.058, backward_time=0.278, grad_norm=52.062, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.080, optim0_lr0=1.919e-04, train_time=1.204
+[gpua014:0/64] 2023-12-02 23:52:57,871 (trainer:735) INFO: 5epoch:train:5201-5300batch: iter_time=9.174e-05, forward_time=0.147, loss_ctc=102.005, loss_att=95.928, acc=0.532, loss=97.751, backward_time=0.277, grad_norm=45.203, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.080, optim0_lr0=1.918e-04, train_time=1.188
+[gpua014:0/64] 2023-12-02 23:55:09,066 (trainer:735) INFO: 5epoch:train:5301-5400batch: iter_time=1.060e-04, forward_time=0.148, loss_ctc=119.577, loss_att=111.700, acc=0.526, loss=114.063, backward_time=0.282, grad_norm=56.202, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.081, optim0_lr0=1.916e-04, train_time=1.312
+[gpua014:0/64] 2023-12-02 23:57:40,537 (trainer:735) INFO: 5epoch:train:5401-5500batch: iter_time=9.868e-05, forward_time=0.148, loss_ctc=104.058, loss_att=97.256, acc=0.553, loss=99.297, backward_time=0.299, grad_norm=47.754, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.080, optim0_lr0=1.915e-04, train_time=1.514
+[gpua014:0/64] 2023-12-02 23:59:45,452 (trainer:735) INFO: 5epoch:train:5501-5600batch: iter_time=8.972e-05, forward_time=0.163, loss_ctc=91.515, loss_att=87.067, acc=0.539, loss=88.401, backward_time=0.313, grad_norm=41.983, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.085, optim0_lr0=1.913e-04, train_time=1.249
+[gpua014:0/64] 2023-12-03 00:02:07,331 (trainer:735) INFO: 5epoch:train:5601-5700batch: iter_time=9.249e-05, forward_time=0.147, loss_ctc=118.320, loss_att=96.896, acc=0.536, loss=103.323, backward_time=0.300, grad_norm=66.101, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.081, optim0_lr0=1.912e-04, train_time=1.419
+[gpua014:0/64] 2023-12-03 00:04:25,136 (trainer:735) INFO: 5epoch:train:5701-5800batch: iter_time=1.015e-04, forward_time=0.161, loss_ctc=105.254, loss_att=95.240, acc=0.555, loss=98.244, backward_time=0.309, grad_norm=48.976, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.083, optim0_lr0=1.911e-04, train_time=1.378
+[gpua014:0/64] 2023-12-03 00:06:31,867 (trainer:735) INFO: 5epoch:train:5801-5900batch: iter_time=9.639e-05, forward_time=0.168, loss_ctc=93.398, loss_att=76.373, acc=0.561, loss=81.480, backward_time=0.311, grad_norm=41.320, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.909e-04, train_time=1.267
+[gpua014:0/64] 2023-12-03 00:08:30,155 (trainer:735) INFO: 5epoch:train:5901-6000batch: iter_time=9.549e-05, forward_time=0.147, loss_ctc=108.050, loss_att=97.723, acc=0.527, loss=100.821, backward_time=0.278, grad_norm=51.406, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.080, optim0_lr0=1.908e-04, train_time=1.183
+[gpua014:0/64] 2023-12-03 00:11:14,901 (trainer:735) INFO: 5epoch:train:6001-6100batch: iter_time=9.184e-05, forward_time=0.147, loss_ctc=109.043, loss_att=99.410, acc=0.540, loss=102.300, backward_time=0.351, grad_norm=48.952, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.906e-04, train_time=1.647
+[gpua014:0/64] 2023-12-03 00:13:38,347 (trainer:735) INFO: 5epoch:train:6101-6200batch: iter_time=8.680e-05, forward_time=0.196, loss_ctc=90.610, loss_att=72.813, acc=0.600, loss=78.152, backward_time=0.322, grad_norm=39.080, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.905e-04, train_time=1.434
+[gpua014:0/64] 2023-12-03 00:15:02,840 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua014:0/64] 2023-12-03 00:15:21,031 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 00:15:24,571 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa17a399ab0>)
+[gpua014:0/64] 2023-12-03 00:15:24,572 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua014:0/64] 2023-12-03 00:15:24,575 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 00:20:22,824 (trainer:735) INFO: 5epoch:train:6201-6300batch: iter_time=2.607, forward_time=0.148, loss_ctc=96.392, loss_att=77.839, acc=0.561, loss=83.405, backward_time=0.279, grad_norm=50.390, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.903e-04, train_time=4.045
+[gpua014:0/64] 2023-12-03 00:22:18,390 (trainer:735) INFO: 5epoch:train:6301-6400batch: iter_time=7.433e-05, forward_time=0.146, loss_ctc=100.079, loss_att=92.963, acc=0.510, loss=95.098, backward_time=0.277, grad_norm=51.488, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.902e-04, train_time=1.155
+[gpua014:0/64] 2023-12-03 00:24:15,498 (trainer:735) INFO: 5epoch:train:6401-6500batch: iter_time=7.772e-05, forward_time=0.146, loss_ctc=102.114, loss_att=93.530, acc=0.541, loss=96.105, backward_time=0.277, grad_norm=47.072, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.900e-04, train_time=1.171
+[gpua014:0/64] 2023-12-03 00:26:13,443 (trainer:735) INFO: 5epoch:train:6501-6600batch: iter_time=8.273e-05, forward_time=0.146, loss_ctc=117.959, loss_att=103.830, acc=0.524, loss=108.069, backward_time=0.278, grad_norm=62.431, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.899e-04, train_time=1.179
+[gpua014:0/64] 2023-12-03 00:28:46,449 (trainer:735) INFO: 5epoch:train:6601-6700batch: iter_time=8.456e-05, forward_time=0.147, loss_ctc=103.505, loss_att=100.403, acc=0.539, loss=101.334, backward_time=0.291, grad_norm=45.497, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.898e-04, train_time=1.530
+[gpua014:0/64] 2023-12-03 00:30:54,765 (trainer:735) INFO: 5epoch:train:6701-6800batch: iter_time=8.493e-05, forward_time=0.147, loss_ctc=93.986, loss_att=85.633, acc=0.556, loss=88.139, backward_time=0.279, grad_norm=41.541, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.896e-04, train_time=1.283
+[gpua014:0/64] 2023-12-03 00:33:25,367 (trainer:735) INFO: 5epoch:train:6801-6900batch: iter_time=9.429e-05, forward_time=0.177, loss_ctc=106.935, loss_att=84.965, acc=0.535, loss=91.556, backward_time=0.318, grad_norm=48.987, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.895e-04, train_time=1.506
+[gpua014:0/64] 2023-12-03 00:35:37,088 (trainer:735) INFO: 5epoch:train:6901-7000batch: iter_time=8.528e-05, forward_time=0.187, loss_ctc=111.565, loss_att=100.022, acc=0.531, loss=103.485, backward_time=0.315, grad_norm=56.642, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.083, optim0_lr0=1.893e-04, train_time=1.317
+[gpua014:0/64] 2023-12-03 00:37:36,997 (trainer:735) INFO: 5epoch:train:7001-7100batch: iter_time=8.900e-05, forward_time=0.147, loss_ctc=93.324, loss_att=77.796, acc=0.577, loss=82.454, backward_time=0.280, grad_norm=42.727, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.892e-04, train_time=1.199
+[gpua014:0/64] 2023-12-03 00:39:48,643 (trainer:735) INFO: 5epoch:train:7101-7200batch: iter_time=8.385e-05, forward_time=0.147, loss_ctc=101.865, loss_att=88.511, acc=0.528, loss=92.517, backward_time=0.296, grad_norm=47.727, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.081, optim0_lr0=1.891e-04, train_time=1.316
+[gpua014:0/64] 2023-12-03 00:42:38,454 (trainer:735) INFO: 5epoch:train:7201-7300batch: iter_time=8.416e-05, forward_time=0.178, loss_ctc=103.315, loss_att=86.719, acc=0.545, loss=91.698, backward_time=0.357, grad_norm=47.072, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.084, optim0_lr0=1.889e-04, train_time=1.698
+[gpua014:0/64] 2023-12-03 00:44:45,810 (trainer:735) INFO: 5epoch:train:7301-7400batch: iter_time=8.296e-05, forward_time=0.147, loss_ctc=104.482, loss_att=88.572, acc=0.570, loss=93.345, backward_time=0.284, grad_norm=43.631, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.888e-04, train_time=1.273
+[gpua014:0/64] 2023-12-03 00:47:04,442 (trainer:735) INFO: 5epoch:train:7401-7500batch: iter_time=8.236e-05, forward_time=0.146, loss_ctc=84.201, loss_att=69.642, acc=0.583, loss=74.010, backward_time=0.289, grad_norm=41.467, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.081, optim0_lr0=1.886e-04, train_time=1.386
+[gpua014:0/64] 2023-12-03 00:47:22,067 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua014:0/64] 2023-12-03 00:47:40,711 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 00:47:44,335 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faaf837ba60>)
+[gpua014:0/64] 2023-12-03 00:47:44,335 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua014:0/64] 2023-12-03 00:47:44,338 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 00:54:03,683 (trainer:735) INFO: 5epoch:train:7501-7600batch: iter_time=2.944, forward_time=0.169, loss_ctc=96.894, loss_att=81.138, acc=0.534, loss=85.865, backward_time=0.281, grad_norm=49.279, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.885e-04, train_time=4.192
+[gpua014:0/64] 2023-12-03 00:56:02,867 (trainer:735) INFO: 5epoch:train:7601-7700batch: iter_time=9.003e-05, forward_time=0.147, loss_ctc=105.252, loss_att=98.143, acc=0.535, loss=100.276, backward_time=0.278, grad_norm=51.734, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.884e-04, train_time=1.192
+[gpua014:0/64] 2023-12-03 00:57:58,395 (trainer:735) INFO: 5epoch:train:7701-7800batch: iter_time=8.573e-05, forward_time=0.147, loss_ctc=100.074, loss_att=88.568, acc=0.545, loss=92.020, backward_time=0.279, grad_norm=47.761, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.882e-04, train_time=1.155
+[gpua014:0/64] 2023-12-03 01:00:16,377 (trainer:735) INFO: 5epoch:train:7801-7900batch: iter_time=8.622e-05, forward_time=0.147, loss_ctc=116.888, loss_att=107.667, acc=0.531, loss=110.434, backward_time=0.287, grad_norm=55.767, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.881e-04, train_time=1.380
+[gpua014:0/64] 2023-12-03 01:02:48,955 (trainer:735) INFO: 5epoch:train:7901-8000batch: iter_time=8.966e-05, forward_time=0.147, loss_ctc=102.369, loss_att=91.704, acc=0.559, loss=94.903, backward_time=0.286, grad_norm=46.825, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.080, optim0_lr0=1.879e-04, train_time=1.526
+[gpua014:0/64] 2023-12-03 01:04:55,550 (trainer:735) INFO: 5epoch:train:8001-8100batch: iter_time=9.746e-05, forward_time=0.183, loss_ctc=89.244, loss_att=81.696, acc=0.547, loss=83.960, backward_time=0.295, grad_norm=41.196, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.083, optim0_lr0=1.878e-04, train_time=1.266
+[gpua014:0/64] 2023-12-03 01:07:25,740 (trainer:735) INFO: 5epoch:train:8101-8200batch: iter_time=9.594e-05, forward_time=0.148, loss_ctc=113.849, loss_att=90.734, acc=0.547, loss=97.669, backward_time=0.293, grad_norm=60.822, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.877e-04, train_time=1.502
+[gpua014:0/64] 2023-12-03 01:09:38,435 (trainer:735) INFO: 5epoch:train:8201-8300batch: iter_time=9.492e-05, forward_time=0.147, loss_ctc=102.706, loss_att=90.387, acc=0.559, loss=94.083, backward_time=0.288, grad_norm=50.183, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.875e-04, train_time=1.327
+[gpua014:0/64] 2023-12-03 01:11:53,487 (trainer:735) INFO: 5epoch:train:8301-8400batch: iter_time=8.637e-05, forward_time=0.147, loss_ctc=89.781, loss_att=71.770, acc=0.571, loss=77.173, backward_time=0.313, grad_norm=42.223, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.874e-04, train_time=1.350
+[gpua014:0/64] 2023-12-03 01:13:53,101 (trainer:735) INFO: 5epoch:train:8401-8500batch: iter_time=9.098e-05, forward_time=0.147, loss_ctc=106.994, loss_att=93.122, acc=0.534, loss=97.284, backward_time=0.279, grad_norm=51.829, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.872e-04, train_time=1.196
+[gpua014:0/64] 2023-12-03 01:16:19,357 (trainer:735) INFO: 5epoch:train:8501-8600batch: iter_time=9.139e-05, forward_time=0.147, loss_ctc=106.216, loss_att=94.423, acc=0.546, loss=97.961, backward_time=0.304, grad_norm=47.540, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.871e-04, train_time=1.462
+[gpua014:0/64] 2023-12-03 01:18:34,178 (trainer:735) INFO: 5epoch:train:8601-8700batch: iter_time=8.909e-05, forward_time=0.202, loss_ctc=87.658, loss_att=69.305, acc=0.609, loss=74.811, backward_time=0.322, grad_norm=39.086, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.087, optim0_lr0=1.870e-04, train_time=1.348
+[gpua014:0/64] 2023-12-03 01:19:53,195 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua014:0/64] 2023-12-03 01:20:12,003 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 01:20:15,670 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa105643700>)
+[gpua014:0/64] 2023-12-03 01:20:15,670 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua014:0/64] 2023-12-03 01:20:15,674 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 01:25:20,181 (trainer:735) INFO: 5epoch:train:8701-8800batch: iter_time=2.303, forward_time=0.178, loss_ctc=93.312, loss_att=74.929, acc=0.576, loss=80.444, backward_time=0.283, grad_norm=47.399, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.868e-04, train_time=4.060
+[gpua014:0/64] 2023-12-03 01:27:15,366 (trainer:735) INFO: 5epoch:train:8801-8900batch: iter_time=7.643e-05, forward_time=0.147, loss_ctc=96.233, loss_att=92.820, acc=0.520, loss=93.844, backward_time=0.277, grad_norm=47.224, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.081, optim0_lr0=1.867e-04, train_time=1.152
+[gpua014:0/64] 2023-12-03 01:29:24,360 (trainer:735) INFO: 5epoch:train:8901-9000batch: iter_time=7.996e-05, forward_time=0.147, loss_ctc=99.131, loss_att=92.896, acc=0.556, loss=94.766, backward_time=0.280, grad_norm=45.312, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.866e-04, train_time=1.290
+[gpua014:0/64] 2023-12-03 01:31:29,584 (trainer:735) INFO: 5epoch:train:9001-9100batch: iter_time=7.818e-05, forward_time=0.147, loss_ctc=116.536, loss_att=104.030, acc=0.542, loss=107.781, backward_time=0.279, grad_norm=69.595, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.864e-04, train_time=1.252
+[gpua014:0/64] 2023-12-03 01:33:33,472 (trainer:735) INFO: 5epoch:train:9101-9200batch: iter_time=8.497e-05, forward_time=0.147, loss_ctc=102.740, loss_att=100.954, acc=0.555, loss=101.490, backward_time=0.293, grad_norm=45.642, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.863e-04, train_time=1.239
+[gpua014:0/64] 2023-12-03 01:35:34,433 (trainer:735) INFO: 5epoch:train:9201-9300batch: iter_time=8.740e-05, forward_time=0.147, loss_ctc=90.758, loss_att=83.266, acc=0.575, loss=85.514, backward_time=0.285, grad_norm=40.360, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.862e-04, train_time=1.209
+[gpua014:0/64] 2023-12-03 01:37:44,488 (trainer:735) INFO: 5epoch:train:9301-9400batch: iter_time=8.180e-05, forward_time=0.192, loss_ctc=105.255, loss_att=83.833, acc=0.556, loss=90.259, backward_time=0.333, grad_norm=48.719, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.086, optim0_lr0=1.860e-04, train_time=1.300
+[gpua014:0/64] 2023-12-03 01:39:50,805 (trainer:735) INFO: 5epoch:train:9401-9500batch: iter_time=7.740e-05, forward_time=0.164, loss_ctc=109.705, loss_att=100.969, acc=0.546, loss=103.590, backward_time=0.293, grad_norm=55.096, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.859e-04, train_time=1.263
+[gpua014:0/64] 2023-12-03 01:42:15,231 (trainer:735) INFO: 5epoch:train:9501-9600batch: iter_time=8.291e-05, forward_time=0.146, loss_ctc=90.888, loss_att=76.529, acc=0.599, loss=80.836, backward_time=0.288, grad_norm=40.278, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.081, optim0_lr0=1.858e-04, train_time=1.444
+[gpua014:0/64] 2023-12-03 01:44:26,811 (trainer:735) INFO: 5epoch:train:9601-9700batch: iter_time=8.187e-05, forward_time=0.146, loss_ctc=98.432, loss_att=87.252, acc=0.542, loss=90.606, backward_time=0.284, grad_norm=52.089, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.856e-04, train_time=1.316
+[gpua014:0/64] 2023-12-03 01:46:41,795 (trainer:735) INFO: 5epoch:train:9701-9800batch: iter_time=7.987e-05, forward_time=0.146, loss_ctc=100.615, loss_att=85.238, acc=0.560, loss=89.851, backward_time=0.281, grad_norm=45.534, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.855e-04, train_time=1.350
+[gpua014:0/64] 2023-12-03 01:49:06,760 (trainer:735) INFO: 5epoch:train:9801-9900batch: iter_time=7.981e-05, forward_time=0.147, loss_ctc=101.893, loss_att=86.447, acc=0.588, loss=91.081, backward_time=0.306, grad_norm=45.398, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.080, optim0_lr0=1.854e-04, train_time=1.449
+[gpua014:0/64] 2023-12-03 01:51:23,199 (trainer:735) INFO: 5epoch:train:9901-10000batch: iter_time=8.891e-05, forward_time=0.191, loss_ctc=82.366, loss_att=68.740, acc=0.601, loss=72.828, backward_time=0.285, grad_norm=39.810, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.089, optim0_lr0=1.852e-04, train_time=1.364
+[gpua014:0/64] 2023-12-03 01:51:43,259 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua014:0/64] 2023-12-03 01:52:01,973 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 01:52:05,602 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faae08af5e0>)
+[gpua014:0/64] 2023-12-03 01:52:05,603 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua014:0/64] 2023-12-03 01:52:05,606 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 01:58:11,854 (trainer:735) INFO: 5epoch:train:10001-10100batch: iter_time=2.745, forward_time=0.147, loss_ctc=94.105, loss_att=78.990, acc=0.550, loss=83.524, backward_time=0.279, grad_norm=49.626, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.851e-04, train_time=4.087
+[gpua014:0/64] 2023-12-03 02:00:15,541 (trainer:735) INFO: 5epoch:train:10101-10200batch: iter_time=8.204e-05, forward_time=0.147, loss_ctc=103.487, loss_att=96.929, acc=0.552, loss=98.896, backward_time=0.278, grad_norm=52.996, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.850e-04, train_time=1.237
+[gpua014:0/64] 2023-12-03 02:02:20,218 (trainer:735) INFO: 5epoch:train:10201-10300batch: iter_time=8.035e-05, forward_time=0.147, loss_ctc=97.395, loss_att=87.491, acc=0.560, loss=90.462, backward_time=0.284, grad_norm=45.554, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.848e-04, train_time=1.247
+[gpua014:0/64] 2023-12-03 02:04:39,610 (trainer:735) INFO: 5epoch:train:10301-10400batch: iter_time=8.290e-05, forward_time=0.147, loss_ctc=113.820, loss_att=105.534, acc=0.548, loss=108.020, backward_time=0.287, grad_norm=55.513, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.847e-04, train_time=1.394
+[gpua014:0/64] 2023-12-03 02:06:41,122 (trainer:735) INFO: 5epoch:train:10401-10500batch: iter_time=8.723e-05, forward_time=0.147, loss_ctc=99.636, loss_att=90.884, acc=0.575, loss=93.510, backward_time=0.287, grad_norm=43.331, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.846e-04, train_time=1.215
+[gpua014:0/64] 2023-12-03 02:08:50,879 (trainer:735) INFO: 5epoch:train:10501-10600batch: iter_time=8.097e-05, forward_time=0.192, loss_ctc=86.957, loss_att=80.673, acc=0.567, loss=82.558, backward_time=0.318, grad_norm=38.507, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.083, optim0_lr0=1.844e-04, train_time=1.297
+[gpua014:0/64] 2023-12-03 02:10:56,209 (trainer:735) INFO: 5epoch:train:10601-10700batch: iter_time=7.810e-05, forward_time=0.148, loss_ctc=112.108, loss_att=90.172, acc=0.563, loss=96.753, backward_time=0.282, grad_norm=59.905, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.843e-04, train_time=1.253
+[gpua014:0/64] 2023-12-03 02:13:06,800 (trainer:735) INFO: 5epoch:train:10701-10800batch: iter_time=8.172e-05, forward_time=0.148, loss_ctc=100.617, loss_att=89.607, acc=0.579, loss=92.910, backward_time=0.299, grad_norm=46.384, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.842e-04, train_time=1.306
+[gpua014:0/64] 2023-12-03 02:15:17,705 (trainer:735) INFO: 5epoch:train:10801-10900batch: iter_time=8.352e-05, forward_time=0.147, loss_ctc=88.190, loss_att=70.617, acc=0.586, loss=75.889, backward_time=0.288, grad_norm=39.917, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.840e-04, train_time=1.309
+[gpua014:0/64] 2023-12-03 02:17:34,612 (trainer:735) INFO: 5epoch:train:10901-11000batch: iter_time=8.618e-05, forward_time=0.148, loss_ctc=103.820, loss_att=91.822, acc=0.551, loss=95.422, backward_time=0.284, grad_norm=51.342, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.839e-04, train_time=1.369
+[gpua014:0/64] 2023-12-03 02:20:09,438 (trainer:735) INFO: 5epoch:train:11001-11100batch: iter_time=8.539e-05, forward_time=0.147, loss_ctc=103.451, loss_att=93.492, acc=0.563, loss=96.480, backward_time=0.321, grad_norm=47.185, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.838e-04, train_time=1.548
+[gpua014:0/64] 2023-12-03 02:22:30,559 (trainer:735) INFO: 5epoch:train:11101-11200batch: iter_time=8.884e-05, forward_time=0.159, loss_ctc=86.625, loss_att=68.512, acc=0.624, loss=73.946, backward_time=0.303, grad_norm=37.413, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.083, optim0_lr0=1.837e-04, train_time=1.411
+[gpua014:0/64] 2023-12-03 02:24:24,667 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua014:0/64] 2023-12-03 02:24:43,661 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 02:24:47,290 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa17aa4dbd0>)
+[gpua014:0/64] 2023-12-03 02:24:47,290 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua014:0/64] 2023-12-03 02:24:47,293 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 02:34:04,767 (trainer:735) INFO: 5epoch:train:11201-11300batch: iter_time=1.635, forward_time=0.202, loss_ctc=90.797, loss_att=73.580, acc=0.585, loss=78.745, backward_time=0.312, grad_norm=45.645, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.081, optim0_lr0=1.835e-04, train_time=6.942
+[gpua014:0/64] 2023-12-03 02:36:00,723 (trainer:735) INFO: 5epoch:train:11301-11400batch: iter_time=7.543e-05, forward_time=0.147, loss_ctc=94.369, loss_att=87.566, acc=0.533, loss=89.607, backward_time=0.278, grad_norm=50.053, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.834e-04, train_time=1.159
+[gpua014:0/64] 2023-12-03 02:37:55,972 (trainer:735) INFO: 5epoch:train:11401-11500batch: iter_time=8.467e-05, forward_time=0.147, loss_ctc=97.362, loss_att=88.194, acc=0.565, loss=90.945, backward_time=0.278, grad_norm=45.373, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.833e-04, train_time=1.152
+[gpua014:0/64] 2023-12-03 02:39:58,799 (trainer:735) INFO: 5epoch:train:11501-11600batch: iter_time=8.374e-05, forward_time=0.154, loss_ctc=112.328, loss_att=98.091, acc=0.548, loss=102.362, backward_time=0.299, grad_norm=60.002, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.081, optim0_lr0=1.831e-04, train_time=1.228
+[gpua014:0/64] 2023-12-03 02:42:06,613 (trainer:735) INFO: 5epoch:train:11601-11700batch: iter_time=8.452e-05, forward_time=0.147, loss_ctc=99.950, loss_att=96.572, acc=0.559, loss=97.586, backward_time=0.301, grad_norm=44.279, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.830e-04, train_time=1.278
+[gpua014:0/64] 2023-12-03 02:44:22,163 (trainer:735) INFO: 5epoch:train:11701-11800batch: iter_time=9.163e-05, forward_time=0.148, loss_ctc=89.373, loss_att=79.972, acc=0.579, loss=82.792, backward_time=0.284, grad_norm=42.702, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.829e-04, train_time=1.355
+[gpua014:0/64] 2023-12-03 02:46:32,096 (trainer:735) INFO: 5epoch:train:11801-11900batch: iter_time=9.334e-05, forward_time=0.148, loss_ctc=102.756, loss_att=79.930, acc=0.559, loss=86.777, backward_time=0.290, grad_norm=47.849, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.828e-04, train_time=1.299
+[gpua014:0/64] 2023-12-03 02:49:14,900 (trainer:735) INFO: 5epoch:train:11901-12000batch: iter_time=1.027e-04, forward_time=0.147, loss_ctc=105.631, loss_att=96.110, acc=0.550, loss=98.966, backward_time=0.325, grad_norm=54.837, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.080, optim0_lr0=1.826e-04, train_time=1.628
+[gpua014:0/64] 2023-12-03 02:51:39,015 (trainer:735) INFO: 5epoch:train:12001-12100batch: iter_time=1.081e-04, forward_time=0.150, loss_ctc=89.915, loss_att=74.404, acc=0.596, loss=79.057, backward_time=0.308, grad_norm=41.062, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.080, optim0_lr0=1.825e-04, train_time=1.441
+[gpua014:0/64] 2023-12-03 02:53:52,602 (trainer:735) INFO: 5epoch:train:12101-12200batch: iter_time=1.020e-04, forward_time=0.162, loss_ctc=96.899, loss_att=83.504, acc=0.550, loss=87.523, backward_time=0.282, grad_norm=48.257, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.824e-04, train_time=1.336
+[gpua014:0/64] 2023-12-03 02:55:59,837 (trainer:735) INFO: 5epoch:train:12201-12300batch: iter_time=1.021e-04, forward_time=0.163, loss_ctc=97.734, loss_att=81.795, acc=0.566, loss=86.577, backward_time=0.282, grad_norm=47.691, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.823e-04, train_time=1.272
+[gpua014:0/64] 2023-12-03 02:58:35,755 (trainer:735) INFO: 5epoch:train:12301-12400batch: iter_time=8.333e-05, forward_time=0.147, loss_ctc=100.118, loss_att=83.887, acc=0.592, loss=88.756, backward_time=0.319, grad_norm=47.628, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.821e-04, train_time=1.559
+[gpua014:0/64] 2023-12-03 03:00:45,747 (trainer:735) INFO: 5epoch:train:12401-12500batch: iter_time=7.893e-05, forward_time=0.191, loss_ctc=79.990, loss_att=65.802, acc=0.605, loss=70.058, backward_time=0.307, grad_norm=37.151, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.083, optim0_lr0=1.820e-04, train_time=1.300
+[gpua014:0/64] 2023-12-03 03:01:05,777 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua014:0/64] 2023-12-03 03:01:24,660 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 03:01:28,304 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa12dd61d20>)
+[gpua014:0/64] 2023-12-03 03:01:28,304 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua014:0/64] 2023-12-03 03:01:28,307 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 03:07:33,915 (trainer:735) INFO: 5epoch:train:12501-12600batch: iter_time=2.825, forward_time=0.162, loss_ctc=92.262, loss_att=78.244, acc=0.557, loss=82.449, backward_time=0.281, grad_norm=47.638, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.819e-04, train_time=4.082
+[gpua014:0/64] 2023-12-03 03:09:31,457 (trainer:735) INFO: 5epoch:train:12601-12700batch: iter_time=7.974e-05, forward_time=0.147, loss_ctc=100.633, loss_att=96.188, acc=0.559, loss=97.521, backward_time=0.286, grad_norm=52.079, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.818e-04, train_time=1.175
+[gpua014:0/64] 2023-12-03 03:11:39,373 (trainer:735) INFO: 5epoch:train:12701-12800batch: iter_time=8.358e-05, forward_time=0.148, loss_ctc=95.691, loss_att=87.752, acc=0.565, loss=90.134, backward_time=0.283, grad_norm=46.598, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.816e-04, train_time=1.279
+[gpua014:0/64] 2023-12-03 03:13:36,824 (trainer:735) INFO: 5epoch:train:12801-12900batch: iter_time=8.940e-05, forward_time=0.147, loss_ctc=110.113, loss_att=103.035, acc=0.557, loss=105.158, backward_time=0.283, grad_norm=55.483, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.080, optim0_lr0=1.815e-04, train_time=1.174
+[gpua014:0/64] 2023-12-03 03:15:45,664 (trainer:735) INFO: 5epoch:train:12901-13000batch: iter_time=8.969e-05, forward_time=0.156, loss_ctc=97.654, loss_att=87.920, acc=0.586, loss=90.840, backward_time=0.315, grad_norm=42.477, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.085, optim0_lr0=1.814e-04, train_time=1.288
+[gpua014:0/64] 2023-12-03 03:18:10,773 (trainer:735) INFO: 5epoch:train:13001-13100batch: iter_time=8.860e-05, forward_time=0.163, loss_ctc=85.289, loss_att=79.516, acc=0.572, loss=81.248, backward_time=0.295, grad_norm=39.716, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.813e-04, train_time=1.451
+[gpua014:0/64] 2023-12-03 03:20:37,517 (trainer:735) INFO: 5epoch:train:13101-13200batch: iter_time=8.264e-05, forward_time=0.185, loss_ctc=107.983, loss_att=87.991, acc=0.572, loss=93.988, backward_time=0.324, grad_norm=54.231, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.085, optim0_lr0=1.811e-04, train_time=1.467
+[gpua014:0/64] 2023-12-03 03:22:56,520 (trainer:735) INFO: 5epoch:train:13201-13300batch: iter_time=7.944e-05, forward_time=0.149, loss_ctc=98.543, loss_att=87.848, acc=0.589, loss=91.057, backward_time=0.286, grad_norm=45.520, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.810e-04, train_time=1.390
+[gpua014:0/64] 2023-12-03 03:25:35,409 (trainer:735) INFO: 5epoch:train:13301-13400batch: iter_time=8.606e-05, forward_time=0.159, loss_ctc=86.218, loss_att=69.137, acc=0.594, loss=74.262, backward_time=0.312, grad_norm=40.860, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.809e-04, train_time=1.589
+[gpua014:0/64] 2023-12-03 03:27:39,841 (trainer:735) INFO: 5epoch:train:13401-13500batch: iter_time=8.818e-05, forward_time=0.149, loss_ctc=101.606, loss_att=89.706, acc=0.557, loss=93.276, backward_time=0.284, grad_norm=53.566, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.808e-04, train_time=1.244
+[gpua014:0/64] 2023-12-03 03:29:56,026 (trainer:735) INFO: 5epoch:train:13501-13600batch: iter_time=8.137e-05, forward_time=0.148, loss_ctc=101.660, loss_att=91.116, acc=0.572, loss=94.279, backward_time=0.306, grad_norm=48.751, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.806e-04, train_time=1.362
+[gpua014:0/64] 2023-12-03 03:32:18,452 (trainer:735) INFO: 5epoch:train:13601-13700batch: iter_time=8.454e-05, forward_time=0.148, loss_ctc=84.220, loss_att=66.068, acc=0.633, loss=71.513, backward_time=0.295, grad_norm=39.834, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.080, optim0_lr0=1.805e-04, train_time=1.424
+[gpua014:0/64] 2023-12-03 03:33:37,187 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua014:0/64] 2023-12-03 03:33:55,894 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 03:33:59,565 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faadd3116c0>)
+[gpua014:0/64] 2023-12-03 03:33:59,565 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua014:0/64] 2023-12-03 03:33:59,568 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 03:38:22,710 (trainer:735) INFO: 5epoch:train:13701-13800batch: iter_time=2.389, forward_time=0.174, loss_ctc=89.117, loss_att=70.284, acc=0.602, loss=75.934, backward_time=0.281, grad_norm=42.954, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.083, optim0_lr0=1.804e-04, train_time=3.642
+[gpua014:0/64] 2023-12-03 03:40:19,195 (trainer:735) INFO: 5epoch:train:13801-13900batch: iter_time=7.928e-05, forward_time=0.148, loss_ctc=92.929, loss_att=85.969, acc=0.546, loss=88.057, backward_time=0.280, grad_norm=46.583, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.803e-04, train_time=1.165
+[gpua014:0/64] 2023-12-03 03:42:15,544 (trainer:735) INFO: 5epoch:train:13901-14000batch: iter_time=8.967e-05, forward_time=0.147, loss_ctc=95.315, loss_att=87.135, acc=0.579, loss=89.589, backward_time=0.281, grad_norm=44.465, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.081, optim0_lr0=1.801e-04, train_time=1.163
+[gpua014:0/64] 2023-12-03 03:44:24,164 (trainer:735) INFO: 5epoch:train:14001-14100batch: iter_time=8.654e-05, forward_time=0.180, loss_ctc=109.162, loss_att=95.988, acc=0.563, loss=99.940, backward_time=0.311, grad_norm=57.173, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.086, optim0_lr0=1.800e-04, train_time=1.286
+[gpua014:0/64] 2023-12-03 03:46:34,212 (trainer:735) INFO: 5epoch:train:14101-14200batch: iter_time=8.525e-05, forward_time=0.148, loss_ctc=98.019, loss_att=94.552, acc=0.576, loss=95.592, backward_time=0.306, grad_norm=44.238, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.799e-04, train_time=1.300
+[gpua014:0/64] 2023-12-03 03:48:47,884 (trainer:735) INFO: 5epoch:train:14201-14300batch: iter_time=9.061e-05, forward_time=0.155, loss_ctc=87.534, loss_att=78.716, acc=0.593, loss=81.362, backward_time=0.313, grad_norm=40.796, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.798e-04, train_time=1.337
+[gpua014:0/64] 2023-12-03 03:51:17,947 (trainer:735) INFO: 5epoch:train:14301-14400batch: iter_time=1.010e-04, forward_time=0.149, loss_ctc=100.734, loss_att=78.621, acc=0.579, loss=85.255, backward_time=0.298, grad_norm=46.543, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.080, optim0_lr0=1.797e-04, train_time=1.500
+[gpua014:0/64] 2023-12-03 03:53:47,579 (trainer:735) INFO: 5epoch:train:14401-14500batch: iter_time=8.581e-05, forward_time=0.148, loss_ctc=103.762, loss_att=96.024, acc=0.567, loss=98.345, backward_time=0.319, grad_norm=57.764, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.795e-04, train_time=1.496
+[gpua014:0/64] 2023-12-03 03:56:05,521 (trainer:735) INFO: 5epoch:train:14501-14600batch: iter_time=9.278e-05, forward_time=0.148, loss_ctc=87.391, loss_att=71.730, acc=0.617, loss=76.428, backward_time=0.293, grad_norm=43.253, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.794e-04, train_time=1.379
+[gpua014:0/64] 2023-12-03 03:58:17,798 (trainer:735) INFO: 5epoch:train:14601-14700batch: iter_time=9.586e-05, forward_time=0.178, loss_ctc=94.924, loss_att=81.839, acc=0.566, loss=85.765, backward_time=0.300, grad_norm=47.647, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.085, optim0_lr0=1.793e-04, train_time=1.323
+[gpua014:0/64] 2023-12-03 04:01:08,052 (trainer:735) INFO: 5epoch:train:14701-14800batch: iter_time=8.936e-05, forward_time=0.148, loss_ctc=96.038, loss_att=79.770, acc=0.582, loss=84.650, backward_time=0.327, grad_norm=42.676, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.080, optim0_lr0=1.792e-04, train_time=1.702
+[gpua014:0/64] 2023-12-03 04:03:11,382 (trainer:735) INFO: 5epoch:train:14801-14900batch: iter_time=9.307e-05, forward_time=0.148, loss_ctc=98.470, loss_att=82.243, acc=0.608, loss=87.111, backward_time=0.303, grad_norm=42.806, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.791e-04, train_time=1.233
+[gpua014:0/64] 2023-12-03 04:05:17,246 (trainer:735) INFO: 5epoch:train:14901-15000batch: iter_time=8.643e-05, forward_time=0.148, loss_ctc=79.032, loss_att=64.330, acc=0.625, loss=68.741, backward_time=0.293, grad_norm=38.285, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.080, optim0_lr0=1.789e-04, train_time=1.258
+[gpua014:0/64] 2023-12-03 04:31:08,629 (trainer:341) INFO: 5epoch results: [train] iter_time=0.199, forward_time=0.156, loss_ctc=102.385, loss_att=90.109, acc=0.548, loss=93.792, backward_time=0.297, grad_norm=48.785, clip=100.000, loss_scale=1.792e+15, optim_step_time=0.081, optim0_lr0=1.889e-04, train_time=1.627, time=6 hours, 47 minutes and 8.81 seconds, total_count=75000, gpu_max_cached_mem_GB=37.328, [valid] loss_ctc=74.504, cer_ctc=0.370, loss_att=62.989, acc=0.510, cer=0.471, wer=1.000, loss=66.443, time=25 minutes and 27.92 seconds, total_count=23355, gpu_max_cached_mem_GB=37.328
+[gpua014:0/64] 2023-12-03 04:31:27,927 (trainer:389) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua014:0/64] 2023-12-03 04:31:28,144 (trainer:272) INFO: 6/40epoch started. Estimated time to finish: 1 week, 4 days and 5 hours
+[gpua014:0/64] 2023-12-03 04:31:28,233 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-12-03 04:31:46,647 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 04:31:50,130 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fadd0f6fb80>)
+[gpua014:0/64] 2023-12-03 04:31:50,130 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua014:0/64] 2023-12-03 04:31:50,133 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 04:37:46,148 (trainer:735) INFO: 6epoch:train:1-100batch: iter_time=2.404, forward_time=0.174, loss_ctc=115.849, loss_att=102.776, acc=0.569, loss=106.698, backward_time=0.286, grad_norm=57.860, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.788e-04, train_time=3.779
+[gpua014:0/64] 2023-12-03 04:39:43,692 (trainer:735) INFO: 6epoch:train:101-200batch: iter_time=7.958e-05, forward_time=0.148, loss_ctc=97.285, loss_att=84.853, acc=0.568, loss=88.582, backward_time=0.278, grad_norm=45.392, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.080, optim0_lr0=1.787e-04, train_time=1.175
+[gpua014:0/64] 2023-12-03 04:41:39,339 (trainer:735) INFO: 6epoch:train:201-300batch: iter_time=8.239e-05, forward_time=0.148, loss_ctc=108.478, loss_att=87.219, acc=0.565, loss=93.597, backward_time=0.280, grad_norm=53.761, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.786e-04, train_time=1.156
+[gpua014:0/64] 2023-12-03 04:43:41,538 (trainer:735) INFO: 6epoch:train:301-400batch: iter_time=9.078e-05, forward_time=0.148, loss_ctc=104.228, loss_att=89.628, acc=0.567, loss=94.008, backward_time=0.283, grad_norm=52.684, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.785e-04, train_time=1.222
+[gpua014:0/64] 2023-12-03 04:45:47,073 (trainer:735) INFO: 6epoch:train:401-500batch: iter_time=9.404e-05, forward_time=0.163, loss_ctc=97.324, loss_att=82.691, acc=0.543, loss=87.081, backward_time=0.281, grad_norm=51.570, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.783e-04, train_time=1.255
+[gpua014:0/64] 2023-12-03 04:48:05,825 (trainer:735) INFO: 6epoch:train:501-600batch: iter_time=9.766e-05, forward_time=0.149, loss_ctc=114.950, loss_att=98.720, acc=0.559, loss=103.589, backward_time=0.296, grad_norm=57.647, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.782e-04, train_time=1.387
+[gpua014:0/64] 2023-12-03 04:50:25,636 (trainer:735) INFO: 6epoch:train:601-700batch: iter_time=9.215e-05, forward_time=0.158, loss_ctc=81.577, loss_att=71.935, acc=0.566, loss=74.828, backward_time=0.299, grad_norm=43.778, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.080, optim0_lr0=1.781e-04, train_time=1.398
+[gpua014:0/64] 2023-12-03 04:52:30,607 (trainer:735) INFO: 6epoch:train:701-800batch: iter_time=8.709e-05, forward_time=0.147, loss_ctc=81.775, loss_att=70.805, acc=0.584, loss=74.096, backward_time=0.281, grad_norm=41.882, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.080, optim0_lr0=1.780e-04, train_time=1.249
+[gpua014:0/64] 2023-12-03 04:54:40,234 (trainer:735) INFO: 6epoch:train:801-900batch: iter_time=9.078e-05, forward_time=0.149, loss_ctc=122.259, loss_att=118.090, acc=0.529, loss=119.341, backward_time=0.290, grad_norm=62.638, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.080, optim0_lr0=1.779e-04, train_time=1.297
+[gpua014:0/64] 2023-12-03 04:57:07,076 (trainer:735) INFO: 6epoch:train:901-1000batch: iter_time=8.575e-05, forward_time=0.168, loss_ctc=85.877, loss_att=74.088, acc=0.572, loss=77.625, backward_time=0.343, grad_norm=40.192, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.088, optim0_lr0=1.778e-04, train_time=1.468
+[gpua014:0/64] 2023-12-03 04:59:29,968 (trainer:735) INFO: 6epoch:train:1001-1100batch: iter_time=8.864e-05, forward_time=0.169, loss_ctc=88.318, loss_att=75.104, acc=0.594, loss=79.068, backward_time=0.308, grad_norm=41.635, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.776e-04, train_time=1.429
+[gpua014:0/64] 2023-12-03 05:01:45,323 (trainer:735) INFO: 6epoch:train:1101-1200batch: iter_time=9.102e-05, forward_time=0.147, loss_ctc=98.316, loss_att=90.779, acc=0.565, loss=93.040, backward_time=0.286, grad_norm=44.835, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.080, optim0_lr0=1.775e-04, train_time=1.353
+[gpua014:0/64] 2023-12-03 05:03:09,098 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-12-03 05:03:27,374 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 05:03:30,968 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faa70817bb0>)
+[gpua014:0/64] 2023-12-03 05:03:30,968 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua014:0/64] 2023-12-03 05:03:30,971 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 05:07:42,239 (trainer:735) INFO: 6epoch:train:1201-1300batch: iter_time=2.151, forward_time=0.148, loss_ctc=111.141, loss_att=98.672, acc=0.573, loss=102.413, backward_time=0.300, grad_norm=51.513, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.774e-04, train_time=3.569
+[gpua014:0/64] 2023-12-03 05:09:43,160 (trainer:735) INFO: 6epoch:train:1301-1400batch: iter_time=8.298e-05, forward_time=0.147, loss_ctc=106.668, loss_att=95.952, acc=0.573, loss=99.167, backward_time=0.280, grad_norm=53.549, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.773e-04, train_time=1.209
+[gpua014:0/64] 2023-12-03 05:11:46,450 (trainer:735) INFO: 6epoch:train:1401-1500batch: iter_time=9.565e-05, forward_time=0.146, loss_ctc=109.127, loss_att=89.126, acc=0.588, loss=95.126, backward_time=0.280, grad_norm=51.531, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.772e-04, train_time=1.233
+[gpua014:0/64] 2023-12-03 05:13:53,427 (trainer:735) INFO: 6epoch:train:1501-1600batch: iter_time=8.765e-05, forward_time=0.146, loss_ctc=92.288, loss_att=81.321, acc=0.576, loss=84.611, backward_time=0.280, grad_norm=47.029, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.080, optim0_lr0=1.771e-04, train_time=1.270
+[gpua014:0/64] 2023-12-03 05:15:53,130 (trainer:735) INFO: 6epoch:train:1601-1700batch: iter_time=1.053e-04, forward_time=0.157, loss_ctc=103.139, loss_att=86.025, acc=0.566, loss=91.159, backward_time=0.278, grad_norm=53.946, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.769e-04, train_time=1.197
+[gpua014:0/64] 2023-12-03 05:17:55,384 (trainer:735) INFO: 6epoch:train:1701-1800batch: iter_time=9.467e-05, forward_time=0.148, loss_ctc=102.636, loss_att=92.514, acc=0.576, loss=95.550, backward_time=0.281, grad_norm=52.663, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.768e-04, train_time=1.222
+[gpua014:0/64] 2023-12-03 05:20:00,859 (trainer:735) INFO: 6epoch:train:1801-1900batch: iter_time=8.834e-05, forward_time=0.146, loss_ctc=98.808, loss_att=78.807, acc=0.584, loss=84.807, backward_time=0.286, grad_norm=49.358, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.767e-04, train_time=1.255
+[gpua014:0/64] 2023-12-03 05:22:07,915 (trainer:735) INFO: 6epoch:train:1901-2000batch: iter_time=8.179e-05, forward_time=0.146, loss_ctc=75.937, loss_att=70.313, acc=0.577, loss=72.000, backward_time=0.296, grad_norm=41.681, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.080, optim0_lr0=1.766e-04, train_time=1.270
+[gpua014:0/64] 2023-12-03 05:24:32,042 (trainer:735) INFO: 6epoch:train:2001-2100batch: iter_time=8.844e-05, forward_time=0.147, loss_ctc=86.522, loss_att=78.420, acc=0.598, loss=80.851, backward_time=0.299, grad_norm=38.680, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.765e-04, train_time=1.441
+[gpua014:0/64] 2023-12-03 05:26:51,758 (trainer:735) INFO: 6epoch:train:2101-2200batch: iter_time=8.693e-05, forward_time=0.147, loss_ctc=113.256, loss_att=102.341, acc=0.538, loss=105.616, backward_time=0.298, grad_norm=60.046, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.764e-04, train_time=1.397
+[gpua014:0/64] 2023-12-03 05:28:55,447 (trainer:735) INFO: 6epoch:train:2201-2300batch: iter_time=9.693e-05, forward_time=0.149, loss_ctc=88.509, loss_att=76.489, acc=0.601, loss=80.095, backward_time=0.287, grad_norm=39.210, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.763e-04, train_time=1.237
+[gpua014:0/64] 2023-12-03 05:31:02,693 (trainer:735) INFO: 6epoch:train:2301-2400batch: iter_time=8.774e-05, forward_time=0.149, loss_ctc=94.129, loss_att=81.415, acc=0.589, loss=85.229, backward_time=0.280, grad_norm=42.658, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.761e-04, train_time=1.272
+[gpua014:0/64] 2023-12-03 05:33:03,998 (trainer:735) INFO: 6epoch:train:2401-2500batch: iter_time=8.608e-05, forward_time=0.147, loss_ctc=94.808, loss_att=89.386, acc=0.593, loss=91.013, backward_time=0.283, grad_norm=44.111, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.760e-04, train_time=1.213
+[gpua014:0/64] 2023-12-03 05:33:05,753 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua014:0/64] 2023-12-03 05:33:23,965 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 05:33:27,778 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1bfc537c0>)
+[gpua014:0/64] 2023-12-03 05:33:27,779 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua014:0/64] 2023-12-03 05:33:27,782 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 05:40:07,958 (trainer:735) INFO: 6epoch:train:2501-2600batch: iter_time=1.325, forward_time=0.161, loss_ctc=111.287, loss_att=95.982, acc=0.591, loss=100.573, backward_time=0.297, grad_norm=49.130, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.759e-04, train_time=4.239
+[gpua014:0/64] 2023-12-03 05:42:04,413 (trainer:735) INFO: 6epoch:train:2601-2700batch: iter_time=8.549e-05, forward_time=0.147, loss_ctc=95.997, loss_att=81.786, acc=0.597, loss=86.049, backward_time=0.277, grad_norm=45.181, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.758e-04, train_time=1.164
+[gpua014:0/64] 2023-12-03 05:44:04,016 (trainer:735) INFO: 6epoch:train:2701-2800batch: iter_time=9.357e-05, forward_time=0.148, loss_ctc=104.678, loss_att=85.035, acc=0.581, loss=90.928, backward_time=0.278, grad_norm=50.336, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.757e-04, train_time=1.196
+[gpua014:0/64] 2023-12-03 05:46:11,798 (trainer:735) INFO: 6epoch:train:2801-2900batch: iter_time=1.020e-04, forward_time=0.147, loss_ctc=97.640, loss_att=84.010, acc=0.590, loss=88.099, backward_time=0.278, grad_norm=48.658, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.756e-04, train_time=1.278
+[gpua014:0/64] 2023-12-03 05:48:33,278 (trainer:735) INFO: 6epoch:train:2901-3000batch: iter_time=8.205e-05, forward_time=0.149, loss_ctc=93.012, loss_att=78.715, acc=0.565, loss=83.004, backward_time=0.292, grad_norm=51.222, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.755e-04, train_time=1.415
+[gpua014:0/64] 2023-12-03 05:50:31,589 (trainer:735) INFO: 6epoch:train:3001-3100batch: iter_time=8.899e-05, forward_time=0.159, loss_ctc=110.878, loss_att=92.058, acc=0.586, loss=97.704, backward_time=0.284, grad_norm=55.244, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.083, optim0_lr0=1.754e-04, train_time=1.183
+[gpua014:0/64] 2023-12-03 05:52:50,811 (trainer:735) INFO: 6epoch:train:3101-3200batch: iter_time=8.861e-05, forward_time=0.157, loss_ctc=77.976, loss_att=69.441, acc=0.585, loss=72.002, backward_time=0.299, grad_norm=39.579, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.082, optim0_lr0=1.752e-04, train_time=1.392
+[gpua014:0/64] 2023-12-03 05:55:11,925 (trainer:735) INFO: 6epoch:train:3201-3300batch: iter_time=8.851e-05, forward_time=0.171, loss_ctc=79.217, loss_att=68.599, acc=0.601, loss=71.784, backward_time=0.301, grad_norm=38.176, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.083, optim0_lr0=1.751e-04, train_time=1.411
+[gpua014:0/64] 2023-12-03 05:57:17,693 (trainer:735) INFO: 6epoch:train:3301-3400batch: iter_time=9.215e-05, forward_time=0.152, loss_ctc=116.400, loss_att=111.125, acc=0.552, loss=112.707, backward_time=0.293, grad_norm=59.848, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.080, optim0_lr0=1.750e-04, train_time=1.257
+[gpua014:0/64] 2023-12-03 05:59:21,510 (trainer:735) INFO: 6epoch:train:3401-3500batch: iter_time=9.673e-05, forward_time=0.148, loss_ctc=83.889, loss_att=71.930, acc=0.589, loss=75.518, backward_time=0.280, grad_norm=38.883, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.081, optim0_lr0=1.749e-04, train_time=1.238
+[gpua014:0/64] 2023-12-03 06:01:37,415 (trainer:735) INFO: 6epoch:train:3501-3600batch: iter_time=8.394e-05, forward_time=0.150, loss_ctc=85.132, loss_att=71.626, acc=0.618, loss=75.678, backward_time=0.291, grad_norm=36.649, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.081, optim0_lr0=1.748e-04, train_time=1.359
+[gpua014:0/64] 2023-12-03 06:04:11,886 (trainer:735) INFO: 6epoch:train:3601-3700batch: iter_time=9.007e-05, forward_time=0.148, loss_ctc=96.330, loss_att=87.371, acc=0.586, loss=90.059, backward_time=0.279, grad_norm=43.275, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.081, optim0_lr0=1.747e-04, train_time=1.544
+[gpua014:0/64] 2023-12-03 06:05:24,956 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua014:0/64] 2023-12-03 06:05:43,702 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 06:05:47,242 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa17a31a3e0>)
+[gpua014:0/64] 2023-12-03 06:05:47,242 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua014:0/64] 2023-12-03 06:05:47,245 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 06:10:43,980 (trainer:735) INFO: 6epoch:train:3701-3800batch: iter_time=2.641, forward_time=0.164, loss_ctc=106.313, loss_att=95.175, acc=0.590, loss=98.517, backward_time=0.280, grad_norm=47.895, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.081, optim0_lr0=1.746e-04, train_time=3.921
+[gpua014:0/64] 2023-12-03 06:12:41,710 (trainer:735) INFO: 6epoch:train:3801-3900batch: iter_time=8.200e-05, forward_time=0.148, loss_ctc=103.879, loss_att=92.226, acc=0.574, loss=95.722, backward_time=0.280, grad_norm=52.985, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.081, optim0_lr0=1.745e-04, train_time=1.177
+[gpua014:0/64] 2023-12-03 06:14:56,556 (trainer:735) INFO: 6epoch:train:3901-4000batch: iter_time=7.878e-05, forward_time=0.148, loss_ctc=106.338, loss_att=85.258, acc=0.590, loss=91.582, backward_time=0.334, grad_norm=49.635, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.082, optim0_lr0=1.744e-04, train_time=1.348
+[gpua014:0/64] 2023-12-03 06:17:11,222 (trainer:735) INFO: 6epoch:train:4001-4100batch: iter_time=8.222e-05, forward_time=0.148, loss_ctc=88.674, loss_att=77.651, acc=0.582, loss=80.958, backward_time=0.331, grad_norm=46.122, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.082, optim0_lr0=1.742e-04, train_time=1.346
+[gpua014:0/64] 2023-12-03 06:19:20,450 (trainer:735) INFO: 6epoch:train:4101-4200batch: iter_time=9.147e-05, forward_time=0.148, loss_ctc=100.219, loss_att=84.247, acc=0.574, loss=89.039, backward_time=0.307, grad_norm=50.220, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.082, optim0_lr0=1.741e-04, train_time=1.292
+[gpua014:0/64] 2023-12-03 06:21:28,911 (trainer:735) INFO: 6epoch:train:4201-4300batch: iter_time=9.528e-05, forward_time=0.149, loss_ctc=99.121, loss_att=90.607, acc=0.571, loss=93.161, backward_time=0.291, grad_norm=52.381, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.082, optim0_lr0=1.740e-04, train_time=1.284
+[gpua014:0/64] 2023-12-03 06:23:37,132 (trainer:735) INFO: 6epoch:train:4301-4400batch: iter_time=9.177e-05, forward_time=0.157, loss_ctc=96.643, loss_att=77.485, acc=0.585, loss=83.233, backward_time=0.280, grad_norm=48.643, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.081, optim0_lr0=1.739e-04, train_time=1.282
+[gpua014:0/64] 2023-12-03 06:25:53,210 (trainer:735) INFO: 6epoch:train:4401-4500batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=74.208, loss_att=66.161, acc=0.585, loss=68.575, backward_time=0.277, grad_norm=41.056, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.080, optim0_lr0=1.738e-04, train_time=1.361
+[gpua014:0/64] 2023-12-03 06:28:22,710 (trainer:735) INFO: 6epoch:train:4501-4600batch: iter_time=9.810e-05, forward_time=0.147, loss_ctc=84.738, loss_att=76.887, acc=0.601, loss=79.242, backward_time=0.316, grad_norm=40.046, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.080, optim0_lr0=1.737e-04, train_time=1.495
+[gpua014:0/64] 2023-12-03 06:30:39,453 (trainer:735) INFO: 6epoch:train:4601-4700batch: iter_time=8.809e-05, forward_time=0.147, loss_ctc=109.603, loss_att=98.923, acc=0.540, loss=102.127, backward_time=0.281, grad_norm=59.150, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.080, optim0_lr0=1.736e-04, train_time=1.367
+[gpua014:0/64] 2023-12-03 06:32:46,468 (trainer:735) INFO: 6epoch:train:4701-4800batch: iter_time=8.587e-05, forward_time=0.146, loss_ctc=86.504, loss_att=73.440, acc=0.606, loss=77.359, backward_time=0.285, grad_norm=38.910, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.080, optim0_lr0=1.735e-04, train_time=1.270
+[gpua014:0/64] 2023-12-03 06:34:58,900 (trainer:735) INFO: 6epoch:train:4801-4900batch: iter_time=8.829e-05, forward_time=0.153, loss_ctc=91.959, loss_att=79.304, acc=0.587, loss=83.100, backward_time=0.300, grad_norm=42.484, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.080, optim0_lr0=1.734e-04, train_time=1.324
+[gpua014:0/64] 2023-12-03 06:37:18,135 (trainer:735) INFO: 6epoch:train:4901-5000batch: iter_time=8.585e-05, forward_time=0.159, loss_ctc=93.969, loss_att=87.698, acc=0.591, loss=89.579, backward_time=0.299, grad_norm=46.199, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.082, optim0_lr0=1.733e-04, train_time=1.392
+[gpua014:0/64] 2023-12-03 06:37:19,890 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua014:0/64] 2023-12-03 06:37:38,477 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 06:37:41,979 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa17e220100>)
+[gpua014:0/64] 2023-12-03 06:37:41,979 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua014:0/64] 2023-12-03 06:37:41,982 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 06:44:25,941 (trainer:735) INFO: 6epoch:train:5001-5100batch: iter_time=1.327, forward_time=0.172, loss_ctc=109.331, loss_att=95.504, acc=0.598, loss=99.652, backward_time=0.284, grad_norm=54.432, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.081, optim0_lr0=1.731e-04, train_time=4.278
+[gpua014:0/64] 2023-12-03 06:46:23,752 (trainer:735) INFO: 6epoch:train:5101-5200batch: iter_time=8.970e-05, forward_time=0.148, loss_ctc=94.939, loss_att=81.181, acc=0.603, loss=85.308, backward_time=0.278, grad_norm=44.514, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.730e-04, train_time=1.178
+[gpua014:0/64] 2023-12-03 06:48:38,069 (trainer:735) INFO: 6epoch:train:5201-5300batch: iter_time=8.394e-05, forward_time=0.148, loss_ctc=100.552, loss_att=83.300, acc=0.590, loss=88.476, backward_time=0.292, grad_norm=49.994, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.729e-04, train_time=1.343
+[gpua014:0/64] 2023-12-03 06:50:52,726 (trainer:735) INFO: 6epoch:train:5301-5400batch: iter_time=8.745e-05, forward_time=0.147, loss_ctc=96.763, loss_att=83.207, acc=0.596, loss=87.274, backward_time=0.301, grad_norm=53.773, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.728e-04, train_time=1.346
+[gpua014:0/64] 2023-12-03 06:52:56,429 (trainer:735) INFO: 6epoch:train:5401-5500batch: iter_time=8.784e-05, forward_time=0.147, loss_ctc=89.702, loss_att=76.889, acc=0.575, loss=80.733, backward_time=0.286, grad_norm=50.829, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.727e-04, train_time=1.237
+[gpua014:0/64] 2023-12-03 06:55:16,465 (trainer:735) INFO: 6epoch:train:5501-5600batch: iter_time=9.313e-05, forward_time=0.148, loss_ctc=108.033, loss_att=90.450, acc=0.593, loss=95.724, backward_time=0.331, grad_norm=54.985, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.726e-04, train_time=1.400
+[gpua014:0/64] 2023-12-03 06:57:15,434 (trainer:735) INFO: 6epoch:train:5601-5700batch: iter_time=8.888e-05, forward_time=0.147, loss_ctc=76.524, loss_att=69.037, acc=0.590, loss=71.283, backward_time=0.278, grad_norm=39.031, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.725e-04, train_time=1.189
+[gpua014:0/64] 2023-12-03 06:59:37,056 (trainer:735) INFO: 6epoch:train:5701-5800batch: iter_time=7.820e-05, forward_time=0.147, loss_ctc=78.568, loss_att=67.758, acc=0.608, loss=71.001, backward_time=0.296, grad_norm=39.127, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.724e-04, train_time=1.416
+[gpua014:0/64] 2023-12-03 07:01:59,084 (trainer:735) INFO: 6epoch:train:5801-5900batch: iter_time=8.870e-05, forward_time=0.148, loss_ctc=116.272, loss_att=108.902, acc=0.563, loss=111.113, backward_time=0.306, grad_norm=56.561, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.723e-04, train_time=1.420
+[gpua014:0/64] 2023-12-03 07:04:07,316 (trainer:735) INFO: 6epoch:train:5901-6000batch: iter_time=9.515e-05, forward_time=0.148, loss_ctc=82.036, loss_att=70.187, acc=0.597, loss=73.742, backward_time=0.302, grad_norm=39.246, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.081, optim0_lr0=1.722e-04, train_time=1.282
+[gpua014:0/64] 2023-12-03 07:06:14,145 (trainer:735) INFO: 6epoch:train:6001-6100batch: iter_time=8.966e-05, forward_time=0.165, loss_ctc=83.828, loss_att=70.107, acc=0.623, loss=74.223, backward_time=0.293, grad_norm=38.532, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.084, optim0_lr0=1.721e-04, train_time=1.268
+[gpua014:0/64] 2023-12-03 07:08:37,612 (trainer:735) INFO: 6epoch:train:6101-6200batch: iter_time=9.435e-05, forward_time=0.148, loss_ctc=93.974, loss_att=85.556, acc=0.596, loss=88.082, backward_time=0.301, grad_norm=43.224, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.081, optim0_lr0=1.720e-04, train_time=1.434
+[gpua014:0/64] 2023-12-03 07:09:58,054 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua014:0/64] 2023-12-03 07:10:16,148 (s2t:445) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-12-03 07:10:19,736 (abs_task:1616) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7faad94e3f40>)
+[gpua014:0/64] 2023-12-03 07:10:19,736 (abs_task:1617) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua014:0/64] 2023-12-03 07:10:19,739 (abs_task:1618) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257
+[gpua014:0/64] 2023-12-03 07:15:53,332 (trainer:735) INFO: 6epoch:train:6201-6300batch: iter_time=2.114, forward_time=0.172, loss_ctc=104.536, loss_att=93.071, acc=0.600, loss=96.511, backward_time=0.285, grad_norm=50.403, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.082, optim0_lr0=1.719e-04, train_time=4.357
+[gpua014:0/64] 2023-12-03 07:17:49,674 (trainer:735) INFO: 6epoch:train:6301-6400batch: iter_time=9.459e-05, forward_time=0.147, loss_ctc=103.191, loss_att=88.870, acc=0.594, loss=93.167, backward_time=0.278, grad_norm=49.930, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.718e-04, train_time=1.163
+[gpua014:0/64] 2023-12-03 07:20:05,952 (trainer:735) INFO: 6epoch:train:6401-6500batch: iter_time=8.970e-05, forward_time=0.147, loss_ctc=104.693, loss_att=83.583, acc=0.607, loss=89.916, backward_time=0.288, grad_norm=50.439, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.717e-04, train_time=1.363
+[gpua014:0/64] 2023-12-03 07:22:09,746 (trainer:735) INFO: 6epoch:train:6501-6600batch: iter_time=9.357e-05, forward_time=0.147, loss_ctc=87.000, loss_att=75.555, acc=0.594, loss=78.989, backward_time=0.280, grad_norm=46.451, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.081, optim0_lr0=1.715e-04, train_time=1.238
+[gpua014:0/64] 2023-12-03 07:24:18,665 (trainer:735) INFO: 6epoch:train:6601-6700batch: iter_time=9.390e-05, forward_time=0.147, loss_ctc=97.938, loss_att=81.148, acc=0.589, loss=86.185, backward_time=0.305, grad_norm=50.959, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.714e-04, train_time=1.289
+[gpua014:0/64] 2023-12-03 07:26:33,140 (trainer:735) INFO: 6epoch:train:6701-6800batch: iter_time=8.798e-05, forward_time=0.147, loss_ctc=96.721, loss_att=86.772, acc=0.596, loss=89.757, backward_time=0.324, grad_norm=47.676, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.080, optim0_lr0=1.713e-04, train_time=1.345
+[gpua014:0/64] 2023-12-03 07:28:43,800 (trainer:735) INFO: 6epoch:train:6801-6900batch: iter_time=8.177e-05, forward_time=0.160, loss_ctc=94.243, loss_att=73.716, acc=0.606, loss=79.874, backward_time=0.318, grad_norm=47.492, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.081, optim0_lr0=1.712e-04, train_time=1.306
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port error
+
+gpua014:120543:120653 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120544:120652 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120545:120655 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+
+gpua014:120546:120654 [0] transport/net_ib.cc:93 NCCL WARN NET/IB : Got async event : port active
+srun: Job step aborted: Waiting up to 32 seconds for job step to finish.