diff --git "a/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.30.log" "b/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.30.log" new file mode 100644--- /dev/null +++ "b/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.30.log" @@ -0,0 +1,3731 @@ +# Running on gpua002.delta.ncsa.illinois.edu +# Started at Tue Jan 16 16:51:59 CST 2024 +# SLURMD_NODENAME=gpua002 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2863115 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_END_TIME=1705618303 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2863115 +# SLURM_JOB_NAME=exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpua[002,013,016,024,034-035,039,044,051,053,058-060,063,095,099]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA100x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1705445503 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpua[002,013,016,024,034-035,039,044,051,053,058-060,063,095,099]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1 +# SLURM_SUBMIT_HOST=dt-login01.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=4152509 +# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua002 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_411bba74-6e18-4e8f-8d97-412e795e9ba2 +[gpua002:0/64] 2024-01-16 16:56:14,006 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpua002:0/64] 2024-01-16 16:56:15,952 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpua002:0/64] 2024-01-16 16:56:15,991 (s2t:420) INFO: Vocabulary size: 50002 +[gpua002:0/64] 2024-01-16 16:56:29,463 (abs_task:1270) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpua002:0/64] 2024-01-16 16:56:29,475 (abs_task:1271) INFO: Model structure: +ESPnetS2TCTCModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerCTCEncoder( + (embed): Conv2dSubsampling8( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + (4): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (5): ReLU() + ) + (out): Linear(in_features=9216, out_features=1024, bias=True) + (pos_enc): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (9): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (10): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (11): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (12): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (13): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (14): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (15): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (16): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (17): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (18): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (19): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (20): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (21): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (22): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (23): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (24): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (25): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (26): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (conditioning_layer): Linear(in_features=50002, out_features=1024, bias=True) + ) + (prompt_encoder): TransformerEncoder( + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + ) + (embed): Embedding(50002, 512) + (pos_enc): PositionalEncoding( + (dropout): Dropout(p=0.0, inplace=False) + ) + (embed_proj): Linear(in_features=512, out_features=1024, bias=True) + (prompt_proj): Linear(in_features=512, out_features=1024, bias=True) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TCTCModel + Total Number of model parameters: 1.01 B + Number of trainable parameters: 1.01 B (100.0%) + Size: 4.02 GB + Type: torch.float32 +[gpua002:0/64] 2024-01-16 16:56:29,475 (abs_task:1274) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0002 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpua002:0/64] 2024-01-16 16:56:29,475 (abs_task:1275) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0002]) +[gpua002:0/64] 2024-01-16 16:56:29,489 (abs_task:1284) INFO: Saving the configuration in exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/config.yaml +[gpua002:0/64] 2024-01-16 16:56:35,343 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-16 16:56:36,319 (abs_task:1660) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-16 16:56:36,319 (abs_task:1661) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpua002:0/64] 2024-01-16 16:56:36,321 (abs_task:1662) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-16 16:57:10,263 (trainer:167) INFO: The training was resumed using exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/checkpoint.pth +gpua002:4152608:4152608 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.2<0> +gpua002:4152608:4152608 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua002:4152608:4152608 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpua002:0/64] 2024-01-16 16:57:15,990 (trainer:298) INFO: 6/45epoch started +[gpua002:0/64] 2024-01-16 16:57:16,030 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua002:0/64] 2024-01-16 16:57:34,644 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-16 16:57:38,029 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-16 16:57:38,029 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua002:0/64] 2024-01-16 16:57:38,032 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpua016:219652:219652 [1] NCCL INFO cudaDriverVersion 12020 +gpua016:219652:219652 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> +gpua016:219652:219652 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua016:219652:219722 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0> +gpua016:219652:219722 [1] NCCL INFO Using network IB +gpua016:219652:219722 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua016:219652:219722 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpua016:219652:219722 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read +gpua016:219652:219722 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read +gpua016:219652:219722 [1] NCCL INFO Connected all rings +gpua016:219652:219722 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 +gpua016:219652:219722 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 +gpua016:219652:219722 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read +gpua016:219652:219722 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read +gpua016:219652:219722 [1] NCCL INFO Connected all trees +gpua016:219652:219722 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua016:219652:219722 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua016:219652:219722 [1] NCCL INFO comm 0x55c37ea1f220 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua016:219651:219651 [0] NCCL INFO cudaDriverVersion 12020 +gpua016:219651:219651 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> +gpua016:219651:219651 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua016:219651:219723 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0> +gpua016:219651:219723 [0] NCCL INFO Using network IB +gpua016:219651:219723 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua016:219651:219723 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpua016:219651:219723 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpua016:219651:219723 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 +gpua016:219651:219723 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read +gpua016:219651:219723 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read +gpua016:219651:219723 [0] NCCL INFO Connected all rings +gpua013:301085:301085 [2] NCCL INFO cudaDriverVersion 12020 +gpua013:301085:301085 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.13<0> +gpua013:301085:301085 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua013:301085:301154 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.13<0> +gpua013:301085:301154 [2] NCCL INFO Using network IB +gpua013:301085:301154 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua013:301085:301154 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpua013:301085:301154 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read +gpua013:301085:301154 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read +gpua013:301085:301154 [2] NCCL INFO Connected all rings +gpua013:301085:301154 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read +gpua013:301085:301154 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read +gpua013:301085:301154 [2] NCCL INFO Connected all trees +gpua051:296045:296045 [2] NCCL INFO cudaDriverVersion 12020 +gpua051:296045:296045 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.51<0> +gpua051:296045:296045 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua051:296045:296110 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.51<0> +gpua051:296045:296110 [2] NCCL INFO Using network IB +gpua051:296045:296110 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua051:296045:296110 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpua051:296045:296110 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read +gpua051:296045:296110 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read +gpua051:296045:296110 [2] NCCL INFO Connected all rings +gpua051:296045:296110 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read +gpua051:296045:296110 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read +gpua035:169209:169209 [1] NCCL INFO cudaDriverVersion 12020 +gpua035:169209:169209 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:169209:169209 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:169209:169293 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:169209:169293 [1] NCCL INFO Using network IB +gpua035:169209:169293 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua035:169209:169293 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpua035:169209:169293 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read +gpua035:169209:169293 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read +gpua035:169209:169293 [1] NCCL INFO Connected all rings +gpua035:169209:169293 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 +gpua035:169209:169293 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 +gpua039:229347:229347 [1] NCCL INFO cudaDriverVersion 12020 +gpua039:229347:229347 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0> +gpua039:229347:229347 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua039:229347:229417 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.39<0> +gpua039:229347:229417 [1] NCCL INFO Using network IB +gpua039:229347:229417 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua039:229347:229417 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpua039:229347:229417 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read +gpua039:229347:229417 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read +gpua039:229347:229417 [1] NCCL INFO Connected all rings +gpua039:229347:229417 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 +gpua039:229347:229417 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 +gpua053:87449:87449 [0] NCCL INFO cudaDriverVersion 12020 +gpua053:87449:87449 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:87449:87449 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:87449:87517 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:87449:87517 [0] NCCL INFO Using network IB +gpua053:87449:87517 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua053:87449:87517 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpua053:87449:87517 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpua053:87449:87517 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 +gpua053:87449:87517 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read +gpua053:87449:87517 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read +gpua053:87449:87517 [0] NCCL INFO Connected all rings +gpua058:7362:7362 [1] NCCL INFO cudaDriverVersion 12020 +gpua058:7362:7362 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.58<0> +gpua058:7362:7362 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua058:7362:7435 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.58<0> +gpua058:7362:7435 [1] NCCL INFO Using network IB +gpua058:7362:7435 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua058:7362:7435 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpua058:7362:7435 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read +gpua058:7362:7435 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read +gpua058:7362:7435 [1] NCCL INFO Connected all rings +gpua058:7362:7435 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 +gpua058:7362:7435 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 +gpua059:311037:311037 [1] NCCL INFO cudaDriverVersion 12020 +gpua059:311037:311037 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.59<0> +gpua059:311037:311037 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua059:311037:311107 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.59<0> +gpua059:311037:311107 [1] NCCL INFO Using network IB +gpua059:311037:311107 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua059:311037:311107 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpua059:311037:311107 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read +gpua059:311037:311107 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read +gpua059:311037:311107 [1] NCCL INFO Connected all rings +gpua059:311037:311107 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 +gpua059:311037:311107 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 +gpua095:565525:565525 [0] NCCL INFO cudaDriverVersion 12020 +gpua095:565525:565525 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.95<0> +gpua095:565525:565525 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua095:565525:565594 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.95<0> +gpua095:565525:565594 [0] NCCL INFO Using network IB +gpua095:565525:565594 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua095:565525:565594 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpua095:565525:565594 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpua095:565525:565594 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 +gpua095:565525:565594 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read +gpua095:565525:565594 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read +gpua095:565525:565594 [0] NCCL INFO Connected all rings +gpua063:527682:527682 [1] NCCL INFO cudaDriverVersion 12020 +gpua063:527682:527682 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> +gpua063:527682:527682 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua063:527682:527753 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> +gpua063:527682:527753 [1] NCCL INFO Using network IB +gpua063:527682:527753 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua063:527682:527753 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpua063:527682:527753 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read +gpua063:527682:527753 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read +gpua063:527682:527753 [1] NCCL INFO Connected all rings +gpua063:527682:527753 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 +gpua063:527682:527753 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 +gpua016:219651:219723 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 +gpua016:219651:219723 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 +gpua016:219651:219723 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 +gpua016:219651:219723 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 +gpua016:219651:219723 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 +gpua016:219651:219723 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 +gpua016:219651:219723 [0] NCCL INFO Connected all trees +gpua016:219651:219723 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua016:219651:219723 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua016:219651:219723 [0] NCCL INFO comm 0x5633e5562a10 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua016:219654:219654 [3] NCCL INFO cudaDriverVersion 12020 +gpua013:301085:301154 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua013:301085:301154 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua013:301085:301154 [2] NCCL INFO comm 0x55feb7dd22e0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua034:95153:95153 [2] NCCL INFO cudaDriverVersion 12020 +gpua034:95153:95153 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.34<0> +gpua034:95153:95153 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua034:95153:95217 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.34<0> +gpua034:95153:95217 [2] NCCL INFO Using network IB +gpua034:95153:95217 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua034:95153:95217 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpua034:95153:95217 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read +gpua034:95153:95217 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read +gpua034:95153:95217 [2] NCCL INFO Connected all rings +gpua034:95153:95217 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read +gpua034:95153:95217 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read +gpua034:95153:95217 [2] NCCL INFO Connected all trees +gpua051:296045:296110 [2] NCCL INFO Connected all trees +gpua051:296045:296110 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua051:296045:296110 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua051:296045:296110 [2] NCCL INFO comm 0x564c6a5f2780 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua035:169209:169293 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read +gpua035:169209:169293 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read +gpua035:169209:169293 [1] NCCL INFO Connected all trees +gpua035:169209:169293 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:169209:169293 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:169209:169293 [1] NCCL INFO comm 0x561a37875bd0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua039:229347:229417 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read +gpua039:229347:229417 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read +gpua039:229347:229417 [1] NCCL INFO Connected all trees +gpua039:229347:229417 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua039:229347:229417 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua039:229347:229417 [1] NCCL INFO comm 0x5560e3e102e0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua039:229349:229349 [3] NCCL INFO cudaDriverVersion 12020 +gpua039:229349:229349 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0> +gpua039:229349:229349 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua039:229349:229419 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.39<0> +gpua039:229349:229419 [3] NCCL INFO Using network IB +gpua039:229349:229419 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua053:87449:87517 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 +gpua053:87449:87517 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 +gpua053:87449:87517 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 +gpua053:87449:87517 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 +gpua053:87449:87517 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 +gpua053:87449:87517 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 +gpua053:87449:87517 [0] NCCL INFO Connected all trees +gpua053:87449:87517 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:87449:87517 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:87449:87517 [0] NCCL INFO comm 0x562749f31aa0 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua053:87452:87452 [3] NCCL INFO cudaDriverVersion 12020 +gpua053:87452:87452 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua058:7362:7435 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read +gpua058:7362:7435 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read +gpua058:7362:7435 [1] NCCL INFO Connected all trees +gpua058:7362:7435 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua058:7362:7435 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua058:7362:7435 [1] NCCL INFO comm 0x56471d551b50 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua059:311037:311107 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read +gpua059:311037:311107 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read +gpua059:311037:311107 [1] NCCL INFO Connected all trees +gpua059:311037:311107 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua059:311037:311107 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua059:311037:311107 [1] NCCL INFO comm 0x55847e503d60 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua095:565525:565594 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 +gpua095:565525:565594 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 +gpua095:565525:565594 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 +gpua095:565525:565594 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 +gpua095:565525:565594 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 +gpua095:565525:565594 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 +gpua095:565525:565594 [0] NCCL INFO Connected all trees +gpua095:565525:565594 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua095:565525:565594 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua095:565525:565594 [0] NCCL INFO comm 0x5621861bb140 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua063:527682:527753 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read +gpua063:527682:527753 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read +gpua063:527682:527753 [1] NCCL INFO Connected all trees +gpua063:527682:527753 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua063:527682:527753 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua063:527682:527753 [1] NCCL INFO comm 0x562459029510 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua016:219654:219654 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> +gpua016:219654:219654 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua016:219654:219724 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0> +gpua016:219654:219724 [3] NCCL INFO Using network IB +gpua016:219654:219724 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua016:219654:219724 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpua016:219654:219724 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpua016:219654:219724 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 +gpua016:219654:219724 [3] NCCL INFO Connected all rings +gpua016:219654:219724 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read +gpua016:219654:219724 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read +gpua016:219654:219724 [3] NCCL INFO Connected all trees +gpua013:301083:301083 [0] NCCL INFO cudaDriverVersion 12020 +gpua013:301083:301083 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.13<0> +gpua013:301083:301083 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua013:301083:301156 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.13<0> +gpua013:301083:301156 [0] NCCL INFO Using network IB +gpua013:301083:301156 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua013:301083:301156 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpua013:301083:301156 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpua013:301083:301156 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 +gpua013:301083:301156 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read +gpua013:301083:301156 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read +gpua013:301083:301156 [0] NCCL INFO Connected all rings +gpua034:95153:95217 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua034:95153:95217 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua034:95153:95217 [2] NCCL INFO comm 0x556fc4b3f0f0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua035:169210:169210 [2] NCCL INFO cudaDriverVersion 12020 +gpua035:169210:169210 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:169210:169210 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:169210:169292 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:169210:169292 [2] NCCL INFO Using network IB +gpua035:169210:169292 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua035:169210:169292 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpua035:169210:169292 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read +gpua035:169210:169292 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read +gpua035:169210:169292 [2] NCCL INFO Connected all rings +gpua035:169210:169292 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read +gpua035:169210:169292 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read +gpua039:229349:229419 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpua039:229349:229419 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpua039:229349:229419 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 +gpua039:229349:229419 [3] NCCL INFO Connected all rings +gpua039:229349:229419 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read +gpua039:229349:229419 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read +gpua039:229349:229419 [3] NCCL INFO Connected all trees +gpua039:229349:229419 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua039:229349:229419 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua039:229349:229419 [3] NCCL INFO comm 0x55d692cf4b50 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua053:87452:87452 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:87452:87519 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:87452:87519 [3] NCCL INFO Using network IB +gpua053:87452:87519 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua053:87452:87519 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpua053:87452:87519 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpua053:87452:87519 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 +gpua053:87452:87519 [3] NCCL INFO Connected all rings +gpua053:87452:87519 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read +gpua053:87452:87519 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read +gpua053:87452:87519 [3] NCCL INFO Connected all trees +gpua053:87452:87519 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua058:7361:7361 [0] NCCL INFO cudaDriverVersion 12020 +gpua058:7361:7361 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.58<0> +gpua058:7361:7361 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua058:7361:7436 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.58<0> +gpua058:7361:7436 [0] NCCL INFO Using network IB +gpua058:7361:7436 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua058:7361:7436 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpua058:7361:7436 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpua058:7361:7436 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 +gpua058:7361:7436 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read +gpua058:7361:7436 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read +gpua058:7361:7436 [0] NCCL INFO Connected all rings +gpua059:311036:311036 [0] NCCL INFO cudaDriverVersion 12020 +gpua059:311036:311036 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.59<0> +gpua059:311036:311036 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua059:311036:311110 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.59<0> +gpua059:311036:311110 [0] NCCL INFO Using network IB +gpua059:311036:311110 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua059:311036:311110 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpua059:311036:311110 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpua059:311036:311110 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 +gpua059:311036:311110 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read +gpua059:311036:311110 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read +gpua059:311036:311110 [0] NCCL INFO Connected all rings +gpua095:565526:565526 [1] NCCL INFO cudaDriverVersion 12020 +gpua095:565526:565526 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.95<0> +gpua095:565526:565526 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua095:565526:565600 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.95<0> +gpua095:565526:565600 [1] NCCL INFO Using network IB +gpua095:565526:565600 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua095:565526:565600 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpua095:565526:565600 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read +gpua095:565526:565600 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read +gpua095:565526:565600 [1] NCCL INFO Connected all rings +gpua095:565526:565600 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 +gpua095:565526:565600 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 +gpua063:527683:527683 [2] NCCL INFO cudaDriverVersion 12020 +gpua063:527683:527683 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> +gpua063:527683:527683 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua063:527683:527754 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> +gpua063:527683:527754 [2] NCCL INFO Using network IB +gpua063:527683:527754 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua063:527683:527754 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpua063:527683:527754 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read +gpua063:527683:527754 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read +gpua063:527683:527754 [2] NCCL INFO Connected all rings +gpua063:527683:527754 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read +gpua063:527683:527754 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read +gpua016:219654:219724 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua016:219654:219724 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua016:219654:219724 [3] NCCL INFO comm 0x55d40fa5b190 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua013:301083:301156 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 +gpua013:301083:301156 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 +gpua013:301083:301156 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 +gpua013:301083:301156 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 +gpua013:301083:301156 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 +gpua013:301083:301156 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 +gpua013:301083:301156 [0] NCCL INFO Connected all trees +gpua013:301083:301156 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua013:301083:301156 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua013:301083:301156 [0] NCCL INFO comm 0x55ab912a7a50 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua035:169210:169292 [2] NCCL INFO Connected all trees +gpua035:169210:169292 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:169210:169292 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:169210:169292 [2] NCCL INFO comm 0x55cef968ece0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua039:229348:229348 [2] NCCL INFO cudaDriverVersion 12020 +gpua039:229348:229348 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0> +gpua039:229348:229348 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua039:229348:229420 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.39<0> +gpua039:229348:229420 [2] NCCL INFO Using network IB +gpua039:229348:229420 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua039:229348:229420 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpua039:229348:229420 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read +gpua039:229348:229420 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read +gpua039:229348:229420 [2] NCCL INFO Connected all rings +gpua039:229348:229420 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read +gpua039:229348:229420 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read +gpua053:87452:87519 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:87452:87519 [3] NCCL INFO comm 0x558f18c9ced0 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua058:7361:7436 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 +gpua058:7361:7436 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 +gpua058:7361:7436 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 +gpua058:7361:7436 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 +gpua058:7361:7436 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 +gpua058:7361:7436 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 +gpua058:7361:7436 [0] NCCL INFO Connected all trees +gpua058:7361:7436 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua058:7361:7436 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua058:7361:7436 [0] NCCL INFO comm 0x55995e82ada0 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua059:311036:311110 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 +gpua059:311036:311110 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 +gpua059:311036:311110 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 +gpua059:311036:311110 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 +gpua059:311036:311110 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 +gpua059:311036:311110 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 +gpua059:311036:311110 [0] NCCL INFO Connected all trees +gpua059:311036:311110 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua059:311036:311110 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua059:311036:311110 [0] NCCL INFO comm 0x559c44d6d290 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua095:565526:565600 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read +gpua095:565526:565600 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read +gpua095:565526:565600 [1] NCCL INFO Connected all trees +gpua095:565526:565600 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua095:565526:565600 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua095:565526:565600 [1] NCCL INFO comm 0x55eb0829bae0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua063:527683:527754 [2] NCCL INFO Connected all trees +gpua063:527683:527754 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua063:527683:527754 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua063:527683:527754 [2] NCCL INFO comm 0x562eea7ad720 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua016:219653:219653 [2] NCCL INFO cudaDriverVersion 12020 +gpua016:219653:219653 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0> +gpua016:219653:219653 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua016:219653:219726 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0> +gpua016:219653:219726 [2] NCCL INFO Using network IB +gpua016:219653:219726 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua016:219653:219726 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpua016:219653:219726 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read +gpua016:219653:219726 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read +gpua016:219653:219726 [2] NCCL INFO Connected all rings +gpua016:219653:219726 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read +gpua016:219653:219726 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read +gpua013:301086:301086 [3] NCCL INFO cudaDriverVersion 12020 +gpua013:301086:301086 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.13<0> +gpua013:301086:301086 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua013:301086:301155 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.13<0> +gpua013:301086:301155 [3] NCCL INFO Using network IB +gpua013:301086:301155 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua013:301086:301155 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpua013:301086:301155 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpua013:301086:301155 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 +gpua013:301086:301155 [3] NCCL INFO Connected all rings +gpua013:301086:301155 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read +gpua013:301086:301155 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read +gpua013:301086:301155 [3] NCCL INFO Connected all trees +gpua034:95152:95152 [1] NCCL INFO cudaDriverVersion 12020 +gpua034:95152:95152 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.34<0> +gpua034:95152:95152 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua034:95152:95218 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.34<0> +gpua034:95152:95218 [1] NCCL INFO Using network IB +gpua034:95152:95218 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua034:95152:95218 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpua034:95152:95218 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read +gpua034:95152:95218 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read +gpua034:95152:95218 [1] NCCL INFO Connected all rings +gpua034:95152:95218 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 +gpua034:95152:95218 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 +gpua051:296044:296044 [1] NCCL INFO cudaDriverVersion 12020 +gpua051:296044:296044 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.51<0> +gpua051:296044:296044 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua051:296044:296107 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.51<0> +gpua051:296044:296107 [1] NCCL INFO Using network IB +gpua051:296044:296107 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua051:296044:296107 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpua051:296044:296107 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read +gpua051:296044:296107 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read +gpua051:296044:296107 [1] NCCL INFO Connected all rings +gpua051:296044:296107 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 +gpua051:296044:296107 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 +gpua035:169211:169211 [3] NCCL INFO cudaDriverVersion 12020 +gpua035:169211:169211 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:169211:169211 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:169211:169290 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:169211:169290 [3] NCCL INFO Using network IB +gpua035:169211:169290 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua035:169211:169290 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpua035:169211:169290 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpua035:169211:169290 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 +gpua035:169211:169290 [3] NCCL INFO Connected all rings +gpua035:169211:169290 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read +gpua035:169211:169290 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read +gpua039:229348:229420 [2] NCCL INFO Connected all trees +gpua039:229348:229420 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua039:229348:229420 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua039:229348:229420 [2] NCCL INFO comm 0x56327e2eb000 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua058:7364:7364 [3] NCCL INFO cudaDriverVersion 12020 +gpua058:7364:7364 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.58<0> +gpua058:7364:7364 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua058:7364:7433 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.58<0> +gpua058:7364:7433 [3] NCCL INFO Using network IB +gpua058:7364:7433 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua058:7364:7433 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpua058:7364:7433 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpua058:7364:7433 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 +gpua058:7364:7433 [3] NCCL INFO Connected all rings +gpua058:7364:7433 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read +gpua058:7364:7433 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read +gpua058:7364:7433 [3] NCCL INFO Connected all trees +gpua059:311038:311038 [2] NCCL INFO cudaDriverVersion 12020 +gpua059:311038:311038 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.59<0> +gpua059:311038:311038 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua059:311038:311108 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.59<0> +gpua059:311038:311108 [2] NCCL INFO Using network IB +gpua059:311038:311108 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua059:311038:311108 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpua059:311038:311108 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read +gpua059:311038:311108 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read +gpua059:311038:311108 [2] NCCL INFO Connected all rings +gpua059:311038:311108 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read +gpua059:311038:311108 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read +gpua095:565528:565528 [3] NCCL INFO cudaDriverVersion 12020 +gpua095:565528:565528 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.95<0> +gpua095:565528:565528 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua095:565528:565595 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.95<0> +gpua095:565528:565595 [3] NCCL INFO Using network IB +gpua095:565528:565595 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua095:565528:565595 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpua095:565528:565595 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpua095:565528:565595 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 +gpua095:565528:565595 [3] NCCL INFO Connected all rings +gpua095:565528:565595 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read +gpua095:565528:565595 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read +gpua063:527681:527681 [0] NCCL INFO cudaDriverVersion 12020 +gpua063:527681:527681 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> +gpua063:527681:527681 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua063:527681:527751 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> +gpua063:527681:527751 [0] NCCL INFO Using network IB +gpua063:527681:527751 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua063:527681:527751 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpua063:527681:527751 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpua063:527681:527751 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 +gpua063:527681:527751 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read +gpua063:527681:527751 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read +gpua063:527681:527751 [0] NCCL INFO Connected all rings +gpua016:219653:219726 [2] NCCL INFO Connected all trees +gpua016:219653:219726 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua016:219653:219726 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua016:219653:219726 [2] NCCL INFO comm 0x55651eeaa520 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua013:301086:301155 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua013:301086:301155 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua013:301086:301155 [3] NCCL INFO comm 0x559e6b2f34e0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua034:95152:95218 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read +gpua034:95152:95218 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read +gpua034:95152:95218 [1] NCCL INFO Connected all trees +gpua034:95152:95218 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua034:95152:95218 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua034:95152:95218 [1] NCCL INFO comm 0x56264682a870 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua034:95154:95154 [3] NCCL INFO cudaDriverVersion 12020 +gpua034:95154:95154 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.34<0> +gpua034:95154:95154 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua034:95154:95219 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.34<0> +gpua034:95154:95219 [3] NCCL INFO Using network IB +gpua034:95154:95219 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua051:296044:296107 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read +gpua051:296044:296107 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read +gpua051:296044:296107 [1] NCCL INFO Connected all trees +gpua051:296044:296107 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua051:296044:296107 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua051:296044:296107 [1] NCCL INFO comm 0x563bd43b9960 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua035:169211:169290 [3] NCCL INFO Connected all trees +gpua035:169211:169290 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:169211:169290 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:169211:169290 [3] NCCL INFO comm 0x563ee4f21380 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua039:229346:229346 [0] NCCL INFO cudaDriverVersion 12020 +gpua039:229346:229346 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.39<0> +gpua039:229346:229346 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua039:229346:229418 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.39<0> +gpua039:229346:229418 [0] NCCL INFO Using network IB +gpua039:229346:229418 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua039:229346:229418 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpua039:229346:229418 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpua039:229346:229418 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 +gpua039:229346:229418 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read +gpua039:229346:229418 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read +gpua039:229346:229418 [0] NCCL INFO Connected all rings +gpua053:87450:87450 [1] NCCL INFO cudaDriverVersion 12020 +gpua053:87450:87450 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:87450:87450 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:87450:87522 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:87450:87522 [1] NCCL INFO Using network IB +gpua053:87450:87522 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua053:87450:87522 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpua053:87450:87522 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read +gpua053:87450:87522 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read +gpua053:87450:87522 [1] NCCL INFO Connected all rings +gpua053:87450:87522 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 +gpua053:87450:87522 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 +gpua058:7364:7433 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua058:7364:7433 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua058:7364:7433 [3] NCCL INFO comm 0x55da550f6d60 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua059:311038:311108 [2] NCCL INFO Connected all trees +gpua059:311038:311108 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua059:311038:311108 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua059:311038:311108 [2] NCCL INFO comm 0x563cb7fbea50 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua095:565528:565595 [3] NCCL INFO Connected all trees +gpua095:565528:565595 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua095:565528:565595 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua095:565528:565595 [3] NCCL INFO comm 0x55d42e78ee10 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua063:527681:527751 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 +gpua063:527681:527751 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 +gpua063:527681:527751 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 +gpua063:527681:527751 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 +gpua063:527681:527751 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 +gpua063:527681:527751 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 +gpua063:527681:527751 [0] NCCL INFO Connected all trees +gpua063:527681:527751 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua063:527681:527751 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua063:527681:527751 [0] NCCL INFO comm 0x564e56d798e0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua013:301084:301084 [1] NCCL INFO cudaDriverVersion 12020 +gpua013:301084:301084 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.13<0> +gpua013:301084:301084 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua013:301084:301157 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.13<0> +gpua013:301084:301157 [1] NCCL INFO Using network IB +gpua013:301084:301157 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua013:301084:301157 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpua013:301084:301157 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read +gpua013:301084:301157 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read +gpua013:301084:301157 [1] NCCL INFO Connected all rings +gpua013:301084:301157 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 +gpua013:301084:301157 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 +gpua034:95154:95219 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpua034:95154:95219 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpua034:95154:95219 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 +gpua034:95154:95219 [3] NCCL INFO Connected all rings +gpua034:95154:95219 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read +gpua034:95154:95219 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read +gpua034:95154:95219 [3] NCCL INFO Connected all trees +gpua034:95154:95219 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua034:95154:95219 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua034:95154:95219 [3] NCCL INFO comm 0x5582164dad10 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua051:296043:296043 [0] NCCL INFO cudaDriverVersion 12020 +gpua051:296043:296043 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.51<0> +gpua051:296043:296043 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua051:296043:296109 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.51<0> +gpua051:296043:296109 [0] NCCL INFO Using network IB +gpua051:296043:296109 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua051:296043:296109 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpua051:296043:296109 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpua051:296043:296109 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 +gpua051:296043:296109 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read +gpua051:296043:296109 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read +gpua051:296043:296109 [0] NCCL INFO Connected all rings +gpua035:169208:169208 [0] NCCL INFO cudaDriverVersion 12020 +gpua035:169208:169208 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0> +gpua035:169208:169208 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua035:169208:169291 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0> +gpua035:169208:169291 [0] NCCL INFO Using network IB +gpua035:169208:169291 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua035:169208:169291 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpua035:169208:169291 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpua035:169208:169291 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 +gpua035:169208:169291 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read +gpua035:169208:169291 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read +gpua035:169208:169291 [0] NCCL INFO Connected all rings +gpua039:229346:229418 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 +gpua039:229346:229418 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 +gpua039:229346:229418 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 +gpua039:229346:229418 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 +gpua039:229346:229418 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 +gpua039:229346:229418 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 +gpua039:229346:229418 [0] NCCL INFO Connected all trees +gpua039:229346:229418 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua039:229346:229418 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua039:229346:229418 [0] NCCL INFO comm 0x557324ca6ed0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua053:87450:87522 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read +gpua053:87450:87522 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read +gpua053:87450:87522 [1] NCCL INFO Connected all trees +gpua053:87450:87522 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:87450:87522 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:87450:87522 [1] NCCL INFO comm 0x55bcf45b5730 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua058:7363:7363 [2] NCCL INFO cudaDriverVersion 12020 +gpua058:7363:7363 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.58<0> +gpua058:7363:7363 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua058:7363:7434 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.58<0> +gpua058:7363:7434 [2] NCCL INFO Using network IB +gpua058:7363:7434 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua058:7363:7434 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpua058:7363:7434 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read +gpua058:7363:7434 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read +gpua058:7363:7434 [2] NCCL INFO Connected all rings +gpua058:7363:7434 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read +gpua058:7363:7434 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read +gpua058:7363:7434 [2] NCCL INFO Connected all trees +gpua013:301084:301157 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read +gpua013:301084:301157 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read +gpua013:301084:301157 [1] NCCL INFO Connected all trees +gpua013:301084:301157 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua013:301084:301157 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua013:301084:301157 [1] NCCL INFO comm 0x55a5fdcab6e0 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua051:296043:296109 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 +gpua051:296043:296109 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 +gpua051:296043:296109 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 +gpua051:296043:296109 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 +gpua051:296043:296109 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 +gpua051:296043:296109 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 +gpua051:296043:296109 [0] NCCL INFO Connected all trees +gpua051:296043:296109 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua051:296043:296109 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua051:296043:296109 [0] NCCL INFO comm 0x55d537beaad0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua035:169208:169291 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 +gpua035:169208:169291 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 +gpua035:169208:169291 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 +gpua035:169208:169291 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 +gpua035:169208:169291 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 +gpua035:169208:169291 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 +gpua035:169208:169291 [0] NCCL INFO Connected all trees +gpua035:169208:169291 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua035:169208:169291 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua035:169208:169291 [0] NCCL INFO comm 0x564399368ec0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua053:87451:87451 [2] NCCL INFO cudaDriverVersion 12020 +gpua053:87451:87451 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> +gpua053:87451:87451 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua053:87451:87518 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> +gpua053:87451:87518 [2] NCCL INFO Using network IB +gpua053:87451:87518 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua053:87451:87518 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpua053:87451:87518 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read +gpua053:87451:87518 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read +gpua053:87451:87518 [2] NCCL INFO Connected all rings +gpua053:87451:87518 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read +gpua053:87451:87518 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read +gpua053:87451:87518 [2] NCCL INFO Connected all trees +gpua058:7363:7434 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua058:7363:7434 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua058:7363:7434 [2] NCCL INFO comm 0x55e1e24d2920 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua053:87451:87518 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua053:87451:87518 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua053:87451:87518 [2] NCCL INFO comm 0x55b5ca0dadf0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua060:467070:467070 [1] NCCL INFO cudaDriverVersion 12020 +gpua060:467070:467070 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:467070:467070 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:467070:467150 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:467070:467150 [1] NCCL INFO Using network IB +gpua060:467070:467150 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua060:467070:467150 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpua060:467070:467150 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read +gpua060:467070:467150 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read +gpua060:467070:467150 [1] NCCL INFO Connected all rings +gpua060:467070:467150 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 +gpua060:467070:467150 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 +gpua060:467070:467150 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read +gpua060:467070:467150 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read +gpua060:467070:467150 [1] NCCL INFO Connected all trees +gpua060:467070:467150 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:467070:467150 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:467070:467150 [1] NCCL INFO comm 0x5611770f9360 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua024:258338:258338 [3] NCCL INFO cudaDriverVersion 12020 +gpua024:258338:258338 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.24<0> +gpua024:258338:258338 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua024:258338:258402 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.24<0> +gpua024:258338:258402 [3] NCCL INFO Using network IB +gpua024:258338:258402 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua024:258338:258402 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpua024:258338:258402 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpua024:258338:258402 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 +gpua024:258338:258402 [3] NCCL INFO Connected all rings +gpua024:258338:258402 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read +gpua024:258338:258402 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read +gpua024:258338:258402 [3] NCCL INFO Connected all trees +gpua024:258338:258402 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua024:258338:258402 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua024:258338:258402 [3] NCCL INFO comm 0x55fe460c7b60 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua024:258335:258335 [0] NCCL INFO cudaDriverVersion 12020 +gpua024:258335:258335 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.24<0> +gpua024:258335:258335 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua024:258335:258399 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.24<0> +gpua024:258335:258399 [0] NCCL INFO Using network IB +gpua024:258335:258399 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua024:258335:258399 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpua024:258335:258399 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpua024:258335:258399 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 +gpua024:258335:258399 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read +gpua024:258335:258399 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read +gpua024:258335:258399 [0] NCCL INFO Connected all rings +gpua024:258335:258399 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 +gpua024:258335:258399 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 +gpua024:258335:258399 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 +gpua024:258335:258399 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 +gpua024:258335:258399 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 +gpua024:258335:258399 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 +gpua024:258335:258399 [0] NCCL INFO Connected all trees +gpua051:296046:296046 [3] NCCL INFO cudaDriverVersion 12020 +gpua051:296046:296046 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.51<0> +gpua051:296046:296046 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua051:296046:296108 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.51<0> +gpua051:296046:296108 [3] NCCL INFO Using network IB +gpua051:296046:296108 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua051:296046:296108 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpua051:296046:296108 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpua051:296046:296108 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 +gpua051:296046:296108 [3] NCCL INFO Connected all rings +gpua051:296046:296108 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read +gpua051:296046:296108 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read +gpua024:258335:258399 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua024:258335:258399 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua024:258335:258399 [0] NCCL INFO comm 0x56346b70f190 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua024:258337:258337 [2] NCCL INFO cudaDriverVersion 12020 +gpua024:258337:258337 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.24<0> +gpua024:258337:258337 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua024:258337:258401 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.24<0> +gpua024:258337:258401 [2] NCCL INFO Using network IB +gpua024:258337:258401 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua024:258337:258401 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpua024:258337:258401 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read +gpua051:296046:296108 [3] NCCL INFO Connected all trees +gpua051:296046:296108 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua051:296046:296108 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua051:296046:296108 [3] NCCL INFO comm 0x55ab8dd744e0 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua024:258337:258401 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read +gpua024:258337:258401 [2] NCCL INFO Connected all rings +gpua095:565527:565527 [2] NCCL INFO cudaDriverVersion 12020 +gpua095:565527:565527 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.95<0> +gpua095:565527:565527 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua095:565527:565596 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.95<0> +gpua095:565527:565596 [2] NCCL INFO Using network IB +gpua095:565527:565596 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua095:565527:565596 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpua095:565527:565596 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read +gpua095:565527:565596 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read +gpua095:565527:565596 [2] NCCL INFO Connected all rings +gpua095:565527:565596 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read +gpua095:565527:565596 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read +gpua024:258337:258401 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read +gpua024:258337:258401 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read +gpua024:258337:258401 [2] NCCL INFO Connected all trees +gpua024:258337:258401 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua024:258337:258401 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua024:258337:258401 [2] NCCL INFO comm 0x55c90b00cbb0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua095:565527:565596 [2] NCCL INFO Connected all trees +gpua095:565527:565596 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua095:565527:565596 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua095:565527:565596 [2] NCCL INFO comm 0x556b584b6370 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua024:258336:258336 [1] NCCL INFO cudaDriverVersion 12020 +gpua024:258336:258336 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.24<0> +gpua024:258336:258336 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua024:258336:258405 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.24<0> +gpua024:258336:258405 [1] NCCL INFO Using network IB +gpua024:258336:258405 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua024:258336:258405 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpua024:258336:258405 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read +gpua024:258336:258405 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read +gpua024:258336:258405 [1] NCCL INFO Connected all rings +gpua024:258336:258405 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 +gpua024:258336:258405 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 +gpua063:527684:527684 [3] NCCL INFO cudaDriverVersion 12020 +gpua063:527684:527684 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> +gpua063:527684:527684 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua063:527684:527752 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> +gpua063:527684:527752 [3] NCCL INFO Using network IB +gpua063:527684:527752 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua063:527684:527752 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpua063:527684:527752 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpua063:527684:527752 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 +gpua063:527684:527752 [3] NCCL INFO Connected all rings +gpua063:527684:527752 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read +gpua063:527684:527752 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read +gpua024:258336:258405 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read +gpua024:258336:258405 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read +gpua024:258336:258405 [1] NCCL INFO Connected all trees +gpua024:258336:258405 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua024:258336:258405 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua024:258336:258405 [1] NCCL INFO comm 0x564cded887d0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua059:311039:311039 [3] NCCL INFO cudaDriverVersion 12020 +gpua059:311039:311039 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.59<0> +gpua059:311039:311039 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua059:311039:311109 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.59<0> +gpua059:311039:311109 [3] NCCL INFO Using network IB +gpua059:311039:311109 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua059:311039:311109 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpua059:311039:311109 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpua059:311039:311109 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 +gpua059:311039:311109 [3] NCCL INFO Connected all rings +gpua059:311039:311109 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read +gpua059:311039:311109 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read +gpua063:527684:527752 [3] NCCL INFO Connected all trees +gpua063:527684:527752 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua063:527684:527752 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua063:527684:527752 [3] NCCL INFO comm 0x559c5f90a980 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua034:95151:95151 [0] NCCL INFO cudaDriverVersion 12020 +gpua034:95151:95151 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.34<0> +gpua034:95151:95151 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua034:95151:95220 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.34<0> +gpua034:95151:95220 [0] NCCL INFO Using network IB +gpua034:95151:95220 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua034:95151:95220 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpua034:95151:95220 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpua034:95151:95220 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 +gpua034:95151:95220 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read +gpua034:95151:95220 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read +gpua034:95151:95220 [0] NCCL INFO Connected all rings +gpua059:311039:311109 [3] NCCL INFO Connected all trees +gpua059:311039:311109 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua059:311039:311109 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua059:311039:311109 [3] NCCL INFO comm 0x55910f959b60 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua034:95151:95220 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 +gpua034:95151:95220 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 +gpua034:95151:95220 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 +gpua034:95151:95220 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 +gpua034:95151:95220 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 +gpua034:95151:95220 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 +gpua034:95151:95220 [0] NCCL INFO Connected all trees +gpua034:95151:95220 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua034:95151:95220 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua034:95151:95220 [0] NCCL INFO comm 0x55eae7976550 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua099:2915464:2915464 [1] NCCL INFO cudaDriverVersion 12020 +gpua099:2915464:2915464 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.99<0> +gpua099:2915464:2915464 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua099:2915464:2915532 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.99<0> +gpua099:2915464:2915532 [1] NCCL INFO Using network IB +gpua099:2915464:2915532 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua099:2915464:2915532 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpua099:2915464:2915532 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read +gpua099:2915464:2915532 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read +gpua099:2915464:2915532 [1] NCCL INFO Connected all rings +gpua099:2915464:2915532 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read +gpua099:2915464:2915532 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read +gpua099:2915464:2915532 [1] NCCL INFO Connected all trees +gpua099:2915464:2915532 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua099:2915464:2915532 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua099:2915464:2915532 [1] NCCL INFO comm 0x555b2e125850 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua060:467071:467071 [2] NCCL INFO cudaDriverVersion 12020 +gpua060:467071:467071 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:467071:467071 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:467071:467149 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:467071:467149 [2] NCCL INFO Using network IB +gpua060:467071:467149 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua060:467071:467149 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpua060:467071:467149 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read +gpua060:467071:467149 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read +gpua060:467071:467149 [2] NCCL INFO Connected all rings +gpua060:467071:467149 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read +gpua060:467071:467149 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read +gpua060:467071:467149 [2] NCCL INFO Connected all trees +gpua060:467071:467149 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:467071:467149 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:467071:467149 [2] NCCL INFO comm 0x55cd8c80f8e0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua060:467069:467069 [0] NCCL INFO cudaDriverVersion 12020 +gpua060:467069:467069 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:467069:467069 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:467069:467155 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:467069:467155 [0] NCCL INFO Using network IB +gpua060:467069:467155 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua060:467069:467155 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpua060:467069:467155 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpua002:4152610:4152610 [2] NCCL INFO cudaDriverVersion 12020 +gpua002:4152610:4152610 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.2<0> +gpua002:4152610:4152610 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua002:4152610:4152675 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.2<0> +gpua002:4152610:4152675 [2] NCCL INFO Using network IB +gpua002:4152610:4152675 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua002:4152610:4152675 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpua002:4152610:4152675 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read +gpua002:4152610:4152675 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read +gpua002:4152610:4152675 [2] NCCL INFO Connected all rings +gpua002:4152610:4152675 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read +gpua002:4152610:4152675 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read +gpua060:467069:467155 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 +gpua060:467069:467155 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read +gpua060:467069:467155 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read +gpua060:467069:467155 [0] NCCL INFO Connected all rings +gpua060:467069:467155 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 +gpua060:467069:467155 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 +gpua060:467069:467155 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 +gpua060:467069:467155 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 +gpua060:467069:467155 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 +gpua060:467069:467155 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 +gpua060:467069:467155 [0] NCCL INFO Connected all trees +gpua002:4152610:4152675 [2] NCCL INFO Connected all trees +gpua002:4152610:4152675 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua002:4152610:4152675 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua002:4152610:4152675 [2] NCCL INFO comm 0x559878c438d0 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua060:467069:467155 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:467069:467155 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:467069:467155 [0] NCCL INFO comm 0x559b215b5360 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua002:4152609:4152609 [1] NCCL INFO cudaDriverVersion 12020 +gpua002:4152609:4152609 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.2<0> +gpua002:4152609:4152609 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua002:4152609:4152673 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.2<0> +gpua002:4152609:4152673 [1] NCCL INFO Using network IB +gpua002:4152609:4152673 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua002:4152609:4152673 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpua002:4152609:4152673 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read +gpua002:4152609:4152673 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read +gpua002:4152609:4152673 [1] NCCL INFO Connected all rings +gpua002:4152609:4152673 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read +gpua002:4152609:4152673 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read +gpua060:467072:467072 [3] NCCL INFO cudaDriverVersion 12020 +gpua060:467072:467072 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> +gpua060:467072:467072 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua060:467072:467152 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> +gpua060:467072:467152 [3] NCCL INFO Using network IB +gpua060:467072:467152 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua060:467072:467152 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpua060:467072:467152 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpua060:467072:467152 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 +gpua060:467072:467152 [3] NCCL INFO Connected all rings +gpua060:467072:467152 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read +gpua060:467072:467152 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read +gpua099:2915465:2915465 [2] NCCL INFO cudaDriverVersion 12020 +gpua099:2915465:2915465 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.99<0> +gpua099:2915465:2915465 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua099:2915465:2915535 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.99<0> +gpua099:2915465:2915535 [2] NCCL INFO Using network IB +gpua099:2915465:2915535 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua099:2915465:2915535 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpua099:2915465:2915535 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read +gpua099:2915465:2915535 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read +gpua099:2915465:2915535 [2] NCCL INFO Connected all rings +gpua099:2915465:2915535 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read +gpua099:2915465:2915535 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read +gpua002:4152609:4152673 [1] NCCL INFO Connected all trees +gpua002:4152609:4152673 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua002:4152609:4152673 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua002:4152609:4152673 [1] NCCL INFO comm 0x56203cd77a80 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua060:467072:467152 [3] NCCL INFO Connected all trees +gpua060:467072:467152 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua060:467072:467152 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua060:467072:467152 [3] NCCL INFO comm 0x556f54580ab0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua099:2915465:2915535 [2] NCCL INFO Connected all trees +gpua099:2915465:2915535 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua099:2915465:2915535 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua099:2915465:2915535 [2] NCCL INFO comm 0x55b594863de0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua099:2915463:2915463 [0] NCCL INFO cudaDriverVersion 12020 +gpua099:2915463:2915463 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.99<0> +gpua099:2915463:2915463 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua099:2915463:2915533 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.99<0> +gpua099:2915463:2915533 [0] NCCL INFO Using network IB +gpua099:2915463:2915533 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua099:2915463:2915533 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpua099:2915463:2915533 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpua099:2915463:2915533 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 +gpua099:2915463:2915533 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read +gpua099:2915463:2915533 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read +gpua099:2915463:2915533 [0] NCCL INFO Connected all rings +gpua099:2915463:2915533 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 +gpua099:2915463:2915533 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 +gpua099:2915463:2915533 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 +gpua099:2915463:2915533 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 +gpua099:2915463:2915533 [0] NCCL INFO Connected all trees +gpua099:2915463:2915533 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua099:2915463:2915533 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua099:2915463:2915533 [0] NCCL INFO comm 0x561ce1b01d10 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua002:4152608:4152676 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.2<0> +gpua002:4152608:4152676 [0] NCCL INFO Using network IB +gpua002:4152608:4152676 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua002:4152608:4152676 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpua002:4152608:4152676 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpua002:4152608:4152676 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpua002:4152608:4152676 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpua002:4152608:4152676 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 +gpua002:4152608:4152676 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read +gpua002:4152608:4152676 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read +gpua044:218320:218320 [2] NCCL INFO cudaDriverVersion 12020 +gpua044:218320:218320 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.44<0> +gpua044:218320:218320 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua044:218320:218382 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.44<0> +gpua044:218320:218382 [2] NCCL INFO Using network IB +gpua044:218320:218382 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpua044:218320:218382 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpua044:218320:218382 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read +gpua044:218320:218382 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read +gpua044:218320:218382 [2] NCCL INFO Connected all rings +gpua044:218320:218382 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read +gpua044:218320:218382 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read +gpua002:4152608:4152676 [0] NCCL INFO Connected all rings +gpua002:4152608:4152676 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 +gpua002:4152608:4152676 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 +gpua002:4152608:4152676 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 +gpua002:4152608:4152676 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 +gpua002:4152608:4152676 [0] NCCL INFO Connected all trees +gpua002:4152608:4152676 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua002:4152608:4152676 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua002:4152608:4152676 [0] NCCL INFO comm 0x558835ea9820 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua044:218320:218382 [2] NCCL INFO Connected all trees +gpua044:218320:218382 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua044:218320:218382 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua044:218320:218382 [2] NCCL INFO comm 0x557eec10e440 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpua002:4152611:4152611 [3] NCCL INFO cudaDriverVersion 12020 +gpua002:4152611:4152611 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.2<0> +gpua002:4152611:4152611 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua002:4152611:4152674 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.2<0> +gpua002:4152611:4152674 [3] NCCL INFO Using network IB +gpua002:4152611:4152674 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua002:4152611:4152674 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpua002:4152611:4152674 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpua002:4152611:4152674 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 +gpua002:4152611:4152674 [3] NCCL INFO Connected all rings +gpua002:4152611:4152674 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read +gpua002:4152611:4152674 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read +gpua044:218318:218318 [0] NCCL INFO cudaDriverVersion 12020 +gpua044:218318:218318 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.44<0> +gpua044:218318:218318 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua044:218318:218379 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.44<0> +gpua044:218318:218379 [0] NCCL INFO Using network IB +gpua044:218318:218379 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpua044:218318:218379 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpua044:218318:218379 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpua044:218318:218379 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 +gpua044:218318:218379 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read +gpua044:218318:218379 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read +gpua044:218318:218379 [0] NCCL INFO Connected all rings +gpua099:2915466:2915466 [3] NCCL INFO cudaDriverVersion 12020 +gpua099:2915466:2915466 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.99<0> +gpua099:2915466:2915466 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua099:2915466:2915531 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.99<0> +gpua099:2915466:2915531 [3] NCCL INFO Using network IB +gpua099:2915466:2915531 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua099:2915466:2915531 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpua099:2915466:2915531 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpua099:2915466:2915531 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 +gpua099:2915466:2915531 [3] NCCL INFO Connected all rings +gpua099:2915466:2915531 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read +gpua099:2915466:2915531 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read +gpua002:4152611:4152674 [3] NCCL INFO Connected all trees +gpua002:4152611:4152674 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua002:4152611:4152674 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua002:4152611:4152674 [3] NCCL INFO comm 0x55bce4ed7cf0 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua044:218318:218379 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 +gpua044:218318:218379 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 +gpua044:218318:218379 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 +gpua044:218318:218379 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 +gpua044:218318:218379 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 +gpua044:218318:218379 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 +gpua044:218318:218379 [0] NCCL INFO Connected all trees +gpua044:218318:218379 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua044:218318:218379 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua044:218318:218379 [0] NCCL INFO comm 0x557b7be4a150 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpua099:2915466:2915531 [3] NCCL INFO Connected all trees +gpua099:2915466:2915531 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua099:2915466:2915531 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua099:2915466:2915531 [3] NCCL INFO comm 0x55861b50fdd0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpua044:218319:218319 [1] NCCL INFO cudaDriverVersion 12020 +gpua044:218319:218319 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.44<0> +gpua044:218319:218319 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua044:218319:218378 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.44<0> +gpua044:218319:218378 [1] NCCL INFO Using network IB +gpua044:218319:218378 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpua044:218319:218378 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpua044:218319:218378 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read +gpua044:218319:218378 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read +gpua044:218319:218378 [1] NCCL INFO Connected all rings +gpua044:218319:218378 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 +gpua044:218319:218378 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 +gpua044:218319:218378 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read +gpua044:218319:218378 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read +gpua044:218319:218378 [1] NCCL INFO Connected all trees +gpua044:218319:218378 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua044:218319:218378 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua044:218319:218378 [1] NCCL INFO comm 0x56328ab61480 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpua044:218321:218321 [3] NCCL INFO cudaDriverVersion 12020 +gpua044:218321:218321 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.44<0> +gpua044:218321:218321 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpua044:218321:218383 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.44<0> +gpua044:218321:218383 [3] NCCL INFO Using network IB +gpua044:218321:218383 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpua044:218321:218383 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpua044:218321:218383 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpua044:218321:218383 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 +gpua044:218321:218383 [3] NCCL INFO Connected all rings +gpua044:218321:218383 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read +gpua044:218321:218383 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read +gpua044:218321:218383 [3] NCCL INFO Connected all trees +gpua044:218321:218383 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpua044:218321:218383 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpua044:218321:218383 [3] NCCL INFO comm 0x556f37a58da0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +[gpua002:0/64] 2024-01-16 17:06:11,952 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpua002:0/64] 2024-01-16 17:08:32,943 (trainer:753) INFO: 6epoch:train:1-100batch: iter_time=2.788, forward_time=0.251, loss_ctc=137.028, loss_interctc_layer6=131.521, loss_interctc_layer12=115.320, loss_interctc_layer15=110.222, loss_interctc_layer21=136.264, loss=126.071, backward_time=0.374, grad_norm=88.851, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.137, optim0_lr0=1.788e-04, train_time=6.768 +[gpua002:0/64] 2024-01-16 17:10:58,920 (trainer:753) INFO: 6epoch:train:101-200batch: iter_time=8.602e-05, forward_time=0.140, loss_ctc=109.961, loss_interctc_layer6=112.096, loss_interctc_layer12=98.772, loss_interctc_layer15=93.669, loss_interctc_layer21=110.711, loss=105.042, backward_time=0.357, grad_norm=88.678, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.134, optim0_lr0=1.787e-04, train_time=1.460 +[gpua002:0/64] 2024-01-16 17:14:03,133 (trainer:753) INFO: 6epoch:train:201-300batch: iter_time=8.797e-05, forward_time=0.234, loss_ctc=117.563, loss_interctc_layer6=119.129, loss_interctc_layer12=106.216, loss_interctc_layer15=102.208, loss_interctc_layer21=117.823, loss=112.588, backward_time=0.426, grad_norm=79.014, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.141, optim0_lr0=1.786e-04, train_time=1.838 +[gpua002:0/64] 2024-01-16 17:17:25,210 (trainer:753) INFO: 6epoch:train:301-400batch: iter_time=9.525e-05, forward_time=0.141, loss_ctc=120.128, loss_interctc_layer6=118.044, loss_interctc_layer12=103.356, loss_interctc_layer15=97.746, loss_interctc_layer21=119.741, loss=111.803, backward_time=0.434, grad_norm=72.442, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.135, optim0_lr0=1.785e-04, train_time=2.024 +[gpua002:0/64] 2024-01-16 17:21:40,999 (trainer:753) INFO: 6epoch:train:401-500batch: iter_time=9.500e-05, forward_time=0.139, loss_ctc=112.828, loss_interctc_layer6=109.103, loss_interctc_layer12=96.646, loss_interctc_layer15=92.541, loss_interctc_layer21=113.449, loss=104.913, backward_time=0.538, grad_norm=92.365, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.135, optim0_lr0=1.783e-04, train_time=2.557 +[gpua002:0/64] 2024-01-16 17:25:34,981 (trainer:753) INFO: 6epoch:train:501-600batch: iter_time=9.281e-05, forward_time=0.140, loss_ctc=126.587, loss_interctc_layer6=127.869, loss_interctc_layer12=114.473, loss_interctc_layer15=109.032, loss_interctc_layer21=127.191, loss=121.030, backward_time=0.568, grad_norm=96.323, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.135, optim0_lr0=1.782e-04, train_time=2.341 +[gpua002:0/64] 2024-01-16 17:30:10,158 (trainer:753) INFO: 6epoch:train:601-700batch: iter_time=9.094e-05, forward_time=0.138, loss_ctc=98.137, loss_interctc_layer6=92.723, loss_interctc_layer12=81.709, loss_interctc_layer15=77.542, loss_interctc_layer21=98.768, loss=89.776, backward_time=0.511, grad_norm=87.566, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.134, optim0_lr0=1.781e-04, train_time=2.752 +[gpua002:0/64] 2024-01-16 17:33:05,748 (trainer:753) INFO: 6epoch:train:701-800batch: iter_time=9.079e-05, forward_time=0.277, loss_ctc=92.442, loss_interctc_layer6=95.366, loss_interctc_layer12=83.902, loss_interctc_layer15=79.225, loss_interctc_layer21=92.640, loss=88.715, backward_time=0.369, grad_norm=61.363, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.141, optim0_lr0=1.780e-04, train_time=1.756 +[gpua002:0/64] 2024-01-16 17:36:35,864 (trainer:753) INFO: 6epoch:train:801-900batch: iter_time=9.309e-05, forward_time=0.147, loss_ctc=159.699, loss_interctc_layer6=137.200, loss_interctc_layer12=124.436, loss_interctc_layer15=118.252, loss_interctc_layer21=157.647, loss=139.447, backward_time=0.447, grad_norm=106.102, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.135, optim0_lr0=1.779e-04, train_time=2.099 +[gpua002:0/64] 2024-01-16 17:39:53,179 (trainer:753) INFO: 6epoch:train:901-1000batch: iter_time=9.143e-05, forward_time=0.140, loss_ctc=101.241, loss_interctc_layer6=99.898, loss_interctc_layer12=87.489, loss_interctc_layer15=82.661, loss_interctc_layer21=101.262, loss=94.510, backward_time=0.396, grad_norm=62.125, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.135, optim0_lr0=1.778e-04, train_time=1.975 +[gpua002:0/64] 2024-01-16 17:42:56,419 (trainer:753) INFO: 6epoch:train:1001-1100batch: iter_time=9.078e-05, forward_time=0.139, loss_ctc=98.483, loss_interctc_layer6=102.405, loss_interctc_layer12=89.981, loss_interctc_layer15=85.028, loss_interctc_layer21=98.411, loss=94.862, backward_time=0.373, grad_norm=55.745, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.776e-04, train_time=1.832 +[gpua002:0/64] 2024-01-16 17:45:29,916 (trainer:753) INFO: 6epoch:train:1101-1200batch: iter_time=9.105e-05, forward_time=0.139, loss_ctc=122.744, loss_interctc_layer6=115.443, loss_interctc_layer12=102.102, loss_interctc_layer15=97.018, loss_interctc_layer21=123.208, loss=112.103, backward_time=0.432, grad_norm=70.291, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.775e-04, train_time=1.535 +[gpua002:0/64] 2024-01-16 17:47:29,860 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua002:0/64] 2024-01-16 17:47:48,790 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-16 17:47:52,443 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-16 17:47:52,443 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua002:0/64] 2024-01-16 17:47:52,446 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-16 17:58:30,517 (trainer:753) INFO: 6epoch:train:1201-1300batch: iter_time=3.285, forward_time=0.197, loss_ctc=133.006, loss_interctc_layer6=126.494, loss_interctc_layer12=111.889, loss_interctc_layer15=106.636, loss_interctc_layer21=133.297, loss=122.264, backward_time=0.387, grad_norm=77.731, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.137, optim0_lr0=1.774e-04, train_time=7.804 +[gpua002:0/64] 2024-01-16 18:01:04,297 (trainer:753) INFO: 6epoch:train:1301-1400batch: iter_time=9.105e-05, forward_time=0.142, loss_ctc=131.019, loss_interctc_layer6=122.314, loss_interctc_layer12=108.426, loss_interctc_layer15=103.046, loss_interctc_layer21=131.786, loss=119.318, backward_time=0.317, grad_norm=83.388, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.773e-04, train_time=1.539 +[gpua002:0/64] 2024-01-16 18:04:11,801 (trainer:753) INFO: 6epoch:train:1401-1500batch: iter_time=9.726e-05, forward_time=0.142, loss_ctc=123.854, loss_interctc_layer6=121.345, loss_interctc_layer12=108.387, loss_interctc_layer15=102.541, loss_interctc_layer21=124.572, loss=116.140, backward_time=0.377, grad_norm=101.579, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.772e-04, train_time=1.875 +[gpua002:0/64] 2024-01-16 18:07:01,731 (trainer:753) INFO: 6epoch:train:1501-1600batch: iter_time=9.568e-05, forward_time=0.141, loss_ctc=111.686, loss_interctc_layer6=105.380, loss_interctc_layer12=92.578, loss_interctc_layer15=87.327, loss_interctc_layer21=112.106, loss=101.816, backward_time=0.351, grad_norm=71.480, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.771e-04, train_time=1.699 +[gpua002:0/64] 2024-01-16 18:10:05,110 (trainer:753) INFO: 6epoch:train:1601-1700batch: iter_time=9.004e-05, forward_time=0.141, loss_ctc=117.470, loss_interctc_layer6=118.322, loss_interctc_layer12=104.175, loss_interctc_layer15=98.416, loss_interctc_layer21=118.017, loss=111.280, backward_time=0.379, grad_norm=80.692, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.134, optim0_lr0=1.769e-04, train_time=1.834 +[gpua002:0/64] 2024-01-16 18:12:24,524 (trainer:753) INFO: 6epoch:train:1701-1800batch: iter_time=8.797e-05, forward_time=0.141, loss_ctc=128.518, loss_interctc_layer6=117.770, loss_interctc_layer12=104.062, loss_interctc_layer15=98.366, loss_interctc_layer21=129.269, loss=115.597, backward_time=0.322, grad_norm=111.222, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.134, optim0_lr0=1.768e-04, train_time=1.394 +[gpua002:0/64] 2024-01-16 18:15:22,508 (trainer:753) INFO: 6epoch:train:1801-1900batch: iter_time=8.736e-05, forward_time=0.143, loss_ctc=110.562, loss_interctc_layer6=110.520, loss_interctc_layer12=98.553, loss_interctc_layer15=94.114, loss_interctc_layer21=110.202, loss=104.790, backward_time=0.374, grad_norm=80.394, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.767e-04, train_time=1.780 +[gpua002:0/64] 2024-01-16 18:18:06,934 (trainer:753) INFO: 6epoch:train:1901-2000batch: iter_time=8.848e-05, forward_time=0.140, loss_ctc=96.532, loss_interctc_layer6=87.929, loss_interctc_layer12=76.836, loss_interctc_layer15=72.693, loss_interctc_layer21=97.188, loss=86.236, backward_time=0.377, grad_norm=54.401, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.766e-04, train_time=1.644 +[gpua002:0/64] 2024-01-16 18:21:21,538 (trainer:753) INFO: 6epoch:train:2001-2100batch: iter_time=8.907e-05, forward_time=0.142, loss_ctc=105.519, loss_interctc_layer6=102.892, loss_interctc_layer12=89.762, loss_interctc_layer15=84.390, loss_interctc_layer21=105.779, loss=97.669, backward_time=0.492, grad_norm=59.484, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.765e-04, train_time=1.946 +[gpua002:0/64] 2024-01-16 18:23:56,066 (trainer:753) INFO: 6epoch:train:2101-2200batch: iter_time=8.803e-05, forward_time=0.141, loss_ctc=146.213, loss_interctc_layer6=125.246, loss_interctc_layer12=111.874, loss_interctc_layer15=106.617, loss_interctc_layer21=146.024, loss=127.195, backward_time=0.340, grad_norm=81.726, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.764e-04, train_time=1.545 +[gpua002:0/64] 2024-01-16 18:26:48,997 (trainer:753) INFO: 6epoch:train:2201-2300batch: iter_time=8.530e-05, forward_time=0.141, loss_ctc=104.791, loss_interctc_layer6=104.369, loss_interctc_layer12=90.920, loss_interctc_layer15=85.463, loss_interctc_layer21=105.353, loss=98.179, backward_time=0.429, grad_norm=65.486, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.763e-04, train_time=1.729 +[gpua002:0/64] 2024-01-16 18:29:41,276 (trainer:753) INFO: 6epoch:train:2301-2400batch: iter_time=8.505e-05, forward_time=0.142, loss_ctc=112.456, loss_interctc_layer6=108.930, loss_interctc_layer12=96.123, loss_interctc_layer15=90.972, loss_interctc_layer21=113.298, loss=104.356, backward_time=0.348, grad_norm=69.111, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.761e-04, train_time=1.723 +[gpua002:0/64] 2024-01-16 18:32:09,152 (trainer:753) INFO: 6epoch:train:2401-2500batch: iter_time=8.793e-05, forward_time=0.141, loss_ctc=124.218, loss_interctc_layer6=111.934, loss_interctc_layer12=98.292, loss_interctc_layer15=92.736, loss_interctc_layer21=124.693, loss=110.374, backward_time=0.355, grad_norm=73.990, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.135, optim0_lr0=1.760e-04, train_time=1.479 +[gpua002:0/64] 2024-01-16 18:32:29,183 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua002:0/64] 2024-01-16 18:32:48,139 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-16 18:32:51,775 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-16 18:32:51,775 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua002:0/64] 2024-01-16 18:32:51,779 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-16 18:49:08,673 (trainer:753) INFO: 6epoch:train:2501-2600batch: iter_time=4.968, forward_time=0.405, loss_ctc=132.283, loss_interctc_layer6=128.372, loss_interctc_layer12=113.191, loss_interctc_layer15=106.953, loss_interctc_layer21=132.901, loss=122.740, backward_time=0.951, grad_norm=69.655, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.151, optim0_lr0=1.759e-04, train_time=10.194 +[gpua002:0/64] 2024-01-16 18:54:10,276 (trainer:753) INFO: 6epoch:train:2601-2700batch: iter_time=8.464e-05, forward_time=0.318, loss_ctc=114.739, loss_interctc_layer6=111.411, loss_interctc_layer12=97.413, loss_interctc_layer15=91.960, loss_interctc_layer21=115.397, loss=106.184, backward_time=0.649, grad_norm=67.323, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.150, optim0_lr0=1.758e-04, train_time=3.016 +[gpua002:0/64] 2024-01-16 18:59:49,575 (trainer:753) INFO: 6epoch:train:2701-2800batch: iter_time=9.238e-05, forward_time=0.214, loss_ctc=119.964, loss_interctc_layer6=116.978, loss_interctc_layer12=104.833, loss_interctc_layer15=98.902, loss_interctc_layer21=120.384, loss=112.212, backward_time=0.666, grad_norm=82.866, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.146, optim0_lr0=1.757e-04, train_time=3.394 +[gpua002:0/64] 2024-01-16 19:03:26,675 (trainer:753) INFO: 6epoch:train:2801-2900batch: iter_time=0.002, forward_time=0.304, loss_ctc=116.439, loss_interctc_layer6=113.565, loss_interctc_layer12=98.803, loss_interctc_layer15=92.951, loss_interctc_layer21=116.970, loss=107.746, backward_time=0.465, grad_norm=102.740, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.144, optim0_lr0=1.756e-04, train_time=2.170 +[gpua002:0/64] 2024-01-16 19:06:41,941 (trainer:753) INFO: 6epoch:train:2901-3000batch: iter_time=7.550e-04, forward_time=0.213, loss_ctc=110.580, loss_interctc_layer6=106.433, loss_interctc_layer12=93.898, loss_interctc_layer15=89.324, loss_interctc_layer21=110.737, loss=102.194, backward_time=0.469, grad_norm=73.535, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.144, optim0_lr0=1.755e-04, train_time=1.951 +[gpua002:0/64] 2024-01-16 19:10:36,614 (trainer:753) INFO: 6epoch:train:3001-3100batch: iter_time=3.416e-04, forward_time=0.213, loss_ctc=124.684, loss_interctc_layer6=124.731, loss_interctc_layer12=110.663, loss_interctc_layer15=104.707, loss_interctc_layer21=125.870, loss=118.131, backward_time=0.524, grad_norm=90.018, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.146, optim0_lr0=1.754e-04, train_time=2.348 +[gpua002:0/64] 2024-01-16 19:15:28,297 (trainer:753) INFO: 6epoch:train:3101-3200batch: iter_time=8.498e-05, forward_time=0.184, loss_ctc=97.479, loss_interctc_layer6=90.318, loss_interctc_layer12=78.951, loss_interctc_layer15=74.771, loss_interctc_layer21=97.936, loss=87.891, backward_time=0.577, grad_norm=99.930, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.136, optim0_lr0=1.752e-04, train_time=2.915 +[gpua002:0/64] 2024-01-16 19:20:31,913 (trainer:753) INFO: 6epoch:train:3201-3300batch: iter_time=9.762e-05, forward_time=0.268, loss_ctc=93.408, loss_interctc_layer6=93.800, loss_interctc_layer12=82.176, loss_interctc_layer15=77.528, loss_interctc_layer21=93.848, loss=88.152, backward_time=0.670, grad_norm=63.463, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.139, optim0_lr0=1.751e-04, train_time=3.038 +[gpua002:0/64] 2024-01-16 19:27:05,639 (trainer:753) INFO: 6epoch:train:3301-3400batch: iter_time=4.855e-04, forward_time=0.283, loss_ctc=155.660, loss_interctc_layer6=132.156, loss_interctc_layer12=116.857, loss_interctc_layer15=110.308, loss_interctc_layer21=156.192, loss=134.235, backward_time=0.921, grad_norm=87.587, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.141, optim0_lr0=1.750e-04, train_time=3.937 +[gpua002:0/64] 2024-01-16 19:32:03,146 (trainer:753) INFO: 6epoch:train:3401-3500batch: iter_time=9.102e-05, forward_time=0.166, loss_ctc=103.430, loss_interctc_layer6=98.593, loss_interctc_layer12=86.108, loss_interctc_layer15=80.859, loss_interctc_layer21=104.001, loss=94.598, backward_time=0.662, grad_norm=76.547, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.140, optim0_lr0=1.749e-04, train_time=2.975 +[gpua002:0/64] 2024-01-16 19:38:00,416 (trainer:753) INFO: 6epoch:train:3501-3600batch: iter_time=6.176e-04, forward_time=0.240, loss_ctc=97.421, loss_interctc_layer6=99.821, loss_interctc_layer12=87.106, loss_interctc_layer15=82.197, loss_interctc_layer21=97.775, loss=92.864, backward_time=0.711, grad_norm=57.222, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.139, optim0_lr0=1.748e-04, train_time=3.573 +[gpua002:0/64] 2024-01-16 19:42:12,471 (trainer:753) INFO: 6epoch:train:3601-3700batch: iter_time=2.179e-04, forward_time=0.177, loss_ctc=123.494, loss_interctc_layer6=113.811, loss_interctc_layer12=100.034, loss_interctc_layer15=94.398, loss_interctc_layer21=124.217, loss=111.191, backward_time=0.473, grad_norm=77.507, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.138, optim0_lr0=1.747e-04, train_time=2.520 +[gpua002:0/64] 2024-01-16 19:45:59,504 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua002:0/64] 2024-01-16 19:46:18,657 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-16 19:46:22,333 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-16 19:46:22,333 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua002:0/64] 2024-01-16 19:46:22,336 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-16 19:58:41,259 (trainer:753) INFO: 6epoch:train:3701-3800batch: iter_time=3.289, forward_time=0.240, loss_ctc=130.291, loss_interctc_layer6=124.502, loss_interctc_layer12=109.075, loss_interctc_layer15=103.351, loss_interctc_layer21=130.438, loss=119.531, backward_time=0.880, grad_norm=75.652, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.140, optim0_lr0=1.746e-04, train_time=9.887 +[gpua002:0/64] 2024-01-16 20:12:25,292 (trainer:753) INFO: 6epoch:train:3801-3900batch: iter_time=9.543e-05, forward_time=0.143, loss_ctc=121.173, loss_interctc_layer6=121.069, loss_interctc_layer12=105.131, loss_interctc_layer15=99.272, loss_interctc_layer21=121.832, loss=113.695, backward_time=1.536, grad_norm=85.064, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.135, optim0_lr0=1.745e-04, train_time=8.241 +[gpua002:0/64] 2024-01-16 20:26:34,496 (trainer:753) INFO: 6epoch:train:3901-4000batch: iter_time=9.074e-05, forward_time=0.192, loss_ctc=113.211, loss_interctc_layer6=119.349, loss_interctc_layer12=106.802, loss_interctc_layer15=101.344, loss_interctc_layer21=114.018, loss=110.945, backward_time=1.833, grad_norm=88.693, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.135, optim0_lr0=1.744e-04, train_time=8.491 +[gpua002:0/64] 2024-01-16 20:36:17,494 (trainer:753) INFO: 6epoch:train:4001-4100batch: iter_time=8.809e-05, forward_time=0.214, loss_ctc=104.343, loss_interctc_layer6=102.787, loss_interctc_layer12=89.256, loss_interctc_layer15=83.811, loss_interctc_layer21=104.147, loss=96.869, backward_time=1.538, grad_norm=63.817, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.137, optim0_lr0=1.742e-04, train_time=5.830 +[gpua002:0/64] 2024-01-16 20:42:29,876 (trainer:753) INFO: 6epoch:train:4101-4200batch: iter_time=8.773e-05, forward_time=0.141, loss_ctc=114.208, loss_interctc_layer6=116.308, loss_interctc_layer12=101.702, loss_interctc_layer15=95.667, loss_interctc_layer21=114.583, loss=108.494, backward_time=0.760, grad_norm=70.787, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.134, optim0_lr0=1.741e-04, train_time=3.723 +[gpua002:0/64] 2024-01-16 20:47:56,818 (trainer:753) INFO: 6epoch:train:4201-4300batch: iter_time=8.667e-05, forward_time=0.143, loss_ctc=120.930, loss_interctc_layer6=115.276, loss_interctc_layer12=101.076, loss_interctc_layer15=95.649, loss_interctc_layer21=121.681, loss=110.923, backward_time=0.613, grad_norm=82.367, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.135, optim0_lr0=1.740e-04, train_time=3.270 +[gpua002:0/64] 2024-01-16 20:53:52,247 (trainer:753) INFO: 6epoch:train:4301-4400batch: iter_time=8.978e-05, forward_time=0.210, loss_ctc=106.178, loss_interctc_layer6=109.357, loss_interctc_layer12=96.896, loss_interctc_layer15=91.548, loss_interctc_layer21=106.879, loss=102.172, backward_time=0.801, grad_norm=77.343, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.138, optim0_lr0=1.739e-04, train_time=3.553 +[gpua002:0/64] 2024-01-16 20:58:43,249 (trainer:753) INFO: 6epoch:train:4401-4500batch: iter_time=9.299e-05, forward_time=0.141, loss_ctc=89.543, loss_interctc_layer6=87.366, loss_interctc_layer12=76.496, loss_interctc_layer15=72.265, loss_interctc_layer21=90.911, loss=83.316, backward_time=0.667, grad_norm=75.844, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.135, optim0_lr0=1.738e-04, train_time=2.911 +[gpua002:0/64] 2024-01-16 21:03:28,050 (trainer:753) INFO: 6epoch:train:4501-4600batch: iter_time=8.531e-05, forward_time=0.169, loss_ctc=101.196, loss_interctc_layer6=100.736, loss_interctc_layer12=87.530, loss_interctc_layer15=82.315, loss_interctc_layer21=101.938, loss=94.743, backward_time=0.699, grad_norm=50.680, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.135, optim0_lr0=1.737e-04, train_time=2.847 +[gpua002:0/64] 2024-01-16 21:07:13,637 (trainer:753) INFO: 6epoch:train:4601-4700batch: iter_time=8.985e-05, forward_time=0.211, loss_ctc=137.907, loss_interctc_layer6=122.216, loss_interctc_layer12=108.653, loss_interctc_layer15=104.035, loss_interctc_layer21=137.455, loss=122.053, backward_time=0.426, grad_norm=82.129, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.137, optim0_lr0=1.736e-04, train_time=2.256 +[gpua002:0/64] 2024-01-16 21:11:33,590 (trainer:753) INFO: 6epoch:train:4701-4800batch: iter_time=9.157e-05, forward_time=0.146, loss_ctc=98.483, loss_interctc_layer6=102.418, loss_interctc_layer12=88.967, loss_interctc_layer15=83.878, loss_interctc_layer21=98.693, loss=94.488, backward_time=0.516, grad_norm=65.297, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.135, optim0_lr0=1.735e-04, train_time=2.598 +[gpua002:0/64] 2024-01-16 21:15:57,225 (trainer:753) INFO: 6epoch:train:4801-4900batch: iter_time=8.666e-05, forward_time=0.141, loss_ctc=107.033, loss_interctc_layer6=107.281, loss_interctc_layer12=94.323, loss_interctc_layer15=89.539, loss_interctc_layer21=108.435, loss=101.322, backward_time=0.534, grad_norm=87.981, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.134, optim0_lr0=1.734e-04, train_time=2.637 +[gpua002:0/64] 2024-01-16 21:20:26,197 (trainer:753) INFO: 6epoch:train:4901-5000batch: iter_time=8.361e-05, forward_time=0.165, loss_ctc=117.983, loss_interctc_layer6=111.192, loss_interctc_layer12=97.238, loss_interctc_layer15=91.645, loss_interctc_layer21=118.096, loss=107.231, backward_time=0.529, grad_norm=67.781, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.136, optim0_lr0=1.733e-04, train_time=2.689 +[gpua002:0/64] 2024-01-16 21:20:40,364 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua002:0/64] 2024-01-16 21:20:59,228 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-16 21:21:03,062 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-16 21:21:03,062 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua002:0/64] 2024-01-16 21:21:03,066 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-16 21:43:41,667 (trainer:753) INFO: 6epoch:train:5001-5100batch: iter_time=3.034, forward_time=1.449, loss_ctc=130.921, loss_interctc_layer6=127.496, loss_interctc_layer12=111.391, loss_interctc_layer15=104.997, loss_interctc_layer21=131.989, loss=121.359, backward_time=1.288, grad_norm=78.911, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.210, optim0_lr0=1.731e-04, train_time=13.953 +[gpua002:0/64] 2024-01-16 21:53:41,423 (trainer:753) INFO: 6epoch:train:5101-5200batch: iter_time=0.004, forward_time=1.277, loss_ctc=112.549, loss_interctc_layer6=110.621, loss_interctc_layer12=96.326, loss_interctc_layer15=90.699, loss_interctc_layer21=113.501, loss=104.739, backward_time=1.234, grad_norm=80.106, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.203, optim0_lr0=1.730e-04, train_time=5.998 +[gpua002:0/64] 2024-01-16 22:01:58,273 (trainer:753) INFO: 6epoch:train:5201-5300batch: iter_time=0.005, forward_time=1.212, loss_ctc=115.886, loss_interctc_layer6=113.034, loss_interctc_layer12=100.005, loss_interctc_layer15=94.399, loss_interctc_layer21=117.191, loss=108.103, backward_time=0.964, grad_norm=76.457, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.279, optim0_lr0=1.729e-04, train_time=4.969 +[gpua002:0/64] 2024-01-16 22:11:27,136 (trainer:753) INFO: 6epoch:train:5301-5400batch: iter_time=0.005, forward_time=1.627, loss_ctc=114.242, loss_interctc_layer6=112.669, loss_interctc_layer12=97.341, loss_interctc_layer15=91.000, loss_interctc_layer21=114.996, loss=106.050, backward_time=1.475, grad_norm=62.492, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.252, optim0_lr0=1.728e-04, train_time=5.688 +[gpua002:0/64] 2024-01-16 22:16:51,272 (trainer:753) INFO: 6epoch:train:5401-5500batch: iter_time=0.001, forward_time=0.399, loss_ctc=107.916, loss_interctc_layer6=104.979, loss_interctc_layer12=91.741, loss_interctc_layer15=86.745, loss_interctc_layer21=108.813, loss=100.039, backward_time=0.842, grad_norm=83.906, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.164, optim0_lr0=1.727e-04, train_time=3.241 +[gpua002:0/64] 2024-01-16 22:21:38,479 (trainer:753) INFO: 6epoch:train:5501-5600batch: iter_time=0.002, forward_time=0.256, loss_ctc=122.940, loss_interctc_layer6=122.666, loss_interctc_layer12=108.031, loss_interctc_layer15=102.243, loss_interctc_layer21=123.360, loss=115.848, backward_time=0.700, grad_norm=79.245, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.148, optim0_lr0=1.726e-04, train_time=2.874 +[gpua002:0/64] 2024-01-16 22:25:59,698 (trainer:753) INFO: 6epoch:train:5601-5700batch: iter_time=1.261e-04, forward_time=0.245, loss_ctc=97.057, loss_interctc_layer6=89.003, loss_interctc_layer12=77.483, loss_interctc_layer15=73.185, loss_interctc_layer21=97.579, loss=86.861, backward_time=0.589, grad_norm=55.393, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.144, optim0_lr0=1.725e-04, train_time=2.611 +[gpua002:0/64] 2024-01-16 22:31:25,585 (trainer:753) INFO: 6epoch:train:5701-5800batch: iter_time=4.304e-04, forward_time=0.290, loss_ctc=92.162, loss_interctc_layer6=92.917, loss_interctc_layer12=80.916, loss_interctc_layer15=76.158, loss_interctc_layer21=92.616, loss=86.954, backward_time=0.653, grad_norm=59.535, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.142, optim0_lr0=1.724e-04, train_time=3.258 +[gpua002:0/64] 2024-01-16 22:36:00,021 (trainer:753) INFO: 6epoch:train:5801-5900batch: iter_time=2.714e-04, forward_time=0.308, loss_ctc=153.621, loss_interctc_layer6=131.558, loss_interctc_layer12=115.529, loss_interctc_layer15=109.414, loss_interctc_layer21=155.091, loss=133.043, backward_time=0.621, grad_norm=86.093, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.141, optim0_lr0=1.723e-04, train_time=2.744 +[gpua002:0/64] 2024-01-16 22:40:22,358 (trainer:753) INFO: 6epoch:train:5901-6000batch: iter_time=0.001, forward_time=0.287, loss_ctc=100.861, loss_interctc_layer6=96.806, loss_interctc_layer12=83.798, loss_interctc_layer15=78.709, loss_interctc_layer21=101.486, loss=92.332, backward_time=0.535, grad_norm=61.911, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.139, optim0_lr0=1.722e-04, train_time=2.624 +[gpua002:0/64] 2024-01-16 22:44:36,879 (trainer:753) INFO: 6epoch:train:6001-6100batch: iter_time=4.938e-04, forward_time=0.238, loss_ctc=95.888, loss_interctc_layer6=98.905, loss_interctc_layer12=86.041, loss_interctc_layer15=80.866, loss_interctc_layer21=96.464, loss=91.633, backward_time=0.553, grad_norm=65.870, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.144, optim0_lr0=1.721e-04, train_time=2.545 +[gpua002:0/64] 2024-01-16 22:49:05,181 (trainer:753) INFO: 6epoch:train:6101-6200batch: iter_time=5.656e-04, forward_time=0.256, loss_ctc=120.958, loss_interctc_layer6=110.556, loss_interctc_layer12=96.731, loss_interctc_layer15=90.932, loss_interctc_layer21=121.121, loss=108.060, backward_time=0.533, grad_norm=60.246, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.145, optim0_lr0=1.720e-04, train_time=2.683 +[gpua002:0/64] 2024-01-16 22:51:48,422 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua002:0/64] 2024-01-16 22:52:07,120 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-16 22:52:10,678 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-16 22:52:10,678 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua002:0/64] 2024-01-16 22:52:10,681 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-16 23:01:11,155 (trainer:753) INFO: 6epoch:train:6201-6300batch: iter_time=3.036, forward_time=0.223, loss_ctc=128.415, loss_interctc_layer6=122.524, loss_interctc_layer12=107.058, loss_interctc_layer15=100.930, loss_interctc_layer21=129.316, loss=117.648, backward_time=0.413, grad_norm=79.646, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.137, optim0_lr0=1.719e-04, train_time=7.260 +[gpua002:0/64] 2024-01-16 23:04:34,068 (trainer:753) INFO: 6epoch:train:6301-6400batch: iter_time=1.957e-04, forward_time=0.201, loss_ctc=125.523, loss_interctc_layer6=119.213, loss_interctc_layer12=104.256, loss_interctc_layer15=98.202, loss_interctc_layer21=126.027, loss=114.644, backward_time=0.551, grad_norm=76.818, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.142, optim0_lr0=1.718e-04, train_time=2.028 +[gpua002:0/64] 2024-01-16 23:07:36,711 (trainer:753) INFO: 6epoch:train:6401-6500batch: iter_time=8.497e-05, forward_time=0.142, loss_ctc=116.423, loss_interctc_layer6=118.035, loss_interctc_layer12=103.597, loss_interctc_layer15=97.550, loss_interctc_layer21=117.185, loss=110.558, backward_time=0.424, grad_norm=72.569, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.135, optim0_lr0=1.717e-04, train_time=1.827 +[gpua002:0/64] 2024-01-16 23:10:30,124 (trainer:753) INFO: 6epoch:train:6501-6600batch: iter_time=8.843e-05, forward_time=0.147, loss_ctc=105.817, loss_interctc_layer6=101.321, loss_interctc_layer12=88.177, loss_interctc_layer15=82.220, loss_interctc_layer21=105.858, loss=96.679, backward_time=0.353, grad_norm=66.680, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.135, optim0_lr0=1.715e-04, train_time=1.733 +[gpua002:0/64] 2024-01-16 23:13:56,748 (trainer:753) INFO: 6epoch:train:6601-6700batch: iter_time=8.748e-05, forward_time=0.225, loss_ctc=111.953, loss_interctc_layer6=114.206, loss_interctc_layer12=99.691, loss_interctc_layer15=93.581, loss_interctc_layer21=113.028, loss=106.492, backward_time=0.473, grad_norm=76.009, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.143, optim0_lr0=1.714e-04, train_time=2.066 +[gpua002:0/64] 2024-01-16 23:17:41,331 (trainer:753) INFO: 6epoch:train:6701-6800batch: iter_time=8.807e-05, forward_time=0.153, loss_ctc=121.814, loss_interctc_layer6=113.781, loss_interctc_layer12=98.798, loss_interctc_layer15=92.995, loss_interctc_layer21=122.667, loss=110.011, backward_time=0.443, grad_norm=121.057, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.135, optim0_lr0=1.713e-04, train_time=2.243 +[gpua002:0/64] 2024-01-16 23:20:40,681 (trainer:753) INFO: 6epoch:train:6801-6900batch: iter_time=8.626e-05, forward_time=0.141, loss_ctc=102.753, loss_interctc_layer6=107.073, loss_interctc_layer12=93.587, loss_interctc_layer15=88.893, loss_interctc_layer21=103.383, loss=99.138, backward_time=0.396, grad_norm=70.650, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.135, optim0_lr0=1.712e-04, train_time=1.795 +[gpua002:0/64] 2024-01-16 23:23:16,172 (trainer:753) INFO: 6epoch:train:6901-7000batch: iter_time=7.937e-05, forward_time=0.140, loss_ctc=93.013, loss_interctc_layer6=85.735, loss_interctc_layer12=74.505, loss_interctc_layer15=70.074, loss_interctc_layer21=93.603, loss=83.386, backward_time=0.364, grad_norm=54.134, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.135, optim0_lr0=1.711e-04, train_time=1.555 +[gpua002:0/64] 2024-01-16 23:26:07,777 (trainer:753) INFO: 6epoch:train:7001-7100batch: iter_time=8.165e-05, forward_time=0.155, loss_ctc=101.740, loss_interctc_layer6=100.393, loss_interctc_layer12=86.583, loss_interctc_layer15=81.117, loss_interctc_layer21=101.935, loss=94.354, backward_time=0.399, grad_norm=65.045, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.136, optim0_lr0=1.710e-04, train_time=1.715 +[gpua002:0/64] 2024-01-16 23:28:35,031 (trainer:753) INFO: 6epoch:train:7101-7200batch: iter_time=8.514e-05, forward_time=0.206, loss_ctc=137.839, loss_interctc_layer6=121.541, loss_interctc_layer12=106.398, loss_interctc_layer15=100.590, loss_interctc_layer21=139.368, loss=121.147, backward_time=0.322, grad_norm=80.368, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.136, optim0_lr0=1.709e-04, train_time=1.473 +[gpua002:0/64] 2024-01-16 23:31:19,062 (trainer:753) INFO: 6epoch:train:7201-7300batch: iter_time=8.691e-05, forward_time=0.168, loss_ctc=100.053, loss_interctc_layer6=102.019, loss_interctc_layer12=87.908, loss_interctc_layer15=82.634, loss_interctc_layer21=100.964, loss=94.716, backward_time=0.359, grad_norm=59.805, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.136, optim0_lr0=1.708e-04, train_time=1.640 +[gpua002:0/64] 2024-01-16 23:34:44,918 (trainer:753) INFO: 6epoch:train:7301-7400batch: iter_time=8.641e-05, forward_time=0.144, loss_ctc=107.540, loss_interctc_layer6=105.902, loss_interctc_layer12=92.672, loss_interctc_layer15=87.303, loss_interctc_layer21=108.457, loss=100.375, backward_time=0.422, grad_norm=66.283, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.136, optim0_lr0=1.707e-04, train_time=2.059 +[gpua002:0/64] 2024-01-16 23:37:30,745 (trainer:753) INFO: 6epoch:train:7401-7500batch: iter_time=8.960e-05, forward_time=0.141, loss_ctc=118.710, loss_interctc_layer6=109.137, loss_interctc_layer12=95.077, loss_interctc_layer15=89.585, loss_interctc_layer21=119.581, loss=106.418, backward_time=0.412, grad_norm=64.271, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.135, optim0_lr0=1.706e-04, train_time=1.658 +[gpua002:0/64] 2024-01-16 23:37:50,076 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua002:0/64] 2024-01-16 23:38:09,050 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-16 23:38:12,967 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-16 23:38:12,967 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua002:0/64] 2024-01-16 23:38:12,970 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-16 23:47:25,180 (trainer:753) INFO: 6epoch:train:7501-7600batch: iter_time=2.390, forward_time=0.216, loss_ctc=126.902, loss_interctc_layer6=125.108, loss_interctc_layer12=109.429, loss_interctc_layer15=102.410, loss_interctc_layer21=127.444, loss=118.259, backward_time=0.334, grad_norm=87.049, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.136, optim0_lr0=1.705e-04, train_time=5.944 +[gpua002:0/64] 2024-01-16 23:51:26,034 (trainer:753) INFO: 6epoch:train:7601-7700batch: iter_time=8.360e-05, forward_time=0.142, loss_ctc=104.538, loss_interctc_layer6=109.129, loss_interctc_layer12=94.975, loss_interctc_layer15=89.229, loss_interctc_layer21=104.949, loss=100.564, backward_time=0.499, grad_norm=62.657, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.135, optim0_lr0=1.704e-04, train_time=2.409 +[gpua002:0/64] 2024-01-16 23:53:55,696 (trainer:753) INFO: 6epoch:train:7701-7800batch: iter_time=9.294e-05, forward_time=0.142, loss_ctc=108.925, loss_interctc_layer6=112.283, loss_interctc_layer12=97.996, loss_interctc_layer15=92.336, loss_interctc_layer21=108.263, loss=103.961, backward_time=0.328, grad_norm=66.480, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.136, optim0_lr0=1.703e-04, train_time=1.496 +[gpua002:0/64] 2024-01-16 23:58:03,601 (trainer:753) INFO: 6epoch:train:7801-7900batch: iter_time=2.487e-04, forward_time=0.231, loss_ctc=108.489, loss_interctc_layer6=110.034, loss_interctc_layer12=94.481, loss_interctc_layer15=88.314, loss_interctc_layer21=109.565, loss=102.176, backward_time=0.602, grad_norm=80.259, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.146, optim0_lr0=1.702e-04, train_time=2.476 +[gpua002:0/64] 2024-01-17 00:00:39,543 (trainer:753) INFO: 6epoch:train:7901-8000batch: iter_time=1.096e-04, forward_time=0.171, loss_ctc=103.275, loss_interctc_layer6=102.238, loss_interctc_layer12=89.204, loss_interctc_layer15=83.461, loss_interctc_layer21=103.879, loss=96.412, backward_time=0.313, grad_norm=141.716, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.135, optim0_lr0=1.701e-04, train_time=1.561 +[gpua002:0/64] 2024-01-17 00:03:45,085 (trainer:753) INFO: 6epoch:train:8001-8100batch: iter_time=1.012e-04, forward_time=0.141, loss_ctc=117.008, loss_interctc_layer6=121.756, loss_interctc_layer12=106.584, loss_interctc_layer15=100.084, loss_interctc_layer21=117.963, loss=112.679, backward_time=0.414, grad_norm=83.696, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.135, optim0_lr0=1.700e-04, train_time=1.857 +[gpua002:0/64] 2024-01-17 00:07:10,047 (trainer:753) INFO: 6epoch:train:8101-8200batch: iter_time=2.527e-04, forward_time=0.197, loss_ctc=91.294, loss_interctc_layer6=88.891, loss_interctc_layer12=76.513, loss_interctc_layer15=71.994, loss_interctc_layer21=91.196, loss=83.977, backward_time=0.443, grad_norm=56.923, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.143, optim0_lr0=1.699e-04, train_time=2.049 +[gpua002:0/64] 2024-01-17 00:11:13,754 (trainer:753) INFO: 6epoch:train:8201-8300batch: iter_time=8.526e-05, forward_time=0.141, loss_ctc=87.064, loss_interctc_layer6=91.712, loss_interctc_layer12=79.484, loss_interctc_layer15=75.078, loss_interctc_layer21=87.609, loss=84.189, backward_time=0.436, grad_norm=86.642, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.135, optim0_lr0=1.698e-04, train_time=2.437 +[gpua002:0/64] 2024-01-17 00:14:03,375 (trainer:753) INFO: 6epoch:train:8301-8400batch: iter_time=8.849e-05, forward_time=0.142, loss_ctc=144.761, loss_interctc_layer6=129.426, loss_interctc_layer12=112.662, loss_interctc_layer15=106.484, loss_interctc_layer21=145.757, loss=127.818, backward_time=0.351, grad_norm=82.638, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.135, optim0_lr0=1.697e-04, train_time=1.696 +[gpua002:0/64] 2024-01-17 00:17:19,658 (trainer:753) INFO: 6epoch:train:8401-8500batch: iter_time=8.247e-05, forward_time=0.141, loss_ctc=95.375, loss_interctc_layer6=95.679, loss_interctc_layer12=82.676, loss_interctc_layer15=77.197, loss_interctc_layer21=96.258, loss=89.437, backward_time=0.406, grad_norm=77.584, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.135, optim0_lr0=1.696e-04, train_time=1.963 +[gpua002:0/64] 2024-01-17 00:20:39,208 (trainer:753) INFO: 6epoch:train:8501-8600batch: iter_time=1.789e-04, forward_time=0.298, loss_ctc=91.093, loss_interctc_layer6=97.713, loss_interctc_layer12=84.902, loss_interctc_layer15=79.478, loss_interctc_layer21=91.643, loss=88.966, backward_time=0.415, grad_norm=105.087, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.140, optim0_lr0=1.695e-04, train_time=1.995 +[gpua002:0/64] 2024-01-17 00:23:56,922 (trainer:753) INFO: 6epoch:train:8601-8700batch: iter_time=8.233e-05, forward_time=0.142, loss_ctc=115.201, loss_interctc_layer6=109.848, loss_interctc_layer12=95.722, loss_interctc_layer15=90.006, loss_interctc_layer21=115.192, loss=105.194, backward_time=0.461, grad_norm=86.650, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.134, optim0_lr0=1.694e-04, train_time=1.977 +[gpua002:0/64] 2024-01-17 00:25:31,835 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua002:0/64] 2024-01-17 00:25:51,108 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 00:25:54,816 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 00:25:54,816 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua002:0/64] 2024-01-17 00:25:54,819 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 00:32:22,927 (trainer:753) INFO: 6epoch:train:8701-8800batch: iter_time=3.425, forward_time=0.211, loss_ctc=124.394, loss_interctc_layer6=120.667, loss_interctc_layer12=104.930, loss_interctc_layer15=99.311, loss_interctc_layer21=125.251, loss=114.910, backward_time=0.332, grad_norm=74.458, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.137, optim0_lr0=1.693e-04, train_time=5.060 +[gpua002:0/64] 2024-01-17 00:35:40,224 (trainer:753) INFO: 6epoch:train:8801-8900batch: iter_time=8.081e-05, forward_time=0.376, loss_ctc=123.364, loss_interctc_layer6=117.489, loss_interctc_layer12=102.199, loss_interctc_layer15=96.317, loss_interctc_layer21=124.216, loss=112.717, backward_time=0.436, grad_norm=111.568, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.164, optim0_lr0=1.692e-04, train_time=1.969 +[gpua002:0/64] 2024-01-17 00:38:56,866 (trainer:753) INFO: 6epoch:train:8901-9000batch: iter_time=0.001, forward_time=0.384, loss_ctc=113.758, loss_interctc_layer6=114.746, loss_interctc_layer12=100.485, loss_interctc_layer15=94.751, loss_interctc_layer21=114.995, loss=107.747, backward_time=0.550, grad_norm=65.457, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.158, optim0_lr0=1.691e-04, train_time=1.968 +[gpua002:0/64] 2024-01-17 00:42:08,794 (trainer:753) INFO: 6epoch:train:9001-9100batch: iter_time=6.851e-04, forward_time=0.309, loss_ctc=102.588, loss_interctc_layer6=98.941, loss_interctc_layer12=85.427, loss_interctc_layer15=80.212, loss_interctc_layer21=103.551, loss=94.144, backward_time=0.519, grad_norm=63.101, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.150, optim0_lr0=1.690e-04, train_time=1.919 +[gpua002:0/64] 2024-01-17 00:45:50,022 (trainer:753) INFO: 6epoch:train:9101-9200batch: iter_time=3.550e-04, forward_time=0.363, loss_ctc=111.314, loss_interctc_layer6=113.698, loss_interctc_layer12=98.299, loss_interctc_layer15=92.236, loss_interctc_layer21=112.165, loss=105.542, backward_time=0.624, grad_norm=84.275, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.159, optim0_lr0=1.689e-04, train_time=2.213 +[gpua002:0/64] 2024-01-17 00:50:02,992 (trainer:753) INFO: 6epoch:train:9201-9300batch: iter_time=5.763e-04, forward_time=0.378, loss_ctc=118.796, loss_interctc_layer6=111.575, loss_interctc_layer12=97.161, loss_interctc_layer15=90.867, loss_interctc_layer21=119.320, loss=107.544, backward_time=0.552, grad_norm=116.855, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.152, optim0_lr0=1.688e-04, train_time=2.529 +[gpua002:0/64] 2024-01-17 00:54:00,917 (trainer:753) INFO: 6epoch:train:9301-9400batch: iter_time=1.031e-04, forward_time=0.288, loss_ctc=101.153, loss_interctc_layer6=104.341, loss_interctc_layer12=90.919, loss_interctc_layer15=85.613, loss_interctc_layer21=102.395, loss=96.884, backward_time=0.514, grad_norm=62.766, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.145, optim0_lr0=1.687e-04, train_time=2.380 +[gpua002:0/64] 2024-01-17 00:57:13,963 (trainer:753) INFO: 6epoch:train:9401-9500batch: iter_time=0.001, forward_time=0.278, loss_ctc=91.672, loss_interctc_layer6=84.859, loss_interctc_layer12=73.437, loss_interctc_layer15=68.741, loss_interctc_layer21=92.510, loss=82.244, backward_time=0.478, grad_norm=78.914, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.152, optim0_lr0=1.686e-04, train_time=1.928 +[gpua002:0/64] 2024-01-17 01:00:26,700 (trainer:753) INFO: 6epoch:train:9501-9600batch: iter_time=2.861e-04, forward_time=0.338, loss_ctc=100.777, loss_interctc_layer6=99.445, loss_interctc_layer12=85.754, loss_interctc_layer15=80.438, loss_interctc_layer21=101.693, loss=93.622, backward_time=0.489, grad_norm=59.067, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.146, optim0_lr0=1.685e-04, train_time=1.928 +[gpua002:0/64] 2024-01-17 01:04:19,709 (trainer:753) INFO: 6epoch:train:9601-9700batch: iter_time=3.818e-04, forward_time=0.341, loss_ctc=133.935, loss_interctc_layer6=119.328, loss_interctc_layer12=104.572, loss_interctc_layer15=98.785, loss_interctc_layer21=135.243, loss=118.372, backward_time=0.529, grad_norm=92.379, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.147, optim0_lr0=1.684e-04, train_time=2.331 +[gpua002:0/64] 2024-01-17 01:07:26,954 (trainer:753) INFO: 6epoch:train:9701-9800batch: iter_time=1.086e-04, forward_time=0.308, loss_ctc=100.127, loss_interctc_layer6=100.839, loss_interctc_layer12=87.006, loss_interctc_layer15=81.369, loss_interctc_layer21=100.889, loss=94.046, backward_time=0.457, grad_norm=58.746, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.150, optim0_lr0=1.683e-04, train_time=1.872 +[gpua002:0/64] 2024-01-17 01:10:10,867 (trainer:753) INFO: 6epoch:train:9801-9900batch: iter_time=9.865e-05, forward_time=0.250, loss_ctc=105.515, loss_interctc_layer6=104.649, loss_interctc_layer12=91.317, loss_interctc_layer15=85.719, loss_interctc_layer21=106.660, loss=98.772, backward_time=0.396, grad_norm=61.997, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.146, optim0_lr0=1.682e-04, train_time=1.638 +[gpua002:0/64] 2024-01-17 01:13:33,269 (trainer:753) INFO: 6epoch:train:9901-10000batch: iter_time=7.525e-04, forward_time=0.324, loss_ctc=118.069, loss_interctc_layer6=107.812, loss_interctc_layer12=93.381, loss_interctc_layer15=87.778, loss_interctc_layer21=119.905, loss=105.389, backward_time=0.504, grad_norm=79.872, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.154, optim0_lr0=1.681e-04, train_time=2.024 +[gpua002:0/64] 2024-01-17 01:13:53,395 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua002:0/64] 2024-01-17 01:14:13,170 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 01:14:16,867 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 01:14:16,867 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua002:0/64] 2024-01-17 01:14:16,870 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 01:29:31,935 (trainer:753) INFO: 6epoch:train:10001-10100batch: iter_time=3.510, forward_time=0.307, loss_ctc=124.330, loss_interctc_layer6=123.368, loss_interctc_layer12=107.601, loss_interctc_layer15=101.196, loss_interctc_layer21=125.317, loss=116.362, backward_time=0.324, grad_norm=99.455, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.139, optim0_lr0=1.680e-04, train_time=9.587 +[gpua002:0/64] 2024-01-17 01:32:38,753 (trainer:753) INFO: 6epoch:train:10101-10200batch: iter_time=8.944e-05, forward_time=0.158, loss_ctc=103.100, loss_interctc_layer6=108.263, loss_interctc_layer12=93.889, loss_interctc_layer15=88.085, loss_interctc_layer21=103.631, loss=99.394, backward_time=0.365, grad_norm=73.477, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.135, optim0_lr0=1.679e-04, train_time=1.868 +[gpua002:0/64] 2024-01-17 01:35:44,815 (trainer:753) INFO: 6epoch:train:10201-10300batch: iter_time=3.526e-04, forward_time=0.246, loss_ctc=106.450, loss_interctc_layer6=111.621, loss_interctc_layer12=96.666, loss_interctc_layer15=91.372, loss_interctc_layer21=107.363, loss=102.694, backward_time=0.390, grad_norm=68.449, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.151, optim0_lr0=1.678e-04, train_time=1.860 +[gpua002:0/64] 2024-01-17 01:38:31,270 (trainer:753) INFO: 6epoch:train:10301-10400batch: iter_time=9.726e-05, forward_time=0.157, loss_ctc=108.614, loss_interctc_layer6=109.345, loss_interctc_layer12=94.462, loss_interctc_layer15=87.826, loss_interctc_layer21=109.032, loss=101.856, backward_time=0.369, grad_norm=67.381, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.135, optim0_lr0=1.677e-04, train_time=1.665 +[gpua002:0/64] 2024-01-17 01:41:33,750 (trainer:753) INFO: 6epoch:train:10401-10500batch: iter_time=9.647e-05, forward_time=0.260, loss_ctc=100.341, loss_interctc_layer6=100.086, loss_interctc_layer12=86.823, loss_interctc_layer15=81.788, loss_interctc_layer21=102.056, loss=94.219, backward_time=0.365, grad_norm=80.220, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.139, optim0_lr0=1.676e-04, train_time=1.824 +[gpua002:0/64] 2024-01-17 01:44:56,190 (trainer:753) INFO: 6epoch:train:10501-10600batch: iter_time=1.013e-04, forward_time=0.142, loss_ctc=116.710, loss_interctc_layer6=119.335, loss_interctc_layer12=104.140, loss_interctc_layer15=98.797, loss_interctc_layer21=116.583, loss=111.113, backward_time=0.471, grad_norm=91.545, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.134, optim0_lr0=1.675e-04, train_time=2.025 +[gpua002:0/64] 2024-01-17 01:47:56,156 (trainer:753) INFO: 6epoch:train:10601-10700batch: iter_time=3.938e-04, forward_time=0.254, loss_ctc=89.884, loss_interctc_layer6=87.154, loss_interctc_layer12=75.575, loss_interctc_layer15=70.920, loss_interctc_layer21=90.731, loss=82.853, backward_time=0.371, grad_norm=72.279, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.140, optim0_lr0=1.674e-04, train_time=1.799 +[gpua002:0/64] 2024-01-17 01:50:04,406 (trainer:753) INFO: 6epoch:train:10701-10800batch: iter_time=1.021e-04, forward_time=0.143, loss_ctc=85.796, loss_interctc_layer6=90.942, loss_interctc_layer12=78.756, loss_interctc_layer15=73.965, loss_interctc_layer21=88.382, loss=83.568, backward_time=0.300, grad_norm=51.676, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.135, optim0_lr0=1.673e-04, train_time=1.283 +[gpua002:0/64] 2024-01-17 01:53:01,635 (trainer:753) INFO: 6epoch:train:10801-10900batch: iter_time=5.474e-04, forward_time=0.212, loss_ctc=142.232, loss_interctc_layer6=126.184, loss_interctc_layer12=109.682, loss_interctc_layer15=103.807, loss_interctc_layer21=143.638, loss=125.109, backward_time=0.400, grad_norm=117.539, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.143, optim0_lr0=1.672e-04, train_time=1.770 +[gpua002:0/64] 2024-01-17 01:55:41,589 (trainer:753) INFO: 6epoch:train:10901-11000batch: iter_time=9.129e-05, forward_time=0.141, loss_ctc=93.523, loss_interctc_layer6=94.598, loss_interctc_layer12=81.625, loss_interctc_layer15=76.260, loss_interctc_layer21=94.236, loss=88.048, backward_time=0.361, grad_norm=57.654, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.135, optim0_lr0=1.671e-04, train_time=1.602 +[gpua002:0/64] 2024-01-17 01:59:21,455 (trainer:753) INFO: 6epoch:train:11001-11100batch: iter_time=1.090e-04, forward_time=0.283, loss_ctc=89.712, loss_interctc_layer6=95.986, loss_interctc_layer12=83.756, loss_interctc_layer15=78.073, loss_interctc_layer21=90.364, loss=87.578, backward_time=0.433, grad_norm=65.794, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.139, optim0_lr0=1.670e-04, train_time=2.198 +[gpua002:0/64] 2024-01-17 02:01:55,882 (trainer:753) INFO: 6epoch:train:11101-11200batch: iter_time=9.401e-05, forward_time=0.142, loss_ctc=113.643, loss_interctc_layer6=108.492, loss_interctc_layer12=94.495, loss_interctc_layer15=88.821, loss_interctc_layer21=114.399, loss=103.970, backward_time=0.328, grad_norm=95.459, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.134, optim0_lr0=1.669e-04, train_time=1.542 +[gpua002:0/64] 2024-01-17 02:04:00,952 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua002:0/64] 2024-01-17 02:04:20,161 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 02:04:23,810 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 02:04:23,810 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua002:0/64] 2024-01-17 02:04:23,814 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 02:16:40,843 (trainer:753) INFO: 6epoch:train:11201-11300batch: iter_time=3.255, forward_time=0.192, loss_ctc=124.220, loss_interctc_layer6=120.618, loss_interctc_layer12=104.836, loss_interctc_layer15=98.472, loss_interctc_layer21=125.353, loss=114.700, backward_time=0.371, grad_norm=71.876, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.137, optim0_lr0=1.668e-04, train_time=8.852 +[gpua002:0/64] 2024-01-17 02:20:06,423 (trainer:753) INFO: 6epoch:train:11301-11400batch: iter_time=8.919e-05, forward_time=0.166, loss_ctc=119.959, loss_interctc_layer6=115.794, loss_interctc_layer12=100.620, loss_interctc_layer15=94.002, loss_interctc_layer21=121.171, loss=110.309, backward_time=0.414, grad_norm=90.687, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.135, optim0_lr0=1.667e-04, train_time=2.056 +[gpua002:0/64] 2024-01-17 02:23:59,507 (trainer:753) INFO: 6epoch:train:11401-11500batch: iter_time=8.866e-05, forward_time=0.142, loss_ctc=113.498, loss_interctc_layer6=115.328, loss_interctc_layer12=99.513, loss_interctc_layer15=93.770, loss_interctc_layer21=115.042, loss=107.430, backward_time=0.473, grad_norm=165.062, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.135, optim0_lr0=1.666e-04, train_time=2.330 +[gpua002:0/64] 2024-01-17 02:27:41,917 (trainer:753) INFO: 6epoch:train:11501-11600batch: iter_time=9.260e-05, forward_time=0.141, loss_ctc=101.613, loss_interctc_layer6=98.470, loss_interctc_layer12=84.875, loss_interctc_layer15=79.555, loss_interctc_layer21=102.498, loss=93.402, backward_time=0.446, grad_norm=62.259, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.135, optim0_lr0=1.665e-04, train_time=2.222 +[gpua002:0/64] 2024-01-17 02:31:10,204 (trainer:753) INFO: 6epoch:train:11601-11700batch: iter_time=9.068e-05, forward_time=0.160, loss_ctc=109.604, loss_interctc_layer6=111.457, loss_interctc_layer12=96.721, loss_interctc_layer15=90.502, loss_interctc_layer21=110.294, loss=103.715, backward_time=0.423, grad_norm=70.546, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.136, optim0_lr0=1.664e-04, train_time=2.086 +[gpua002:0/64] 2024-01-17 02:34:55,252 (trainer:753) INFO: 6epoch:train:11701-11800batch: iter_time=9.881e-05, forward_time=0.191, loss_ctc=117.615, loss_interctc_layer6=111.508, loss_interctc_layer12=96.353, loss_interctc_layer15=90.771, loss_interctc_layer21=118.368, loss=106.923, backward_time=0.462, grad_norm=84.457, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.136, optim0_lr0=1.663e-04, train_time=2.250 +[gpua002:0/64] 2024-01-17 02:37:53,501 (trainer:753) INFO: 6epoch:train:11801-11900batch: iter_time=9.173e-05, forward_time=0.164, loss_ctc=100.690, loss_interctc_layer6=103.693, loss_interctc_layer12=90.055, loss_interctc_layer15=84.859, loss_interctc_layer21=101.278, loss=96.115, backward_time=0.412, grad_norm=73.174, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.136, optim0_lr0=1.662e-04, train_time=1.782 +[gpua002:0/64] 2024-01-17 02:40:56,428 (trainer:753) INFO: 6epoch:train:11901-12000batch: iter_time=9.598e-05, forward_time=0.157, loss_ctc=89.854, loss_interctc_layer6=83.603, loss_interctc_layer12=72.168, loss_interctc_layer15=67.607, loss_interctc_layer21=91.136, loss=80.874, backward_time=0.420, grad_norm=62.994, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.136, optim0_lr0=1.661e-04, train_time=1.829 +[gpua002:0/64] 2024-01-17 02:44:46,506 (trainer:753) INFO: 6epoch:train:12001-12100batch: iter_time=9.026e-05, forward_time=0.141, loss_ctc=99.349, loss_interctc_layer6=97.703, loss_interctc_layer12=84.188, loss_interctc_layer15=78.401, loss_interctc_layer21=100.229, loss=91.974, backward_time=0.492, grad_norm=57.525, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.134, optim0_lr0=1.660e-04, train_time=2.300 +[gpua002:0/64] 2024-01-17 02:48:51,293 (trainer:753) INFO: 6epoch:train:12101-12200batch: iter_time=9.138e-05, forward_time=0.141, loss_ctc=131.395, loss_interctc_layer6=118.526, loss_interctc_layer12=103.069, loss_interctc_layer15=97.383, loss_interctc_layer21=133.714, loss=116.817, backward_time=0.510, grad_norm=87.889, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.134, optim0_lr0=1.659e-04, train_time=2.448 +[gpua002:0/64] 2024-01-17 02:52:15,397 (trainer:753) INFO: 6epoch:train:12201-12300batch: iter_time=9.126e-05, forward_time=0.148, loss_ctc=98.119, loss_interctc_layer6=99.276, loss_interctc_layer12=85.292, loss_interctc_layer15=79.935, loss_interctc_layer21=99.112, loss=92.347, backward_time=0.453, grad_norm=64.055, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.135, optim0_lr0=1.659e-04, train_time=2.041 +[gpua002:0/64] 2024-01-17 02:56:06,985 (trainer:753) INFO: 6epoch:train:12301-12400batch: iter_time=9.503e-05, forward_time=0.159, loss_ctc=104.562, loss_interctc_layer6=103.814, loss_interctc_layer12=90.330, loss_interctc_layer15=84.710, loss_interctc_layer21=105.309, loss=97.745, backward_time=0.589, grad_norm=86.472, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.136, optim0_lr0=1.658e-04, train_time=2.316 +[gpua002:0/64] 2024-01-17 02:59:36,482 (trainer:753) INFO: 6epoch:train:12401-12500batch: iter_time=8.966e-05, forward_time=0.158, loss_ctc=117.534, loss_interctc_layer6=107.481, loss_interctc_layer12=93.257, loss_interctc_layer15=87.349, loss_interctc_layer21=118.214, loss=104.767, backward_time=0.449, grad_norm=83.911, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.137, optim0_lr0=1.657e-04, train_time=2.095 +[gpua002:0/64] 2024-01-17 02:59:56,534 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua002:0/64] 2024-01-17 03:00:17,270 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 03:00:21,501 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 03:00:21,501 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua002:0/64] 2024-01-17 03:00:21,504 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 03:14:16,495 (trainer:753) INFO: 6epoch:train:12501-12600batch: iter_time=3.196, forward_time=0.195, loss_ctc=123.885, loss_interctc_layer6=123.004, loss_interctc_layer12=106.436, loss_interctc_layer15=99.986, loss_interctc_layer21=124.629, loss=115.588, backward_time=0.632, grad_norm=72.962, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.136, optim0_lr0=1.656e-04, train_time=8.800 +[gpua002:0/64] 2024-01-17 03:19:24,072 (trainer:753) INFO: 6epoch:train:12601-12700batch: iter_time=9.438e-05, forward_time=0.142, loss_ctc=100.682, loss_interctc_layer6=106.546, loss_interctc_layer12=92.123, loss_interctc_layer15=86.499, loss_interctc_layer21=102.041, loss=97.578, backward_time=0.552, grad_norm=74.411, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.134, optim0_lr0=1.655e-04, train_time=3.076 +[gpua002:0/64] 2024-01-17 03:24:02,141 (trainer:753) INFO: 6epoch:train:12701-12800batch: iter_time=9.346e-05, forward_time=0.235, loss_ctc=104.629, loss_interctc_layer6=110.719, loss_interctc_layer12=96.476, loss_interctc_layer15=91.277, loss_interctc_layer21=105.362, loss=101.693, backward_time=0.549, grad_norm=73.189, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.135, optim0_lr0=1.654e-04, train_time=2.780 +[gpua002:0/64] 2024-01-17 03:28:07,381 (trainer:753) INFO: 6epoch:train:12801-12900batch: iter_time=1.034e-04, forward_time=0.144, loss_ctc=106.504, loss_interctc_layer6=108.582, loss_interctc_layer12=92.987, loss_interctc_layer15=87.034, loss_interctc_layer21=107.721, loss=100.566, backward_time=0.533, grad_norm=72.790, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.134, optim0_lr0=1.653e-04, train_time=2.452 +[gpua002:0/64] 2024-01-17 03:34:28,403 (trainer:753) INFO: 6epoch:train:12901-13000batch: iter_time=1.073e-04, forward_time=0.140, loss_ctc=99.805, loss_interctc_layer6=100.069, loss_interctc_layer12=86.576, loss_interctc_layer15=81.001, loss_interctc_layer21=101.123, loss=93.715, backward_time=0.822, grad_norm=72.510, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.134, optim0_lr0=1.652e-04, train_time=3.809 +[gpua002:0/64] 2024-01-17 03:39:24,155 (trainer:753) INFO: 6epoch:train:13001-13100batch: iter_time=1.035e-04, forward_time=0.148, loss_ctc=113.639, loss_interctc_layer6=118.398, loss_interctc_layer12=102.967, loss_interctc_layer15=96.677, loss_interctc_layer21=114.196, loss=109.175, backward_time=0.540, grad_norm=103.692, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.135, optim0_lr0=1.651e-04, train_time=2.958 +[gpua002:0/64] 2024-01-17 03:42:46,554 (trainer:753) INFO: 6epoch:train:13101-13200batch: iter_time=9.681e-05, forward_time=0.163, loss_ctc=87.456, loss_interctc_layer6=86.751, loss_interctc_layer12=74.754, loss_interctc_layer15=69.740, loss_interctc_layer21=88.469, loss=81.434, backward_time=0.444, grad_norm=50.008, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.136, optim0_lr0=1.650e-04, train_time=2.023 +[gpua002:0/64] 2024-01-17 03:46:23,373 (trainer:753) INFO: 6epoch:train:13201-13300batch: iter_time=1.087e-04, forward_time=0.195, loss_ctc=84.671, loss_interctc_layer6=89.884, loss_interctc_layer12=77.871, loss_interctc_layer15=72.636, loss_interctc_layer21=84.671, loss=81.947, backward_time=0.504, grad_norm=55.433, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.139, optim0_lr0=1.649e-04, train_time=2.168 +[gpua002:0/64] 2024-01-17 03:51:34,330 (trainer:753) INFO: 6epoch:train:13301-13400batch: iter_time=1.082e-04, forward_time=0.143, loss_ctc=140.455, loss_interctc_layer6=125.612, loss_interctc_layer12=108.195, loss_interctc_layer15=102.315, loss_interctc_layer21=142.167, loss=123.749, backward_time=0.498, grad_norm=75.394, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.135, optim0_lr0=1.648e-04, train_time=3.110 +[gpua002:0/64] 2024-01-17 03:59:09,620 (trainer:753) INFO: 6epoch:train:13401-13500batch: iter_time=1.030e-04, forward_time=0.144, loss_ctc=92.858, loss_interctc_layer6=93.734, loss_interctc_layer12=80.471, loss_interctc_layer15=75.109, loss_interctc_layer21=93.983, loss=87.231, backward_time=0.911, grad_norm=59.272, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.135, optim0_lr0=1.647e-04, train_time=4.552 +[gpua002:0/64] 2024-01-17 04:06:28,867 (trainer:753) INFO: 6epoch:train:13501-13600batch: iter_time=1.085e-04, forward_time=0.174, loss_ctc=89.001, loss_interctc_layer6=95.680, loss_interctc_layer12=82.481, loss_interctc_layer15=77.428, loss_interctc_layer21=89.661, loss=86.850, backward_time=0.990, grad_norm=49.317, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.136, optim0_lr0=1.646e-04, train_time=4.393 +[gpua002:0/64] 2024-01-17 04:15:38,339 (trainer:753) INFO: 6epoch:train:13601-13700batch: iter_time=9.969e-05, forward_time=0.166, loss_ctc=112.218, loss_interctc_layer6=107.898, loss_interctc_layer12=93.655, loss_interctc_layer15=87.796, loss_interctc_layer21=112.434, loss=102.800, backward_time=0.820, grad_norm=67.232, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.136, optim0_lr0=1.645e-04, train_time=5.494 +[gpua002:0/64] 2024-01-17 04:19:20,139 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua002:0/64] 2024-01-17 04:19:39,567 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 04:19:43,539 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 04:19:43,539 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua002:0/64] 2024-01-17 04:19:43,542 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 04:28:43,177 (trainer:753) INFO: 6epoch:train:13701-13800batch: iter_time=3.115, forward_time=0.199, loss_ctc=120.520, loss_interctc_layer6=119.156, loss_interctc_layer12=103.216, loss_interctc_layer15=97.345, loss_interctc_layer21=122.030, loss=112.453, backward_time=0.483, grad_norm=70.581, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.138, optim0_lr0=1.644e-04, train_time=7.848 +[gpua002:0/64] 2024-01-17 04:31:59,779 (trainer:753) INFO: 6epoch:train:13801-13900batch: iter_time=9.260e-05, forward_time=0.236, loss_ctc=113.975, loss_interctc_layer6=115.176, loss_interctc_layer12=99.668, loss_interctc_layer15=93.392, loss_interctc_layer21=114.994, loss=107.441, backward_time=0.407, grad_norm=75.538, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.140, optim0_lr0=1.644e-04, train_time=1.965 +[gpua002:0/64] 2024-01-17 04:34:56,379 (trainer:753) INFO: 6epoch:train:13901-14000batch: iter_time=9.821e-05, forward_time=0.223, loss_ctc=104.494, loss_interctc_layer6=113.254, loss_interctc_layer12=98.983, loss_interctc_layer15=93.212, loss_interctc_layer21=105.011, loss=102.991, backward_time=0.387, grad_norm=70.785, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.142, optim0_lr0=1.643e-04, train_time=1.765 +[gpua002:0/64] 2024-01-17 04:37:36,598 (trainer:753) INFO: 6epoch:train:14001-14100batch: iter_time=0.001, forward_time=0.160, loss_ctc=98.014, loss_interctc_layer6=97.152, loss_interctc_layer12=83.393, loss_interctc_layer15=77.878, loss_interctc_layer21=98.860, loss=91.059, backward_time=0.369, grad_norm=52.390, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.136, optim0_lr0=1.642e-04, train_time=1.603 +[gpua002:0/64] 2024-01-17 04:40:47,753 (trainer:753) INFO: 6epoch:train:14101-14200batch: iter_time=9.670e-05, forward_time=0.211, loss_ctc=105.455, loss_interctc_layer6=110.392, loss_interctc_layer12=94.919, loss_interctc_layer15=88.792, loss_interctc_layer21=106.578, loss=101.227, backward_time=0.405, grad_norm=64.000, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.138, optim0_lr0=1.641e-04, train_time=1.911 +[gpua002:0/64] 2024-01-17 04:43:51,414 (trainer:753) INFO: 6epoch:train:14201-14300batch: iter_time=3.640e-04, forward_time=0.233, loss_ctc=111.397, loss_interctc_layer6=108.625, loss_interctc_layer12=94.445, loss_interctc_layer15=88.195, loss_interctc_layer21=112.469, loss=103.026, backward_time=0.420, grad_norm=74.537, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.141, optim0_lr0=1.640e-04, train_time=1.837 +[gpua002:0/64] 2024-01-17 04:46:54,297 (trainer:753) INFO: 6epoch:train:14301-14400batch: iter_time=6.239e-04, forward_time=0.241, loss_ctc=97.712, loss_interctc_layer6=102.818, loss_interctc_layer12=89.093, loss_interctc_layer15=83.764, loss_interctc_layer21=97.831, loss=94.244, backward_time=0.435, grad_norm=61.046, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.139, optim0_lr0=1.639e-04, train_time=1.827 +[gpua002:0/64] 2024-01-17 04:49:40,501 (trainer:753) INFO: 6epoch:train:14401-14500batch: iter_time=9.576e-05, forward_time=0.241, loss_ctc=81.593, loss_interctc_layer6=82.493, loss_interctc_layer12=71.055, loss_interctc_layer15=66.274, loss_interctc_layer21=81.844, loss=76.652, backward_time=0.373, grad_norm=53.180, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.139, optim0_lr0=1.638e-04, train_time=1.663 +[gpua002:0/64] 2024-01-17 04:52:31,784 (trainer:753) INFO: 6epoch:train:14501-14600batch: iter_time=2.443e-04, forward_time=0.215, loss_ctc=95.410, loss_interctc_layer6=96.708, loss_interctc_layer12=83.038, loss_interctc_layer15=77.419, loss_interctc_layer21=96.323, loss=89.780, backward_time=0.369, grad_norm=53.127, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.139, optim0_lr0=1.637e-04, train_time=1.712 +[gpua002:0/64] 2024-01-17 04:55:24,298 (trainer:753) INFO: 6epoch:train:14601-14700batch: iter_time=9.512e-05, forward_time=0.205, loss_ctc=124.213, loss_interctc_layer6=115.806, loss_interctc_layer12=100.292, loss_interctc_layer15=94.137, loss_interctc_layer21=126.713, loss=112.232, backward_time=0.363, grad_norm=74.827, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.137, optim0_lr0=1.636e-04, train_time=1.725 +[gpua002:0/64] 2024-01-17 04:58:14,813 (trainer:753) INFO: 6epoch:train:14701-14800batch: iter_time=1.141e-04, forward_time=0.242, loss_ctc=93.357, loss_interctc_layer6=98.695, loss_interctc_layer12=84.422, loss_interctc_layer15=78.944, loss_interctc_layer21=94.307, loss=89.945, backward_time=0.363, grad_norm=78.883, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.140, optim0_lr0=1.635e-04, train_time=1.704 +[gpua002:0/64] 2024-01-17 05:01:03,042 (trainer:753) INFO: 6epoch:train:14801-14900batch: iter_time=3.548e-04, forward_time=0.268, loss_ctc=100.717, loss_interctc_layer6=103.292, loss_interctc_layer12=89.891, loss_interctc_layer15=84.049, loss_interctc_layer21=101.652, loss=95.920, backward_time=0.359, grad_norm=56.234, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.140, optim0_lr0=1.634e-04, train_time=1.684 +[gpua002:0/64] 2024-01-17 05:03:59,492 (trainer:753) INFO: 6epoch:train:14901-15000batch: iter_time=6.199e-04, forward_time=0.237, loss_ctc=111.009, loss_interctc_layer6=105.859, loss_interctc_layer12=91.348, loss_interctc_layer15=85.314, loss_interctc_layer21=112.073, loss=101.121, backward_time=0.401, grad_norm=60.746, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.141, optim0_lr0=1.633e-04, train_time=1.764 +[gpua002:0/64] 2024-01-17 05:33:57,436 (trainer:352) INFO: 6epoch results: [train] iter_time=0.262, forward_time=0.232, loss_ctc=111.245, loss_interctc_layer6=108.782, loss_interctc_layer12=94.934, loss_interctc_layer15=89.435, loss_interctc_layer21=111.951, loss=103.270, backward_time=0.528, grad_norm=75.984, clip=100.000, loss_scale=3.056e+17, optim_step_time=0.141, optim0_lr0=1.707e-04, train_time=2.907, time=12 hours, 7 minutes and 9.12 seconds, total_count=90000, gpu_max_cached_mem_GB=34.396, [valid] loss_ctc=80.336, cer_ctc=0.349, loss_interctc_layer6=77.830, cer_interctc_layer6=0.327, loss_interctc_layer12=66.092, cer_interctc_layer12=0.269, loss_interctc_layer15=61.622, cer_interctc_layer15=0.244, loss_interctc_layer21=81.065, cer_interctc_layer21=0.348, loss=73.389, time=29 minutes and 31.82 seconds, total_count=28026, gpu_max_cached_mem_GB=34.396 +[gpua002:0/64] 2024-01-17 05:34:25,074 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count +[gpua002:0/64] 2024-01-17 05:34:25,146 (trainer:461) INFO: The model files were removed: exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/1epoch.pth +[gpua002:0/64] 2024-01-17 05:34:25,146 (trainer:286) INFO: 7/45epoch started. Estimated time to finish: 2 weeks, 6 days and 12 hours +[gpua002:0/64] 2024-01-17 05:34:25,161 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua002:0/64] 2024-01-17 05:34:43,674 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 05:34:47,101 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 05:34:47,101 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua002:0/64] 2024-01-17 05:34:47,104 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 05:44:01,458 (trainer:753) INFO: 7epoch:train:1-100batch: iter_time=2.216, forward_time=0.191, loss_ctc=122.439, loss_interctc_layer6=128.038, loss_interctc_layer12=111.686, loss_interctc_layer15=105.254, loss_interctc_layer21=123.123, loss=118.108, backward_time=0.303, grad_norm=89.726, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.139, optim0_lr0=1.633e-04, train_time=5.763 +[gpua002:0/64] 2024-01-17 05:46:07,339 (trainer:753) INFO: 7epoch:train:101-200batch: iter_time=8.309e-05, forward_time=0.142, loss_ctc=105.989, loss_interctc_layer6=105.588, loss_interctc_layer12=93.514, loss_interctc_layer15=88.735, loss_interctc_layer21=106.476, loss=100.060, backward_time=0.297, grad_norm=63.078, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.138, optim0_lr0=1.632e-04, train_time=1.259 +[gpua002:0/64] 2024-01-17 05:48:25,210 (trainer:753) INFO: 7epoch:train:201-300batch: iter_time=8.795e-05, forward_time=0.142, loss_ctc=112.146, loss_interctc_layer6=109.043, loss_interctc_layer12=94.418, loss_interctc_layer15=88.548, loss_interctc_layer21=113.467, loss=103.524, backward_time=0.320, grad_norm=72.638, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.138, optim0_lr0=1.631e-04, train_time=1.378 +[gpua002:0/64] 2024-01-17 05:51:24,599 (trainer:753) INFO: 7epoch:train:301-400batch: iter_time=9.464e-05, forward_time=0.142, loss_ctc=107.121, loss_interctc_layer6=102.029, loss_interctc_layer12=88.561, loss_interctc_layer15=83.339, loss_interctc_layer21=107.766, loss=97.763, backward_time=0.399, grad_norm=62.206, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.137, optim0_lr0=1.630e-04, train_time=1.794 +[gpua002:0/64] 2024-01-17 05:54:09,452 (trainer:753) INFO: 7epoch:train:401-500batch: iter_time=8.812e-05, forward_time=0.142, loss_ctc=109.763, loss_interctc_layer6=112.051, loss_interctc_layer12=97.762, loss_interctc_layer15=91.981, loss_interctc_layer21=110.689, loss=104.449, backward_time=0.344, grad_norm=97.033, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.138, optim0_lr0=1.629e-04, train_time=1.647 +[gpua002:0/64] 2024-01-17 05:56:56,132 (trainer:753) INFO: 7epoch:train:501-600batch: iter_time=9.417e-05, forward_time=0.142, loss_ctc=109.656, loss_interctc_layer6=106.468, loss_interctc_layer12=91.820, loss_interctc_layer15=86.247, loss_interctc_layer21=110.774, loss=100.993, backward_time=0.365, grad_norm=60.798, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.137, optim0_lr0=1.628e-04, train_time=1.668 +[gpua002:0/64] 2024-01-17 05:59:19,930 (trainer:753) INFO: 7epoch:train:601-700batch: iter_time=9.065e-05, forward_time=0.144, loss_ctc=118.806, loss_interctc_layer6=117.909, loss_interctc_layer12=101.808, loss_interctc_layer15=96.402, loss_interctc_layer21=120.410, loss=111.067, backward_time=0.339, grad_norm=83.799, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.138, optim0_lr0=1.627e-04, train_time=1.438 +[gpua002:0/64] 2024-01-17 06:02:13,252 (trainer:753) INFO: 7epoch:train:701-800batch: iter_time=9.556e-05, forward_time=0.143, loss_ctc=104.902, loss_interctc_layer6=106.492, loss_interctc_layer12=92.809, loss_interctc_layer15=87.611, loss_interctc_layer21=105.501, loss=99.463, backward_time=0.336, grad_norm=69.355, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.138, optim0_lr0=1.626e-04, train_time=1.732 +[gpua002:0/64] 2024-01-17 06:04:53,800 (trainer:753) INFO: 7epoch:train:801-900batch: iter_time=9.148e-05, forward_time=0.144, loss_ctc=88.224, loss_interctc_layer6=96.934, loss_interctc_layer12=84.361, loss_interctc_layer15=79.147, loss_interctc_layer21=88.957, loss=87.525, backward_time=0.341, grad_norm=63.033, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.138, optim0_lr0=1.625e-04, train_time=1.606 +[gpua002:0/64] 2024-01-17 06:07:24,912 (trainer:753) INFO: 7epoch:train:901-1000batch: iter_time=9.601e-05, forward_time=0.142, loss_ctc=117.307, loss_interctc_layer6=106.418, loss_interctc_layer12=92.097, loss_interctc_layer15=86.583, loss_interctc_layer21=118.354, loss=104.152, backward_time=0.317, grad_norm=76.737, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.137, optim0_lr0=1.624e-04, train_time=1.511 +[gpua002:0/64] 2024-01-17 06:10:07,262 (trainer:753) INFO: 7epoch:train:1001-1100batch: iter_time=9.405e-05, forward_time=0.165, loss_ctc=103.741, loss_interctc_layer6=101.919, loss_interctc_layer12=87.572, loss_interctc_layer15=81.675, loss_interctc_layer21=104.730, loss=95.928, backward_time=0.355, grad_norm=63.979, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.140, optim0_lr0=1.624e-04, train_time=1.623 +[gpua002:0/64] 2024-01-17 06:12:53,331 (trainer:753) INFO: 7epoch:train:1101-1200batch: iter_time=8.790e-05, forward_time=0.221, loss_ctc=104.605, loss_interctc_layer6=103.704, loss_interctc_layer12=90.435, loss_interctc_layer15=85.211, loss_interctc_layer21=105.332, loss=97.857, backward_time=0.356, grad_norm=72.429, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.141, optim0_lr0=1.623e-04, train_time=1.661 +[gpua002:0/64] 2024-01-17 06:14:39,538 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua002:0/64] 2024-01-17 06:14:58,213 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 06:15:01,692 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 06:15:01,692 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua002:0/64] 2024-01-17 06:15:01,699 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 06:22:35,092 (trainer:753) INFO: 7epoch:train:1201-1300batch: iter_time=2.409, forward_time=0.144, loss_ctc=128.786, loss_interctc_layer6=127.861, loss_interctc_layer12=111.770, loss_interctc_layer15=105.661, loss_interctc_layer21=129.063, loss=120.628, backward_time=0.343, grad_norm=73.284, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.137, optim0_lr0=1.622e-04, train_time=5.817 +[gpua002:0/64] 2024-01-17 06:25:15,563 (trainer:753) INFO: 7epoch:train:1301-1400batch: iter_time=9.720e-05, forward_time=0.142, loss_ctc=115.263, loss_interctc_layer6=112.244, loss_interctc_layer12=98.650, loss_interctc_layer15=93.060, loss_interctc_layer21=116.517, loss=107.147, backward_time=0.343, grad_norm=80.461, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.137, optim0_lr0=1.621e-04, train_time=1.604 +[gpua002:0/64] 2024-01-17 06:27:33,876 (trainer:753) INFO: 7epoch:train:1401-1500batch: iter_time=7.616e-05, forward_time=0.145, loss_ctc=106.368, loss_interctc_layer6=105.624, loss_interctc_layer12=92.255, loss_interctc_layer15=86.845, loss_interctc_layer21=107.843, loss=99.787, backward_time=0.303, grad_norm=76.174, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.137, optim0_lr0=1.620e-04, train_time=1.383 +[gpua002:0/64] 2024-01-17 06:30:40,112 (trainer:753) INFO: 7epoch:train:1501-1600batch: iter_time=9.546e-05, forward_time=0.143, loss_ctc=105.515, loss_interctc_layer6=96.087, loss_interctc_layer12=82.208, loss_interctc_layer15=76.864, loss_interctc_layer21=106.860, loss=93.507, backward_time=0.457, grad_norm=77.557, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.138, optim0_lr0=1.619e-04, train_time=1.862 +[gpua002:0/64] 2024-01-17 06:33:23,062 (trainer:753) INFO: 7epoch:train:1601-1700batch: iter_time=9.682e-05, forward_time=0.142, loss_ctc=115.465, loss_interctc_layer6=108.931, loss_interctc_layer12=94.963, loss_interctc_layer15=88.961, loss_interctc_layer21=116.373, loss=104.938, backward_time=0.353, grad_norm=83.765, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.137, optim0_lr0=1.618e-04, train_time=1.629 +[gpua002:0/64] 2024-01-17 06:36:04,596 (trainer:753) INFO: 7epoch:train:1701-1800batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=114.260, loss_interctc_layer6=110.846, loss_interctc_layer12=96.192, loss_interctc_layer15=90.161, loss_interctc_layer21=115.366, loss=105.365, backward_time=0.338, grad_norm=74.608, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.137, optim0_lr0=1.617e-04, train_time=1.615 +[gpua002:0/64] 2024-01-17 06:38:36,760 (trainer:753) INFO: 7epoch:train:1801-1900batch: iter_time=9.024e-05, forward_time=0.141, loss_ctc=121.219, loss_interctc_layer6=109.265, loss_interctc_layer12=94.826, loss_interctc_layer15=89.438, loss_interctc_layer21=122.169, loss=107.383, backward_time=0.342, grad_norm=78.527, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.136, optim0_lr0=1.616e-04, train_time=1.522 +[gpua002:0/64] 2024-01-17 06:41:18,448 (trainer:753) INFO: 7epoch:train:1901-2000batch: iter_time=9.895e-05, forward_time=0.167, loss_ctc=124.369, loss_interctc_layer6=113.094, loss_interctc_layer12=96.554, loss_interctc_layer15=90.952, loss_interctc_layer21=125.495, loss=110.093, backward_time=0.359, grad_norm=73.073, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.137, optim0_lr0=1.616e-04, train_time=1.617 +[gpua002:0/64] 2024-01-17 06:44:08,808 (trainer:753) INFO: 7epoch:train:2001-2100batch: iter_time=8.762e-05, forward_time=0.181, loss_ctc=95.717, loss_interctc_layer6=99.523, loss_interctc_layer12=86.149, loss_interctc_layer15=80.860, loss_interctc_layer21=96.732, loss=91.796, backward_time=0.343, grad_norm=68.701, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.140, optim0_lr0=1.615e-04, train_time=1.703 +[gpua002:0/64] 2024-01-17 06:46:45,590 (trainer:753) INFO: 7epoch:train:2101-2200batch: iter_time=8.813e-05, forward_time=0.163, loss_ctc=105.389, loss_interctc_layer6=99.971, loss_interctc_layer12=86.315, loss_interctc_layer15=80.886, loss_interctc_layer21=106.222, loss=95.756, backward_time=0.319, grad_norm=61.707, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.614e-04, train_time=1.568 +[gpua002:0/64] 2024-01-17 06:49:22,698 (trainer:753) INFO: 7epoch:train:2201-2300batch: iter_time=9.056e-05, forward_time=0.142, loss_ctc=122.836, loss_interctc_layer6=110.576, loss_interctc_layer12=95.742, loss_interctc_layer15=89.582, loss_interctc_layer21=124.322, loss=108.612, backward_time=0.385, grad_norm=76.328, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.613e-04, train_time=1.571 +[gpua002:0/64] 2024-01-17 06:51:58,005 (trainer:753) INFO: 7epoch:train:2301-2400batch: iter_time=9.060e-05, forward_time=0.141, loss_ctc=90.399, loss_interctc_layer6=89.777, loss_interctc_layer12=76.564, loss_interctc_layer15=71.315, loss_interctc_layer21=91.010, loss=83.813, backward_time=0.345, grad_norm=60.272, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.612e-04, train_time=1.552 +[gpua002:0/64] 2024-01-17 06:54:51,830 (trainer:753) INFO: 7epoch:train:2401-2500batch: iter_time=9.610e-05, forward_time=0.142, loss_ctc=121.852, loss_interctc_layer6=116.012, loss_interctc_layer12=101.330, loss_interctc_layer15=95.847, loss_interctc_layer21=123.174, loss=111.643, backward_time=0.415, grad_norm=68.710, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.611e-04, train_time=1.739 +[gpua002:0/64] 2024-01-17 06:55:11,860 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua002:0/64] 2024-01-17 06:55:30,636 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 06:55:34,345 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 06:55:34,345 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua002:0/64] 2024-01-17 06:55:34,348 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 07:04:28,062 (trainer:753) INFO: 7epoch:train:2501-2600batch: iter_time=2.450, forward_time=0.147, loss_ctc=118.242, loss_interctc_layer6=124.826, loss_interctc_layer12=108.117, loss_interctc_layer15=101.513, loss_interctc_layer21=119.551, loss=114.450, backward_time=0.305, grad_norm=85.754, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.140, optim0_lr0=1.610e-04, train_time=5.762 +[gpua002:0/64] 2024-01-17 07:06:47,625 (trainer:753) INFO: 7epoch:train:2601-2700batch: iter_time=8.987e-05, forward_time=0.141, loss_ctc=101.960, loss_interctc_layer6=102.839, loss_interctc_layer12=90.197, loss_interctc_layer15=84.880, loss_interctc_layer21=102.789, loss=96.533, backward_time=0.302, grad_norm=62.002, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.609e-04, train_time=1.396 +[gpua002:0/64] 2024-01-17 07:09:06,776 (trainer:753) INFO: 7epoch:train:2701-2800batch: iter_time=8.670e-05, forward_time=0.142, loss_ctc=110.110, loss_interctc_layer6=106.988, loss_interctc_layer12=92.119, loss_interctc_layer15=85.880, loss_interctc_layer21=111.491, loss=101.318, backward_time=0.301, grad_norm=67.197, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.609e-04, train_time=1.391 +[gpua002:0/64] 2024-01-17 07:11:43,945 (trainer:753) INFO: 7epoch:train:2801-2900batch: iter_time=9.078e-05, forward_time=0.152, loss_ctc=105.212, loss_interctc_layer6=101.082, loss_interctc_layer12=87.433, loss_interctc_layer15=81.970, loss_interctc_layer21=106.206, loss=96.381, backward_time=0.363, grad_norm=62.574, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.138, optim0_lr0=1.608e-04, train_time=1.571 +[gpua002:0/64] 2024-01-17 07:14:14,037 (trainer:753) INFO: 7epoch:train:2901-3000batch: iter_time=9.053e-05, forward_time=0.160, loss_ctc=106.164, loss_interctc_layer6=110.060, loss_interctc_layer12=95.483, loss_interctc_layer15=89.345, loss_interctc_layer21=107.140, loss=101.638, backward_time=0.320, grad_norm=77.292, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.607e-04, train_time=1.501 +[gpua002:0/64] 2024-01-17 07:16:47,373 (trainer:753) INFO: 7epoch:train:3001-3100batch: iter_time=9.886e-05, forward_time=0.142, loss_ctc=107.419, loss_interctc_layer6=104.392, loss_interctc_layer12=89.907, loss_interctc_layer15=84.063, loss_interctc_layer21=108.497, loss=98.856, backward_time=0.326, grad_norm=58.855, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.606e-04, train_time=1.533 +[gpua002:0/64] 2024-01-17 07:19:34,670 (trainer:753) INFO: 7epoch:train:3101-3200batch: iter_time=9.214e-05, forward_time=0.156, loss_ctc=115.369, loss_interctc_layer6=114.271, loss_interctc_layer12=99.375, loss_interctc_layer15=92.684, loss_interctc_layer21=116.988, loss=107.737, backward_time=0.337, grad_norm=89.845, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.605e-04, train_time=1.672 +[gpua002:0/64] 2024-01-17 07:22:28,282 (trainer:753) INFO: 7epoch:train:3201-3300batch: iter_time=9.739e-05, forward_time=0.179, loss_ctc=101.082, loss_interctc_layer6=104.180, loss_interctc_layer12=90.337, loss_interctc_layer15=84.665, loss_interctc_layer21=101.911, loss=96.435, backward_time=0.337, grad_norm=68.829, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.138, optim0_lr0=1.604e-04, train_time=1.736 +[gpua002:0/64] 2024-01-17 07:25:14,372 (trainer:753) INFO: 7epoch:train:3301-3400batch: iter_time=9.717e-05, forward_time=0.154, loss_ctc=87.209, loss_interctc_layer6=95.553, loss_interctc_layer12=82.995, loss_interctc_layer15=77.481, loss_interctc_layer21=87.403, loss=86.128, backward_time=0.340, grad_norm=66.379, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.137, optim0_lr0=1.603e-04, train_time=1.661 +[gpua002:0/64] 2024-01-17 07:27:52,998 (trainer:753) INFO: 7epoch:train:3401-3500batch: iter_time=1.039e-04, forward_time=0.143, loss_ctc=114.788, loss_interctc_layer6=105.086, loss_interctc_layer12=89.922, loss_interctc_layer15=84.101, loss_interctc_layer21=116.391, loss=102.058, backward_time=0.348, grad_norm=67.052, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.137, optim0_lr0=1.603e-04, train_time=1.586 +[gpua002:0/64] 2024-01-17 07:30:16,528 (trainer:753) INFO: 7epoch:train:3501-3600batch: iter_time=1.082e-04, forward_time=0.141, loss_ctc=101.514, loss_interctc_layer6=100.327, loss_interctc_layer12=85.911, loss_interctc_layer15=80.173, loss_interctc_layer21=102.423, loss=94.070, backward_time=0.349, grad_norm=61.360, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.137, optim0_lr0=1.602e-04, train_time=1.435 +[gpua002:0/64] 2024-01-17 07:32:38,970 (trainer:753) INFO: 7epoch:train:3601-3700batch: iter_time=1.110e-04, forward_time=0.141, loss_ctc=101.652, loss_interctc_layer6=101.967, loss_interctc_layer12=88.139, loss_interctc_layer15=82.617, loss_interctc_layer21=102.973, loss=95.470, backward_time=0.310, grad_norm=73.964, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.137, optim0_lr0=1.601e-04, train_time=1.423 +[gpua002:0/64] 2024-01-17 07:34:22,152 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua002:0/64] 2024-01-17 07:34:41,233 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 07:34:44,717 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 07:34:44,717 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua002:0/64] 2024-01-17 07:34:44,720 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 07:40:10,303 (trainer:753) INFO: 7epoch:train:3701-3800batch: iter_time=1.496, forward_time=0.164, loss_ctc=120.499, loss_interctc_layer6=126.444, loss_interctc_layer12=109.917, loss_interctc_layer15=103.367, loss_interctc_layer21=121.834, loss=116.412, backward_time=0.335, grad_norm=82.152, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.138, optim0_lr0=1.600e-04, train_time=4.514 +[gpua002:0/64] 2024-01-17 07:43:22,970 (trainer:753) INFO: 7epoch:train:3801-3900batch: iter_time=0.001, forward_time=0.173, loss_ctc=105.858, loss_interctc_layer6=109.727, loss_interctc_layer12=95.148, loss_interctc_layer15=89.438, loss_interctc_layer21=106.846, loss=101.404, backward_time=0.448, grad_norm=71.682, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.140, optim0_lr0=1.599e-04, train_time=1.926 +[gpua002:0/64] 2024-01-17 07:46:18,490 (trainer:753) INFO: 7epoch:train:3901-4000batch: iter_time=8.261e-05, forward_time=0.183, loss_ctc=98.652, loss_interctc_layer6=105.000, loss_interctc_layer12=90.942, loss_interctc_layer15=85.496, loss_interctc_layer21=99.564, loss=95.931, backward_time=0.405, grad_norm=75.670, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.139, optim0_lr0=1.598e-04, train_time=1.755 +[gpua002:0/64] 2024-01-17 07:49:40,225 (trainer:753) INFO: 7epoch:train:4001-4100batch: iter_time=9.018e-05, forward_time=0.143, loss_ctc=96.284, loss_interctc_layer6=94.311, loss_interctc_layer12=80.369, loss_interctc_layer15=74.822, loss_interctc_layer21=97.450, loss=88.647, backward_time=0.475, grad_norm=101.348, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.597e-04, train_time=2.018 +[gpua002:0/64] 2024-01-17 07:52:46,713 (trainer:753) INFO: 7epoch:train:4101-4200batch: iter_time=9.629e-05, forward_time=0.142, loss_ctc=106.448, loss_interctc_layer6=107.670, loss_interctc_layer12=92.911, loss_interctc_layer15=87.154, loss_interctc_layer21=107.691, loss=100.375, backward_time=0.424, grad_norm=64.759, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.137, optim0_lr0=1.597e-04, train_time=1.865 +[gpua002:0/64] 2024-01-17 07:56:05,726 (trainer:753) INFO: 7epoch:train:4201-4300batch: iter_time=9.305e-05, forward_time=0.142, loss_ctc=107.347, loss_interctc_layer6=109.240, loss_interctc_layer12=94.228, loss_interctc_layer15=87.986, loss_interctc_layer21=108.422, loss=101.445, backward_time=0.475, grad_norm=71.655, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.137, optim0_lr0=1.596e-04, train_time=1.990 +[gpua002:0/64] 2024-01-17 07:59:34,997 (trainer:753) INFO: 7epoch:train:4301-4400batch: iter_time=9.117e-05, forward_time=0.144, loss_ctc=110.986, loss_interctc_layer6=108.565, loss_interctc_layer12=93.915, loss_interctc_layer15=88.144, loss_interctc_layer21=112.261, loss=102.774, backward_time=0.447, grad_norm=69.388, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.137, optim0_lr0=1.595e-04, train_time=2.091 +[gpua002:0/64] 2024-01-17 08:02:37,530 (trainer:753) INFO: 7epoch:train:4401-4500batch: iter_time=9.083e-05, forward_time=0.148, loss_ctc=115.522, loss_interctc_layer6=110.929, loss_interctc_layer12=95.128, loss_interctc_layer15=89.486, loss_interctc_layer21=116.633, loss=105.540, backward_time=0.434, grad_norm=78.376, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.594e-04, train_time=1.826 +[gpua002:0/64] 2024-01-17 08:05:37,609 (trainer:753) INFO: 7epoch:train:4501-4600batch: iter_time=8.064e-05, forward_time=0.147, loss_ctc=91.521, loss_interctc_layer6=98.377, loss_interctc_layer12=85.219, loss_interctc_layer15=79.453, loss_interctc_layer21=92.285, loss=89.371, backward_time=0.379, grad_norm=65.802, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.139, optim0_lr0=1.593e-04, train_time=1.800 +[gpua002:0/64] 2024-01-17 08:08:27,171 (trainer:753) INFO: 7epoch:train:4601-4700batch: iter_time=9.847e-05, forward_time=0.152, loss_ctc=99.855, loss_interctc_layer6=97.897, loss_interctc_layer12=84.047, loss_interctc_layer15=78.298, loss_interctc_layer21=101.105, loss=92.240, backward_time=0.405, grad_norm=69.166, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.139, optim0_lr0=1.592e-04, train_time=1.696 +[gpua002:0/64] 2024-01-17 08:11:50,814 (trainer:753) INFO: 7epoch:train:4701-4800batch: iter_time=8.921e-05, forward_time=0.158, loss_ctc=118.176, loss_interctc_layer6=110.287, loss_interctc_layer12=95.045, loss_interctc_layer15=88.879, loss_interctc_layer21=119.667, loss=106.411, backward_time=0.431, grad_norm=85.068, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.141, optim0_lr0=1.592e-04, train_time=2.036 +[gpua002:0/64] 2024-01-17 08:14:50,865 (trainer:753) INFO: 7epoch:train:4801-4900batch: iter_time=8.821e-05, forward_time=0.141, loss_ctc=87.009, loss_interctc_layer6=89.026, loss_interctc_layer12=75.972, loss_interctc_layer15=70.500, loss_interctc_layer21=87.897, loss=82.081, backward_time=0.423, grad_norm=61.075, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.591e-04, train_time=1.800 +[gpua002:0/64] 2024-01-17 08:17:30,573 (trainer:753) INFO: 7epoch:train:4901-5000batch: iter_time=8.511e-05, forward_time=0.144, loss_ctc=110.503, loss_interctc_layer6=113.840, loss_interctc_layer12=98.800, loss_interctc_layer15=93.090, loss_interctc_layer21=111.635, loss=105.574, backward_time=0.358, grad_norm=67.391, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.590e-04, train_time=1.597 +[gpua002:0/64] 2024-01-17 08:17:50,602 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua002:0/64] 2024-01-17 08:18:09,691 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 08:18:13,145 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 08:18:13,145 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua002:0/64] 2024-01-17 08:18:13,245 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 08:31:52,119 (trainer:753) INFO: 7epoch:train:5001-5100batch: iter_time=2.963, forward_time=0.143, loss_ctc=116.158, loss_interctc_layer6=124.268, loss_interctc_layer12=107.446, loss_interctc_layer15=100.772, loss_interctc_layer21=117.637, loss=113.256, backward_time=0.315, grad_norm=91.455, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.139, optim0_lr0=1.589e-04, train_time=8.615 +[gpua002:0/64] 2024-01-17 08:34:39,716 (trainer:753) INFO: 7epoch:train:5101-5200batch: iter_time=8.575e-05, forward_time=0.141, loss_ctc=99.393, loss_interctc_layer6=102.385, loss_interctc_layer12=88.792, loss_interctc_layer15=83.322, loss_interctc_layer21=100.609, loss=94.900, backward_time=0.335, grad_norm=62.015, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.137, optim0_lr0=1.588e-04, train_time=1.676 +[gpua002:0/64] 2024-01-17 08:37:49,574 (trainer:753) INFO: 7epoch:train:5201-5300batch: iter_time=8.760e-05, forward_time=0.143, loss_ctc=108.050, loss_interctc_layer6=106.841, loss_interctc_layer12=91.799, loss_interctc_layer15=85.845, loss_interctc_layer21=108.922, loss=100.291, backward_time=0.390, grad_norm=63.952, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.587e-04, train_time=1.898 +[gpua002:0/64] 2024-01-17 08:40:37,189 (trainer:753) INFO: 7epoch:train:5301-5400batch: iter_time=8.814e-05, forward_time=0.152, loss_ctc=102.419, loss_interctc_layer6=99.673, loss_interctc_layer12=85.938, loss_interctc_layer15=80.226, loss_interctc_layer21=103.602, loss=94.372, backward_time=0.351, grad_norm=57.447, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.587e-04, train_time=1.676 +[gpua002:0/64] 2024-01-17 08:43:03,419 (trainer:753) INFO: 7epoch:train:5401-5500batch: iter_time=8.008e-05, forward_time=0.146, loss_ctc=104.849, loss_interctc_layer6=108.873, loss_interctc_layer12=93.910, loss_interctc_layer15=87.733, loss_interctc_layer21=105.796, loss=100.232, backward_time=0.307, grad_norm=71.982, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.586e-04, train_time=1.462 +[gpua002:0/64] 2024-01-17 08:46:09,665 (trainer:753) INFO: 7epoch:train:5501-5600batch: iter_time=1.028e-04, forward_time=0.148, loss_ctc=105.724, loss_interctc_layer6=103.152, loss_interctc_layer12=88.644, loss_interctc_layer15=82.774, loss_interctc_layer21=107.542, loss=97.567, backward_time=0.409, grad_norm=62.284, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.585e-04, train_time=1.862 +[gpua002:0/64] 2024-01-17 08:48:57,673 (trainer:753) INFO: 7epoch:train:5601-5700batch: iter_time=9.940e-05, forward_time=0.155, loss_ctc=113.931, loss_interctc_layer6=113.978, loss_interctc_layer12=98.177, loss_interctc_layer15=92.746, loss_interctc_layer21=115.453, loss=106.857, backward_time=0.357, grad_norm=79.737, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.139, optim0_lr0=1.584e-04, train_time=1.681 +[gpua002:0/64] 2024-01-17 08:51:50,521 (trainer:753) INFO: 7epoch:train:5701-5800batch: iter_time=1.018e-04, forward_time=0.188, loss_ctc=99.176, loss_interctc_layer6=102.172, loss_interctc_layer12=88.138, loss_interctc_layer15=82.910, loss_interctc_layer21=100.257, loss=94.530, backward_time=0.359, grad_norm=63.843, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.139, optim0_lr0=1.583e-04, train_time=1.728 +[gpua002:0/64] 2024-01-17 08:54:33,268 (trainer:753) INFO: 7epoch:train:5801-5900batch: iter_time=9.405e-05, forward_time=0.142, loss_ctc=85.381, loss_interctc_layer6=95.683, loss_interctc_layer12=82.465, loss_interctc_layer15=77.018, loss_interctc_layer21=86.325, loss=85.374, backward_time=0.348, grad_norm=57.489, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.582e-04, train_time=1.627 +[gpua002:0/64] 2024-01-17 08:57:04,096 (trainer:753) INFO: 7epoch:train:5901-6000batch: iter_time=9.145e-05, forward_time=0.142, loss_ctc=113.927, loss_interctc_layer6=104.091, loss_interctc_layer12=89.379, loss_interctc_layer15=83.203, loss_interctc_layer21=115.166, loss=101.153, backward_time=0.335, grad_norm=81.575, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.138, optim0_lr0=1.582e-04, train_time=1.507 +[gpua002:0/64] 2024-01-17 08:59:43,111 (trainer:753) INFO: 7epoch:train:6001-6100batch: iter_time=8.356e-05, forward_time=0.142, loss_ctc=100.350, loss_interctc_layer6=99.654, loss_interctc_layer12=85.121, loss_interctc_layer15=79.384, loss_interctc_layer21=101.363, loss=93.174, backward_time=0.377, grad_norm=55.557, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.138, optim0_lr0=1.581e-04, train_time=1.591 +[gpua002:0/64] 2024-01-17 09:02:29,565 (trainer:753) INFO: 7epoch:train:6101-6200batch: iter_time=9.174e-05, forward_time=0.142, loss_ctc=99.866, loss_interctc_layer6=101.401, loss_interctc_layer12=87.331, loss_interctc_layer15=81.788, loss_interctc_layer21=101.521, loss=94.381, backward_time=0.359, grad_norm=70.872, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.138, optim0_lr0=1.580e-04, train_time=1.664 +[gpua002:0/64] 2024-01-17 09:04:02,529 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua002:0/64] 2024-01-17 09:04:21,626 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 09:04:25,020 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 09:04:25,020 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua002:0/64] 2024-01-17 09:04:25,036 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 09:10:12,960 (trainer:753) INFO: 7epoch:train:6201-6300batch: iter_time=2.994, forward_time=0.166, loss_ctc=118.540, loss_interctc_layer6=124.318, loss_interctc_layer12=107.650, loss_interctc_layer15=101.237, loss_interctc_layer21=119.398, loss=114.229, backward_time=0.308, grad_norm=100.855, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.138, optim0_lr0=1.579e-04, train_time=4.634 +[gpua002:0/64] 2024-01-17 09:12:27,080 (trainer:753) INFO: 7epoch:train:6301-6400batch: iter_time=8.261e-05, forward_time=0.141, loss_ctc=104.114, loss_interctc_layer6=108.056, loss_interctc_layer12=94.110, loss_interctc_layer15=88.158, loss_interctc_layer21=104.778, loss=99.843, backward_time=0.300, grad_norm=73.422, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.578e-04, train_time=1.341 +[gpua002:0/64] 2024-01-17 09:15:07,205 (trainer:753) INFO: 7epoch:train:6401-6500batch: iter_time=7.659e-05, forward_time=0.143, loss_ctc=96.054, loss_interctc_layer6=103.098, loss_interctc_layer12=89.170, loss_interctc_layer15=83.334, loss_interctc_layer21=97.225, loss=93.776, backward_time=0.329, grad_norm=67.639, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.577e-04, train_time=1.601 +[gpua002:0/64] 2024-01-17 09:17:31,905 (trainer:753) INFO: 7epoch:train:6501-6600batch: iter_time=8.857e-05, forward_time=0.141, loss_ctc=95.990, loss_interctc_layer6=94.084, loss_interctc_layer12=80.231, loss_interctc_layer15=74.396, loss_interctc_layer21=97.550, loss=88.450, backward_time=0.327, grad_norm=62.956, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.577e-04, train_time=1.447 +[gpua002:0/64] 2024-01-17 09:20:23,910 (trainer:753) INFO: 7epoch:train:6601-6700batch: iter_time=8.609e-05, forward_time=0.158, loss_ctc=104.513, loss_interctc_layer6=106.599, loss_interctc_layer12=91.954, loss_interctc_layer15=86.159, loss_interctc_layer21=105.777, loss=99.000, backward_time=0.354, grad_norm=63.388, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.138, optim0_lr0=1.576e-04, train_time=1.720 +[gpua002:0/64] 2024-01-17 09:23:01,146 (trainer:753) INFO: 7epoch:train:6701-6800batch: iter_time=8.474e-05, forward_time=0.176, loss_ctc=104.718, loss_interctc_layer6=108.148, loss_interctc_layer12=92.948, loss_interctc_layer15=87.238, loss_interctc_layer21=106.377, loss=99.886, backward_time=0.354, grad_norm=58.437, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.575e-04, train_time=1.572 +[gpua002:0/64] 2024-01-17 09:25:28,536 (trainer:753) INFO: 7epoch:train:6801-6900batch: iter_time=8.773e-05, forward_time=0.154, loss_ctc=108.151, loss_interctc_layer6=106.744, loss_interctc_layer12=92.425, loss_interctc_layer15=86.829, loss_interctc_layer21=109.817, loss=100.793, backward_time=0.342, grad_norm=66.537, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.138, optim0_lr0=1.574e-04, train_time=1.474 +[gpua002:0/64] 2024-01-17 09:28:10,574 (trainer:753) INFO: 7epoch:train:6901-7000batch: iter_time=8.454e-05, forward_time=0.141, loss_ctc=111.246, loss_interctc_layer6=108.594, loss_interctc_layer12=93.719, loss_interctc_layer15=87.463, loss_interctc_layer21=113.095, loss=102.823, backward_time=0.355, grad_norm=71.334, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.573e-04, train_time=1.620 +[gpua002:0/64] 2024-01-17 09:30:56,456 (trainer:753) INFO: 7epoch:train:7001-7100batch: iter_time=8.555e-05, forward_time=0.141, loss_ctc=89.314, loss_interctc_layer6=97.485, loss_interctc_layer12=83.767, loss_interctc_layer15=78.579, loss_interctc_layer21=90.328, loss=87.895, backward_time=0.347, grad_norm=51.896, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.573e-04, train_time=1.658 +[gpua002:0/64] 2024-01-17 09:33:39,370 (trainer:753) INFO: 7epoch:train:7101-7200batch: iter_time=8.377e-05, forward_time=0.141, loss_ctc=99.041, loss_interctc_layer6=96.842, loss_interctc_layer12=82.735, loss_interctc_layer15=77.211, loss_interctc_layer21=100.249, loss=91.216, backward_time=0.375, grad_norm=58.488, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.572e-04, train_time=1.630 +[gpua002:0/64] 2024-01-17 09:36:38,748 (trainer:753) INFO: 7epoch:train:7201-7300batch: iter_time=8.852e-05, forward_time=0.141, loss_ctc=115.418, loss_interctc_layer6=109.088, loss_interctc_layer12=93.699, loss_interctc_layer15=87.421, loss_interctc_layer21=116.913, loss=104.508, backward_time=0.357, grad_norm=64.298, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.571e-04, train_time=1.793 +[gpua002:0/64] 2024-01-17 09:39:04,067 (trainer:753) INFO: 7epoch:train:7301-7400batch: iter_time=8.539e-05, forward_time=0.141, loss_ctc=85.913, loss_interctc_layer6=87.889, loss_interctc_layer12=75.032, loss_interctc_layer15=69.445, loss_interctc_layer21=86.960, loss=81.048, backward_time=0.337, grad_norm=53.867, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.570e-04, train_time=1.454 +[gpua002:0/64] 2024-01-17 09:41:29,940 (trainer:753) INFO: 7epoch:train:7401-7500batch: iter_time=8.630e-05, forward_time=0.143, loss_ctc=109.214, loss_interctc_layer6=112.150, loss_interctc_layer12=97.493, loss_interctc_layer15=91.484, loss_interctc_layer21=109.915, loss=104.051, backward_time=0.349, grad_norm=86.026, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.137, optim0_lr0=1.569e-04, train_time=1.459 +[gpua002:0/64] 2024-01-17 09:41:49,970 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua002:0/64] 2024-01-17 09:42:08,888 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 09:42:12,489 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 09:42:12,489 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua002:0/64] 2024-01-17 09:42:12,492 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 09:52:08,116 (trainer:753) INFO: 7epoch:train:7501-7600batch: iter_time=2.703, forward_time=0.193, loss_ctc=120.937, loss_interctc_layer6=122.523, loss_interctc_layer12=105.578, loss_interctc_layer15=98.860, loss_interctc_layer21=122.309, loss=114.041, backward_time=0.307, grad_norm=86.671, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.139, optim0_lr0=1.569e-04, train_time=6.381 +[gpua002:0/64] 2024-01-17 09:54:45,214 (trainer:753) INFO: 7epoch:train:7601-7700batch: iter_time=1.028e-04, forward_time=0.166, loss_ctc=103.274, loss_interctc_layer6=100.690, loss_interctc_layer12=87.945, loss_interctc_layer15=82.485, loss_interctc_layer21=104.895, loss=95.858, backward_time=0.343, grad_norm=66.593, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.138, optim0_lr0=1.568e-04, train_time=1.571 +[gpua002:0/64] 2024-01-17 09:57:40,624 (trainer:753) INFO: 7epoch:train:7701-7800batch: iter_time=1.188e-04, forward_time=0.238, loss_ctc=113.589, loss_interctc_layer6=105.765, loss_interctc_layer12=90.481, loss_interctc_layer15=84.568, loss_interctc_layer21=116.027, loss=102.086, backward_time=0.342, grad_norm=73.308, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.141, optim0_lr0=1.567e-04, train_time=1.754 +[gpua002:0/64] 2024-01-17 10:00:23,752 (trainer:753) INFO: 7epoch:train:7801-7900batch: iter_time=1.157e-04, forward_time=0.142, loss_ctc=108.356, loss_interctc_layer6=98.586, loss_interctc_layer12=84.774, loss_interctc_layer15=79.070, loss_interctc_layer21=109.944, loss=96.146, backward_time=0.448, grad_norm=60.780, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.138, optim0_lr0=1.566e-04, train_time=1.631 +[gpua002:0/64] 2024-01-17 10:03:12,632 (trainer:753) INFO: 7epoch:train:7901-8000batch: iter_time=9.218e-05, forward_time=0.142, loss_ctc=111.199, loss_interctc_layer6=107.840, loss_interctc_layer12=92.959, loss_interctc_layer15=86.705, loss_interctc_layer21=112.571, loss=102.255, backward_time=0.402, grad_norm=85.714, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.139, optim0_lr0=1.565e-04, train_time=1.688 +[gpua002:0/64] 2024-01-17 10:05:41,733 (trainer:753) INFO: 7epoch:train:8001-8100batch: iter_time=1.081e-04, forward_time=0.142, loss_ctc=110.903, loss_interctc_layer6=102.711, loss_interctc_layer12=87.913, loss_interctc_layer15=81.894, loss_interctc_layer21=112.472, loss=99.179, backward_time=0.317, grad_norm=62.199, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.138, optim0_lr0=1.565e-04, train_time=1.491 +[gpua002:0/64] 2024-01-17 10:08:19,554 (trainer:753) INFO: 7epoch:train:8101-8200batch: iter_time=9.929e-05, forward_time=0.163, loss_ctc=119.944, loss_interctc_layer6=112.747, loss_interctc_layer12=96.648, loss_interctc_layer15=90.858, loss_interctc_layer21=121.773, loss=108.394, backward_time=0.315, grad_norm=88.590, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.137, optim0_lr0=1.564e-04, train_time=1.578 +[gpua002:0/64] 2024-01-17 10:11:34,701 (trainer:753) INFO: 7epoch:train:8201-8300batch: iter_time=0.002, forward_time=0.223, loss_ctc=102.266, loss_interctc_layer6=101.774, loss_interctc_layer12=87.637, loss_interctc_layer15=82.044, loss_interctc_layer21=103.695, loss=95.483, backward_time=0.443, grad_norm=66.871, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.141, optim0_lr0=1.563e-04, train_time=1.951 +[gpua002:0/64] 2024-01-17 10:13:53,139 (trainer:753) INFO: 7epoch:train:8301-8400batch: iter_time=9.436e-05, forward_time=0.142, loss_ctc=88.035, loss_interctc_layer6=93.325, loss_interctc_layer12=80.560, loss_interctc_layer15=74.975, loss_interctc_layer21=88.860, loss=85.151, backward_time=0.326, grad_norm=85.042, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.138, optim0_lr0=1.562e-04, train_time=1.384 +[gpua002:0/64] 2024-01-17 10:16:14,074 (trainer:753) INFO: 7epoch:train:8401-8500batch: iter_time=9.117e-05, forward_time=0.142, loss_ctc=115.851, loss_interctc_layer6=102.712, loss_interctc_layer12=87.662, loss_interctc_layer15=81.620, loss_interctc_layer21=117.477, loss=101.064, backward_time=0.308, grad_norm=57.122, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.138, optim0_lr0=1.561e-04, train_time=1.409 +[gpua002:0/64] 2024-01-17 10:18:50,510 (trainer:753) INFO: 7epoch:train:8501-8600batch: iter_time=8.867e-05, forward_time=0.141, loss_ctc=103.605, loss_interctc_layer6=98.508, loss_interctc_layer12=83.769, loss_interctc_layer15=77.842, loss_interctc_layer21=104.650, loss=93.675, backward_time=0.340, grad_norm=62.165, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.137, optim0_lr0=1.561e-04, train_time=1.564 +[gpua002:0/64] 2024-01-17 10:21:35,219 (trainer:753) INFO: 7epoch:train:8601-8700batch: iter_time=9.180e-05, forward_time=0.141, loss_ctc=103.283, loss_interctc_layer6=99.403, loss_interctc_layer12=85.321, loss_interctc_layer15=79.695, loss_interctc_layer21=104.667, loss=94.474, backward_time=0.349, grad_norm=85.793, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.138, optim0_lr0=1.560e-04, train_time=1.647 +[gpua002:0/64] 2024-01-17 10:23:12,015 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua002:0/64] 2024-01-17 10:23:30,776 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 10:23:34,450 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 10:23:34,450 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua002:0/64] 2024-01-17 10:23:34,453 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 10:32:56,375 (trainer:753) INFO: 7epoch:train:8701-8800batch: iter_time=2.885, forward_time=0.152, loss_ctc=123.011, loss_interctc_layer6=121.669, loss_interctc_layer12=104.739, loss_interctc_layer15=98.220, loss_interctc_layer21=124.181, loss=114.364, backward_time=0.327, grad_norm=117.044, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.140, optim0_lr0=1.559e-04, train_time=6.810 +[gpua002:0/64] 2024-01-17 10:35:17,486 (trainer:753) INFO: 7epoch:train:8801-8900batch: iter_time=8.275e-05, forward_time=0.166, loss_ctc=108.325, loss_interctc_layer6=109.108, loss_interctc_layer12=94.478, loss_interctc_layer15=88.402, loss_interctc_layer21=109.593, loss=101.981, backward_time=0.385, grad_norm=85.850, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.147, optim0_lr0=1.558e-04, train_time=1.412 +[gpua002:0/64] 2024-01-17 10:37:38,734 (trainer:753) INFO: 7epoch:train:8901-9000batch: iter_time=8.473e-05, forward_time=0.175, loss_ctc=100.064, loss_interctc_layer6=102.699, loss_interctc_layer12=88.590, loss_interctc_layer15=82.954, loss_interctc_layer21=101.472, loss=95.156, backward_time=0.314, grad_norm=59.353, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.138, optim0_lr0=1.557e-04, train_time=1.413 +[gpua002:0/64] 2024-01-17 10:40:34,211 (trainer:753) INFO: 7epoch:train:9001-9100batch: iter_time=9.040e-05, forward_time=0.143, loss_ctc=100.895, loss_interctc_layer6=92.686, loss_interctc_layer12=78.721, loss_interctc_layer15=73.264, loss_interctc_layer21=102.319, loss=89.577, backward_time=0.343, grad_norm=58.882, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.138, optim0_lr0=1.557e-04, train_time=1.754 +[gpua002:0/64] 2024-01-17 10:43:31,519 (trainer:753) INFO: 7epoch:train:9101-9200batch: iter_time=9.094e-05, forward_time=0.142, loss_ctc=109.928, loss_interctc_layer6=106.214, loss_interctc_layer12=91.578, loss_interctc_layer15=85.560, loss_interctc_layer21=111.654, loss=100.987, backward_time=0.360, grad_norm=81.458, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.137, optim0_lr0=1.556e-04, train_time=1.773 +[gpua002:0/64] 2024-01-17 10:46:18,148 (trainer:753) INFO: 7epoch:train:9201-9300batch: iter_time=8.814e-05, forward_time=0.142, loss_ctc=109.220, loss_interctc_layer6=106.808, loss_interctc_layer12=91.928, loss_interctc_layer15=85.820, loss_interctc_layer21=110.920, loss=100.939, backward_time=0.375, grad_norm=67.917, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.137, optim0_lr0=1.555e-04, train_time=1.666 +[gpua002:0/64] 2024-01-17 10:49:22,181 (trainer:753) INFO: 7epoch:train:9301-9400batch: iter_time=9.126e-05, forward_time=0.167, loss_ctc=115.111, loss_interctc_layer6=106.116, loss_interctc_layer12=91.437, loss_interctc_layer15=85.821, loss_interctc_layer21=116.690, loss=103.035, backward_time=0.389, grad_norm=95.175, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.137, optim0_lr0=1.554e-04, train_time=1.840 +[gpua002:0/64] 2024-01-17 10:51:37,086 (trainer:753) INFO: 7epoch:train:9401-9500batch: iter_time=9.229e-05, forward_time=0.150, loss_ctc=117.991, loss_interctc_layer6=108.475, loss_interctc_layer12=94.358, loss_interctc_layer15=87.941, loss_interctc_layer21=119.635, loss=105.680, backward_time=0.301, grad_norm=79.905, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.138, optim0_lr0=1.553e-04, train_time=1.348 +[gpua002:0/64] 2024-01-17 10:54:26,758 (trainer:753) INFO: 7epoch:train:9501-9600batch: iter_time=9.456e-05, forward_time=0.150, loss_ctc=91.566, loss_interctc_layer6=95.941, loss_interctc_layer12=82.974, loss_interctc_layer15=77.113, loss_interctc_layer21=92.630, loss=88.045, backward_time=0.360, grad_norm=97.084, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.138, optim0_lr0=1.553e-04, train_time=1.697 +[gpua002:0/64] 2024-01-17 10:57:48,402 (trainer:753) INFO: 7epoch:train:9601-9700batch: iter_time=9.466e-05, forward_time=0.231, loss_ctc=99.306, loss_interctc_layer6=95.857, loss_interctc_layer12=81.715, loss_interctc_layer15=75.840, loss_interctc_layer21=100.661, loss=90.676, backward_time=0.398, grad_norm=64.509, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.141, optim0_lr0=1.552e-04, train_time=2.016 +[gpua002:0/64] 2024-01-17 11:00:32,579 (trainer:753) INFO: 7epoch:train:9701-9800batch: iter_time=8.834e-05, forward_time=0.142, loss_ctc=120.177, loss_interctc_layer6=108.175, loss_interctc_layer12=92.502, loss_interctc_layer15=86.195, loss_interctc_layer21=122.189, loss=105.847, backward_time=0.314, grad_norm=72.929, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.137, optim0_lr0=1.551e-04, train_time=1.642 +[gpua002:0/64] 2024-01-17 11:03:04,128 (trainer:753) INFO: 7epoch:train:9801-9900batch: iter_time=9.514e-05, forward_time=0.141, loss_ctc=86.619, loss_interctc_layer6=87.260, loss_interctc_layer12=73.781, loss_interctc_layer15=68.408, loss_interctc_layer21=87.844, loss=80.782, backward_time=0.321, grad_norm=69.781, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.137, optim0_lr0=1.550e-04, train_time=1.515 +[gpua002:0/64] 2024-01-17 11:05:33,730 (trainer:753) INFO: 7epoch:train:9901-10000batch: iter_time=1.003e-04, forward_time=0.143, loss_ctc=116.246, loss_interctc_layer6=111.821, loss_interctc_layer12=96.408, loss_interctc_layer15=90.982, loss_interctc_layer21=117.188, loss=106.529, backward_time=0.342, grad_norm=73.592, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.137, optim0_lr0=1.550e-04, train_time=1.496 +[gpua002:0/64] 2024-01-17 11:05:53,765 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua002:0/64] 2024-01-17 11:06:13,011 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 11:06:16,725 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 11:06:16,725 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua002:0/64] 2024-01-17 11:06:16,729 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 11:17:41,664 (trainer:753) INFO: 7epoch:train:10001-10100batch: iter_time=3.258, forward_time=0.145, loss_ctc=113.422, loss_interctc_layer6=121.615, loss_interctc_layer12=104.904, loss_interctc_layer15=98.010, loss_interctc_layer21=114.452, loss=110.481, backward_time=0.309, grad_norm=86.241, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.549e-04, train_time=7.279 +[gpua002:0/64] 2024-01-17 11:20:18,645 (trainer:753) INFO: 7epoch:train:10101-10200batch: iter_time=8.165e-05, forward_time=0.142, loss_ctc=97.512, loss_interctc_layer6=99.572, loss_interctc_layer12=86.239, loss_interctc_layer15=80.839, loss_interctc_layer21=98.676, loss=92.568, backward_time=0.368, grad_norm=66.947, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.548e-04, train_time=1.570 +[gpua002:0/64] 2024-01-17 11:23:10,683 (trainer:753) INFO: 7epoch:train:10201-10300batch: iter_time=8.545e-05, forward_time=0.143, loss_ctc=105.568, loss_interctc_layer6=104.490, loss_interctc_layer12=89.249, loss_interctc_layer15=82.975, loss_interctc_layer21=107.187, loss=97.894, backward_time=0.362, grad_norm=57.303, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.138, optim0_lr0=1.547e-04, train_time=1.720 +[gpua002:0/64] 2024-01-17 11:26:12,947 (trainer:753) INFO: 7epoch:train:10301-10400batch: iter_time=8.847e-05, forward_time=0.141, loss_ctc=100.529, loss_interctc_layer6=97.837, loss_interctc_layer12=83.645, loss_interctc_layer15=77.980, loss_interctc_layer21=101.888, loss=92.376, backward_time=0.362, grad_norm=63.254, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.138, optim0_lr0=1.546e-04, train_time=1.822 +[gpua002:0/64] 2024-01-17 11:29:09,990 (trainer:753) INFO: 7epoch:train:10401-10500batch: iter_time=8.511e-05, forward_time=0.148, loss_ctc=101.789, loss_interctc_layer6=106.707, loss_interctc_layer12=92.080, loss_interctc_layer15=85.716, loss_interctc_layer21=103.080, loss=97.874, backward_time=0.435, grad_norm=280.493, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.138, optim0_lr0=1.546e-04, train_time=1.770 +[gpua002:0/64] 2024-01-17 11:31:55,113 (trainer:753) INFO: 7epoch:train:10501-10600batch: iter_time=8.392e-05, forward_time=0.166, loss_ctc=104.270, loss_interctc_layer6=101.784, loss_interctc_layer12=87.187, loss_interctc_layer15=81.175, loss_interctc_layer21=105.816, loss=96.046, backward_time=0.384, grad_norm=54.946, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.545e-04, train_time=1.650 +[gpua002:0/64] 2024-01-17 11:34:45,160 (trainer:753) INFO: 7epoch:train:10601-10700batch: iter_time=8.225e-05, forward_time=0.182, loss_ctc=111.661, loss_interctc_layer6=111.679, loss_interctc_layer12=95.970, loss_interctc_layer15=89.779, loss_interctc_layer21=113.354, loss=104.489, backward_time=0.379, grad_norm=74.381, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.142, optim0_lr0=1.544e-04, train_time=1.701 +[gpua002:0/64] 2024-01-17 11:37:20,468 (trainer:753) INFO: 7epoch:train:10701-10800batch: iter_time=8.472e-05, forward_time=0.142, loss_ctc=96.351, loss_interctc_layer6=100.474, loss_interctc_layer12=86.112, loss_interctc_layer15=80.532, loss_interctc_layer21=97.885, loss=92.271, backward_time=0.352, grad_norm=66.614, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.543e-04, train_time=1.553 +[gpua002:0/64] 2024-01-17 11:39:51,467 (trainer:753) INFO: 7epoch:train:10801-10900batch: iter_time=8.877e-05, forward_time=0.142, loss_ctc=83.478, loss_interctc_layer6=93.884, loss_interctc_layer12=80.540, loss_interctc_layer15=75.488, loss_interctc_layer21=84.608, loss=83.600, backward_time=0.404, grad_norm=110.063, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.543e-04, train_time=1.510 +[gpua002:0/64] 2024-01-17 11:42:36,555 (trainer:753) INFO: 7epoch:train:10901-11000batch: iter_time=8.833e-05, forward_time=0.143, loss_ctc=110.407, loss_interctc_layer6=101.272, loss_interctc_layer12=86.422, loss_interctc_layer15=80.195, loss_interctc_layer21=111.886, loss=98.036, backward_time=0.364, grad_norm=65.807, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.542e-04, train_time=1.651 +[gpua002:0/64] 2024-01-17 11:45:40,138 (trainer:753) INFO: 7epoch:train:11001-11100batch: iter_time=8.608e-05, forward_time=0.143, loss_ctc=98.367, loss_interctc_layer6=98.178, loss_interctc_layer12=83.377, loss_interctc_layer15=77.196, loss_interctc_layer21=99.840, loss=91.392, backward_time=0.401, grad_norm=63.748, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.541e-04, train_time=1.836 +[gpua002:0/64] 2024-01-17 11:48:26,962 (trainer:753) INFO: 7epoch:train:11101-11200batch: iter_time=9.177e-05, forward_time=0.143, loss_ctc=97.755, loss_interctc_layer6=98.949, loss_interctc_layer12=84.657, loss_interctc_layer15=78.944, loss_interctc_layer21=98.935, loss=91.848, backward_time=0.398, grad_norm=67.733, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.540e-04, train_time=1.668 +[gpua002:0/64] 2024-01-17 11:49:58,966 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua002:0/64] 2024-01-17 11:50:17,908 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 11:50:21,620 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 11:50:21,620 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua002:0/64] 2024-01-17 11:50:21,623 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 11:59:56,890 (trainer:753) INFO: 7epoch:train:11201-11300batch: iter_time=3.302, forward_time=0.180, loss_ctc=118.012, loss_interctc_layer6=122.032, loss_interctc_layer12=104.754, loss_interctc_layer15=98.015, loss_interctc_layer21=119.971, loss=112.557, backward_time=0.323, grad_norm=98.145, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.140, optim0_lr0=1.540e-04, train_time=6.899 +[gpua002:0/64] 2024-01-17 12:02:06,315 (trainer:753) INFO: 7epoch:train:11301-11400batch: iter_time=9.078e-05, forward_time=0.145, loss_ctc=106.949, loss_interctc_layer6=107.181, loss_interctc_layer12=93.126, loss_interctc_layer15=86.524, loss_interctc_layer21=108.715, loss=100.499, backward_time=0.298, grad_norm=127.046, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.140, optim0_lr0=1.539e-04, train_time=1.294 +[gpua002:0/64] 2024-01-17 12:04:29,484 (trainer:753) INFO: 7epoch:train:11401-11500batch: iter_time=9.218e-05, forward_time=0.143, loss_ctc=100.448, loss_interctc_layer6=102.123, loss_interctc_layer12=88.413, loss_interctc_layer15=82.588, loss_interctc_layer21=101.824, loss=95.079, backward_time=0.326, grad_norm=74.844, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.140, optim0_lr0=1.538e-04, train_time=1.431 +[gpua002:0/64] 2024-01-17 12:07:00,435 (trainer:753) INFO: 7epoch:train:11501-11600batch: iter_time=9.169e-05, forward_time=0.142, loss_ctc=100.632, loss_interctc_layer6=92.843, loss_interctc_layer12=78.672, loss_interctc_layer15=73.019, loss_interctc_layer21=102.366, loss=89.506, backward_time=0.345, grad_norm=58.032, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.140, optim0_lr0=1.537e-04, train_time=1.509 +[gpua002:0/64] 2024-01-17 12:09:37,703 (trainer:753) INFO: 7epoch:train:11601-11700batch: iter_time=9.133e-05, forward_time=0.143, loss_ctc=110.458, loss_interctc_layer6=106.006, loss_interctc_layer12=91.208, loss_interctc_layer15=84.875, loss_interctc_layer21=111.786, loss=100.867, backward_time=0.348, grad_norm=75.077, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.140, optim0_lr0=1.537e-04, train_time=1.572 +[gpua002:0/64] 2024-01-17 12:12:05,961 (trainer:753) INFO: 7epoch:train:11701-11800batch: iter_time=8.191e-05, forward_time=0.142, loss_ctc=107.872, loss_interctc_layer6=106.051, loss_interctc_layer12=90.667, loss_interctc_layer15=84.242, loss_interctc_layer21=109.416, loss=99.649, backward_time=0.340, grad_norm=72.319, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.138, optim0_lr0=1.536e-04, train_time=1.482 +[gpua002:0/64] 2024-01-17 12:15:04,601 (trainer:753) INFO: 7epoch:train:11801-11900batch: iter_time=8.888e-05, forward_time=0.142, loss_ctc=113.739, loss_interctc_layer6=105.400, loss_interctc_layer12=90.463, loss_interctc_layer15=84.975, loss_interctc_layer21=115.624, loss=102.040, backward_time=0.444, grad_norm=85.076, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.535e-04, train_time=1.786 +[gpua002:0/64] 2024-01-17 12:18:15,116 (trainer:753) INFO: 7epoch:train:11901-12000batch: iter_time=8.399e-05, forward_time=0.142, loss_ctc=115.681, loss_interctc_layer6=108.333, loss_interctc_layer12=92.693, loss_interctc_layer15=86.512, loss_interctc_layer21=117.299, loss=104.104, backward_time=0.346, grad_norm=83.568, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.139, optim0_lr0=1.534e-04, train_time=1.905 +[gpua002:0/64] 2024-01-17 12:21:05,711 (trainer:753) INFO: 7epoch:train:12001-12100batch: iter_time=8.460e-05, forward_time=0.144, loss_ctc=90.953, loss_interctc_layer6=95.820, loss_interctc_layer12=81.875, loss_interctc_layer15=76.294, loss_interctc_layer21=92.091, loss=87.407, backward_time=0.348, grad_norm=65.461, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.138, optim0_lr0=1.534e-04, train_time=1.706 +[gpua002:0/64] 2024-01-17 12:23:59,632 (trainer:753) INFO: 7epoch:train:12101-12200batch: iter_time=9.114e-05, forward_time=0.253, loss_ctc=99.407, loss_interctc_layer6=95.329, loss_interctc_layer12=81.117, loss_interctc_layer15=75.299, loss_interctc_layer21=100.814, loss=90.393, backward_time=0.331, grad_norm=66.402, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.142, optim0_lr0=1.533e-04, train_time=1.738 +[gpua002:0/64] 2024-01-17 12:26:43,152 (trainer:753) INFO: 7epoch:train:12201-12300batch: iter_time=9.210e-05, forward_time=0.143, loss_ctc=118.075, loss_interctc_layer6=107.020, loss_interctc_layer12=91.430, loss_interctc_layer15=84.900, loss_interctc_layer21=119.924, loss=104.270, backward_time=0.350, grad_norm=95.319, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.532e-04, train_time=1.636 +[gpua002:0/64] 2024-01-17 12:29:15,402 (trainer:753) INFO: 7epoch:train:12301-12400batch: iter_time=8.706e-05, forward_time=0.141, loss_ctc=86.233, loss_interctc_layer6=87.219, loss_interctc_layer12=73.406, loss_interctc_layer15=67.935, loss_interctc_layer21=87.341, loss=80.427, backward_time=0.317, grad_norm=61.108, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.531e-04, train_time=1.522 +[gpua002:0/64] 2024-01-17 12:31:47,410 (trainer:753) INFO: 7epoch:train:12401-12500batch: iter_time=8.350e-05, forward_time=0.141, loss_ctc=114.963, loss_interctc_layer6=110.833, loss_interctc_layer12=95.275, loss_interctc_layer15=89.501, loss_interctc_layer21=116.070, loss=105.328, backward_time=0.358, grad_norm=77.247, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.531e-04, train_time=1.520 +[gpua002:0/64] 2024-01-17 12:32:07,439 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua002:0/64] 2024-01-17 12:32:26,430 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 12:32:30,406 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 12:32:30,406 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua002:0/64] 2024-01-17 12:32:30,409 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 12:42:46,959 (trainer:753) INFO: 7epoch:train:12501-12600batch: iter_time=3.049, forward_time=0.143, loss_ctc=116.437, loss_interctc_layer6=120.396, loss_interctc_layer12=103.532, loss_interctc_layer15=96.568, loss_interctc_layer21=118.066, loss=111.000, backward_time=0.308, grad_norm=83.490, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.530e-04, train_time=6.595 +[gpua002:0/64] 2024-01-17 12:45:03,768 (trainer:753) INFO: 7epoch:train:12601-12700batch: iter_time=9.546e-05, forward_time=0.142, loss_ctc=101.470, loss_interctc_layer6=99.747, loss_interctc_layer12=86.349, loss_interctc_layer15=80.757, loss_interctc_layer21=102.834, loss=94.231, backward_time=0.301, grad_norm=60.974, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.138, optim0_lr0=1.529e-04, train_time=1.368 +[gpua002:0/64] 2024-01-17 12:47:55,603 (trainer:753) INFO: 7epoch:train:12701-12800batch: iter_time=9.370e-05, forward_time=0.142, loss_ctc=109.935, loss_interctc_layer6=103.877, loss_interctc_layer12=88.837, loss_interctc_layer15=82.571, loss_interctc_layer21=111.693, loss=99.383, backward_time=0.386, grad_norm=61.979, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.528e-04, train_time=1.718 +[gpua002:0/64] 2024-01-17 12:50:49,431 (trainer:753) INFO: 7epoch:train:12801-12900batch: iter_time=9.590e-05, forward_time=0.141, loss_ctc=104.144, loss_interctc_layer6=97.452, loss_interctc_layer12=83.315, loss_interctc_layer15=77.706, loss_interctc_layer21=105.840, loss=93.692, backward_time=0.347, grad_norm=65.465, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.528e-04, train_time=1.738 +[gpua002:0/64] 2024-01-17 12:54:08,539 (trainer:753) INFO: 7epoch:train:12901-13000batch: iter_time=9.471e-05, forward_time=0.142, loss_ctc=107.087, loss_interctc_layer6=106.438, loss_interctc_layer12=91.306, loss_interctc_layer15=85.123, loss_interctc_layer21=108.129, loss=99.617, backward_time=0.473, grad_norm=70.113, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.527e-04, train_time=1.991 +[gpua002:0/64] 2024-01-17 12:56:42,767 (trainer:753) INFO: 7epoch:train:13001-13100batch: iter_time=9.349e-05, forward_time=0.225, loss_ctc=108.412, loss_interctc_layer6=101.960, loss_interctc_layer12=87.085, loss_interctc_layer15=81.175, loss_interctc_layer21=110.073, loss=97.741, backward_time=0.354, grad_norm=67.207, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.144, optim0_lr0=1.526e-04, train_time=1.541 +[gpua002:0/64] 2024-01-17 12:59:15,078 (trainer:753) INFO: 7epoch:train:13101-13200batch: iter_time=9.030e-05, forward_time=0.142, loss_ctc=118.581, loss_interctc_layer6=110.626, loss_interctc_layer12=95.041, loss_interctc_layer15=88.908, loss_interctc_layer21=120.139, loss=106.659, backward_time=0.319, grad_norm=77.556, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.525e-04, train_time=1.524 +[gpua002:0/64] 2024-01-17 13:02:24,564 (trainer:753) INFO: 7epoch:train:13201-13300batch: iter_time=1.009e-04, forward_time=0.141, loss_ctc=99.751, loss_interctc_layer6=99.376, loss_interctc_layer12=85.169, loss_interctc_layer15=79.331, loss_interctc_layer21=101.366, loss=92.999, backward_time=0.361, grad_norm=65.933, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.525e-04, train_time=1.895 +[gpua002:0/64] 2024-01-17 13:06:01,347 (trainer:753) INFO: 7epoch:train:13301-13400batch: iter_time=9.509e-05, forward_time=0.141, loss_ctc=86.280, loss_interctc_layer6=92.521, loss_interctc_layer12=79.610, loss_interctc_layer15=74.183, loss_interctc_layer21=87.562, loss=84.031, backward_time=0.535, grad_norm=68.228, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.524e-04, train_time=2.168 +[gpua002:0/64] 2024-01-17 13:08:44,070 (trainer:753) INFO: 7epoch:train:13401-13500batch: iter_time=9.215e-05, forward_time=0.142, loss_ctc=111.692, loss_interctc_layer6=101.261, loss_interctc_layer12=86.080, loss_interctc_layer15=79.854, loss_interctc_layer21=113.743, loss=98.526, backward_time=0.370, grad_norm=58.136, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.523e-04, train_time=1.627 +[gpua002:0/64] 2024-01-17 13:11:39,056 (trainer:753) INFO: 7epoch:train:13501-13600batch: iter_time=9.620e-05, forward_time=0.142, loss_ctc=100.755, loss_interctc_layer6=97.285, loss_interctc_layer12=82.361, loss_interctc_layer15=76.482, loss_interctc_layer21=102.350, loss=91.847, backward_time=0.357, grad_norm=59.085, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.522e-04, train_time=1.750 +[gpua002:0/64] 2024-01-17 13:14:17,143 (trainer:753) INFO: 7epoch:train:13601-13700batch: iter_time=9.593e-05, forward_time=0.141, loss_ctc=100.821, loss_interctc_layer6=97.756, loss_interctc_layer12=83.722, loss_interctc_layer15=77.939, loss_interctc_layer21=102.509, loss=92.549, backward_time=0.335, grad_norm=66.240, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.522e-04, train_time=1.581 +[gpua002:0/64] 2024-01-17 13:15:48,732 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua002:0/64] 2024-01-17 13:16:07,660 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 13:16:11,116 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 13:16:11,116 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua002:0/64] 2024-01-17 13:16:11,123 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 13:25:58,641 (trainer:753) INFO: 7epoch:train:13701-13800batch: iter_time=3.213, forward_time=0.175, loss_ctc=119.534, loss_interctc_layer6=120.356, loss_interctc_layer12=102.781, loss_interctc_layer15=96.481, loss_interctc_layer21=121.357, loss=112.102, backward_time=0.321, grad_norm=75.506, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.138, optim0_lr0=1.521e-04, train_time=7.014 +[gpua002:0/64] 2024-01-17 13:29:08,175 (trainer:753) INFO: 7epoch:train:13801-13900batch: iter_time=7.947e-05, forward_time=0.142, loss_ctc=106.747, loss_interctc_layer6=107.291, loss_interctc_layer12=92.533, loss_interctc_layer15=86.605, loss_interctc_layer21=108.599, loss=100.355, backward_time=0.390, grad_norm=85.195, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.520e-04, train_time=1.896 +[gpua002:0/64] 2024-01-17 13:32:29,088 (trainer:753) INFO: 7epoch:train:13901-14000batch: iter_time=8.367e-05, forward_time=0.142, loss_ctc=97.545, loss_interctc_layer6=100.479, loss_interctc_layer12=86.191, loss_interctc_layer15=80.601, loss_interctc_layer21=99.077, loss=92.779, backward_time=0.415, grad_norm=64.725, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.137, optim0_lr0=1.519e-04, train_time=2.009 +[gpua002:0/64] 2024-01-17 13:34:47,786 (trainer:753) INFO: 7epoch:train:14001-14100batch: iter_time=8.434e-05, forward_time=0.141, loss_ctc=98.704, loss_interctc_layer6=91.886, loss_interctc_layer12=77.894, loss_interctc_layer15=71.999, loss_interctc_layer21=100.468, loss=88.190, backward_time=0.314, grad_norm=56.152, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.519e-04, train_time=1.387 +[gpua002:0/64] 2024-01-17 13:37:47,158 (trainer:753) INFO: 7epoch:train:14101-14200batch: iter_time=8.408e-05, forward_time=0.141, loss_ctc=108.192, loss_interctc_layer6=105.144, loss_interctc_layer12=90.261, loss_interctc_layer15=83.932, loss_interctc_layer21=109.758, loss=99.458, backward_time=0.392, grad_norm=70.043, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.518e-04, train_time=1.794 +[gpua002:0/64] 2024-01-17 13:40:52,327 (trainer:753) INFO: 7epoch:train:14201-14300batch: iter_time=8.368e-05, forward_time=0.142, loss_ctc=105.328, loss_interctc_layer6=105.879, loss_interctc_layer12=90.599, loss_interctc_layer15=84.181, loss_interctc_layer21=106.468, loss=98.491, backward_time=0.371, grad_norm=98.737, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.517e-04, train_time=1.851 +[gpua002:0/64] 2024-01-17 13:44:46,101 (trainer:753) INFO: 7epoch:train:14301-14400batch: iter_time=8.449e-05, forward_time=0.142, loss_ctc=113.777, loss_interctc_layer6=104.435, loss_interctc_layer12=89.501, loss_interctc_layer15=83.753, loss_interctc_layer21=115.772, loss=101.448, backward_time=0.469, grad_norm=82.891, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.517e-04, train_time=2.338 +[gpua002:0/64] 2024-01-17 13:48:16,658 (trainer:753) INFO: 7epoch:train:14401-14500batch: iter_time=8.601e-05, forward_time=0.141, loss_ctc=115.138, loss_interctc_layer6=107.304, loss_interctc_layer12=91.149, loss_interctc_layer15=84.894, loss_interctc_layer21=117.042, loss=103.106, backward_time=0.409, grad_norm=65.271, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.516e-04, train_time=2.105 +[gpua002:0/64] 2024-01-17 13:51:22,181 (trainer:753) INFO: 7epoch:train:14501-14600batch: iter_time=8.388e-05, forward_time=0.141, loss_ctc=89.536, loss_interctc_layer6=94.980, loss_interctc_layer12=81.074, loss_interctc_layer15=75.413, loss_interctc_layer21=90.814, loss=86.363, backward_time=0.422, grad_norm=59.137, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.515e-04, train_time=1.855 +[gpua002:0/64] 2024-01-17 13:54:39,097 (trainer:753) INFO: 7epoch:train:14601-14700batch: iter_time=9.076e-05, forward_time=0.155, loss_ctc=97.890, loss_interctc_layer6=93.973, loss_interctc_layer12=79.925, loss_interctc_layer15=74.315, loss_interctc_layer21=99.183, loss=89.057, backward_time=0.394, grad_norm=52.517, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.142, optim0_lr0=1.514e-04, train_time=1.969 +[gpua002:0/64] 2024-01-17 13:57:46,413 (trainer:753) INFO: 7epoch:train:14701-14800batch: iter_time=0.115, forward_time=0.218, loss_ctc=116.211, loss_interctc_layer6=106.255, loss_interctc_layer12=90.578, loss_interctc_layer15=84.194, loss_interctc_layer21=118.169, loss=103.082, backward_time=0.410, grad_norm=65.200, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.146, optim0_lr0=1.514e-04, train_time=1.872 +[gpua002:0/64] 2024-01-17 14:00:57,467 (trainer:753) INFO: 7epoch:train:14801-14900batch: iter_time=8.585e-05, forward_time=0.141, loss_ctc=85.521, loss_interctc_layer6=86.575, loss_interctc_layer12=73.221, loss_interctc_layer15=67.395, loss_interctc_layer21=87.119, loss=79.966, backward_time=0.442, grad_norm=53.763, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.513e-04, train_time=1.911 +[gpua002:0/64] 2024-01-17 14:04:27,550 (trainer:753) INFO: 7epoch:train:14901-15000batch: iter_time=8.846e-05, forward_time=0.141, loss_ctc=114.142, loss_interctc_layer6=110.663, loss_interctc_layer12=95.728, loss_interctc_layer15=89.201, loss_interctc_layer21=115.785, loss=105.104, backward_time=0.427, grad_norm=74.842, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.138, optim0_lr0=1.512e-04, train_time=2.101 +[gpua002:0/64] 2024-01-17 14:36:24,758 (trainer:352) INFO: 7epoch results: [train] iter_time=0.220, forward_time=0.152, loss_ctc=106.272, loss_interctc_layer6=104.803, loss_interctc_layer12=90.239, loss_interctc_layer15=84.383, loss_interctc_layer21=107.583, loss=98.656, backward_time=0.361, grad_norm=73.599, clip=100.000, loss_scale=5.872e+19, optim_step_time=0.138, optim0_lr0=1.570e-04, train_time=2.040, time=8 hours, 30 minutes and 27.51 seconds, total_count=105000, gpu_max_cached_mem_GB=34.398, [valid] loss_ctc=72.175, cer_ctc=0.322, loss_interctc_layer6=70.044, cer_interctc_layer6=0.311, loss_interctc_layer12=58.495, cer_interctc_layer12=0.244, loss_interctc_layer15=54.408, cer_interctc_layer15=0.220, loss_interctc_layer21=73.290, cer_interctc_layer21=0.327, loss=65.683, time=31 minutes and 31.99 seconds, total_count=32697, gpu_max_cached_mem_GB=34.398 +[gpua002:0/64] 2024-01-17 14:36:44,334 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count +[gpua002:0/64] 2024-01-17 14:36:44,452 (trainer:461) INFO: The model files were removed: exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/2epoch.pth +[gpua002:0/64] 2024-01-17 14:36:44,452 (trainer:286) INFO: 8/45epoch started. Estimated time to finish: 2 weeks, 3 days and 3 hours +[gpua002:0/64] 2024-01-17 14:36:44,469 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua002:0/64] 2024-01-17 14:37:02,748 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 14:37:06,119 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 14:37:06,119 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua002:0/64] 2024-01-17 14:37:06,123 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 14:47:12,960 (trainer:753) INFO: 8epoch:train:1-100batch: iter_time=2.980, forward_time=0.191, loss_ctc=99.976, loss_interctc_layer6=104.308, loss_interctc_layer12=90.262, loss_interctc_layer15=84.916, loss_interctc_layer21=100.892, loss=96.071, backward_time=0.310, grad_norm=76.393, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.139, optim0_lr0=1.511e-04, train_time=6.285 +[gpua002:0/64] 2024-01-17 14:49:47,270 (trainer:753) INFO: 8epoch:train:101-200batch: iter_time=8.140e-05, forward_time=0.141, loss_ctc=99.980, loss_interctc_layer6=101.085, loss_interctc_layer12=86.337, loss_interctc_layer15=80.338, loss_interctc_layer21=101.475, loss=93.843, backward_time=0.334, grad_norm=61.534, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.511e-04, train_time=1.543 +[gpua002:0/64] 2024-01-17 14:52:10,038 (trainer:753) INFO: 8epoch:train:201-300batch: iter_time=8.653e-05, forward_time=0.142, loss_ctc=108.169, loss_interctc_layer6=107.029, loss_interctc_layer12=91.901, loss_interctc_layer15=85.996, loss_interctc_layer21=109.493, loss=100.518, backward_time=0.305, grad_norm=61.619, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.510e-04, train_time=1.427 +[gpua002:0/64] 2024-01-17 14:55:19,875 (trainer:753) INFO: 8epoch:train:301-400batch: iter_time=8.734e-05, forward_time=0.141, loss_ctc=98.263, loss_interctc_layer6=101.493, loss_interctc_layer12=86.638, loss_interctc_layer15=81.269, loss_interctc_layer21=99.590, loss=93.451, backward_time=0.351, grad_norm=66.969, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.509e-04, train_time=1.898 +[gpua002:0/64] 2024-01-17 14:58:08,154 (trainer:753) INFO: 8epoch:train:401-500batch: iter_time=9.578e-05, forward_time=0.143, loss_ctc=95.034, loss_interctc_layer6=95.407, loss_interctc_layer12=81.590, loss_interctc_layer15=76.097, loss_interctc_layer21=96.627, loss=88.951, backward_time=0.365, grad_norm=61.851, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.509e-04, train_time=1.682 +[gpua002:0/64] 2024-01-17 15:01:10,031 (trainer:753) INFO: 8epoch:train:501-600batch: iter_time=8.742e-05, forward_time=0.142, loss_ctc=92.126, loss_interctc_layer6=100.294, loss_interctc_layer12=86.140, loss_interctc_layer15=80.566, loss_interctc_layer21=93.108, loss=90.447, backward_time=0.361, grad_norm=70.487, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.508e-04, train_time=1.816 +[gpua002:0/64] 2024-01-17 15:03:29,727 (trainer:753) INFO: 8epoch:train:601-700batch: iter_time=9.384e-05, forward_time=0.142, loss_ctc=114.192, loss_interctc_layer6=107.615, loss_interctc_layer12=93.116, loss_interctc_layer15=87.024, loss_interctc_layer21=115.716, loss=103.533, backward_time=0.311, grad_norm=75.102, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.138, optim0_lr0=1.507e-04, train_time=1.399 +[gpua002:0/64] 2024-01-17 15:06:39,490 (trainer:753) INFO: 8epoch:train:701-800batch: iter_time=9.694e-05, forward_time=0.141, loss_ctc=113.459, loss_interctc_layer6=109.702, loss_interctc_layer12=96.413, loss_interctc_layer15=90.626, loss_interctc_layer21=115.275, loss=105.095, backward_time=0.421, grad_norm=74.427, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.138, optim0_lr0=1.506e-04, train_time=1.897 +[gpua002:0/64] 2024-01-17 15:09:42,841 (trainer:753) INFO: 8epoch:train:801-900batch: iter_time=9.986e-05, forward_time=0.142, loss_ctc=113.439, loss_interctc_layer6=108.614, loss_interctc_layer12=92.570, loss_interctc_layer15=86.381, loss_interctc_layer21=115.195, loss=103.240, backward_time=0.433, grad_norm=89.972, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.506e-04, train_time=1.833 +[gpua002:0/64] 2024-01-17 15:12:46,130 (trainer:753) INFO: 8epoch:train:901-1000batch: iter_time=9.272e-05, forward_time=0.142, loss_ctc=111.536, loss_interctc_layer6=111.911, loss_interctc_layer12=96.718, loss_interctc_layer15=90.921, loss_interctc_layer21=113.457, loss=104.909, backward_time=0.412, grad_norm=68.281, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.137, optim0_lr0=1.505e-04, train_time=1.833 +[gpua002:0/64] 2024-01-17 15:15:53,692 (trainer:753) INFO: 8epoch:train:1001-1100batch: iter_time=9.123e-05, forward_time=0.142, loss_ctc=95.370, loss_interctc_layer6=103.218, loss_interctc_layer12=89.272, loss_interctc_layer15=84.098, loss_interctc_layer21=96.486, loss=93.689, backward_time=0.367, grad_norm=94.057, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.138, optim0_lr0=1.504e-04, train_time=1.875 +[gpua002:0/64] 2024-01-17 15:19:12,732 (trainer:753) INFO: 8epoch:train:1101-1200batch: iter_time=9.034e-05, forward_time=0.215, loss_ctc=107.074, loss_interctc_layer6=109.860, loss_interctc_layer12=95.831, loss_interctc_layer15=90.378, loss_interctc_layer21=108.268, loss=102.282, backward_time=0.377, grad_norm=67.746, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.140, optim0_lr0=1.504e-04, train_time=1.990 +[gpua002:0/64] 2024-01-17 15:21:01,574 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua002:0/64] 2024-01-17 15:21:20,810 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 15:21:24,398 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 15:21:24,398 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua002:0/64] 2024-01-17 15:21:24,402 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 15:28:01,326 (trainer:753) INFO: 8epoch:train:1201-1300batch: iter_time=3.260, forward_time=0.198, loss_ctc=100.249, loss_interctc_layer6=110.059, loss_interctc_layer12=95.295, loss_interctc_layer15=89.601, loss_interctc_layer21=101.490, loss=99.339, backward_time=0.343, grad_norm=69.144, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.141, optim0_lr0=1.503e-04, train_time=5.286 +[gpua002:0/64] 2024-01-17 15:30:09,141 (trainer:753) INFO: 8epoch:train:1301-1400batch: iter_time=8.010e-05, forward_time=0.142, loss_ctc=110.169, loss_interctc_layer6=110.248, loss_interctc_layer12=95.093, loss_interctc_layer15=88.693, loss_interctc_layer21=111.692, loss=103.179, backward_time=0.295, grad_norm=79.871, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.137, optim0_lr0=1.502e-04, train_time=1.278 +[gpua002:0/64] 2024-01-17 15:32:32,692 (trainer:753) INFO: 8epoch:train:1401-1500batch: iter_time=8.709e-05, forward_time=0.141, loss_ctc=90.924, loss_interctc_layer6=95.611, loss_interctc_layer12=81.793, loss_interctc_layer15=76.238, loss_interctc_layer21=92.291, loss=87.372, backward_time=0.338, grad_norm=63.828, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.137, optim0_lr0=1.502e-04, train_time=1.435 +[gpua002:0/64] 2024-01-17 15:35:48,272 (trainer:753) INFO: 8epoch:train:1501-1600batch: iter_time=8.204e-05, forward_time=0.143, loss_ctc=100.710, loss_interctc_layer6=98.299, loss_interctc_layer12=84.033, loss_interctc_layer15=78.290, loss_interctc_layer21=102.078, loss=92.682, backward_time=0.346, grad_norm=53.508, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.137, optim0_lr0=1.501e-04, train_time=1.956 +[gpua002:0/64] 2024-01-17 15:38:27,676 (trainer:753) INFO: 8epoch:train:1601-1700batch: iter_time=8.332e-05, forward_time=0.142, loss_ctc=99.440, loss_interctc_layer6=104.511, loss_interctc_layer12=89.851, loss_interctc_layer15=83.772, loss_interctc_layer21=100.953, loss=95.705, backward_time=0.345, grad_norm=69.512, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.137, optim0_lr0=1.500e-04, train_time=1.592 +[gpua002:0/64] 2024-01-17 15:41:06,735 (trainer:753) INFO: 8epoch:train:1701-1800batch: iter_time=8.349e-05, forward_time=0.142, loss_ctc=84.407, loss_interctc_layer6=85.568, loss_interctc_layer12=73.412, loss_interctc_layer15=68.130, loss_interctc_layer21=85.810, loss=79.465, backward_time=0.328, grad_norm=66.239, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.138, optim0_lr0=1.499e-04, train_time=1.592 +[gpua002:0/64] 2024-01-17 15:44:24,319 (trainer:753) INFO: 8epoch:train:1801-1900batch: iter_time=8.777e-05, forward_time=0.142, loss_ctc=103.529, loss_interctc_layer6=106.086, loss_interctc_layer12=90.886, loss_interctc_layer15=84.738, loss_interctc_layer21=105.000, loss=98.048, backward_time=0.399, grad_norm=61.828, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.138, optim0_lr0=1.499e-04, train_time=1.976 +[gpua002:0/64] 2024-01-17 15:47:09,704 (trainer:753) INFO: 8epoch:train:1901-2000batch: iter_time=8.166e-05, forward_time=0.141, loss_ctc=119.945, loss_interctc_layer6=111.056, loss_interctc_layer12=96.470, loss_interctc_layer15=90.496, loss_interctc_layer21=121.879, loss=107.969, backward_time=0.371, grad_norm=94.203, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.138, optim0_lr0=1.498e-04, train_time=1.654 +[gpua002:0/64] 2024-01-17 15:50:22,399 (trainer:753) INFO: 8epoch:train:2001-2100batch: iter_time=8.045e-05, forward_time=0.141, loss_ctc=99.510, loss_interctc_layer6=103.246, loss_interctc_layer12=89.222, loss_interctc_layer15=83.792, loss_interctc_layer21=100.857, loss=95.326, backward_time=0.383, grad_norm=59.440, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.137, optim0_lr0=1.497e-04, train_time=1.927 +[gpua002:0/64] 2024-01-17 15:54:12,954 (trainer:753) INFO: 8epoch:train:2101-2200batch: iter_time=8.578e-05, forward_time=0.142, loss_ctc=113.325, loss_interctc_layer6=113.667, loss_interctc_layer12=96.815, loss_interctc_layer15=89.939, loss_interctc_layer21=115.145, loss=105.778, backward_time=0.488, grad_norm=68.566, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.137, optim0_lr0=1.497e-04, train_time=2.305 +[gpua002:0/64] 2024-01-17 15:56:41,292 (trainer:753) INFO: 8epoch:train:2201-2300batch: iter_time=7.963e-05, forward_time=0.141, loss_ctc=99.857, loss_interctc_layer6=101.415, loss_interctc_layer12=88.224, loss_interctc_layer15=82.931, loss_interctc_layer21=101.539, loss=94.793, backward_time=0.351, grad_norm=62.131, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.138, optim0_lr0=1.496e-04, train_time=1.483 +[gpua002:0/64] 2024-01-17 15:59:23,540 (trainer:753) INFO: 8epoch:train:2301-2400batch: iter_time=9.123e-05, forward_time=0.241, loss_ctc=92.442, loss_interctc_layer6=98.693, loss_interctc_layer12=84.252, loss_interctc_layer15=78.607, loss_interctc_layer21=93.718, loss=89.542, backward_time=0.373, grad_norm=56.732, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.144, optim0_lr0=1.495e-04, train_time=1.622 +[gpua002:0/64] 2024-01-17 16:01:53,317 (trainer:753) INFO: 8epoch:train:2401-2500batch: iter_time=0.001, forward_time=0.194, loss_ctc=110.156, loss_interctc_layer6=119.026, loss_interctc_layer12=103.006, loss_interctc_layer15=96.976, loss_interctc_layer21=111.316, loss=108.096, backward_time=0.329, grad_norm=85.160, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.139, optim0_lr0=1.495e-04, train_time=1.498 +[gpua002:0/64] 2024-01-17 16:02:13,347 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua002:0/64] 2024-01-17 16:02:32,368 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 16:02:36,204 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 16:02:36,204 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua002:0/64] 2024-01-17 16:02:36,207 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 16:11:08,166 (trainer:753) INFO: 8epoch:train:2501-2600batch: iter_time=3.362, forward_time=0.144, loss_ctc=100.733, loss_interctc_layer6=101.370, loss_interctc_layer12=87.243, loss_interctc_layer15=81.750, loss_interctc_layer21=102.359, loss=94.691, backward_time=0.317, grad_norm=66.833, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.139, optim0_lr0=1.494e-04, train_time=5.547 +[gpua002:0/64] 2024-01-17 16:13:21,958 (trainer:753) INFO: 8epoch:train:2601-2700batch: iter_time=8.559e-05, forward_time=0.142, loss_ctc=101.503, loss_interctc_layer6=98.906, loss_interctc_layer12=83.811, loss_interctc_layer15=77.853, loss_interctc_layer21=102.959, loss=93.007, backward_time=0.297, grad_norm=51.539, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.138, optim0_lr0=1.493e-04, train_time=1.339 +[gpua002:0/64] 2024-01-17 16:16:02,224 (trainer:753) INFO: 8epoch:train:2701-2800batch: iter_time=8.687e-05, forward_time=0.143, loss_ctc=110.712, loss_interctc_layer6=105.051, loss_interctc_layer12=89.852, loss_interctc_layer15=83.648, loss_interctc_layer21=112.610, loss=100.375, backward_time=0.327, grad_norm=86.495, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.138, optim0_lr0=1.492e-04, train_time=1.602 +[gpua002:0/64] 2024-01-17 16:18:23,792 (trainer:753) INFO: 8epoch:train:2801-2900batch: iter_time=8.626e-05, forward_time=0.141, loss_ctc=98.153, loss_interctc_layer6=99.537, loss_interctc_layer12=84.738, loss_interctc_layer15=79.081, loss_interctc_layer21=99.537, loss=92.209, backward_time=0.308, grad_norm=79.373, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.138, optim0_lr0=1.492e-04, train_time=1.415 +[gpua002:0/64] 2024-01-17 16:21:14,325 (trainer:753) INFO: 8epoch:train:2901-3000batch: iter_time=8.932e-05, forward_time=0.145, loss_ctc=93.711, loss_interctc_layer6=93.367, loss_interctc_layer12=79.541, loss_interctc_layer15=73.936, loss_interctc_layer21=95.089, loss=87.129, backward_time=0.350, grad_norm=64.059, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.138, optim0_lr0=1.491e-04, train_time=1.705 +[gpua002:0/64] 2024-01-17 16:24:07,603 (trainer:753) INFO: 8epoch:train:3001-3100batch: iter_time=8.689e-05, forward_time=0.142, loss_ctc=93.773, loss_interctc_layer6=98.819, loss_interctc_layer12=84.461, loss_interctc_layer15=78.718, loss_interctc_layer21=95.179, loss=90.190, backward_time=0.380, grad_norm=73.811, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.490e-04, train_time=1.733 +[gpua002:0/64] 2024-01-17 16:28:13,016 (trainer:753) INFO: 8epoch:train:3101-3200batch: iter_time=9.213e-05, forward_time=0.142, loss_ctc=115.700, loss_interctc_layer6=105.978, loss_interctc_layer12=91.191, loss_interctc_layer15=84.609, loss_interctc_layer21=117.842, loss=103.064, backward_time=0.395, grad_norm=72.671, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.490e-04, train_time=2.454 +[gpua002:0/64] 2024-01-17 16:30:48,433 (trainer:753) INFO: 8epoch:train:3201-3300batch: iter_time=9.362e-05, forward_time=0.150, loss_ctc=111.948, loss_interctc_layer6=107.220, loss_interctc_layer12=93.746, loss_interctc_layer15=87.849, loss_interctc_layer21=113.219, loss=102.796, backward_time=0.316, grad_norm=68.645, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.140, optim0_lr0=1.489e-04, train_time=1.554 +[gpua002:0/64] 2024-01-17 16:34:35,630 (trainer:753) INFO: 8epoch:train:3301-3400batch: iter_time=8.870e-05, forward_time=0.303, loss_ctc=118.433, loss_interctc_layer6=106.974, loss_interctc_layer12=90.885, loss_interctc_layer15=84.390, loss_interctc_layer21=120.422, loss=104.221, backward_time=0.533, grad_norm=83.159, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.149, optim0_lr0=1.488e-04, train_time=2.272 +[gpua002:0/64] 2024-01-17 16:37:29,243 (trainer:753) INFO: 8epoch:train:3401-3500batch: iter_time=8.498e-05, forward_time=0.153, loss_ctc=112.971, loss_interctc_layer6=109.579, loss_interctc_layer12=93.850, loss_interctc_layer15=87.788, loss_interctc_layer21=114.491, loss=103.736, backward_time=0.369, grad_norm=65.812, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.488e-04, train_time=1.736 +[gpua002:0/64] 2024-01-17 16:40:13,006 (trainer:753) INFO: 8epoch:train:3501-3600batch: iter_time=8.041e-05, forward_time=0.142, loss_ctc=95.755, loss_interctc_layer6=100.890, loss_interctc_layer12=86.721, loss_interctc_layer15=81.323, loss_interctc_layer21=97.099, loss=92.358, backward_time=0.335, grad_norm=82.237, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.137, optim0_lr0=1.487e-04, train_time=1.637 +[gpua002:0/64] 2024-01-17 16:42:34,842 (trainer:753) INFO: 8epoch:train:3601-3700batch: iter_time=8.510e-05, forward_time=0.144, loss_ctc=106.333, loss_interctc_layer6=105.978, loss_interctc_layer12=91.716, loss_interctc_layer15=86.175, loss_interctc_layer21=107.742, loss=99.589, backward_time=0.308, grad_norm=66.897, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.486e-04, train_time=1.415 +[gpua002:0/64] 2024-01-17 16:44:04,236 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua002:0/64] 2024-01-17 16:44:23,554 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 16:44:27,237 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 16:44:27,238 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua002:0/64] 2024-01-17 16:44:27,241 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 16:51:07,941 (trainer:753) INFO: 8epoch:train:3701-3800batch: iter_time=3.382, forward_time=0.144, loss_ctc=100.662, loss_interctc_layer6=107.725, loss_interctc_layer12=92.236, loss_interctc_layer15=85.911, loss_interctc_layer21=102.208, loss=97.748, backward_time=0.317, grad_norm=64.341, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.486e-04, train_time=5.134 +[gpua002:0/64] 2024-01-17 16:53:16,140 (trainer:753) INFO: 8epoch:train:3801-3900batch: iter_time=8.131e-05, forward_time=0.142, loss_ctc=113.376, loss_interctc_layer6=108.617, loss_interctc_layer12=92.977, loss_interctc_layer15=86.809, loss_interctc_layer21=115.419, loss=103.440, backward_time=0.297, grad_norm=68.117, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.485e-04, train_time=1.282 +[gpua002:0/64] 2024-01-17 16:55:57,246 (trainer:753) INFO: 8epoch:train:3901-4000batch: iter_time=8.111e-05, forward_time=0.142, loss_ctc=95.643, loss_interctc_layer6=94.643, loss_interctc_layer12=80.641, loss_interctc_layer15=75.182, loss_interctc_layer21=97.097, loss=88.641, backward_time=0.381, grad_norm=75.836, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.484e-04, train_time=1.611 +[gpua002:0/64] 2024-01-17 16:59:45,913 (trainer:753) INFO: 8epoch:train:4001-4100batch: iter_time=8.794e-05, forward_time=0.141, loss_ctc=101.366, loss_interctc_layer6=97.055, loss_interctc_layer12=82.656, loss_interctc_layer15=76.981, loss_interctc_layer21=102.932, loss=92.198, backward_time=0.390, grad_norm=61.296, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.137, optim0_lr0=1.484e-04, train_time=2.286 +[gpua002:0/64] 2024-01-17 17:02:12,647 (trainer:753) INFO: 8epoch:train:4101-4200batch: iter_time=8.419e-05, forward_time=0.142, loss_ctc=102.726, loss_interctc_layer6=104.838, loss_interctc_layer12=89.805, loss_interctc_layer15=83.491, loss_interctc_layer21=104.328, loss=97.038, backward_time=0.300, grad_norm=68.667, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.483e-04, train_time=1.467 +[gpua002:0/64] 2024-01-17 17:05:44,847 (trainer:753) INFO: 8epoch:train:4201-4300batch: iter_time=7.944e-05, forward_time=0.141, loss_ctc=87.846, loss_interctc_layer6=84.757, loss_interctc_layer12=72.532, loss_interctc_layer15=67.171, loss_interctc_layer21=89.246, loss=80.310, backward_time=0.415, grad_norm=65.658, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.482e-04, train_time=2.122 +[gpua002:0/64] 2024-01-17 17:09:30,040 (trainer:753) INFO: 8epoch:train:4301-4400batch: iter_time=8.133e-05, forward_time=0.142, loss_ctc=107.904, loss_interctc_layer6=105.947, loss_interctc_layer12=90.833, loss_interctc_layer15=84.427, loss_interctc_layer21=109.747, loss=99.772, backward_time=0.489, grad_norm=73.890, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.481e-04, train_time=2.252 +[gpua002:0/64] 2024-01-17 17:14:04,881 (trainer:753) INFO: 8epoch:train:4401-4500batch: iter_time=8.898e-05, forward_time=0.286, loss_ctc=118.192, loss_interctc_layer6=109.702, loss_interctc_layer12=94.640, loss_interctc_layer15=88.398, loss_interctc_layer21=119.952, loss=106.177, backward_time=0.598, grad_norm=72.359, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.153, optim0_lr0=1.481e-04, train_time=2.748 +[gpua002:0/64] 2024-01-17 17:18:27,347 (trainer:753) INFO: 8epoch:train:4501-4600batch: iter_time=8.821e-05, forward_time=0.204, loss_ctc=102.776, loss_interctc_layer6=101.752, loss_interctc_layer12=87.753, loss_interctc_layer15=82.027, loss_interctc_layer21=104.622, loss=95.786, backward_time=0.469, grad_norm=76.282, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.138, optim0_lr0=1.480e-04, train_time=2.624 +[gpua002:0/64] 2024-01-17 17:21:15,366 (trainer:753) INFO: 8epoch:train:4601-4700batch: iter_time=8.765e-05, forward_time=0.142, loss_ctc=115.740, loss_interctc_layer6=111.885, loss_interctc_layer12=95.239, loss_interctc_layer15=88.085, loss_interctc_layer21=117.865, loss=105.763, backward_time=0.316, grad_norm=62.740, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.137, optim0_lr0=1.479e-04, train_time=1.680 +[gpua002:0/64] 2024-01-17 17:24:14,116 (trainer:753) INFO: 8epoch:train:4701-4800batch: iter_time=8.618e-05, forward_time=0.142, loss_ctc=103.519, loss_interctc_layer6=100.120, loss_interctc_layer12=86.400, loss_interctc_layer15=80.896, loss_interctc_layer21=105.093, loss=95.205, backward_time=0.427, grad_norm=71.932, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.137, optim0_lr0=1.479e-04, train_time=1.787 +[gpua002:0/64] 2024-01-17 17:27:07,636 (trainer:753) INFO: 8epoch:train:4801-4900batch: iter_time=8.337e-05, forward_time=0.142, loss_ctc=92.835, loss_interctc_layer6=97.961, loss_interctc_layer12=83.731, loss_interctc_layer15=77.860, loss_interctc_layer21=94.332, loss=89.344, backward_time=0.377, grad_norm=101.251, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.137, optim0_lr0=1.478e-04, train_time=1.732 +[gpua002:0/64] 2024-01-17 17:29:46,269 (trainer:753) INFO: 8epoch:train:4901-5000batch: iter_time=8.513e-05, forward_time=0.143, loss_ctc=110.718, loss_interctc_layer6=116.401, loss_interctc_layer12=100.990, loss_interctc_layer15=94.269, loss_interctc_layer21=112.425, loss=106.960, backward_time=0.314, grad_norm=77.197, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.137, optim0_lr0=1.477e-04, train_time=1.588 +[gpua002:0/64] 2024-01-17 17:30:06,300 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua002:0/64] 2024-01-17 17:30:26,248 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 17:30:29,717 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 17:30:29,718 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua002:0/64] 2024-01-17 17:30:29,721 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 17:38:23,503 (trainer:753) INFO: 8epoch:train:5001-5100batch: iter_time=3.142, forward_time=0.141, loss_ctc=95.133, loss_interctc_layer6=101.344, loss_interctc_layer12=87.330, loss_interctc_layer15=81.755, loss_interctc_layer21=96.648, loss=92.442, backward_time=0.346, grad_norm=66.159, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.137, optim0_lr0=1.477e-04, train_time=5.173 +[gpua002:0/64] 2024-01-17 17:41:05,761 (trainer:753) INFO: 8epoch:train:5101-5200batch: iter_time=8.599e-05, forward_time=0.141, loss_ctc=95.929, loss_interctc_layer6=97.583, loss_interctc_layer12=82.651, loss_interctc_layer15=76.717, loss_interctc_layer21=97.889, loss=90.154, backward_time=0.338, grad_norm=56.682, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.138, optim0_lr0=1.476e-04, train_time=1.622 +[gpua002:0/64] 2024-01-17 17:44:09,842 (trainer:753) INFO: 8epoch:train:5201-5300batch: iter_time=8.668e-05, forward_time=0.142, loss_ctc=105.750, loss_interctc_layer6=105.373, loss_interctc_layer12=90.191, loss_interctc_layer15=83.336, loss_interctc_layer21=107.329, loss=98.396, backward_time=0.332, grad_norm=71.982, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.137, optim0_lr0=1.475e-04, train_time=1.841 +[gpua002:0/64] 2024-01-17 17:47:09,362 (trainer:753) INFO: 8epoch:train:5301-5400batch: iter_time=1.029e-04, forward_time=0.141, loss_ctc=94.946, loss_interctc_layer6=98.343, loss_interctc_layer12=83.835, loss_interctc_layer15=78.210, loss_interctc_layer21=96.603, loss=90.387, backward_time=0.345, grad_norm=62.895, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.137, optim0_lr0=1.475e-04, train_time=1.795 +[gpua002:0/64] 2024-01-17 17:50:36,413 (trainer:753) INFO: 8epoch:train:5401-5500batch: iter_time=1.051e-04, forward_time=0.142, loss_ctc=91.344, loss_interctc_layer6=92.120, loss_interctc_layer12=78.293, loss_interctc_layer15=72.690, loss_interctc_layer21=92.923, loss=85.474, backward_time=0.382, grad_norm=61.797, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.138, optim0_lr0=1.474e-04, train_time=2.070 +[gpua002:0/64] 2024-01-17 17:53:19,443 (trainer:753) INFO: 8epoch:train:5501-5600batch: iter_time=1.021e-04, forward_time=0.236, loss_ctc=88.229, loss_interctc_layer6=97.271, loss_interctc_layer12=83.220, loss_interctc_layer15=77.352, loss_interctc_layer21=89.373, loss=87.089, backward_time=0.355, grad_norm=64.833, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.149, optim0_lr0=1.473e-04, train_time=1.628 +[gpua002:0/64] 2024-01-17 17:55:49,626 (trainer:753) INFO: 8epoch:train:5601-5700batch: iter_time=8.950e-05, forward_time=0.161, loss_ctc=108.835, loss_interctc_layer6=105.047, loss_interctc_layer12=89.986, loss_interctc_layer15=83.737, loss_interctc_layer21=110.467, loss=99.614, backward_time=0.346, grad_norm=65.056, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.143, optim0_lr0=1.473e-04, train_time=1.504 +[gpua002:0/64] 2024-01-17 17:59:29,375 (trainer:753) INFO: 8epoch:train:5701-5800batch: iter_time=8.234e-05, forward_time=0.141, loss_ctc=107.716, loss_interctc_layer6=106.151, loss_interctc_layer12=92.156, loss_interctc_layer15=86.669, loss_interctc_layer21=109.398, loss=100.418, backward_time=0.377, grad_norm=76.292, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.137, optim0_lr0=1.472e-04, train_time=2.197 +[gpua002:0/64] 2024-01-17 18:03:51,344 (trainer:753) INFO: 8epoch:train:5801-5900batch: iter_time=8.296e-05, forward_time=0.144, loss_ctc=108.733, loss_interctc_layer6=105.597, loss_interctc_layer12=89.826, loss_interctc_layer15=82.982, loss_interctc_layer21=110.720, loss=99.571, backward_time=0.517, grad_norm=76.139, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.138, optim0_lr0=1.471e-04, train_time=2.619 +[gpua002:0/64] 2024-01-17 18:06:29,853 (trainer:753) INFO: 8epoch:train:5901-6000batch: iter_time=8.554e-05, forward_time=0.144, loss_ctc=105.721, loss_interctc_layer6=108.572, loss_interctc_layer12=92.909, loss_interctc_layer15=86.523, loss_interctc_layer21=107.674, loss=100.280, backward_time=0.311, grad_norm=70.775, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.137, optim0_lr0=1.471e-04, train_time=1.581 +[gpua002:0/64] 2024-01-17 18:09:36,584 (trainer:753) INFO: 8epoch:train:6001-6100batch: iter_time=8.828e-05, forward_time=0.142, loss_ctc=90.999, loss_interctc_layer6=100.300, loss_interctc_layer12=85.795, loss_interctc_layer15=80.086, loss_interctc_layer21=92.395, loss=89.915, backward_time=0.355, grad_norm=59.043, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.137, optim0_lr0=1.470e-04, train_time=1.870 +[gpua002:0/64] 2024-01-17 18:12:03,721 (trainer:753) INFO: 8epoch:train:6101-6200batch: iter_time=8.696e-05, forward_time=0.141, loss_ctc=100.744, loss_interctc_layer6=105.412, loss_interctc_layer12=90.992, loss_interctc_layer15=84.877, loss_interctc_layer21=102.224, loss=96.850, backward_time=0.304, grad_norm=66.023, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.137, optim0_lr0=1.469e-04, train_time=1.472 +[gpua002:0/64] 2024-01-17 18:14:21,917 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua002:0/64] 2024-01-17 18:14:42,231 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 18:14:46,198 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 18:14:46,198 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua002:0/64] 2024-01-17 18:14:46,201 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 18:21:53,665 (trainer:753) INFO: 8epoch:train:6201-6300batch: iter_time=3.259, forward_time=0.217, loss_ctc=97.855, loss_interctc_layer6=106.952, loss_interctc_layer12=92.096, loss_interctc_layer15=85.330, loss_interctc_layer21=99.462, loss=96.339, backward_time=0.375, grad_norm=89.579, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.139, optim0_lr0=1.469e-04, train_time=5.899 +[gpua002:0/64] 2024-01-17 18:24:01,657 (trainer:753) INFO: 8epoch:train:6301-6400batch: iter_time=8.550e-05, forward_time=0.143, loss_ctc=106.831, loss_interctc_layer6=107.629, loss_interctc_layer12=91.814, loss_interctc_layer15=85.515, loss_interctc_layer21=108.574, loss=100.072, backward_time=0.297, grad_norm=96.057, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.138, optim0_lr0=1.468e-04, train_time=1.280 +[gpua002:0/64] 2024-01-17 18:26:10,846 (trainer:753) INFO: 8epoch:train:6401-6500batch: iter_time=8.826e-05, forward_time=0.143, loss_ctc=88.654, loss_interctc_layer6=94.780, loss_interctc_layer12=80.755, loss_interctc_layer15=75.100, loss_interctc_layer21=90.365, loss=85.931, backward_time=0.300, grad_norm=59.704, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.138, optim0_lr0=1.467e-04, train_time=1.292 +[gpua002:0/64] 2024-01-17 18:28:51,939 (trainer:753) INFO: 8epoch:train:6501-6600batch: iter_time=1.008e-04, forward_time=0.142, loss_ctc=98.455, loss_interctc_layer6=96.446, loss_interctc_layer12=82.141, loss_interctc_layer15=76.340, loss_interctc_layer21=99.900, loss=90.656, backward_time=0.330, grad_norm=59.662, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.138, optim0_lr0=1.467e-04, train_time=1.611 +[gpua002:0/64] 2024-01-17 18:31:30,463 (trainer:753) INFO: 8epoch:train:6601-6700batch: iter_time=8.579e-05, forward_time=0.156, loss_ctc=97.606, loss_interctc_layer6=103.208, loss_interctc_layer12=88.243, loss_interctc_layer15=81.710, loss_interctc_layer21=99.288, loss=94.011, backward_time=0.313, grad_norm=64.608, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.138, optim0_lr0=1.466e-04, train_time=1.585 +[gpua002:0/64] 2024-01-17 18:33:57,460 (trainer:753) INFO: 8epoch:train:6701-6800batch: iter_time=7.982e-05, forward_time=0.156, loss_ctc=83.198, loss_interctc_layer6=84.707, loss_interctc_layer12=72.380, loss_interctc_layer15=67.058, loss_interctc_layer21=84.526, loss=78.374, backward_time=0.315, grad_norm=66.054, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.138, optim0_lr0=1.465e-04, train_time=1.470 +[gpua002:0/64] 2024-01-17 18:36:46,237 (trainer:753) INFO: 8epoch:train:6801-6900batch: iter_time=7.857e-05, forward_time=0.141, loss_ctc=101.340, loss_interctc_layer6=104.301, loss_interctc_layer12=88.627, loss_interctc_layer15=82.190, loss_interctc_layer21=102.493, loss=95.790, backward_time=0.332, grad_norm=64.915, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.136, optim0_lr0=1.465e-04, train_time=1.688 +[gpua002:0/64] 2024-01-17 18:39:50,331 (trainer:753) INFO: 8epoch:train:6901-7000batch: iter_time=9.199e-05, forward_time=0.142, loss_ctc=115.122, loss_interctc_layer6=108.144, loss_interctc_layer12=93.076, loss_interctc_layer15=87.317, loss_interctc_layer21=116.973, loss=104.127, backward_time=0.377, grad_norm=86.927, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.138, optim0_lr0=1.464e-04, train_time=1.841 +[gpua002:0/64] 2024-01-17 18:42:55,668 (trainer:753) INFO: 8epoch:train:7001-7100batch: iter_time=8.736e-05, forward_time=0.144, loss_ctc=96.135, loss_interctc_layer6=100.249, loss_interctc_layer12=86.373, loss_interctc_layer15=80.823, loss_interctc_layer21=97.749, loss=92.266, backward_time=0.364, grad_norm=65.880, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.138, optim0_lr0=1.464e-04, train_time=1.853 +[gpua002:0/64] 2024-01-17 18:45:50,005 (trainer:753) INFO: 8epoch:train:7101-7200batch: iter_time=8.114e-05, forward_time=0.142, loss_ctc=110.986, loss_interctc_layer6=112.373, loss_interctc_layer12=95.121, loss_interctc_layer15=88.150, loss_interctc_layer21=112.811, loss=103.888, backward_time=0.349, grad_norm=80.016, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.137, optim0_lr0=1.463e-04, train_time=1.741 +[gpua002:0/64] 2024-01-17 18:48:42,408 (trainer:753) INFO: 8epoch:train:7201-7300batch: iter_time=8.678e-05, forward_time=0.143, loss_ctc=96.166, loss_interctc_layer6=99.324, loss_interctc_layer12=85.676, loss_interctc_layer15=80.327, loss_interctc_layer21=97.646, loss=91.828, backward_time=0.370, grad_norm=58.741, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.137, optim0_lr0=1.462e-04, train_time=1.726 +[gpua002:0/64] 2024-01-17 18:51:40,032 (trainer:753) INFO: 8epoch:train:7301-7400batch: iter_time=8.168e-05, forward_time=0.141, loss_ctc=90.288, loss_interctc_layer6=97.666, loss_interctc_layer12=83.189, loss_interctc_layer15=77.511, loss_interctc_layer21=91.675, loss=88.066, backward_time=0.361, grad_norm=61.488, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.137, optim0_lr0=1.462e-04, train_time=1.776 +[gpua002:0/64] 2024-01-17 18:54:20,302 (trainer:753) INFO: 8epoch:train:7401-7500batch: iter_time=7.950e-05, forward_time=0.196, loss_ctc=105.598, loss_interctc_layer6=114.138, loss_interctc_layer12=98.553, loss_interctc_layer15=92.290, loss_interctc_layer21=107.115, loss=103.539, backward_time=0.358, grad_norm=72.271, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.144, optim0_lr0=1.461e-04, train_time=1.602 +[gpua002:0/64] 2024-01-17 18:54:40,332 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua002:0/64] 2024-01-17 18:54:59,718 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 18:55:03,175 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 18:55:03,175 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua002:0/64] 2024-01-17 18:55:03,335 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 19:02:39,910 (trainer:753) INFO: 8epoch:train:7501-7600batch: iter_time=3.562, forward_time=0.146, loss_ctc=93.043, loss_interctc_layer6=100.109, loss_interctc_layer12=85.983, loss_interctc_layer15=80.151, loss_interctc_layer21=94.639, loss=90.785, backward_time=0.302, grad_norm=57.903, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.138, optim0_lr0=1.460e-04, train_time=4.996 +[gpua002:0/64] 2024-01-17 19:04:47,437 (trainer:753) INFO: 8epoch:train:7601-7700batch: iter_time=8.073e-05, forward_time=0.142, loss_ctc=94.322, loss_interctc_layer6=96.506, loss_interctc_layer12=81.529, loss_interctc_layer15=75.535, loss_interctc_layer21=95.918, loss=88.762, backward_time=0.296, grad_norm=67.060, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.138, optim0_lr0=1.460e-04, train_time=1.275 +[gpua002:0/64] 2024-01-17 19:07:47,016 (trainer:753) INFO: 8epoch:train:7701-7800batch: iter_time=8.637e-05, forward_time=0.164, loss_ctc=103.297, loss_interctc_layer6=103.918, loss_interctc_layer12=88.341, loss_interctc_layer15=82.122, loss_interctc_layer21=105.311, loss=96.598, backward_time=0.384, grad_norm=159.888, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.141, optim0_lr0=1.459e-04, train_time=1.796 +[gpua002:0/64] 2024-01-17 19:10:47,072 (trainer:753) INFO: 8epoch:train:7801-7900batch: iter_time=8.500e-05, forward_time=0.142, loss_ctc=93.638, loss_interctc_layer6=97.669, loss_interctc_layer12=82.940, loss_interctc_layer15=77.121, loss_interctc_layer21=95.134, loss=89.301, backward_time=0.406, grad_norm=89.590, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.138, optim0_lr0=1.458e-04, train_time=1.800 +[gpua002:0/64] 2024-01-17 19:13:47,021 (trainer:753) INFO: 8epoch:train:7901-8000batch: iter_time=8.791e-05, forward_time=0.143, loss_ctc=90.201, loss_interctc_layer6=91.827, loss_interctc_layer12=77.862, loss_interctc_layer15=72.342, loss_interctc_layer21=91.740, loss=84.794, backward_time=0.373, grad_norm=50.278, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.138, optim0_lr0=1.458e-04, train_time=1.799 +[gpua002:0/64] 2024-01-17 19:16:55,930 (trainer:753) INFO: 8epoch:train:8001-8100batch: iter_time=8.284e-05, forward_time=0.142, loss_ctc=86.301, loss_interctc_layer6=96.278, loss_interctc_layer12=81.772, loss_interctc_layer15=76.034, loss_interctc_layer21=87.685, loss=85.614, backward_time=0.361, grad_norm=60.835, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.138, optim0_lr0=1.457e-04, train_time=1.886 +[gpua002:0/64] 2024-01-17 19:19:45,337 (trainer:753) INFO: 8epoch:train:8101-8200batch: iter_time=8.764e-05, forward_time=0.143, loss_ctc=108.230, loss_interctc_layer6=104.794, loss_interctc_layer12=89.207, loss_interctc_layer15=83.048, loss_interctc_layer21=110.402, loss=99.136, backward_time=0.394, grad_norm=75.927, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.137, optim0_lr0=1.456e-04, train_time=1.697 +[gpua002:0/64] 2024-01-17 19:22:37,818 (trainer:753) INFO: 8epoch:train:8201-8300batch: iter_time=9.313e-05, forward_time=0.155, loss_ctc=106.173, loss_interctc_layer6=104.967, loss_interctc_layer12=90.382, loss_interctc_layer15=84.853, loss_interctc_layer21=107.552, loss=98.786, backward_time=0.389, grad_norm=70.821, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.141, optim0_lr0=1.456e-04, train_time=1.725 +[gpua002:0/64] 2024-01-17 19:25:53,143 (trainer:753) INFO: 8epoch:train:8301-8400batch: iter_time=9.102e-05, forward_time=0.161, loss_ctc=109.019, loss_interctc_layer6=105.431, loss_interctc_layer12=89.502, loss_interctc_layer15=82.818, loss_interctc_layer21=110.577, loss=99.469, backward_time=0.397, grad_norm=69.663, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.140, optim0_lr0=1.455e-04, train_time=1.953 +[gpua002:0/64] 2024-01-17 19:29:02,222 (trainer:753) INFO: 8epoch:train:8401-8500batch: iter_time=8.869e-05, forward_time=0.142, loss_ctc=103.696, loss_interctc_layer6=106.857, loss_interctc_layer12=91.531, loss_interctc_layer15=85.605, loss_interctc_layer21=105.805, loss=98.699, backward_time=0.374, grad_norm=73.360, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.136, optim0_lr0=1.454e-04, train_time=1.891 +[gpua002:0/64] 2024-01-17 19:31:53,651 (trainer:753) INFO: 8epoch:train:8501-8600batch: iter_time=8.824e-05, forward_time=0.146, loss_ctc=90.273, loss_interctc_layer6=100.204, loss_interctc_layer12=85.809, loss_interctc_layer15=79.858, loss_interctc_layer21=91.621, loss=89.553, backward_time=0.370, grad_norm=61.750, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.137, optim0_lr0=1.454e-04, train_time=1.714 +[gpua002:0/64] 2024-01-17 19:35:01,792 (trainer:753) INFO: 8epoch:train:8601-8700batch: iter_time=8.773e-05, forward_time=0.143, loss_ctc=99.068, loss_interctc_layer6=104.403, loss_interctc_layer12=89.776, loss_interctc_layer15=83.827, loss_interctc_layer21=100.725, loss=95.560, backward_time=0.328, grad_norm=73.688, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.137, optim0_lr0=1.453e-04, train_time=1.881 +[gpua002:0/64] 2024-01-17 19:36:25,487 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua002:0/64] 2024-01-17 19:36:44,507 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 19:36:47,941 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 19:36:47,941 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua002:0/64] 2024-01-17 19:36:48,030 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 19:43:10,642 (trainer:753) INFO: 8epoch:train:8701-8800batch: iter_time=3.462, forward_time=0.170, loss_ctc=99.221, loss_interctc_layer6=106.058, loss_interctc_layer12=90.692, loss_interctc_layer15=84.821, loss_interctc_layer21=100.808, loss=96.320, backward_time=0.301, grad_norm=67.040, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.137, optim0_lr0=1.453e-04, train_time=4.888 +[gpua002:0/64] 2024-01-17 19:45:38,722 (trainer:753) INFO: 8epoch:train:8801-8900batch: iter_time=8.786e-05, forward_time=0.142, loss_ctc=111.160, loss_interctc_layer6=106.851, loss_interctc_layer12=90.989, loss_interctc_layer15=84.753, loss_interctc_layer21=113.389, loss=101.428, backward_time=0.392, grad_norm=61.750, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.137, optim0_lr0=1.452e-04, train_time=1.481 +[gpua002:0/64] 2024-01-17 19:48:25,313 (trainer:753) INFO: 8epoch:train:8901-9000batch: iter_time=8.943e-05, forward_time=0.142, loss_ctc=94.733, loss_interctc_layer6=94.307, loss_interctc_layer12=80.057, loss_interctc_layer15=74.145, loss_interctc_layer21=96.940, loss=88.036, backward_time=0.380, grad_norm=68.449, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.137, optim0_lr0=1.451e-04, train_time=1.666 +[gpua002:0/64] 2024-01-17 19:50:56,162 (trainer:753) INFO: 8epoch:train:9001-9100batch: iter_time=9.983e-05, forward_time=0.142, loss_ctc=100.370, loss_interctc_layer6=96.345, loss_interctc_layer12=81.964, loss_interctc_layer15=76.124, loss_interctc_layer21=102.234, loss=91.407, backward_time=0.313, grad_norm=57.637, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.451e-04, train_time=1.508 +[gpua002:0/64] 2024-01-17 19:53:20,670 (trainer:753) INFO: 8epoch:train:9101-9200batch: iter_time=9.907e-05, forward_time=0.142, loss_ctc=99.868, loss_interctc_layer6=102.703, loss_interctc_layer12=87.383, loss_interctc_layer15=81.176, loss_interctc_layer21=101.432, loss=94.513, backward_time=0.354, grad_norm=70.969, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.450e-04, train_time=1.445 +[gpua002:0/64] 2024-01-17 19:56:14,886 (trainer:753) INFO: 8epoch:train:9201-9300batch: iter_time=9.598e-05, forward_time=0.173, loss_ctc=84.726, loss_interctc_layer6=83.938, loss_interctc_layer12=71.279, loss_interctc_layer15=66.088, loss_interctc_layer21=86.363, loss=78.479, backward_time=0.418, grad_norm=59.873, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.140, optim0_lr0=1.449e-04, train_time=1.742 +[gpua002:0/64] 2024-01-17 19:59:07,558 (trainer:753) INFO: 8epoch:train:9301-9400batch: iter_time=8.924e-05, forward_time=0.142, loss_ctc=104.542, loss_interctc_layer6=103.364, loss_interctc_layer12=87.986, loss_interctc_layer15=81.413, loss_interctc_layer21=106.716, loss=96.804, backward_time=0.331, grad_norm=59.249, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.449e-04, train_time=1.726 +[gpua002:0/64] 2024-01-17 20:02:40,461 (trainer:753) INFO: 8epoch:train:9401-9500batch: iter_time=9.232e-05, forward_time=0.141, loss_ctc=118.042, loss_interctc_layer6=107.449, loss_interctc_layer12=92.611, loss_interctc_layer15=86.335, loss_interctc_layer21=119.447, loss=104.777, backward_time=0.462, grad_norm=87.085, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.138, optim0_lr0=1.448e-04, train_time=2.129 +[gpua002:0/64] 2024-01-17 20:05:17,220 (trainer:753) INFO: 8epoch:train:9501-9600batch: iter_time=9.304e-05, forward_time=0.149, loss_ctc=101.410, loss_interctc_layer6=100.072, loss_interctc_layer12=85.948, loss_interctc_layer15=80.499, loss_interctc_layer21=102.881, loss=94.162, backward_time=0.348, grad_norm=68.219, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.138, optim0_lr0=1.447e-04, train_time=1.567 +[gpua002:0/64] 2024-01-17 20:07:52,706 (trainer:753) INFO: 8epoch:train:9601-9700batch: iter_time=9.641e-05, forward_time=0.143, loss_ctc=114.792, loss_interctc_layer6=110.840, loss_interctc_layer12=93.703, loss_interctc_layer15=86.644, loss_interctc_layer21=116.679, loss=104.532, backward_time=0.353, grad_norm=62.694, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.447e-04, train_time=1.555 +[gpua002:0/64] 2024-01-17 20:10:28,039 (trainer:753) INFO: 8epoch:train:9701-9800batch: iter_time=9.196e-05, forward_time=0.143, loss_ctc=101.321, loss_interctc_layer6=98.285, loss_interctc_layer12=84.572, loss_interctc_layer15=79.112, loss_interctc_layer21=102.880, loss=93.234, backward_time=0.327, grad_norm=64.827, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.139, optim0_lr0=1.446e-04, train_time=1.552 +[gpua002:0/64] 2024-01-17 20:13:15,782 (trainer:753) INFO: 8epoch:train:9801-9900batch: iter_time=9.558e-05, forward_time=0.168, loss_ctc=92.275, loss_interctc_layer6=96.495, loss_interctc_layer12=82.061, loss_interctc_layer15=75.819, loss_interctc_layer21=93.999, loss=88.130, backward_time=0.395, grad_norm=75.852, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.141, optim0_lr0=1.446e-04, train_time=1.678 +[gpua002:0/64] 2024-01-17 20:16:03,080 (trainer:753) INFO: 8epoch:train:9901-10000batch: iter_time=1.004e-04, forward_time=0.142, loss_ctc=108.233, loss_interctc_layer6=113.200, loss_interctc_layer12=97.578, loss_interctc_layer15=91.523, loss_interctc_layer21=109.973, loss=104.101, backward_time=0.347, grad_norm=82.361, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.445e-04, train_time=1.673 +[gpua002:0/64] 2024-01-17 20:16:23,222 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua002:0/64] 2024-01-17 20:16:41,928 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 20:16:45,422 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 20:16:45,422 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua002:0/64] 2024-01-17 20:16:45,559 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 20:24:26,184 (trainer:753) INFO: 8epoch:train:10001-10100batch: iter_time=3.556, forward_time=0.164, loss_ctc=96.699, loss_interctc_layer6=99.280, loss_interctc_layer12=84.891, loss_interctc_layer15=79.368, loss_interctc_layer21=98.339, loss=91.715, backward_time=0.309, grad_norm=65.442, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.139, optim0_lr0=1.444e-04, train_time=5.031 +[gpua002:0/64] 2024-01-17 20:26:50,465 (trainer:753) INFO: 8epoch:train:10101-10200batch: iter_time=9.460e-05, forward_time=0.142, loss_ctc=98.392, loss_interctc_layer6=96.615, loss_interctc_layer12=81.831, loss_interctc_layer15=75.662, loss_interctc_layer21=100.213, loss=90.543, backward_time=0.312, grad_norm=50.372, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.444e-04, train_time=1.443 +[gpua002:0/64] 2024-01-17 20:29:56,691 (trainer:753) INFO: 8epoch:train:10201-10300batch: iter_time=9.008e-05, forward_time=0.142, loss_ctc=106.683, loss_interctc_layer6=102.607, loss_interctc_layer12=87.244, loss_interctc_layer15=80.741, loss_interctc_layer21=108.879, loss=97.231, backward_time=0.346, grad_norm=69.196, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.443e-04, train_time=1.862 +[gpua002:0/64] 2024-01-17 20:32:42,642 (trainer:753) INFO: 8epoch:train:10301-10400batch: iter_time=9.550e-05, forward_time=0.149, loss_ctc=94.260, loss_interctc_layer6=96.472, loss_interctc_layer12=82.371, loss_interctc_layer15=75.863, loss_interctc_layer21=95.876, loss=88.968, backward_time=0.325, grad_norm=64.232, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.138, optim0_lr0=1.442e-04, train_time=1.659 +[gpua002:0/64] 2024-01-17 20:35:28,324 (trainer:753) INFO: 8epoch:train:10401-10500batch: iter_time=9.590e-05, forward_time=0.142, loss_ctc=90.780, loss_interctc_layer6=90.705, loss_interctc_layer12=76.776, loss_interctc_layer15=71.207, loss_interctc_layer21=92.301, loss=84.354, backward_time=0.344, grad_norm=70.641, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.442e-04, train_time=1.657 +[gpua002:0/64] 2024-01-17 20:38:34,680 (trainer:753) INFO: 8epoch:train:10501-10600batch: iter_time=9.518e-05, forward_time=0.157, loss_ctc=90.686, loss_interctc_layer6=96.643, loss_interctc_layer12=82.047, loss_interctc_layer15=76.263, loss_interctc_layer21=92.481, loss=87.624, backward_time=0.490, grad_norm=102.046, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.142, optim0_lr0=1.441e-04, train_time=1.861 +[gpua002:0/64] 2024-01-17 20:41:25,909 (trainer:753) INFO: 8epoch:train:10601-10700batch: iter_time=9.849e-05, forward_time=0.166, loss_ctc=113.625, loss_interctc_layer6=104.154, loss_interctc_layer12=88.893, loss_interctc_layer15=82.408, loss_interctc_layer21=115.473, loss=100.911, backward_time=0.369, grad_norm=70.271, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.441e-04, train_time=1.713 +[gpua002:0/64] 2024-01-17 20:44:34,923 (trainer:753) INFO: 8epoch:train:10701-10800batch: iter_time=8.728e-05, forward_time=0.209, loss_ctc=105.471, loss_interctc_layer6=102.853, loss_interctc_layer12=88.641, loss_interctc_layer15=83.223, loss_interctc_layer21=107.442, loss=97.526, backward_time=0.436, grad_norm=74.115, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.142, optim0_lr0=1.440e-04, train_time=1.891 +[gpua002:0/64] 2024-01-17 20:47:24,762 (trainer:753) INFO: 8epoch:train:10801-10900batch: iter_time=9.521e-05, forward_time=0.142, loss_ctc=114.639, loss_interctc_layer6=104.568, loss_interctc_layer12=88.711, loss_interctc_layer15=82.155, loss_interctc_layer21=116.802, loss=101.375, backward_time=0.340, grad_norm=63.841, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.439e-04, train_time=1.698 +[gpua002:0/64] 2024-01-17 20:50:03,874 (trainer:753) INFO: 8epoch:train:10901-11000batch: iter_time=8.909e-05, forward_time=0.143, loss_ctc=108.470, loss_interctc_layer6=106.486, loss_interctc_layer12=90.935, loss_interctc_layer15=84.843, loss_interctc_layer21=110.664, loss=100.279, backward_time=0.332, grad_norm=75.573, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.137, optim0_lr0=1.439e-04, train_time=1.591 +[gpua002:0/64] 2024-01-17 20:53:26,093 (trainer:753) INFO: 8epoch:train:11001-11100batch: iter_time=9.357e-05, forward_time=0.142, loss_ctc=92.380, loss_interctc_layer6=99.264, loss_interctc_layer12=84.558, loss_interctc_layer15=78.721, loss_interctc_layer21=93.771, loss=89.739, backward_time=0.432, grad_norm=75.394, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.137, optim0_lr0=1.438e-04, train_time=2.022 +[gpua002:0/64] 2024-01-17 20:56:15,085 (trainer:753) INFO: 8epoch:train:11101-11200batch: iter_time=9.082e-05, forward_time=0.141, loss_ctc=102.137, loss_interctc_layer6=103.631, loss_interctc_layer12=88.892, loss_interctc_layer15=82.779, loss_interctc_layer21=103.762, loss=96.240, backward_time=0.383, grad_norm=72.109, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.137, optim0_lr0=1.437e-04, train_time=1.690 +[gpua002:0/64] 2024-01-17 20:57:55,567 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua002:0/64] 2024-01-17 20:58:14,508 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 20:58:17,961 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 20:58:17,961 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua002:0/64] 2024-01-17 20:58:17,993 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 21:04:38,177 (trainer:753) INFO: 8epoch:train:11201-11300batch: iter_time=3.414, forward_time=0.181, loss_ctc=99.272, loss_interctc_layer6=106.166, loss_interctc_layer12=90.747, loss_interctc_layer15=84.701, loss_interctc_layer21=100.915, loss=96.360, backward_time=0.321, grad_norm=70.237, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.139, optim0_lr0=1.437e-04, train_time=5.030 +[gpua002:0/64] 2024-01-17 21:06:45,180 (trainer:753) INFO: 8epoch:train:11301-11400batch: iter_time=8.644e-05, forward_time=0.142, loss_ctc=109.371, loss_interctc_layer6=106.265, loss_interctc_layer12=90.421, loss_interctc_layer15=83.893, loss_interctc_layer21=111.849, loss=100.360, backward_time=0.296, grad_norm=72.191, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.137, optim0_lr0=1.436e-04, train_time=1.271 +[gpua002:0/64] 2024-01-17 21:09:36,131 (trainer:753) INFO: 8epoch:train:11401-11500batch: iter_time=9.439e-05, forward_time=0.142, loss_ctc=92.902, loss_interctc_layer6=93.115, loss_interctc_layer12=79.131, loss_interctc_layer15=73.478, loss_interctc_layer21=94.816, loss=86.688, backward_time=0.328, grad_norm=65.836, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.137, optim0_lr0=1.436e-04, train_time=1.709 +[gpua002:0/64] 2024-01-17 21:12:26,688 (trainer:753) INFO: 8epoch:train:11501-11600batch: iter_time=9.762e-05, forward_time=0.142, loss_ctc=99.037, loss_interctc_layer6=95.053, loss_interctc_layer12=80.470, loss_interctc_layer15=74.719, loss_interctc_layer21=100.638, loss=89.983, backward_time=0.354, grad_norm=57.043, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.137, optim0_lr0=1.435e-04, train_time=1.705 +[gpua002:0/64] 2024-01-17 21:15:35,382 (trainer:753) INFO: 8epoch:train:11601-11700batch: iter_time=9.550e-05, forward_time=0.143, loss_ctc=98.666, loss_interctc_layer6=102.171, loss_interctc_layer12=86.492, loss_interctc_layer15=80.443, loss_interctc_layer21=100.745, loss=93.703, backward_time=0.421, grad_norm=73.197, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.137, optim0_lr0=1.434e-04, train_time=1.887 +[gpua002:0/64] 2024-01-17 21:18:51,061 (trainer:753) INFO: 8epoch:train:11701-11800batch: iter_time=9.555e-05, forward_time=0.141, loss_ctc=85.796, loss_interctc_layer6=83.374, loss_interctc_layer12=70.738, loss_interctc_layer15=65.744, loss_interctc_layer21=87.417, loss=78.614, backward_time=0.361, grad_norm=60.740, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.137, optim0_lr0=1.434e-04, train_time=1.957 +[gpua002:0/64] 2024-01-17 21:22:30,575 (trainer:753) INFO: 8epoch:train:11801-11900batch: iter_time=9.349e-05, forward_time=0.163, loss_ctc=104.777, loss_interctc_layer6=103.239, loss_interctc_layer12=87.652, loss_interctc_layer15=81.244, loss_interctc_layer21=106.352, loss=96.653, backward_time=0.384, grad_norm=61.799, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.140, optim0_lr0=1.433e-04, train_time=2.195 +[gpua002:0/64] 2024-01-17 21:26:05,821 (trainer:753) INFO: 8epoch:train:11901-12000batch: iter_time=6.791e-04, forward_time=0.223, loss_ctc=113.910, loss_interctc_layer6=106.845, loss_interctc_layer12=91.641, loss_interctc_layer15=85.516, loss_interctc_layer21=116.174, loss=102.817, backward_time=0.530, grad_norm=82.616, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.144, optim0_lr0=1.433e-04, train_time=2.151 +[gpua002:0/64] 2024-01-17 21:28:43,985 (trainer:753) INFO: 8epoch:train:12001-12100batch: iter_time=1.164e-04, forward_time=0.142, loss_ctc=99.447, loss_interctc_layer6=99.300, loss_interctc_layer12=85.115, loss_interctc_layer15=79.490, loss_interctc_layer21=101.058, loss=92.882, backward_time=0.334, grad_norm=69.086, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.137, optim0_lr0=1.432e-04, train_time=1.583 +[gpua002:0/64] 2024-01-17 21:31:24,933 (trainer:753) INFO: 8epoch:train:12101-12200batch: iter_time=1.002e-04, forward_time=0.143, loss_ctc=113.032, loss_interctc_layer6=110.720, loss_interctc_layer12=93.749, loss_interctc_layer15=86.588, loss_interctc_layer21=115.464, loss=103.910, backward_time=0.357, grad_norm=70.279, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.138, optim0_lr0=1.431e-04, train_time=1.609 +[gpua002:0/64] 2024-01-17 21:35:04,296 (trainer:753) INFO: 8epoch:train:12201-12300batch: iter_time=1.005e-04, forward_time=0.142, loss_ctc=100.044, loss_interctc_layer6=98.067, loss_interctc_layer12=84.118, loss_interctc_layer15=78.671, loss_interctc_layer21=101.890, loss=92.558, backward_time=0.402, grad_norm=66.837, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.138, optim0_lr0=1.431e-04, train_time=2.193 +[gpua002:0/64] 2024-01-17 21:38:05,597 (trainer:753) INFO: 8epoch:train:12301-12400batch: iter_time=9.676e-05, forward_time=0.142, loss_ctc=90.163, loss_interctc_layer6=96.311, loss_interctc_layer12=81.601, loss_interctc_layer15=75.754, loss_interctc_layer21=91.700, loss=87.106, backward_time=0.432, grad_norm=64.743, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.138, optim0_lr0=1.430e-04, train_time=1.813 +[gpua002:0/64] 2024-01-17 21:41:37,653 (trainer:753) INFO: 8epoch:train:12401-12500batch: iter_time=9.110e-05, forward_time=0.142, loss_ctc=107.208, loss_interctc_layer6=112.895, loss_interctc_layer12=97.512, loss_interctc_layer15=91.753, loss_interctc_layer21=109.139, loss=103.701, backward_time=0.459, grad_norm=76.293, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.137, optim0_lr0=1.429e-04, train_time=2.120 +[gpua002:0/64] 2024-01-17 21:41:57,683 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua002:0/64] 2024-01-17 21:42:17,052 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 21:42:20,556 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 21:42:20,556 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua002:0/64] 2024-01-17 21:42:20,751 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 21:50:52,236 (trainer:753) INFO: 8epoch:train:12501-12600batch: iter_time=3.794, forward_time=0.263, loss_ctc=91.334, loss_interctc_layer6=98.624, loss_interctc_layer12=84.034, loss_interctc_layer15=78.188, loss_interctc_layer21=93.238, loss=89.084, backward_time=0.330, grad_norm=95.094, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.140, optim0_lr0=1.429e-04, train_time=5.545 +[gpua002:0/64] 2024-01-17 21:54:04,988 (trainer:753) INFO: 8epoch:train:12601-12700batch: iter_time=8.752e-05, forward_time=0.142, loss_ctc=94.336, loss_interctc_layer6=96.259, loss_interctc_layer12=81.312, loss_interctc_layer15=75.213, loss_interctc_layer21=96.258, loss=88.676, backward_time=0.374, grad_norm=55.004, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.138, optim0_lr0=1.428e-04, train_time=1.928 +[gpua002:0/64] 2024-01-17 21:57:28,656 (trainer:753) INFO: 8epoch:train:12701-12800batch: iter_time=8.842e-05, forward_time=0.142, loss_ctc=101.939, loss_interctc_layer6=102.438, loss_interctc_layer12=86.795, loss_interctc_layer15=80.814, loss_interctc_layer21=103.813, loss=95.160, backward_time=0.470, grad_norm=119.383, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.138, optim0_lr0=1.428e-04, train_time=2.036 +[gpua002:0/64] 2024-01-17 22:01:14,264 (trainer:753) INFO: 8epoch:train:12801-12900batch: iter_time=8.978e-05, forward_time=0.141, loss_ctc=93.121, loss_interctc_layer6=97.071, loss_interctc_layer12=82.494, loss_interctc_layer15=76.457, loss_interctc_layer21=94.850, loss=88.799, backward_time=0.466, grad_norm=59.082, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.138, optim0_lr0=1.427e-04, train_time=2.256 +[gpua002:0/64] 2024-01-17 22:04:16,337 (trainer:753) INFO: 8epoch:train:12901-13000batch: iter_time=8.538e-05, forward_time=0.142, loss_ctc=88.525, loss_interctc_layer6=90.255, loss_interctc_layer12=76.293, loss_interctc_layer15=70.448, loss_interctc_layer21=89.930, loss=83.090, backward_time=0.335, grad_norm=61.529, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.138, optim0_lr0=1.426e-04, train_time=1.821 +[gpua002:0/64] 2024-01-17 22:07:57,593 (trainer:753) INFO: 8epoch:train:13001-13100batch: iter_time=8.636e-05, forward_time=0.141, loss_ctc=85.679, loss_interctc_layer6=95.824, loss_interctc_layer12=81.574, loss_interctc_layer15=75.794, loss_interctc_layer21=87.138, loss=85.202, backward_time=0.391, grad_norm=65.809, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.426e-04, train_time=2.212 +[gpua002:0/64] 2024-01-17 22:10:12,240 (trainer:753) INFO: 8epoch:train:13101-13200batch: iter_time=8.342e-05, forward_time=0.142, loss_ctc=107.520, loss_interctc_layer6=103.530, loss_interctc_layer12=88.014, loss_interctc_layer15=81.601, loss_interctc_layer21=109.701, loss=98.073, backward_time=0.301, grad_norm=83.065, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.425e-04, train_time=1.346 +[gpua002:0/64] 2024-01-17 22:13:42,708 (trainer:753) INFO: 8epoch:train:13201-13300batch: iter_time=8.632e-05, forward_time=0.143, loss_ctc=104.551, loss_interctc_layer6=103.226, loss_interctc_layer12=89.159, loss_interctc_layer15=83.207, loss_interctc_layer21=106.093, loss=97.247, backward_time=0.453, grad_norm=73.055, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.425e-04, train_time=2.104 +[gpua002:0/64] 2024-01-17 22:17:24,490 (trainer:753) INFO: 8epoch:train:13301-13400batch: iter_time=7.287e-04, forward_time=0.210, loss_ctc=105.644, loss_interctc_layer6=103.575, loss_interctc_layer12=87.258, loss_interctc_layer15=80.748, loss_interctc_layer21=107.872, loss=97.019, backward_time=0.405, grad_norm=68.244, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.144, optim0_lr0=1.424e-04, train_time=2.214 +[gpua002:0/64] 2024-01-17 22:20:29,671 (trainer:753) INFO: 8epoch:train:13401-13500batch: iter_time=8.722e-05, forward_time=0.143, loss_ctc=101.593, loss_interctc_layer6=106.455, loss_interctc_layer12=90.730, loss_interctc_layer15=84.288, loss_interctc_layer21=103.699, loss=97.353, backward_time=0.402, grad_norm=84.758, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.423e-04, train_time=1.855 +[gpua002:0/64] 2024-01-17 22:23:34,387 (trainer:753) INFO: 8epoch:train:13501-13600batch: iter_time=8.384e-05, forward_time=0.141, loss_ctc=90.329, loss_interctc_layer6=99.406, loss_interctc_layer12=84.796, loss_interctc_layer15=78.937, loss_interctc_layer21=91.707, loss=89.035, backward_time=0.444, grad_norm=74.220, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.423e-04, train_time=1.847 +[gpua002:0/64] 2024-01-17 22:26:29,883 (trainer:753) INFO: 8epoch:train:13601-13700batch: iter_time=8.446e-05, forward_time=0.141, loss_ctc=96.165, loss_interctc_layer6=103.175, loss_interctc_layer12=88.373, loss_interctc_layer15=82.323, loss_interctc_layer21=97.979, loss=93.603, backward_time=0.375, grad_norm=89.676, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.139, optim0_lr0=1.422e-04, train_time=1.755 +[gpua002:0/64] 2024-01-17 22:28:27,756 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua002:0/64] 2024-01-17 22:28:46,761 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 22:28:50,242 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 22:28:50,242 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua002:0/64] 2024-01-17 22:28:50,255 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 22:39:23,507 (trainer:753) INFO: 8epoch:train:13701-13800batch: iter_time=3.353, forward_time=0.206, loss_ctc=97.674, loss_interctc_layer6=104.710, loss_interctc_layer12=89.076, loss_interctc_layer15=83.390, loss_interctc_layer21=99.070, loss=94.784, backward_time=0.334, grad_norm=64.375, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.141, optim0_lr0=1.422e-04, train_time=7.736 +[gpua002:0/64] 2024-01-17 22:41:50,194 (trainer:753) INFO: 8epoch:train:13801-13900batch: iter_time=7.795e-05, forward_time=0.143, loss_ctc=108.802, loss_interctc_layer6=106.079, loss_interctc_layer12=90.156, loss_interctc_layer15=83.526, loss_interctc_layer21=111.138, loss=99.940, backward_time=0.335, grad_norm=61.427, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.421e-04, train_time=1.467 +[gpua002:0/64] 2024-01-17 22:44:38,033 (trainer:753) INFO: 8epoch:train:13901-14000batch: iter_time=8.986e-05, forward_time=0.143, loss_ctc=92.023, loss_interctc_layer6=92.347, loss_interctc_layer12=78.064, loss_interctc_layer15=72.530, loss_interctc_layer21=93.849, loss=85.763, backward_time=0.373, grad_norm=58.464, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.420e-04, train_time=1.678 +[gpua002:0/64] 2024-01-17 22:47:41,987 (trainer:753) INFO: 8epoch:train:14001-14100batch: iter_time=9.686e-05, forward_time=0.143, loss_ctc=98.918, loss_interctc_layer6=94.836, loss_interctc_layer12=80.197, loss_interctc_layer15=74.371, loss_interctc_layer21=100.905, loss=89.845, backward_time=0.379, grad_norm=51.696, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.139, optim0_lr0=1.420e-04, train_time=1.839 +[gpua002:0/64] 2024-01-17 22:50:53,772 (trainer:753) INFO: 8epoch:train:14101-14200batch: iter_time=9.646e-05, forward_time=0.145, loss_ctc=98.779, loss_interctc_layer6=101.649, loss_interctc_layer12=86.401, loss_interctc_layer15=80.065, loss_interctc_layer21=100.062, loss=93.391, backward_time=0.447, grad_norm=67.558, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.139, optim0_lr0=1.419e-04, train_time=1.916 +[gpua002:0/64] 2024-01-17 22:53:52,138 (trainer:753) INFO: 8epoch:train:14201-14300batch: iter_time=9.348e-05, forward_time=0.142, loss_ctc=84.904, loss_interctc_layer6=82.510, loss_interctc_layer12=69.799, loss_interctc_layer15=64.548, loss_interctc_layer21=86.485, loss=77.649, backward_time=0.390, grad_norm=71.165, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.419e-04, train_time=1.785 +[gpua002:0/64] 2024-01-17 22:57:05,369 (trainer:753) INFO: 8epoch:train:14301-14400batch: iter_time=8.201e-05, forward_time=0.142, loss_ctc=103.718, loss_interctc_layer6=102.415, loss_interctc_layer12=86.951, loss_interctc_layer15=80.681, loss_interctc_layer21=105.439, loss=95.841, backward_time=0.410, grad_norm=89.180, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.137, optim0_lr0=1.418e-04, train_time=1.931 +[gpua002:0/64] 2024-01-17 22:59:51,958 (trainer:753) INFO: 8epoch:train:14401-14500batch: iter_time=8.299e-05, forward_time=0.141, loss_ctc=113.802, loss_interctc_layer6=105.502, loss_interctc_layer12=90.179, loss_interctc_layer15=83.983, loss_interctc_layer21=115.904, loss=101.874, backward_time=0.413, grad_norm=69.144, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.137, optim0_lr0=1.417e-04, train_time=1.666 +[gpua002:0/64] 2024-01-17 23:02:35,444 (trainer:753) INFO: 8epoch:train:14501-14600batch: iter_time=2.421e-04, forward_time=0.147, loss_ctc=99.130, loss_interctc_layer6=99.450, loss_interctc_layer12=85.360, loss_interctc_layer15=79.420, loss_interctc_layer21=100.672, loss=92.807, backward_time=0.334, grad_norm=62.364, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.137, optim0_lr0=1.417e-04, train_time=1.635 +[gpua002:0/64] 2024-01-17 23:05:20,101 (trainer:753) INFO: 8epoch:train:14601-14700batch: iter_time=8.163e-05, forward_time=0.247, loss_ctc=113.548, loss_interctc_layer6=110.573, loss_interctc_layer12=93.320, loss_interctc_layer15=86.136, loss_interctc_layer21=115.987, loss=103.913, backward_time=0.362, grad_norm=69.025, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.140, optim0_lr0=1.416e-04, train_time=1.646 +[gpua002:0/64] 2024-01-17 23:08:53,831 (trainer:753) INFO: 8epoch:train:14701-14800batch: iter_time=9.114e-05, forward_time=0.144, loss_ctc=99.551, loss_interctc_layer6=97.554, loss_interctc_layer12=83.530, loss_interctc_layer15=77.858, loss_interctc_layer21=101.274, loss=91.953, backward_time=0.526, grad_norm=64.058, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.139, optim0_lr0=1.416e-04, train_time=2.138 +[gpua002:0/64] 2024-01-17 23:12:20,459 (trainer:753) INFO: 8epoch:train:14801-14900batch: iter_time=8.966e-05, forward_time=0.142, loss_ctc=88.799, loss_interctc_layer6=95.033, loss_interctc_layer12=80.520, loss_interctc_layer15=74.640, loss_interctc_layer21=90.352, loss=85.869, backward_time=0.386, grad_norm=62.973, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.415e-04, train_time=2.066 +[gpua002:0/64] 2024-01-17 23:14:57,054 (trainer:753) INFO: 8epoch:train:14901-15000batch: iter_time=8.479e-05, forward_time=0.142, loss_ctc=105.779, loss_interctc_layer6=112.915, loss_interctc_layer12=96.707, loss_interctc_layer15=90.516, loss_interctc_layer21=107.733, loss=102.730, backward_time=0.354, grad_norm=81.759, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.138, optim0_lr0=1.414e-04, train_time=1.566 +[gpua002:0/64] 2024-01-17 23:46:58,944 (trainer:352) INFO: 8epoch results: [train] iter_time=0.270, forward_time=0.154, loss_ctc=101.137, loss_interctc_layer6=102.008, loss_interctc_layer12=87.242, loss_interctc_layer15=81.271, loss_interctc_layer21=102.810, loss=94.894, backward_time=0.369, grad_norm=70.842, clip=100.000, loss_scale=1.002e+22, optim_step_time=0.138, optim0_lr0=1.461e-04, train_time=2.073, time=8 hours, 38 minutes and 36.59 seconds, total_count=120000, gpu_max_cached_mem_GB=34.400, [valid] loss_ctc=67.476, cer_ctc=0.310, loss_interctc_layer6=66.416, cer_interctc_layer6=0.297, loss_interctc_layer12=54.867, cer_interctc_layer12=0.235, loss_interctc_layer15=50.842, cer_interctc_layer15=0.208, loss_interctc_layer21=69.092, cer_interctc_layer21=0.314, loss=61.739, time=31 minutes and 37.84 seconds, total_count=37368, gpu_max_cached_mem_GB=34.400 +[gpua002:0/64] 2024-01-17 23:47:19,059 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count +[gpua002:0/64] 2024-01-17 23:47:19,211 (trainer:461) INFO: The model files were removed: exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/3epoch.pth +[gpua002:0/64] 2024-01-17 23:47:19,294 (trainer:286) INFO: 9/45epoch started. Estimated time to finish: 2 weeks, 1 day and 20 hours +[gpua002:0/64] 2024-01-17 23:47:19,411 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua002:0/64] 2024-01-17 23:47:37,832 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-17 23:47:41,291 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-17 23:47:41,291 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpua002:0/64] 2024-01-17 23:47:41,294 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-17 23:55:33,559 (trainer:753) INFO: 9epoch:train:1-100batch: iter_time=3.129, forward_time=0.173, loss_ctc=97.970, loss_interctc_layer6=97.010, loss_interctc_layer12=82.543, loss_interctc_layer15=76.695, loss_interctc_layer21=99.733, loss=90.790, backward_time=0.307, grad_norm=70.268, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.140, optim0_lr0=1.414e-04, train_time=4.941 +[gpua002:0/64] 2024-01-17 23:57:46,862 (trainer:753) INFO: 9epoch:train:101-200batch: iter_time=8.907e-05, forward_time=0.141, loss_ctc=96.369, loss_interctc_layer6=103.674, loss_interctc_layer12=88.642, loss_interctc_layer15=82.585, loss_interctc_layer21=98.289, loss=93.912, backward_time=0.317, grad_norm=58.961, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.413e-04, train_time=1.333 +[gpua002:0/64] 2024-01-18 00:00:23,384 (trainer:753) INFO: 9epoch:train:201-300batch: iter_time=8.297e-05, forward_time=0.142, loss_ctc=127.874, loss_interctc_layer6=116.516, loss_interctc_layer12=99.837, loss_interctc_layer15=93.098, loss_interctc_layer21=130.724, loss=113.610, backward_time=0.356, grad_norm=78.913, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.413e-04, train_time=1.565 +[gpua002:0/64] 2024-01-18 00:02:41,384 (trainer:753) INFO: 9epoch:train:301-400batch: iter_time=9.425e-05, forward_time=0.144, loss_ctc=108.774, loss_interctc_layer6=118.997, loss_interctc_layer12=102.397, loss_interctc_layer15=95.752, loss_interctc_layer21=110.654, loss=107.315, backward_time=0.313, grad_norm=106.663, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.412e-04, train_time=1.378 +[gpua002:0/64] 2024-01-18 00:05:01,349 (trainer:753) INFO: 9epoch:train:401-500batch: iter_time=9.745e-05, forward_time=0.143, loss_ctc=88.018, loss_interctc_layer6=94.130, loss_interctc_layer12=80.502, loss_interctc_layer15=74.794, loss_interctc_layer21=89.585, loss=85.406, backward_time=0.298, grad_norm=73.094, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.139, optim0_lr0=1.412e-04, train_time=1.401 +[gpua002:0/64] 2024-01-18 00:07:52,758 (trainer:753) INFO: 9epoch:train:501-600batch: iter_time=1.072e-04, forward_time=0.141, loss_ctc=92.581, loss_interctc_layer6=99.310, loss_interctc_layer12=85.850, loss_interctc_layer15=79.975, loss_interctc_layer21=94.162, loss=90.376, backward_time=0.370, grad_norm=66.431, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.411e-04, train_time=1.714 +[gpua002:0/64] 2024-01-18 00:10:23,621 (trainer:753) INFO: 9epoch:train:601-700batch: iter_time=1.002e-04, forward_time=0.141, loss_ctc=89.389, loss_interctc_layer6=95.868, loss_interctc_layer12=82.146, loss_interctc_layer15=76.609, loss_interctc_layer21=90.736, loss=86.950, backward_time=0.347, grad_norm=150.302, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.410e-04, train_time=1.508 +[gpua002:0/64] 2024-01-18 00:13:11,938 (trainer:753) INFO: 9epoch:train:701-800batch: iter_time=9.307e-05, forward_time=0.143, loss_ctc=73.510, loss_interctc_layer6=88.228, loss_interctc_layer12=76.100, loss_interctc_layer15=71.164, loss_interctc_layer21=74.497, loss=76.700, backward_time=0.326, grad_norm=79.843, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.139, optim0_lr0=1.410e-04, train_time=1.683 +[gpua002:0/64] 2024-01-18 00:16:11,523 (trainer:753) INFO: 9epoch:train:801-900batch: iter_time=9.446e-05, forward_time=0.142, loss_ctc=107.950, loss_interctc_layer6=104.840, loss_interctc_layer12=89.707, loss_interctc_layer15=83.883, loss_interctc_layer21=109.760, loss=99.228, backward_time=0.368, grad_norm=81.916, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.139, optim0_lr0=1.409e-04, train_time=1.796 +[gpua002:0/64] 2024-01-18 00:19:12,286 (trainer:753) INFO: 9epoch:train:901-1000batch: iter_time=2.463e-04, forward_time=0.181, loss_ctc=101.875, loss_interctc_layer6=95.433, loss_interctc_layer12=81.526, loss_interctc_layer15=75.852, loss_interctc_layer21=103.786, loss=91.694, backward_time=0.424, grad_norm=70.674, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.142, optim0_lr0=1.409e-04, train_time=1.807 +[gpua002:0/64] 2024-01-18 00:21:55,910 (trainer:753) INFO: 9epoch:train:1001-1100batch: iter_time=8.844e-05, forward_time=0.177, loss_ctc=102.517, loss_interctc_layer6=105.950, loss_interctc_layer12=91.783, loss_interctc_layer15=85.682, loss_interctc_layer21=103.977, loss=97.982, backward_time=0.320, grad_norm=71.606, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.141, optim0_lr0=1.408e-04, train_time=1.636 +[gpua002:0/64] 2024-01-18 00:24:45,161 (trainer:753) INFO: 9epoch:train:1101-1200batch: iter_time=8.683e-05, forward_time=0.172, loss_ctc=104.131, loss_interctc_layer6=105.718, loss_interctc_layer12=90.856, loss_interctc_layer15=84.612, loss_interctc_layer21=106.190, loss=98.302, backward_time=0.407, grad_norm=67.125, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.139, optim0_lr0=1.407e-04, train_time=1.692 +[gpua002:0/64] 2024-01-18 00:26:20,139 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpua002:0/64] 2024-01-18 00:26:38,779 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 00:26:42,581 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 00:26:42,581 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpua002:0/64] 2024-01-18 00:26:42,584 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 00:32:37,058 (trainer:753) INFO: 9epoch:train:1201-1300batch: iter_time=3.063, forward_time=0.143, loss_ctc=93.667, loss_interctc_layer6=92.427, loss_interctc_layer12=78.121, loss_interctc_layer15=72.553, loss_interctc_layer21=94.990, loss=86.352, backward_time=0.313, grad_norm=72.263, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.139, optim0_lr0=1.407e-04, train_time=4.717 +[gpua002:0/64] 2024-01-18 00:34:45,707 (trainer:753) INFO: 9epoch:train:1301-1400batch: iter_time=9.093e-05, forward_time=0.144, loss_ctc=100.191, loss_interctc_layer6=103.808, loss_interctc_layer12=88.852, loss_interctc_layer15=82.677, loss_interctc_layer21=102.274, loss=95.560, backward_time=0.300, grad_norm=75.024, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.139, optim0_lr0=1.406e-04, train_time=1.288 +[gpua002:0/64] 2024-01-18 00:37:00,793 (trainer:753) INFO: 9epoch:train:1401-1500batch: iter_time=8.552e-05, forward_time=0.143, loss_ctc=113.164, loss_interctc_layer6=106.070, loss_interctc_layer12=90.870, loss_interctc_layer15=84.239, loss_interctc_layer21=115.003, loss=101.869, backward_time=0.298, grad_norm=66.290, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.406e-04, train_time=1.351 +[gpua002:0/64] 2024-01-18 00:39:37,737 (trainer:753) INFO: 9epoch:train:1501-1600batch: iter_time=9.378e-05, forward_time=0.145, loss_ctc=128.867, loss_interctc_layer6=123.659, loss_interctc_layer12=105.304, loss_interctc_layer15=97.784, loss_interctc_layer21=131.441, loss=117.411, backward_time=0.330, grad_norm=82.104, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.405e-04, train_time=1.569 +[gpua002:0/64] 2024-01-18 00:42:19,106 (trainer:753) INFO: 9epoch:train:1601-1700batch: iter_time=8.529e-05, forward_time=0.144, loss_ctc=102.774, loss_interctc_layer6=100.866, loss_interctc_layer12=86.342, loss_interctc_layer15=80.736, loss_interctc_layer21=104.369, loss=95.017, backward_time=0.402, grad_norm=71.829, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.405e-04, train_time=1.613 +[gpua002:0/64] 2024-01-18 00:45:39,863 (trainer:753) INFO: 9epoch:train:1701-1800batch: iter_time=2.062e-04, forward_time=0.142, loss_ctc=86.158, loss_interctc_layer6=91.849, loss_interctc_layer12=77.912, loss_interctc_layer15=71.931, loss_interctc_layer21=87.712, loss=83.112, backward_time=0.383, grad_norm=61.922, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.404e-04, train_time=2.007 +[gpua002:0/64] 2024-01-18 00:48:34,850 (trainer:753) INFO: 9epoch:train:1801-1900batch: iter_time=9.235e-05, forward_time=0.231, loss_ctc=102.114, loss_interctc_layer6=102.391, loss_interctc_layer12=87.433, loss_interctc_layer15=81.430, loss_interctc_layer21=103.973, loss=95.468, backward_time=0.326, grad_norm=80.112, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.150, optim0_lr0=1.403e-04, train_time=1.749 +[gpua002:0/64] 2024-01-18 00:51:16,323 (trainer:753) INFO: 9epoch:train:1901-2000batch: iter_time=1.001e-04, forward_time=0.140, loss_ctc=72.896, loss_interctc_layer6=87.014, loss_interctc_layer12=74.601, loss_interctc_layer15=69.629, loss_interctc_layer21=74.147, loss=75.657, backward_time=0.322, grad_norm=65.959, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.138, optim0_lr0=1.403e-04, train_time=1.615 +[gpua002:0/64] 2024-01-18 00:53:59,282 (trainer:753) INFO: 9epoch:train:2001-2100batch: iter_time=1.026e-04, forward_time=0.142, loss_ctc=90.585, loss_interctc_layer6=91.416, loss_interctc_layer12=78.232, loss_interctc_layer15=72.950, loss_interctc_layer21=92.057, loss=85.048, backward_time=0.377, grad_norm=70.241, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.138, optim0_lr0=1.402e-04, train_time=1.629 +[gpua002:0/64] 2024-01-18 00:56:21,658 (trainer:753) INFO: 9epoch:train:2101-2200batch: iter_time=1.010e-04, forward_time=0.141, loss_ctc=110.556, loss_interctc_layer6=100.914, loss_interctc_layer12=86.510, loss_interctc_layer15=80.476, loss_interctc_layer21=112.270, loss=98.145, backward_time=0.311, grad_norm=68.842, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.137, optim0_lr0=1.402e-04, train_time=1.424 +[gpua002:0/64] 2024-01-18 01:00:01,435 (trainer:753) INFO: 9epoch:train:2201-2300batch: iter_time=9.858e-05, forward_time=0.142, loss_ctc=102.528, loss_interctc_layer6=102.423, loss_interctc_layer12=87.640, loss_interctc_layer15=81.487, loss_interctc_layer21=104.135, loss=95.643, backward_time=0.407, grad_norm=62.317, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.137, optim0_lr0=1.401e-04, train_time=2.198 +[gpua002:0/64] 2024-01-18 01:02:45,200 (trainer:753) INFO: 9epoch:train:2301-2400batch: iter_time=9.684e-05, forward_time=0.143, loss_ctc=123.807, loss_interctc_layer6=112.341, loss_interctc_layer12=96.586, loss_interctc_layer15=90.167, loss_interctc_layer21=126.148, loss=109.810, backward_time=0.335, grad_norm=86.701, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.137, optim0_lr0=1.401e-04, train_time=1.637 +[gpua002:0/64] 2024-01-18 01:05:14,244 (trainer:753) INFO: 9epoch:train:2401-2500batch: iter_time=9.010e-05, forward_time=0.141, loss_ctc=85.288, loss_interctc_layer6=93.643, loss_interctc_layer12=79.241, loss_interctc_layer15=73.548, loss_interctc_layer21=86.851, loss=83.714, backward_time=0.318, grad_norm=118.535, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.138, optim0_lr0=1.400e-04, train_time=1.490 +[gpua002:0/64] 2024-01-18 01:05:34,274 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpua002:0/64] 2024-01-18 01:05:53,346 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 01:05:56,897 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 01:05:56,897 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpua002:0/64] 2024-01-18 01:05:56,901 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 01:14:11,738 (trainer:753) INFO: 9epoch:train:2501-2600batch: iter_time=3.737, forward_time=0.193, loss_ctc=97.200, loss_interctc_layer6=95.852, loss_interctc_layer12=81.184, loss_interctc_layer15=75.478, loss_interctc_layer21=98.650, loss=89.673, backward_time=0.315, grad_norm=59.131, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.141, optim0_lr0=1.399e-04, train_time=5.373 +[gpua002:0/64] 2024-01-18 01:17:25,084 (trainer:753) INFO: 9epoch:train:2601-2700batch: iter_time=0.608, forward_time=0.142, loss_ctc=93.947, loss_interctc_layer6=103.884, loss_interctc_layer12=88.174, loss_interctc_layer15=82.043, loss_interctc_layer21=95.649, loss=92.739, backward_time=0.296, grad_norm=61.861, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.139, optim0_lr0=1.399e-04, train_time=1.935 +[gpua002:0/64] 2024-01-18 01:19:53,271 (trainer:753) INFO: 9epoch:train:2701-2800batch: iter_time=8.183e-05, forward_time=0.143, loss_ctc=125.138, loss_interctc_layer6=115.446, loss_interctc_layer12=98.548, loss_interctc_layer15=91.554, loss_interctc_layer21=127.747, loss=111.687, backward_time=0.321, grad_norm=81.543, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.139, optim0_lr0=1.398e-04, train_time=1.482 +[gpua002:0/64] 2024-01-18 01:22:50,146 (trainer:753) INFO: 9epoch:train:2801-2900batch: iter_time=8.932e-05, forward_time=0.142, loss_ctc=105.897, loss_interctc_layer6=115.827, loss_interctc_layer12=98.891, loss_interctc_layer15=92.068, loss_interctc_layer21=107.531, loss=104.043, backward_time=0.395, grad_norm=74.883, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.139, optim0_lr0=1.398e-04, train_time=1.768 +[gpua002:0/64] 2024-01-18 01:26:04,765 (trainer:753) INFO: 9epoch:train:2901-3000batch: iter_time=9.733e-05, forward_time=0.144, loss_ctc=86.594, loss_interctc_layer6=92.688, loss_interctc_layer12=78.657, loss_interctc_layer15=72.980, loss_interctc_layer21=88.140, loss=83.812, backward_time=0.517, grad_norm=53.169, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.140, optim0_lr0=1.397e-04, train_time=1.946 +[gpua002:0/64] 2024-01-18 01:28:56,024 (trainer:753) INFO: 9epoch:train:3001-3100batch: iter_time=8.869e-05, forward_time=0.144, loss_ctc=90.546, loss_interctc_layer6=97.301, loss_interctc_layer12=82.886, loss_interctc_layer15=77.001, loss_interctc_layer21=92.490, loss=88.045, backward_time=0.339, grad_norm=62.276, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.140, optim0_lr0=1.397e-04, train_time=1.713 +[gpua002:0/64] 2024-01-18 01:31:47,819 (trainer:753) INFO: 9epoch:train:3101-3200batch: iter_time=8.393e-05, forward_time=0.142, loss_ctc=88.881, loss_interctc_layer6=95.084, loss_interctc_layer12=81.212, loss_interctc_layer15=75.494, loss_interctc_layer21=90.574, loss=86.249, backward_time=0.332, grad_norm=59.219, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.140, optim0_lr0=1.396e-04, train_time=1.718 +[gpua002:0/64] 2024-01-18 01:34:06,742 (trainer:753) INFO: 9epoch:train:3201-3300batch: iter_time=8.697e-05, forward_time=0.142, loss_ctc=71.238, loss_interctc_layer6=86.776, loss_interctc_layer12=74.119, loss_interctc_layer15=69.012, loss_interctc_layer21=72.440, loss=74.717, backward_time=0.321, grad_norm=54.474, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.140, optim0_lr0=1.395e-04, train_time=1.389 +[gpua002:0/64] 2024-01-18 01:37:10,545 (trainer:753) INFO: 9epoch:train:3301-3400batch: iter_time=9.169e-05, forward_time=0.143, loss_ctc=106.539, loss_interctc_layer6=104.214, loss_interctc_layer12=89.029, loss_interctc_layer15=82.664, loss_interctc_layer21=108.656, loss=98.220, backward_time=0.418, grad_norm=93.619, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.139, optim0_lr0=1.395e-04, train_time=1.838 +[gpua002:0/64] 2024-01-18 01:40:01,602 (trainer:753) INFO: 9epoch:train:3401-3500batch: iter_time=9.626e-05, forward_time=0.143, loss_ctc=100.327, loss_interctc_layer6=94.751, loss_interctc_layer12=80.148, loss_interctc_layer15=74.465, loss_interctc_layer21=102.091, loss=90.357, backward_time=0.354, grad_norm=64.507, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.139, optim0_lr0=1.394e-04, train_time=1.710 +[gpua002:0/64] 2024-01-18 01:42:44,046 (trainer:753) INFO: 9epoch:train:3501-3600batch: iter_time=0.004, forward_time=0.264, loss_ctc=97.951, loss_interctc_layer6=103.365, loss_interctc_layer12=88.988, loss_interctc_layer15=83.401, loss_interctc_layer21=99.827, loss=94.706, backward_time=0.345, grad_norm=80.004, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.141, optim0_lr0=1.394e-04, train_time=1.624 +[gpua002:0/64] 2024-01-18 01:45:25,215 (trainer:753) INFO: 9epoch:train:3601-3700batch: iter_time=8.718e-05, forward_time=0.146, loss_ctc=100.954, loss_interctc_layer6=103.416, loss_interctc_layer12=88.706, loss_interctc_layer15=82.192, loss_interctc_layer21=102.931, loss=95.640, backward_time=0.341, grad_norm=64.236, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.137, optim0_lr0=1.393e-04, train_time=1.611 +[gpua002:0/64] 2024-01-18 01:46:54,115 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpua002:0/64] 2024-01-18 01:47:12,883 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 01:47:16,442 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 01:47:16,442 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpua002:0/64] 2024-01-18 01:47:16,445 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 01:53:38,396 (trainer:753) INFO: 9epoch:train:3701-3800batch: iter_time=3.403, forward_time=0.142, loss_ctc=91.238, loss_interctc_layer6=91.296, loss_interctc_layer12=76.736, loss_interctc_layer15=70.966, loss_interctc_layer21=93.187, loss=84.685, backward_time=0.306, grad_norm=86.516, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.138, optim0_lr0=1.393e-04, train_time=4.932 +[gpua002:0/64] 2024-01-18 01:56:13,349 (trainer:753) INFO: 9epoch:train:3801-3900batch: iter_time=8.609e-05, forward_time=0.143, loss_ctc=98.552, loss_interctc_layer6=103.012, loss_interctc_layer12=87.316, loss_interctc_layer15=81.154, loss_interctc_layer21=100.329, loss=94.073, backward_time=0.342, grad_norm=67.472, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.138, optim0_lr0=1.392e-04, train_time=1.550 +[gpua002:0/64] 2024-01-18 01:58:28,533 (trainer:753) INFO: 9epoch:train:3901-4000batch: iter_time=8.621e-05, forward_time=0.143, loss_ctc=111.921, loss_interctc_layer6=105.034, loss_interctc_layer12=89.826, loss_interctc_layer15=83.360, loss_interctc_layer21=114.117, loss=100.852, backward_time=0.303, grad_norm=65.899, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.138, optim0_lr0=1.391e-04, train_time=1.352 +[gpua002:0/64] 2024-01-18 02:01:00,500 (trainer:753) INFO: 9epoch:train:4001-4100batch: iter_time=9.760e-05, forward_time=0.145, loss_ctc=126.987, loss_interctc_layer6=122.243, loss_interctc_layer12=103.250, loss_interctc_layer15=95.652, loss_interctc_layer21=129.405, loss=115.507, backward_time=0.328, grad_norm=88.104, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.138, optim0_lr0=1.391e-04, train_time=1.519 +[gpua002:0/64] 2024-01-18 02:03:49,615 (trainer:753) INFO: 9epoch:train:4101-4200batch: iter_time=9.089e-05, forward_time=0.142, loss_ctc=101.938, loss_interctc_layer6=99.662, loss_interctc_layer12=84.796, loss_interctc_layer15=78.664, loss_interctc_layer21=103.962, loss=93.804, backward_time=0.358, grad_norm=93.462, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.137, optim0_lr0=1.390e-04, train_time=1.691 +[gpua002:0/64] 2024-01-18 02:06:41,916 (trainer:753) INFO: 9epoch:train:4201-4300batch: iter_time=9.058e-05, forward_time=0.142, loss_ctc=85.132, loss_interctc_layer6=90.393, loss_interctc_layer12=76.208, loss_interctc_layer15=70.474, loss_interctc_layer21=86.643, loss=81.770, backward_time=0.434, grad_norm=69.885, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.137, optim0_lr0=1.390e-04, train_time=1.723 +[gpua002:0/64] 2024-01-18 02:09:07,803 (trainer:753) INFO: 9epoch:train:4301-4400batch: iter_time=8.981e-05, forward_time=0.144, loss_ctc=101.840, loss_interctc_layer6=101.981, loss_interctc_layer12=86.949, loss_interctc_layer15=80.518, loss_interctc_layer21=103.660, loss=94.990, backward_time=0.307, grad_norm=63.766, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.137, optim0_lr0=1.389e-04, train_time=1.459 +[gpua002:0/64] 2024-01-18 02:12:15,054 (trainer:753) INFO: 9epoch:train:4401-4500batch: iter_time=9.567e-05, forward_time=0.252, loss_ctc=71.657, loss_interctc_layer6=86.225, loss_interctc_layer12=73.631, loss_interctc_layer15=68.590, loss_interctc_layer21=72.651, loss=74.551, backward_time=0.441, grad_norm=56.357, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.145, optim0_lr0=1.389e-04, train_time=1.872 +[gpua002:0/64] 2024-01-18 02:15:23,209 (trainer:753) INFO: 9epoch:train:4501-4600batch: iter_time=9.919e-05, forward_time=0.143, loss_ctc=89.011, loss_interctc_layer6=90.828, loss_interctc_layer12=77.491, loss_interctc_layer15=72.272, loss_interctc_layer21=91.378, loss=84.196, backward_time=0.379, grad_norm=98.673, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.138, optim0_lr0=1.388e-04, train_time=1.882 +[gpua002:0/64] 2024-01-18 02:18:22,842 (trainer:753) INFO: 9epoch:train:4601-4700batch: iter_time=8.992e-05, forward_time=0.142, loss_ctc=108.920, loss_interctc_layer6=99.640, loss_interctc_layer12=84.872, loss_interctc_layer15=78.775, loss_interctc_layer21=110.907, loss=96.623, backward_time=0.369, grad_norm=69.052, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.138, optim0_lr0=1.388e-04, train_time=1.796 +[gpua002:0/64] 2024-01-18 02:20:57,413 (trainer:753) INFO: 9epoch:train:4701-4800batch: iter_time=9.633e-05, forward_time=0.142, loss_ctc=101.135, loss_interctc_layer6=101.123, loss_interctc_layer12=86.003, loss_interctc_layer15=79.701, loss_interctc_layer21=102.819, loss=94.156, backward_time=0.306, grad_norm=62.384, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.138, optim0_lr0=1.387e-04, train_time=1.546 +[gpua002:0/64] 2024-01-18 02:23:44,825 (trainer:753) INFO: 9epoch:train:4801-4900batch: iter_time=1.061e-04, forward_time=0.143, loss_ctc=122.549, loss_interctc_layer6=111.289, loss_interctc_layer12=95.649, loss_interctc_layer15=89.254, loss_interctc_layer21=124.823, loss=108.713, backward_time=0.378, grad_norm=85.004, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.139, optim0_lr0=1.386e-04, train_time=1.673 +[gpua002:0/64] 2024-01-18 02:26:31,005 (trainer:753) INFO: 9epoch:train:4901-5000batch: iter_time=1.101e-04, forward_time=0.143, loss_ctc=83.991, loss_interctc_layer6=91.991, loss_interctc_layer12=77.942, loss_interctc_layer15=71.883, loss_interctc_layer21=85.575, loss=82.277, backward_time=0.362, grad_norm=59.615, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.139, optim0_lr0=1.386e-04, train_time=1.663 +[gpua002:0/64] 2024-01-18 02:26:51,036 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpua002:0/64] 2024-01-18 02:27:10,103 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 02:27:13,689 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 02:27:13,689 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpua002:0/64] 2024-01-18 02:27:13,692 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 02:38:24,588 (trainer:753) INFO: 9epoch:train:5001-5100batch: iter_time=3.336, forward_time=0.143, loss_ctc=95.154, loss_interctc_layer6=94.493, loss_interctc_layer12=79.975, loss_interctc_layer15=73.866, loss_interctc_layer21=96.476, loss=87.993, backward_time=0.332, grad_norm=59.432, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.137, optim0_lr0=1.385e-04, train_time=7.136 +[gpua002:0/64] 2024-01-18 02:41:10,878 (trainer:753) INFO: 9epoch:train:5101-5200batch: iter_time=8.533e-05, forward_time=0.142, loss_ctc=93.036, loss_interctc_layer6=102.281, loss_interctc_layer12=86.877, loss_interctc_layer15=80.850, loss_interctc_layer21=94.790, loss=91.567, backward_time=0.387, grad_norm=58.321, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.137, optim0_lr0=1.385e-04, train_time=1.663 +[gpua002:0/64] 2024-01-18 02:44:01,053 (trainer:753) INFO: 9epoch:train:5201-5300batch: iter_time=9.072e-05, forward_time=0.145, loss_ctc=123.759, loss_interctc_layer6=113.958, loss_interctc_layer12=97.204, loss_interctc_layer15=90.285, loss_interctc_layer21=126.238, loss=110.289, backward_time=0.333, grad_norm=77.103, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.137, optim0_lr0=1.384e-04, train_time=1.702 +[gpua002:0/64] 2024-01-18 02:46:24,851 (trainer:753) INFO: 9epoch:train:5301-5400batch: iter_time=9.085e-05, forward_time=0.142, loss_ctc=103.228, loss_interctc_layer6=114.234, loss_interctc_layer12=97.759, loss_interctc_layer15=90.613, loss_interctc_layer21=104.981, loss=102.163, backward_time=0.321, grad_norm=87.011, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.137, optim0_lr0=1.384e-04, train_time=1.438 +[gpua002:0/64] 2024-01-18 02:48:54,628 (trainer:753) INFO: 9epoch:train:5401-5500batch: iter_time=8.735e-05, forward_time=0.146, loss_ctc=84.602, loss_interctc_layer6=92.013, loss_interctc_layer12=78.106, loss_interctc_layer15=72.195, loss_interctc_layer21=86.167, loss=82.617, backward_time=0.332, grad_norm=54.802, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.136, optim0_lr0=1.383e-04, train_time=1.498 +[gpua002:0/64] 2024-01-18 02:52:29,238 (trainer:753) INFO: 9epoch:train:5501-5600batch: iter_time=9.246e-05, forward_time=0.277, loss_ctc=88.866, loss_interctc_layer6=96.473, loss_interctc_layer12=81.789, loss_interctc_layer15=75.798, loss_interctc_layer21=90.505, loss=86.686, backward_time=0.388, grad_norm=69.716, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.139, optim0_lr0=1.383e-04, train_time=2.145 +[gpua002:0/64] 2024-01-18 02:55:06,985 (trainer:753) INFO: 9epoch:train:5601-5700batch: iter_time=8.214e-05, forward_time=0.144, loss_ctc=85.504, loss_interctc_layer6=93.310, loss_interctc_layer12=79.337, loss_interctc_layer15=73.478, loss_interctc_layer21=87.355, loss=83.797, backward_time=0.342, grad_norm=62.599, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.137, optim0_lr0=1.382e-04, train_time=1.578 +[gpua002:0/64] 2024-01-18 02:57:37,917 (trainer:753) INFO: 9epoch:train:5701-5800batch: iter_time=9.053e-05, forward_time=0.141, loss_ctc=69.622, loss_interctc_layer6=85.745, loss_interctc_layer12=73.002, loss_interctc_layer15=67.927, loss_interctc_layer21=70.772, loss=73.414, backward_time=0.335, grad_norm=76.231, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.136, optim0_lr0=1.381e-04, train_time=1.508 +[gpua002:0/64] 2024-01-18 03:00:04,866 (trainer:753) INFO: 9epoch:train:5801-5900batch: iter_time=1.005e-04, forward_time=0.142, loss_ctc=106.323, loss_interctc_layer6=103.252, loss_interctc_layer12=87.753, loss_interctc_layer15=81.704, loss_interctc_layer21=108.910, loss=97.588, backward_time=0.316, grad_norm=79.888, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.136, optim0_lr0=1.381e-04, train_time=1.471 +[gpua002:0/64] 2024-01-18 03:03:03,114 (trainer:753) INFO: 9epoch:train:5901-6000batch: iter_time=9.592e-05, forward_time=0.141, loss_ctc=99.199, loss_interctc_layer6=93.341, loss_interctc_layer12=79.099, loss_interctc_layer15=73.259, loss_interctc_layer21=100.768, loss=89.133, backward_time=0.346, grad_norm=49.630, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.136, optim0_lr0=1.380e-04, train_time=1.782 +[gpua002:0/64] 2024-01-18 03:05:39,713 (trainer:753) INFO: 9epoch:train:6001-6100batch: iter_time=9.311e-05, forward_time=0.142, loss_ctc=98.284, loss_interctc_layer6=102.384, loss_interctc_layer12=87.924, loss_interctc_layer15=82.043, loss_interctc_layer21=99.818, loss=94.091, backward_time=0.352, grad_norm=69.310, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.136, optim0_lr0=1.380e-04, train_time=1.566 +[gpua002:0/64] 2024-01-18 03:08:52,096 (trainer:753) INFO: 9epoch:train:6101-6200batch: iter_time=9.329e-05, forward_time=0.142, loss_ctc=99.771, loss_interctc_layer6=103.296, loss_interctc_layer12=87.841, loss_interctc_layer15=81.547, loss_interctc_layer21=101.992, loss=94.889, backward_time=0.471, grad_norm=108.038, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.137, optim0_lr0=1.379e-04, train_time=1.924 +[gpua002:0/64] 2024-01-18 03:10:48,354 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpua002:0/64] 2024-01-18 03:11:07,200 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 03:11:10,663 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 03:11:10,664 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpua002:0/64] 2024-01-18 03:11:10,669 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 03:22:16,364 (trainer:753) INFO: 9epoch:train:6201-6300batch: iter_time=3.527, forward_time=0.187, loss_ctc=91.014, loss_interctc_layer6=90.274, loss_interctc_layer12=76.069, loss_interctc_layer15=70.357, loss_interctc_layer21=92.738, loss=84.090, backward_time=0.374, grad_norm=53.158, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.138, optim0_lr0=1.379e-04, train_time=8.042 +[gpua002:0/64] 2024-01-18 03:24:31,918 (trainer:753) INFO: 9epoch:train:6301-6400batch: iter_time=7.209e-05, forward_time=0.142, loss_ctc=97.917, loss_interctc_layer6=101.875, loss_interctc_layer12=86.284, loss_interctc_layer15=80.284, loss_interctc_layer21=99.733, loss=93.219, backward_time=0.300, grad_norm=64.280, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.136, optim0_lr0=1.378e-04, train_time=1.356 +[gpua002:0/64] 2024-01-18 03:27:16,301 (trainer:753) INFO: 9epoch:train:6401-6500batch: iter_time=8.346e-05, forward_time=0.142, loss_ctc=111.818, loss_interctc_layer6=104.461, loss_interctc_layer12=89.438, loss_interctc_layer15=82.753, loss_interctc_layer21=114.281, loss=100.550, backward_time=0.370, grad_norm=99.065, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.136, optim0_lr0=1.378e-04, train_time=1.644 +[gpua002:0/64] 2024-01-18 03:29:57,894 (trainer:753) INFO: 9epoch:train:6501-6600batch: iter_time=8.559e-05, forward_time=0.143, loss_ctc=126.860, loss_interctc_layer6=121.851, loss_interctc_layer12=103.023, loss_interctc_layer15=95.126, loss_interctc_layer21=129.320, loss=115.236, backward_time=0.342, grad_norm=247.293, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.136, optim0_lr0=1.377e-04, train_time=1.615 +[gpua002:0/64] 2024-01-18 03:33:20,261 (trainer:753) INFO: 9epoch:train:6601-6700batch: iter_time=8.133e-05, forward_time=0.143, loss_ctc=101.972, loss_interctc_layer6=99.396, loss_interctc_layer12=84.291, loss_interctc_layer15=78.118, loss_interctc_layer21=103.710, loss=93.497, backward_time=0.397, grad_norm=92.543, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.136, optim0_lr0=1.377e-04, train_time=2.025 +[gpua002:0/64] 2024-01-18 03:36:20,701 (trainer:753) INFO: 9epoch:train:6701-6800batch: iter_time=8.590e-05, forward_time=0.142, loss_ctc=84.237, loss_interctc_layer6=90.416, loss_interctc_layer12=76.069, loss_interctc_layer15=70.162, loss_interctc_layer21=85.953, loss=81.367, backward_time=0.364, grad_norm=56.626, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.136, optim0_lr0=1.376e-04, train_time=1.804 +[gpua002:0/64] 2024-01-18 03:38:49,408 (trainer:753) INFO: 9epoch:train:6801-6900batch: iter_time=8.634e-05, forward_time=0.141, loss_ctc=99.935, loss_interctc_layer6=101.104, loss_interctc_layer12=85.802, loss_interctc_layer15=79.514, loss_interctc_layer21=101.659, loss=93.603, backward_time=0.321, grad_norm=71.593, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.136, optim0_lr0=1.375e-04, train_time=1.486 +[gpua002:0/64] 2024-01-18 03:41:31,061 (trainer:753) INFO: 9epoch:train:6901-7000batch: iter_time=9.732e-05, forward_time=0.141, loss_ctc=71.293, loss_interctc_layer6=84.692, loss_interctc_layer12=72.331, loss_interctc_layer15=67.198, loss_interctc_layer21=72.124, loss=73.528, backward_time=0.326, grad_norm=92.915, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.136, optim0_lr0=1.375e-04, train_time=1.617 +[gpua002:0/64] 2024-01-18 03:44:39,365 (trainer:753) INFO: 9epoch:train:7001-7100batch: iter_time=9.732e-05, forward_time=0.180, loss_ctc=88.339, loss_interctc_layer6=90.053, loss_interctc_layer12=76.772, loss_interctc_layer15=71.320, loss_interctc_layer21=89.981, loss=83.293, backward_time=0.428, grad_norm=60.160, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.142, optim0_lr0=1.374e-04, train_time=1.883 +[gpua002:0/64] 2024-01-18 03:47:29,302 (trainer:753) INFO: 9epoch:train:7101-7200batch: iter_time=9.443e-05, forward_time=0.148, loss_ctc=108.664, loss_interctc_layer6=98.789, loss_interctc_layer12=83.980, loss_interctc_layer15=78.003, loss_interctc_layer21=110.769, loss=96.041, backward_time=0.371, grad_norm=57.144, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.138, optim0_lr0=1.374e-04, train_time=1.699 +[gpua002:0/64] 2024-01-18 03:50:28,750 (trainer:753) INFO: 9epoch:train:7201-7300batch: iter_time=9.129e-05, forward_time=0.142, loss_ctc=100.728, loss_interctc_layer6=100.263, loss_interctc_layer12=85.283, loss_interctc_layer15=78.882, loss_interctc_layer21=102.474, loss=93.526, backward_time=0.339, grad_norm=156.149, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.137, optim0_lr0=1.373e-04, train_time=1.794 +[gpua002:0/64] 2024-01-18 03:53:21,144 (trainer:753) INFO: 9epoch:train:7301-7400batch: iter_time=8.652e-05, forward_time=0.142, loss_ctc=121.584, loss_interctc_layer6=109.946, loss_interctc_layer12=93.938, loss_interctc_layer15=87.516, loss_interctc_layer21=124.025, loss=107.402, backward_time=0.365, grad_norm=66.904, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.137, optim0_lr0=1.373e-04, train_time=1.724 +[gpua002:0/64] 2024-01-18 03:55:39,554 (trainer:753) INFO: 9epoch:train:7401-7500batch: iter_time=9.429e-05, forward_time=0.141, loss_ctc=82.442, loss_interctc_layer6=91.600, loss_interctc_layer12=77.250, loss_interctc_layer15=71.125, loss_interctc_layer21=84.063, loss=81.296, backward_time=0.313, grad_norm=55.635, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.136, optim0_lr0=1.372e-04, train_time=1.384 +[gpua002:0/64] 2024-01-18 03:55:59,584 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpua002:0/64] 2024-01-18 03:56:18,539 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 03:56:22,207 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 03:56:22,207 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpua002:0/64] 2024-01-18 03:56:22,211 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 04:03:22,499 (trainer:753) INFO: 9epoch:train:7501-7600batch: iter_time=3.247, forward_time=0.187, loss_ctc=99.729, loss_interctc_layer6=93.585, loss_interctc_layer12=79.302, loss_interctc_layer15=73.106, loss_interctc_layer21=101.572, loss=89.459, backward_time=0.311, grad_norm=69.460, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.138, optim0_lr0=1.372e-04, train_time=4.628 +[gpua002:0/64] 2024-01-18 04:05:59,470 (trainer:753) INFO: 9epoch:train:7601-7700batch: iter_time=8.881e-05, forward_time=0.142, loss_ctc=96.072, loss_interctc_layer6=101.647, loss_interctc_layer12=85.842, loss_interctc_layer15=79.502, loss_interctc_layer21=98.236, loss=92.260, backward_time=0.311, grad_norm=79.462, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.137, optim0_lr0=1.371e-04, train_time=1.570 +[gpua002:0/64] 2024-01-18 04:09:10,118 (trainer:753) INFO: 9epoch:train:7701-7800batch: iter_time=8.630e-05, forward_time=0.145, loss_ctc=131.394, loss_interctc_layer6=113.558, loss_interctc_layer12=96.231, loss_interctc_layer15=89.318, loss_interctc_layer21=133.759, loss=112.852, backward_time=0.329, grad_norm=76.065, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.137, optim0_lr0=1.371e-04, train_time=1.906 +[gpua002:0/64] 2024-01-18 04:12:05,648 (trainer:753) INFO: 9epoch:train:7801-7900batch: iter_time=9.016e-05, forward_time=0.153, loss_ctc=110.588, loss_interctc_layer6=113.521, loss_interctc_layer12=96.344, loss_interctc_layer15=89.517, loss_interctc_layer21=112.357, loss=104.465, backward_time=0.362, grad_norm=70.018, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.143, optim0_lr0=1.370e-04, train_time=1.755 +[gpua002:0/64] 2024-01-18 04:14:45,625 (trainer:753) INFO: 9epoch:train:7901-8000batch: iter_time=8.135e-05, forward_time=0.232, loss_ctc=87.429, loss_interctc_layer6=91.217, loss_interctc_layer12=77.107, loss_interctc_layer15=71.322, loss_interctc_layer21=89.110, loss=83.237, backward_time=0.365, grad_norm=72.507, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.140, optim0_lr0=1.370e-04, train_time=1.600 +[gpua002:0/64] 2024-01-18 04:17:06,062 (trainer:753) INFO: 9epoch:train:8001-8100batch: iter_time=8.474e-05, forward_time=0.142, loss_ctc=91.525, loss_interctc_layer6=96.043, loss_interctc_layer12=81.497, loss_interctc_layer15=75.527, loss_interctc_layer21=93.393, loss=87.597, backward_time=0.316, grad_norm=62.727, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.136, optim0_lr0=1.369e-04, train_time=1.404 +[gpua002:0/64] 2024-01-18 04:20:04,365 (trainer:753) INFO: 9epoch:train:8101-8200batch: iter_time=8.436e-05, forward_time=0.141, loss_ctc=91.238, loss_interctc_layer6=93.254, loss_interctc_layer12=79.236, loss_interctc_layer15=73.381, loss_interctc_layer21=92.971, loss=86.016, backward_time=0.341, grad_norm=57.112, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.368e-04, train_time=1.782 +[gpua002:0/64] 2024-01-18 04:23:25,536 (trainer:753) INFO: 9epoch:train:8201-8300batch: iter_time=8.692e-05, forward_time=0.141, loss_ctc=72.031, loss_interctc_layer6=85.871, loss_interctc_layer12=73.070, loss_interctc_layer15=67.909, loss_interctc_layer21=73.453, loss=74.467, backward_time=0.442, grad_norm=60.150, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.368e-04, train_time=2.012 +[gpua002:0/64] 2024-01-18 04:26:24,737 (trainer:753) INFO: 9epoch:train:8301-8400batch: iter_time=8.560e-05, forward_time=0.142, loss_ctc=110.018, loss_interctc_layer6=102.766, loss_interctc_layer12=87.213, loss_interctc_layer15=81.020, loss_interctc_layer21=112.632, loss=98.730, backward_time=0.352, grad_norm=93.412, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.367e-04, train_time=1.792 +[gpua002:0/64] 2024-01-18 04:28:49,268 (trainer:753) INFO: 9epoch:train:8401-8500batch: iter_time=8.476e-05, forward_time=0.142, loss_ctc=101.957, loss_interctc_layer6=93.443, loss_interctc_layer12=79.094, loss_interctc_layer15=73.299, loss_interctc_layer21=103.957, loss=90.350, backward_time=0.323, grad_norm=46.674, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.367e-04, train_time=1.445 +[gpua002:0/64] 2024-01-18 04:31:38,015 (trainer:753) INFO: 9epoch:train:8501-8600batch: iter_time=8.816e-05, forward_time=0.142, loss_ctc=102.740, loss_interctc_layer6=102.145, loss_interctc_layer12=88.014, loss_interctc_layer15=81.821, loss_interctc_layer21=104.569, loss=95.858, backward_time=0.326, grad_norm=65.355, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.366e-04, train_time=1.687 +[gpua002:0/64] 2024-01-18 04:34:37,220 (trainer:753) INFO: 9epoch:train:8601-8700batch: iter_time=0.004, forward_time=0.280, loss_ctc=105.421, loss_interctc_layer6=102.287, loss_interctc_layer12=86.432, loss_interctc_layer15=79.899, loss_interctc_layer21=107.642, loss=96.336, backward_time=0.383, grad_norm=66.484, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.142, optim0_lr0=1.366e-04, train_time=1.791 +[gpua002:0/64] 2024-01-18 04:36:15,599 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpua002:0/64] 2024-01-18 04:36:35,210 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 04:36:38,653 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 04:36:38,653 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpua002:0/64] 2024-01-18 04:36:38,673 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 04:42:39,499 (trainer:753) INFO: 9epoch:train:8701-8800batch: iter_time=3.242, forward_time=0.148, loss_ctc=89.117, loss_interctc_layer6=89.723, loss_interctc_layer12=75.278, loss_interctc_layer15=69.444, loss_interctc_layer21=91.048, loss=82.922, backward_time=0.336, grad_norm=61.624, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.365e-04, train_time=4.824 +[gpua002:0/64] 2024-01-18 04:44:51,239 (trainer:753) INFO: 9epoch:train:8801-8900batch: iter_time=7.781e-05, forward_time=0.142, loss_ctc=89.330, loss_interctc_layer6=100.605, loss_interctc_layer12=85.147, loss_interctc_layer15=79.081, loss_interctc_layer21=90.974, loss=89.027, backward_time=0.296, grad_norm=65.510, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.136, optim0_lr0=1.365e-04, train_time=1.317 +[gpua002:0/64] 2024-01-18 04:47:07,803 (trainer:753) INFO: 9epoch:train:8901-9000batch: iter_time=8.363e-05, forward_time=0.142, loss_ctc=104.570, loss_interctc_layer6=104.312, loss_interctc_layer12=88.839, loss_interctc_layer15=82.609, loss_interctc_layer21=107.169, loss=97.500, backward_time=0.327, grad_norm=69.236, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.136, optim0_lr0=1.364e-04, train_time=1.365 +[gpua002:0/64] 2024-01-18 04:49:56,195 (trainer:753) INFO: 9epoch:train:9001-9100batch: iter_time=8.936e-05, forward_time=0.143, loss_ctc=116.852, loss_interctc_layer6=120.481, loss_interctc_layer12=101.752, loss_interctc_layer15=94.210, loss_interctc_layer21=119.336, loss=110.526, backward_time=0.351, grad_norm=104.737, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.136, optim0_lr0=1.364e-04, train_time=1.684 +[gpua002:0/64] 2024-01-18 04:52:25,639 (trainer:753) INFO: 9epoch:train:9101-9200batch: iter_time=9.395e-05, forward_time=0.141, loss_ctc=95.825, loss_interctc_layer6=98.095, loss_interctc_layer12=82.957, loss_interctc_layer15=77.013, loss_interctc_layer21=98.077, loss=90.393, backward_time=0.321, grad_norm=57.096, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.136, optim0_lr0=1.363e-04, train_time=1.494 +[gpua002:0/64] 2024-01-18 04:55:33,869 (trainer:753) INFO: 9epoch:train:9201-9300batch: iter_time=9.735e-05, forward_time=0.141, loss_ctc=81.708, loss_interctc_layer6=89.464, loss_interctc_layer12=75.214, loss_interctc_layer15=69.514, loss_interctc_layer21=83.306, loss=79.841, backward_time=0.389, grad_norm=72.329, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.136, optim0_lr0=1.363e-04, train_time=1.882 +[gpua002:0/64] 2024-01-18 04:58:12,461 (trainer:753) INFO: 9epoch:train:9301-9400batch: iter_time=8.935e-05, forward_time=0.141, loss_ctc=93.778, loss_interctc_layer6=100.668, loss_interctc_layer12=85.594, loss_interctc_layer15=78.996, loss_interctc_layer21=95.680, loss=90.943, backward_time=0.309, grad_norm=67.376, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.136, optim0_lr0=1.362e-04, train_time=1.586 +[gpua002:0/64] 2024-01-18 05:00:39,204 (trainer:753) INFO: 9epoch:train:9401-9500batch: iter_time=9.365e-05, forward_time=0.142, loss_ctc=68.774, loss_interctc_layer6=84.507, loss_interctc_layer12=71.863, loss_interctc_layer15=66.550, loss_interctc_layer21=69.949, loss=72.328, backward_time=0.330, grad_norm=52.710, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.362e-04, train_time=1.467 +[gpua002:0/64] 2024-01-18 05:04:06,299 (trainer:753) INFO: 9epoch:train:9501-9600batch: iter_time=8.822e-05, forward_time=0.233, loss_ctc=81.864, loss_interctc_layer6=88.698, loss_interctc_layer12=75.262, loss_interctc_layer15=70.001, loss_interctc_layer21=83.704, loss=79.906, backward_time=0.444, grad_norm=75.887, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.143, optim0_lr0=1.361e-04, train_time=2.070 +[gpua002:0/64] 2024-01-18 05:06:44,975 (trainer:753) INFO: 9epoch:train:9601-9700batch: iter_time=8.617e-05, forward_time=0.144, loss_ctc=102.301, loss_interctc_layer6=98.346, loss_interctc_layer12=83.444, loss_interctc_layer15=77.476, loss_interctc_layer21=104.352, loss=93.184, backward_time=0.357, grad_norm=77.079, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.361e-04, train_time=1.587 +[gpua002:0/64] 2024-01-18 05:09:09,972 (trainer:753) INFO: 9epoch:train:9701-9800batch: iter_time=8.778e-05, forward_time=0.143, loss_ctc=96.963, loss_interctc_layer6=99.930, loss_interctc_layer12=84.704, loss_interctc_layer15=78.217, loss_interctc_layer21=98.873, loss=91.737, backward_time=0.307, grad_norm=83.015, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.360e-04, train_time=1.450 +[gpua002:0/64] 2024-01-18 05:12:00,991 (trainer:753) INFO: 9epoch:train:9801-9900batch: iter_time=8.521e-05, forward_time=0.143, loss_ctc=111.259, loss_interctc_layer6=109.430, loss_interctc_layer12=93.338, loss_interctc_layer15=86.971, loss_interctc_layer21=113.464, loss=102.892, backward_time=0.351, grad_norm=79.977, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.360e-04, train_time=1.710 +[gpua002:0/64] 2024-01-18 05:14:53,888 (trainer:753) INFO: 9epoch:train:9901-10000batch: iter_time=9.214e-05, forward_time=0.142, loss_ctc=77.894, loss_interctc_layer6=90.880, loss_interctc_layer12=76.473, loss_interctc_layer15=70.485, loss_interctc_layer21=79.914, loss=79.129, backward_time=0.350, grad_norm=67.098, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.137, optim0_lr0=1.359e-04, train_time=1.729 +[gpua002:0/64] 2024-01-18 05:15:13,917 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpua002:0/64] 2024-01-18 05:15:33,010 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 05:15:36,455 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 05:15:36,455 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpua002:0/64] 2024-01-18 05:15:36,545 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 05:22:51,572 (trainer:753) INFO: 9epoch:train:10001-10100batch: iter_time=3.235, forward_time=0.143, loss_ctc=98.613, loss_interctc_layer6=93.060, loss_interctc_layer12=78.312, loss_interctc_layer15=72.304, loss_interctc_layer21=100.625, loss=88.583, backward_time=0.305, grad_norm=65.008, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.137, optim0_lr0=1.358e-04, train_time=4.776 +[gpua002:0/64] 2024-01-18 05:25:40,163 (trainer:753) INFO: 9epoch:train:10101-10200batch: iter_time=8.775e-05, forward_time=0.143, loss_ctc=95.851, loss_interctc_layer6=101.017, loss_interctc_layer12=85.176, loss_interctc_layer15=78.757, loss_interctc_layer21=97.546, loss=91.670, backward_time=0.327, grad_norm=74.393, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.137, optim0_lr0=1.358e-04, train_time=1.686 +[gpua002:0/64] 2024-01-18 05:28:15,919 (trainer:753) INFO: 9epoch:train:10201-10300batch: iter_time=9.242e-05, forward_time=0.144, loss_ctc=129.952, loss_interctc_layer6=112.751, loss_interctc_layer12=95.605, loss_interctc_layer15=88.710, loss_interctc_layer21=133.360, loss=112.075, backward_time=0.339, grad_norm=78.276, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.137, optim0_lr0=1.357e-04, train_time=1.557 +[gpua002:0/64] 2024-01-18 05:30:55,269 (trainer:753) INFO: 9epoch:train:10301-10400batch: iter_time=8.784e-05, forward_time=0.213, loss_ctc=110.056, loss_interctc_layer6=112.910, loss_interctc_layer12=96.125, loss_interctc_layer15=88.976, loss_interctc_layer21=112.401, loss=104.094, backward_time=0.381, grad_norm=80.023, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.144, optim0_lr0=1.357e-04, train_time=1.593 +[gpua002:0/64] 2024-01-18 05:33:55,784 (trainer:753) INFO: 9epoch:train:10401-10500batch: iter_time=9.026e-05, forward_time=0.143, loss_ctc=86.534, loss_interctc_layer6=90.616, loss_interctc_layer12=76.401, loss_interctc_layer15=70.683, loss_interctc_layer21=88.240, loss=82.495, backward_time=0.402, grad_norm=47.123, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.356e-04, train_time=1.804 +[gpua002:0/64] 2024-01-18 05:36:55,385 (trainer:753) INFO: 9epoch:train:10501-10600batch: iter_time=1.013e-04, forward_time=0.141, loss_ctc=90.333, loss_interctc_layer6=95.056, loss_interctc_layer12=80.456, loss_interctc_layer15=74.375, loss_interctc_layer21=91.887, loss=86.421, backward_time=0.339, grad_norm=74.475, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.356e-04, train_time=1.797 +[gpua002:0/64] 2024-01-18 05:39:49,791 (trainer:753) INFO: 9epoch:train:10601-10700batch: iter_time=8.959e-05, forward_time=0.142, loss_ctc=90.717, loss_interctc_layer6=93.310, loss_interctc_layer12=79.086, loss_interctc_layer15=73.067, loss_interctc_layer21=92.762, loss=85.788, backward_time=0.366, grad_norm=72.004, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.355e-04, train_time=1.744 +[gpua002:0/64] 2024-01-18 05:42:26,976 (trainer:753) INFO: 9epoch:train:10701-10800batch: iter_time=8.848e-05, forward_time=0.141, loss_ctc=71.527, loss_interctc_layer6=85.358, loss_interctc_layer12=72.709, loss_interctc_layer15=67.466, loss_interctc_layer21=72.911, loss=73.994, backward_time=0.353, grad_norm=55.436, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.355e-04, train_time=1.572 +[gpua002:0/64] 2024-01-18 05:45:05,137 (trainer:753) INFO: 9epoch:train:10801-10900batch: iter_time=8.429e-05, forward_time=0.158, loss_ctc=109.725, loss_interctc_layer6=102.266, loss_interctc_layer12=86.509, loss_interctc_layer15=80.160, loss_interctc_layer21=111.919, loss=98.116, backward_time=0.335, grad_norm=81.752, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.137, optim0_lr0=1.354e-04, train_time=1.581 +[gpua002:0/64] 2024-01-18 05:48:30,069 (trainer:753) INFO: 9epoch:train:10901-11000batch: iter_time=8.654e-05, forward_time=0.150, loss_ctc=99.241, loss_interctc_layer6=92.936, loss_interctc_layer12=78.068, loss_interctc_layer15=72.041, loss_interctc_layer21=101.243, loss=88.706, backward_time=0.391, grad_norm=55.844, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.137, optim0_lr0=1.354e-04, train_time=2.049 +[gpua002:0/64] 2024-01-18 05:51:22,811 (trainer:753) INFO: 9epoch:train:11001-11100batch: iter_time=8.223e-05, forward_time=0.142, loss_ctc=102.700, loss_interctc_layer6=102.694, loss_interctc_layer12=87.922, loss_interctc_layer15=81.778, loss_interctc_layer21=104.353, loss=95.890, backward_time=0.366, grad_norm=65.804, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.353e-04, train_time=1.726 +[gpua002:0/64] 2024-01-18 05:53:35,033 (trainer:753) INFO: 9epoch:train:11101-11200batch: iter_time=8.414e-05, forward_time=0.141, loss_ctc=104.204, loss_interctc_layer6=101.170, loss_interctc_layer12=85.596, loss_interctc_layer15=79.338, loss_interctc_layer21=107.077, loss=95.477, backward_time=0.296, grad_norm=73.979, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.353e-04, train_time=1.323 +[gpua002:0/64] 2024-01-18 05:55:37,943 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpua002:0/64] 2024-01-18 05:55:57,386 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 05:56:00,903 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 05:56:00,904 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpua002:0/64] 2024-01-18 05:56:00,917 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 06:01:54,820 (trainer:753) INFO: 9epoch:train:11201-11300batch: iter_time=3.204, forward_time=0.182, loss_ctc=92.114, loss_interctc_layer6=89.831, loss_interctc_layer12=75.285, loss_interctc_layer15=69.516, loss_interctc_layer21=93.812, loss=84.112, backward_time=0.334, grad_norm=61.790, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.138, optim0_lr0=1.352e-04, train_time=4.998 +[gpua002:0/64] 2024-01-18 06:04:01,797 (trainer:753) INFO: 9epoch:train:11301-11400batch: iter_time=8.039e-05, forward_time=0.143, loss_ctc=94.692, loss_interctc_layer6=100.822, loss_interctc_layer12=84.851, loss_interctc_layer15=78.891, loss_interctc_layer21=96.834, loss=91.218, backward_time=0.298, grad_norm=74.888, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.137, optim0_lr0=1.352e-04, train_time=1.270 +[gpua002:0/64] 2024-01-18 06:06:30,457 (trainer:753) INFO: 9epoch:train:11401-11500batch: iter_time=8.303e-05, forward_time=0.142, loss_ctc=110.082, loss_interctc_layer6=104.733, loss_interctc_layer12=88.998, loss_interctc_layer15=82.630, loss_interctc_layer21=112.072, loss=99.703, backward_time=0.325, grad_norm=67.152, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.351e-04, train_time=1.486 +[gpua002:0/64] 2024-01-18 06:09:05,382 (trainer:753) INFO: 9epoch:train:11501-11600batch: iter_time=8.990e-05, forward_time=0.143, loss_ctc=121.964, loss_interctc_layer6=119.057, loss_interctc_layer12=100.340, loss_interctc_layer15=92.631, loss_interctc_layer21=124.379, loss=111.674, backward_time=0.325, grad_norm=80.964, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.351e-04, train_time=1.549 +[gpua002:0/64] 2024-01-18 06:12:24,550 (trainer:753) INFO: 9epoch:train:11601-11700batch: iter_time=8.708e-05, forward_time=0.142, loss_ctc=98.618, loss_interctc_layer6=97.788, loss_interctc_layer12=82.622, loss_interctc_layer15=76.496, loss_interctc_layer21=100.847, loss=91.274, backward_time=0.410, grad_norm=66.354, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.350e-04, train_time=1.991 +[gpua002:0/64] 2024-01-18 06:15:12,674 (trainer:753) INFO: 9epoch:train:11701-11800batch: iter_time=8.781e-05, forward_time=0.141, loss_ctc=83.376, loss_interctc_layer6=89.226, loss_interctc_layer12=74.896, loss_interctc_layer15=68.994, loss_interctc_layer21=84.915, loss=80.281, backward_time=0.356, grad_norm=55.189, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.350e-04, train_time=1.681 +[gpua002:0/64] 2024-01-18 06:17:41,448 (trainer:753) INFO: 9epoch:train:11801-11900batch: iter_time=8.398e-05, forward_time=0.141, loss_ctc=98.491, loss_interctc_layer6=100.280, loss_interctc_layer12=84.856, loss_interctc_layer15=78.731, loss_interctc_layer21=100.759, loss=92.624, backward_time=0.316, grad_norm=74.377, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.349e-04, train_time=1.487 +[gpua002:0/64] 2024-01-18 06:20:17,197 (trainer:753) INFO: 9epoch:train:11901-12000batch: iter_time=9.205e-05, forward_time=0.142, loss_ctc=69.272, loss_interctc_layer6=84.490, loss_interctc_layer12=71.677, loss_interctc_layer15=66.648, loss_interctc_layer21=70.522, loss=72.522, backward_time=0.316, grad_norm=56.684, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.136, optim0_lr0=1.349e-04, train_time=1.558 +[gpua002:0/64] 2024-01-18 06:23:02,337 (trainer:753) INFO: 9epoch:train:12001-12100batch: iter_time=8.583e-05, forward_time=0.141, loss_ctc=85.517, loss_interctc_layer6=88.136, loss_interctc_layer12=74.592, loss_interctc_layer15=69.274, loss_interctc_layer21=87.260, loss=80.956, backward_time=0.359, grad_norm=62.403, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.136, optim0_lr0=1.348e-04, train_time=1.651 +[gpua002:0/64] 2024-01-18 06:26:15,601 (trainer:753) INFO: 9epoch:train:12101-12200batch: iter_time=9.749e-05, forward_time=0.222, loss_ctc=105.991, loss_interctc_layer6=97.792, loss_interctc_layer12=83.164, loss_interctc_layer15=77.110, loss_interctc_layer21=108.310, loss=94.473, backward_time=0.399, grad_norm=66.503, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.147, optim0_lr0=1.348e-04, train_time=1.932 +[gpua002:0/64] 2024-01-18 06:28:55,900 (trainer:753) INFO: 9epoch:train:12201-12300batch: iter_time=9.302e-05, forward_time=0.141, loss_ctc=98.176, loss_interctc_layer6=99.047, loss_interctc_layer12=83.688, loss_interctc_layer15=77.216, loss_interctc_layer21=100.196, loss=91.664, backward_time=0.339, grad_norm=59.284, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.136, optim0_lr0=1.347e-04, train_time=1.604 +[gpua002:0/64] 2024-01-18 06:31:47,182 (trainer:753) INFO: 9epoch:train:12301-12400batch: iter_time=9.027e-05, forward_time=0.142, loss_ctc=119.828, loss_interctc_layer6=108.776, loss_interctc_layer12=92.853, loss_interctc_layer15=86.707, loss_interctc_layer21=122.530, loss=106.139, backward_time=0.335, grad_norm=69.099, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.136, optim0_lr0=1.347e-04, train_time=1.713 +[gpua002:0/64] 2024-01-18 06:35:08,470 (trainer:753) INFO: 9epoch:train:12401-12500batch: iter_time=9.076e-05, forward_time=0.141, loss_ctc=81.823, loss_interctc_layer6=90.579, loss_interctc_layer12=76.374, loss_interctc_layer15=70.258, loss_interctc_layer21=83.485, loss=80.504, backward_time=0.386, grad_norm=72.367, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.136, optim0_lr0=1.346e-04, train_time=2.013 +[gpua002:0/64] 2024-01-18 06:35:28,508 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpua002:0/64] 2024-01-18 06:35:47,677 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 06:35:51,234 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 06:35:51,234 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua002:0/64] 2024-01-18 06:35:51,258 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 06:42:43,083 (trainer:753) INFO: 9epoch:train:12501-12600batch: iter_time=3.134, forward_time=0.144, loss_ctc=93.189, loss_interctc_layer6=93.064, loss_interctc_layer12=78.403, loss_interctc_layer15=72.338, loss_interctc_layer21=95.040, loss=86.407, backward_time=0.302, grad_norm=59.182, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.137, optim0_lr0=1.346e-04, train_time=4.545 +[gpua002:0/64] 2024-01-18 06:45:11,320 (trainer:753) INFO: 9epoch:train:12601-12700batch: iter_time=8.397e-05, forward_time=0.142, loss_ctc=90.930, loss_interctc_layer6=101.188, loss_interctc_layer12=85.179, loss_interctc_layer15=78.649, loss_interctc_layer21=93.002, loss=89.790, backward_time=0.375, grad_norm=71.329, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.137, optim0_lr0=1.345e-04, train_time=1.483 +[gpua002:0/64] 2024-01-18 06:48:13,650 (trainer:753) INFO: 9epoch:train:12701-12800batch: iter_time=8.757e-05, forward_time=0.144, loss_ctc=122.466, loss_interctc_layer6=112.466, loss_interctc_layer12=95.279, loss_interctc_layer15=88.048, loss_interctc_layer21=125.313, loss=108.714, backward_time=0.373, grad_norm=85.722, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.137, optim0_lr0=1.345e-04, train_time=1.823 +[gpua002:0/64] 2024-01-18 06:51:00,084 (trainer:753) INFO: 9epoch:train:12801-12900batch: iter_time=9.247e-05, forward_time=0.143, loss_ctc=100.110, loss_interctc_layer6=110.662, loss_interctc_layer12=94.406, loss_interctc_layer15=86.799, loss_interctc_layer21=102.221, loss=98.840, backward_time=0.377, grad_norm=90.692, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.137, optim0_lr0=1.344e-04, train_time=1.664 +[gpua002:0/64] 2024-01-18 06:54:12,234 (trainer:753) INFO: 9epoch:train:12901-13000batch: iter_time=9.244e-05, forward_time=0.205, loss_ctc=82.489, loss_interctc_layer6=89.803, loss_interctc_layer12=75.810, loss_interctc_layer15=70.122, loss_interctc_layer21=84.232, loss=80.491, backward_time=0.415, grad_norm=52.938, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.143, optim0_lr0=1.344e-04, train_time=1.920 +[gpua002:0/64] 2024-01-18 06:58:04,485 (trainer:753) INFO: 9epoch:train:13001-13100batch: iter_time=9.121e-05, forward_time=0.183, loss_ctc=86.539, loss_interctc_layer6=94.439, loss_interctc_layer12=79.930, loss_interctc_layer15=73.965, loss_interctc_layer21=88.292, loss=84.633, backward_time=0.482, grad_norm=89.734, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.140, optim0_lr0=1.343e-04, train_time=2.323 +[gpua002:0/64] 2024-01-18 07:00:35,950 (trainer:753) INFO: 9epoch:train:13101-13200batch: iter_time=8.696e-05, forward_time=0.142, loss_ctc=85.489, loss_interctc_layer6=93.074, loss_interctc_layer12=78.831, loss_interctc_layer15=72.771, loss_interctc_layer21=86.693, loss=83.371, backward_time=0.369, grad_norm=62.432, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.137, optim0_lr0=1.343e-04, train_time=1.515 +[gpua002:0/64] 2024-01-18 07:03:44,302 (trainer:753) INFO: 9epoch:train:13201-13300batch: iter_time=8.642e-05, forward_time=0.146, loss_ctc=67.950, loss_interctc_layer6=84.404, loss_interctc_layer12=71.643, loss_interctc_layer15=66.356, loss_interctc_layer21=69.350, loss=71.940, backward_time=0.440, grad_norm=52.904, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.136, optim0_lr0=1.342e-04, train_time=1.883 +[gpua002:0/64] 2024-01-18 07:06:41,246 (trainer:753) INFO: 9epoch:train:13301-13400batch: iter_time=8.275e-05, forward_time=0.142, loss_ctc=103.952, loss_interctc_layer6=101.587, loss_interctc_layer12=85.908, loss_interctc_layer15=79.585, loss_interctc_layer21=105.863, loss=95.379, backward_time=0.335, grad_norm=64.954, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.136, optim0_lr0=1.342e-04, train_time=1.769 +[gpua002:0/64] 2024-01-18 07:09:17,471 (trainer:753) INFO: 9epoch:train:13401-13500batch: iter_time=8.215e-05, forward_time=0.141, loss_ctc=97.093, loss_interctc_layer6=91.770, loss_interctc_layer12=77.423, loss_interctc_layer15=71.471, loss_interctc_layer21=99.250, loss=87.401, backward_time=0.364, grad_norm=59.233, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.136, optim0_lr0=1.341e-04, train_time=1.563 +[gpua002:0/64] 2024-01-18 07:11:48,590 (trainer:753) INFO: 9epoch:train:13501-13600batch: iter_time=8.562e-05, forward_time=0.142, loss_ctc=96.690, loss_interctc_layer6=101.127, loss_interctc_layer12=86.677, loss_interctc_layer15=80.671, loss_interctc_layer21=98.622, loss=92.757, backward_time=0.327, grad_norm=79.688, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.136, optim0_lr0=1.341e-04, train_time=1.511 +[gpua002:0/64] 2024-01-18 07:14:42,710 (trainer:753) INFO: 9epoch:train:13601-13700batch: iter_time=8.827e-05, forward_time=0.142, loss_ctc=98.210, loss_interctc_layer6=101.407, loss_interctc_layer12=85.755, loss_interctc_layer15=79.204, loss_interctc_layer21=100.327, loss=92.981, backward_time=0.446, grad_norm=66.438, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.136, optim0_lr0=1.340e-04, train_time=1.741 +[gpua002:0/64] 2024-01-18 07:16:12,484 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpua002:0/64] 2024-01-18 07:16:31,866 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 07:16:35,349 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 07:16:35,350 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpua002:0/64] 2024-01-18 07:16:35,354 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 07:21:59,981 (trainer:753) INFO: 9epoch:train:13701-13800batch: iter_time=2.815, forward_time=0.206, loss_ctc=86.589, loss_interctc_layer6=89.177, loss_interctc_layer12=74.619, loss_interctc_layer15=68.858, loss_interctc_layer21=88.305, loss=81.510, backward_time=0.316, grad_norm=75.959, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.138, optim0_lr0=1.340e-04, train_time=4.372 +[gpua002:0/64] 2024-01-18 07:24:22,148 (trainer:753) INFO: 9epoch:train:13801-13900batch: iter_time=7.887e-05, forward_time=0.145, loss_ctc=87.258, loss_interctc_layer6=99.888, loss_interctc_layer12=84.102, loss_interctc_layer15=77.671, loss_interctc_layer21=88.983, loss=87.580, backward_time=0.330, grad_norm=76.014, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.137, optim0_lr0=1.339e-04, train_time=1.422 +[gpua002:0/64] 2024-01-18 07:27:25,409 (trainer:753) INFO: 9epoch:train:13901-14000batch: iter_time=7.981e-05, forward_time=0.142, loss_ctc=102.847, loss_interctc_layer6=103.639, loss_interctc_layer12=87.927, loss_interctc_layer15=81.479, loss_interctc_layer21=105.231, loss=96.224, backward_time=0.345, grad_norm=64.226, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.137, optim0_lr0=1.339e-04, train_time=1.832 +[gpua002:0/64] 2024-01-18 07:30:04,459 (trainer:753) INFO: 9epoch:train:14001-14100batch: iter_time=8.181e-05, forward_time=0.233, loss_ctc=114.835, loss_interctc_layer6=119.361, loss_interctc_layer12=100.521, loss_interctc_layer15=92.800, loss_interctc_layer21=117.319, loss=108.967, backward_time=0.348, grad_norm=78.577, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.160, optim0_lr0=1.338e-04, train_time=1.589 +[gpua002:0/64] 2024-01-18 07:33:02,298 (trainer:753) INFO: 9epoch:train:14101-14200batch: iter_time=8.366e-05, forward_time=0.142, loss_ctc=93.752, loss_interctc_layer6=96.760, loss_interctc_layer12=81.687, loss_interctc_layer15=75.625, loss_interctc_layer21=95.800, loss=88.725, backward_time=0.354, grad_norm=116.665, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.137, optim0_lr0=1.338e-04, train_time=1.778 +[gpua002:0/64] 2024-01-18 07:35:42,354 (trainer:753) INFO: 9epoch:train:14201-14300batch: iter_time=8.332e-05, forward_time=0.142, loss_ctc=80.269, loss_interctc_layer6=89.025, loss_interctc_layer12=74.811, loss_interctc_layer15=68.829, loss_interctc_layer21=81.933, loss=78.973, backward_time=0.316, grad_norm=50.290, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.136, optim0_lr0=1.337e-04, train_time=1.601 +[gpua002:0/64] 2024-01-18 07:39:05,903 (trainer:753) INFO: 9epoch:train:14301-14400batch: iter_time=8.524e-05, forward_time=0.147, loss_ctc=92.232, loss_interctc_layer6=98.770, loss_interctc_layer12=83.760, loss_interctc_layer15=77.182, loss_interctc_layer21=94.343, loss=89.257, backward_time=0.375, grad_norm=68.000, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.140, optim0_lr0=1.337e-04, train_time=2.035 +[gpua002:0/64] 2024-01-18 07:42:19,969 (trainer:753) INFO: 9epoch:train:14401-14500batch: iter_time=8.299e-05, forward_time=0.224, loss_ctc=68.624, loss_interctc_layer6=84.505, loss_interctc_layer12=71.746, loss_interctc_layer15=66.959, loss_interctc_layer21=69.947, loss=72.356, backward_time=0.404, grad_norm=52.312, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.143, optim0_lr0=1.336e-04, train_time=1.940 +[gpua002:0/64] 2024-01-18 07:45:18,330 (trainer:753) INFO: 9epoch:train:14501-14600batch: iter_time=8.117e-05, forward_time=0.142, loss_ctc=80.317, loss_interctc_layer6=87.883, loss_interctc_layer12=74.315, loss_interctc_layer15=68.989, loss_interctc_layer21=82.258, loss=78.752, backward_time=0.371, grad_norm=65.984, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.137, optim0_lr0=1.336e-04, train_time=1.784 +[gpua002:0/64] 2024-01-18 07:48:46,485 (trainer:753) INFO: 9epoch:train:14601-14700batch: iter_time=8.618e-05, forward_time=0.144, loss_ctc=100.333, loss_interctc_layer6=97.017, loss_interctc_layer12=82.134, loss_interctc_layer15=76.008, loss_interctc_layer21=102.576, loss=91.614, backward_time=0.444, grad_norm=60.213, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.137, optim0_lr0=1.335e-04, train_time=2.081 +[gpua002:0/64] 2024-01-18 07:52:06,967 (trainer:753) INFO: 9epoch:train:14701-14800batch: iter_time=8.113e-05, forward_time=0.142, loss_ctc=95.451, loss_interctc_layer6=98.516, loss_interctc_layer12=83.083, loss_interctc_layer15=76.729, loss_interctc_layer21=97.505, loss=90.257, backward_time=0.401, grad_norm=76.590, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.137, optim0_lr0=1.335e-04, train_time=2.005 +[gpua002:0/64] 2024-01-18 07:55:02,111 (trainer:753) INFO: 9epoch:train:14801-14900batch: iter_time=8.784e-05, forward_time=0.143, loss_ctc=110.144, loss_interctc_layer6=108.675, loss_interctc_layer12=92.735, loss_interctc_layer15=86.079, loss_interctc_layer21=112.366, loss=102.000, backward_time=0.403, grad_norm=80.466, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.137, optim0_lr0=1.334e-04, train_time=1.751 +[gpua002:0/64] 2024-01-18 07:57:52,038 (trainer:753) INFO: 9epoch:train:14901-15000batch: iter_time=8.703e-05, forward_time=0.197, loss_ctc=77.439, loss_interctc_layer6=89.651, loss_interctc_layer12=75.727, loss_interctc_layer15=69.881, loss_interctc_layer21=78.759, loss=78.291, backward_time=0.360, grad_norm=71.965, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.138, optim0_lr0=1.334e-04, train_time=1.699 +[gpua002:0/64] 2024-01-18 08:29:25,445 (trainer:352) INFO: 9epoch results: [train] iter_time=0.265, forward_time=0.154, loss_ctc=97.152, loss_interctc_layer6=99.156, loss_interctc_layer12=84.238, loss_interctc_layer15=78.133, loss_interctc_layer21=99.044, loss=91.544, backward_time=0.354, grad_norm=73.169, clip=100.000, loss_scale=1.924e+24, optim_step_time=0.138, optim0_lr0=1.373e-04, train_time=1.962, time=8 hours, 10 minutes and 56.57 seconds, total_count=135000, gpu_max_cached_mem_GB=34.400, [valid] loss_ctc=67.871, cer_ctc=0.297, loss_interctc_layer6=67.889, cer_interctc_layer6=0.292, loss_interctc_layer12=55.806, cer_interctc_layer12=0.227, loss_interctc_layer15=51.717, cer_interctc_layer15=0.202, loss_interctc_layer21=69.172, cer_interctc_layer21=0.297, loss=62.491, time=31 minutes and 9.36 seconds, total_count=42039, gpu_max_cached_mem_GB=34.400 +[gpua002:0/64] 2024-01-18 08:29:46,061 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.total_count +[gpua002:0/64] 2024-01-18 08:29:46,109 (trainer:461) INFO: The model files were removed: exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/4epoch.pth +[gpua002:0/64] 2024-01-18 08:29:46,109 (trainer:286) INFO: 10/45epoch started. Estimated time to finish: 2 weeks, 19 hours and 52 minutes +[gpua002:0/64] 2024-01-18 08:29:47,069 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpua002:0/64] 2024-01-18 08:30:05,287 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpua002:0/64] 2024-01-18 08:30:08,664 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpua002:0/64] 2024-01-18 08:30:08,664 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpua002:0/64] 2024-01-18 08:30:08,668 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpua002:0/64] 2024-01-18 08:39:01,100 (trainer:753) INFO: 10epoch:train:1-100batch: iter_time=3.237, forward_time=0.213, loss_ctc=98.259, loss_interctc_layer6=100.620, loss_interctc_layer12=85.825, loss_interctc_layer15=79.979, loss_interctc_layer21=99.967, loss=92.930, backward_time=0.309, grad_norm=66.081, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.138, optim0_lr0=1.333e-04, train_time=5.546 +[gpua002:0/64] 2024-01-18 08:41:45,702 (trainer:753) INFO: 10epoch:train:101-200batch: iter_time=9.468e-05, forward_time=0.140, loss_ctc=76.346, loss_interctc_layer6=86.167, loss_interctc_layer12=73.335, loss_interctc_layer15=68.116, loss_interctc_layer21=77.992, loss=76.391, backward_time=0.341, grad_norm=54.852, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.136, optim0_lr0=1.333e-04, train_time=1.646 +[gpua002:0/64] 2024-01-18 08:45:12,481 (trainer:753) INFO: 10epoch:train:201-300batch: iter_time=9.487e-05, forward_time=0.144, loss_ctc=121.362, loss_interctc_layer6=110.041, loss_interctc_layer12=94.742, loss_interctc_layer15=88.755, loss_interctc_layer21=123.700, loss=107.720, backward_time=0.525, grad_norm=75.280, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.137, optim0_lr0=1.332e-04, train_time=2.068 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2863115.0 ON gpua002 CANCELLED AT 2024-01-18T08:47:12 ***