# Running on gpua049.delta.ncsa.illinois.edu # Started at Sun Jan 14 14:31:59 CST 2024 # SLURMD_NODENAME=gpua049 # SLURM_CLUSTER_NAME=delta # SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf # SLURM_CPUS_ON_NODE=64 # SLURM_CPUS_PER_TASK=64 # SLURM_EXPORT_ENV=PATH # SLURM_GET_USER_ENV=1 # SLURM_GPUS_ON_NODE=4 # SLURM_GTIDS=0 # SLURM_JOBID=2858185 # SLURM_JOB_ACCOUNT=bbjs-delta-gpu # SLURM_JOB_CPUS_PER_NODE='64(x16)' # SLURM_JOB_END_TIME=1705437102 # SLURM_JOB_GID=202 # SLURM_JOB_GPUS=0,1,2,3 # SLURM_JOB_ID=2858185 # SLURM_JOB_NAME=exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.log # SLURM_JOB_NODELIST='gpua[049-064]' # SLURM_JOB_NUM_NODES=16 # SLURM_JOB_PARTITION=gpuA100x4 # SLURM_JOB_QOS=bbjs-delta-gpu # SLURM_JOB_START_TIME=1705264302 # SLURM_JOB_UID=68077 # SLURM_JOB_USER=peng6 # SLURM_LOCALID=0 # SLURM_MEM_PER_NODE=240000 # SLURM_NNODES=16 # SLURM_NODEID=0 # SLURM_NODELIST='gpua[049-064]' # SLURM_NODE_ALIASES='(null)' # SLURM_OPEN_MODE=a # SLURM_PRIO_PROCESS=0 # SLURM_PROCID=0 # SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1 # SLURM_SUBMIT_HOST=dt-login01.delta.ncsa.illinois.edu # SLURM_TASKS_PER_NODE='1(x16)' # SLURM_TASK_PID=3562997 # SLURM_TOPOLOGY_ADDR=ss00.ss07.gpua049 # SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node # SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 # srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc /scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method f/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc /scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_aea042be-d383-45ce-ab8c-7d7e82f775bc [gpua049:0/64] 2024-01-14 14:35:19,079 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 [gpua049:0/64] 2024-01-14 14:35:19,294 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. [gpua049:0/64] 2024-01-14 14:35:19,320 (s2t:420) INFO: Vocabulary size: 50002 [gpua049:0/64] 2024-01-14 14:35:33,463 (abs_task:1270) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True [gpua049:0/64] 2024-01-14 14:35:33,475 (abs_task:1271) INFO: Model structure: ESPnetS2TCTCModel( (frontend): DefaultFrontend( (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) (frontend): Frontend() (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) ) (specaug): SpecAug( (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) ) (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) (encoder): EBranchformerCTCEncoder( (embed): Conv2dSubsampling8( (conv): Sequential( (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) (1): ReLU() (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) (3): ReLU() (4): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) (5): ReLU() ) (out): Linear(in_features=9216, out_features=1024, bias=True) (pos_enc): PositionalEncoding( (dropout): Dropout(p=0.1, inplace=False) ) ) (encoders): MultiSequential( (0): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (1): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (2): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (cross_attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (3): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (4): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (5): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (cross_attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (6): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (7): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (8): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (cross_attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (9): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (10): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (11): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (cross_attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (12): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (13): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (14): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (cross_attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (15): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (16): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (17): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (cross_attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (18): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (19): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (20): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (cross_attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (21): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (22): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (23): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (cross_attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (24): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (25): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) (26): EBranchformerEncoderLayer( (attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (cgmlp): ConvolutionalGatingMLP( (channel_proj1): Sequential( (0): Linear(in_features=1024, out_features=4096, bias=True) (1): GELU(approximate='none') ) (csgu): ConvolutionalSpatialGatingUnit( (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (act): Identity() (dropout): Dropout(p=0.1, inplace=False) ) (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (feed_forward_macaron): PositionwiseFeedForward( (w_1): Linear(in_features=1024, out_features=4096, bias=True) (w_2): Linear(in_features=4096, out_features=1024, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): Swish() ) (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (cross_attn): MultiHeadedAttention( (linear_q): Linear(in_features=1024, out_features=1024, bias=True) (linear_k): Linear(in_features=1024, out_features=1024, bias=True) (linear_v): Linear(in_features=1024, out_features=1024, bias=True) (linear_out): Linear(in_features=1024, out_features=1024, bias=True) (dropout): Identity() ) (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) ) ) (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) (conditioning_layer): Linear(in_features=50002, out_features=1024, bias=True) ) (prompt_encoder): TransformerEncoder( (encoders): MultiSequential( (0): EncoderLayer( (self_attn): MultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Identity() ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): ReLU() ) (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (1): EncoderLayer( (self_attn): MultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Identity() ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): ReLU() ) (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (2): EncoderLayer( (self_attn): MultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Identity() ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): ReLU() ) (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) (3): EncoderLayer( (self_attn): MultiHeadedAttention( (linear_q): Linear(in_features=512, out_features=512, bias=True) (linear_k): Linear(in_features=512, out_features=512, bias=True) (linear_v): Linear(in_features=512, out_features=512, bias=True) (linear_out): Linear(in_features=512, out_features=512, bias=True) (dropout): Identity() ) (feed_forward): PositionwiseFeedForward( (w_1): Linear(in_features=512, out_features=2048, bias=True) (w_2): Linear(in_features=2048, out_features=512, bias=True) (dropout): Dropout(p=0.1, inplace=False) (activation): ReLU() ) (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) (dropout): Dropout(p=0.1, inplace=False) ) ) (after_norm): LayerNorm((512,), eps=1e-12, elementwise_affine=True) ) (embed): Embedding(50002, 512) (pos_enc): PositionalEncoding( (dropout): Dropout(p=0.0, inplace=False) ) (embed_proj): Linear(in_features=512, out_features=1024, bias=True) (prompt_proj): Linear(in_features=512, out_features=1024, bias=True) (ctc): CTC( (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) (ctc_loss): CTCLoss() ) ) Model summary: Class Name: ESPnetS2TCTCModel Total Number of model parameters: 1.01 B Number of trainable parameters: 1.01 B (100.0%) Size: 4.02 GB Type: torch.float32 [gpua049:0/64] 2024-01-14 14:35:33,476 (abs_task:1274) INFO: Optimizer: AdamW ( Parameter Group 0 amsgrad: False betas: [0.9, 0.98] capturable: False eps: 1e-06 foreach: None initial_lr: 0.0002 lr: 1.6666666666666667e-09 maximize: False weight_decay: 0.0 ) [gpua049:0/64] 2024-01-14 14:35:33,476 (abs_task:1275) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0002]) [gpua049:0/64] 2024-01-14 14:35:33,506 (abs_task:1284) INFO: Saving the configuration in exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/config.yaml [gpua049:0/64] 2024-01-14 14:35:39,351 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 14:35:40,300 (abs_task:1660) INFO: [valid] dataset: ESPnetDataset( speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} text: {"path": "dump/raw/dev_v3/text", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 14:35:40,300 (abs_task:1661) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, [gpua049:0/64] 2024-01-14 14:35:40,301 (abs_task:1662) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 gpua049:3563089:3563089 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0> gpua049:3563089:3563089 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua049:3563089:3563089 [0] NCCL INFO cudaDriverVersion 12020 NCCL version 2.14.3+cuda11.7 [gpua049:0/64] 2024-01-14 14:35:45,781 (trainer:298) INFO: 1/45epoch started [gpua049:0/64] 2024-01-14 14:35:45,832 (multiple_iter_factory:32) INFO: Building 0th iter-factory... [gpua049:0/64] 2024-01-14 14:36:05,511 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 14:36:09,049 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 14:36:09,049 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, [gpua049:0/64] 2024-01-14 14:36:09,053 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 gpua062:238245:238245 [0] NCCL INFO cudaDriverVersion 12020 gpua062:238245:238245 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0> gpua062:238245:238245 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua062:238245:238298 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0> gpua062:238245:238298 [0] NCCL INFO Using network IB gpua062:238245:238298 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua062:238245:238298 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 gpua062:238245:238298 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 gpua062:238245:238298 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0 gpua062:238245:238298 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read gpua062:238245:238298 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read gpua062:238245:238298 [0] NCCL INFO Connected all rings gpua062:238245:238298 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0 gpua062:238245:238298 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0 gpua062:238245:238298 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0 gpua062:238245:238298 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0 gpua062:238245:238298 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0 gpua062:238245:238298 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0 gpua062:238245:238298 [0] NCCL INFO Connected all trees gpua062:238245:238298 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua062:238245:238298 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua062:238245:238298 [0] NCCL INFO comm 0x55730f7d4440 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua055:284216:284216 [1] NCCL INFO cudaDriverVersion 12020 gpua055:284216:284216 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> gpua055:284216:284216 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua055:284216:284270 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> gpua055:284216:284270 [1] NCCL INFO Using network IB gpua055:284216:284270 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua055:284216:284270 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 gpua055:284216:284270 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read gpua055:284216:284270 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read gpua055:284216:284270 [1] NCCL INFO Connected all rings gpua055:284216:284270 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0 gpua055:284216:284270 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0 gpua059:191941:191941 [2] NCCL INFO cudaDriverVersion 12020 gpua059:191941:191941 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.59<0> gpua059:191941:191941 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua059:191941:191992 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.59<0> gpua059:191941:191992 [2] NCCL INFO Using network IB gpua059:191941:191992 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua059:191941:191992 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 gpua059:191941:191992 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read gpua059:191941:191992 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read gpua059:191941:191992 [2] NCCL INFO Connected all rings gpua059:191941:191992 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read gpua059:191941:191992 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read gpua060:348865:348865 [0] NCCL INFO cudaDriverVersion 12020 gpua060:348865:348865 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> gpua060:348865:348865 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua060:348865:348917 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> gpua060:348865:348917 [0] NCCL INFO Using network IB gpua060:348865:348917 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua060:348865:348917 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 gpua060:348865:348917 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 gpua060:348865:348917 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0 gpua060:348865:348917 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read gpua060:348865:348917 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read gpua060:348865:348917 [0] NCCL INFO Connected all rings gpua062:238246:238246 [1] NCCL INFO cudaDriverVersion 12020 gpua062:238246:238246 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0> gpua062:238246:238246 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua062:238246:238301 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0> gpua062:238246:238301 [1] NCCL INFO Using network IB gpua062:238246:238301 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua062:238246:238301 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 gpua062:238246:238301 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read gpua062:238246:238301 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read gpua062:238246:238301 [1] NCCL INFO Connected all rings gpua062:238246:238301 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0 gpua062:238246:238301 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0 gpua055:284216:284270 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read gpua055:284216:284270 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read gpua055:284216:284270 [1] NCCL INFO Connected all trees gpua055:284216:284270 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua055:284216:284270 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua055:284216:284270 [1] NCCL INFO comm 0x56035c2b2c60 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua059:191941:191992 [2] NCCL INFO Connected all trees gpua059:191941:191992 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua059:191941:191992 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua059:191941:191992 [2] NCCL INFO comm 0x55e80f26d110 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua060:348865:348917 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0 gpua060:348865:348917 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0 gpua060:348865:348917 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0 gpua060:348865:348917 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0 gpua060:348865:348917 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0 gpua060:348865:348917 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0 gpua060:348865:348917 [0] NCCL INFO Connected all trees gpua060:348865:348917 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua060:348865:348917 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua060:348865:348917 [0] NCCL INFO comm 0x55fa5f6785c0 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua062:238246:238301 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read gpua062:238246:238301 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read gpua062:238246:238301 [1] NCCL INFO Connected all trees gpua062:238246:238301 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua062:238246:238301 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua062:238246:238301 [1] NCCL INFO comm 0x55dfcd44eec0 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua055:284217:284217 [2] NCCL INFO cudaDriverVersion 12020 gpua055:284217:284217 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> gpua055:284217:284217 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua055:284217:284268 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> gpua055:284217:284268 [2] NCCL INFO Using network IB gpua055:284217:284268 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua055:284217:284268 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 gpua055:284217:284268 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read gpua055:284217:284268 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read gpua055:284217:284268 [2] NCCL INFO Connected all rings gpua055:284217:284268 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read gpua055:284217:284268 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read gpua059:191942:191942 [3] NCCL INFO cudaDriverVersion 12020 gpua059:191942:191942 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.59<0> gpua059:191942:191942 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua059:191942:191995 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.59<0> gpua059:191942:191995 [3] NCCL INFO Using network IB gpua059:191942:191995 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua059:191942:191995 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 gpua059:191942:191995 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 gpua059:191942:191995 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0 gpua059:191942:191995 [3] NCCL INFO Connected all rings gpua059:191942:191995 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read gpua059:191942:191995 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read gpua060:348867:348867 [2] NCCL INFO cudaDriverVersion 12020 gpua060:348867:348867 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> gpua060:348867:348867 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua060:348867:348916 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> gpua060:348867:348916 [2] NCCL INFO Using network IB gpua060:348867:348916 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua060:348867:348916 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 gpua060:348867:348916 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read gpua060:348867:348916 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read gpua060:348867:348916 [2] NCCL INFO Connected all rings gpua060:348867:348916 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read gpua060:348867:348916 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read gpua062:238247:238247 [2] NCCL INFO cudaDriverVersion 12020 gpua062:238247:238247 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0> gpua062:238247:238247 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua062:238247:238299 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0> gpua062:238247:238299 [2] NCCL INFO Using network IB gpua062:238247:238299 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua062:238247:238299 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 gpua062:238247:238299 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read gpua062:238247:238299 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read gpua062:238247:238299 [2] NCCL INFO Connected all rings gpua062:238247:238299 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read gpua062:238247:238299 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read gpua055:284217:284268 [2] NCCL INFO Connected all trees gpua055:284217:284268 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua055:284217:284268 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua055:284217:284268 [2] NCCL INFO comm 0x560d32459ce0 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua059:191942:191995 [3] NCCL INFO Connected all trees gpua059:191942:191995 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua059:191942:191995 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua059:191942:191995 [3] NCCL INFO comm 0x555a286cabe0 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua060:348867:348916 [2] NCCL INFO Connected all trees gpua060:348867:348916 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua060:348867:348916 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua060:348867:348916 [2] NCCL INFO comm 0x55f84739cde0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua062:238247:238299 [2] NCCL INFO Connected all trees gpua062:238247:238299 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua062:238247:238299 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua062:238247:238299 [2] NCCL INFO comm 0x557309baf080 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua059:191940:191940 [1] NCCL INFO cudaDriverVersion 12020 gpua059:191940:191940 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.59<0> gpua059:191940:191940 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua059:191940:191993 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.59<0> gpua059:191940:191993 [1] NCCL INFO Using network IB gpua059:191940:191993 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua059:191940:191993 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 gpua059:191940:191993 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read gpua059:191940:191993 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read gpua059:191940:191993 [1] NCCL INFO Connected all rings gpua059:191940:191993 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0 gpua059:191940:191993 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0 gpua060:348866:348866 [1] NCCL INFO cudaDriverVersion 12020 gpua060:348866:348866 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> gpua060:348866:348866 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua060:348866:348915 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> gpua060:348866:348915 [1] NCCL INFO Using network IB gpua060:348866:348915 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua060:348866:348915 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 gpua060:348866:348915 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read gpua060:348866:348915 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read gpua060:348866:348915 [1] NCCL INFO Connected all rings gpua060:348866:348915 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0 gpua060:348866:348915 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0 gpua059:191940:191993 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read gpua059:191940:191993 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read gpua059:191940:191993 [1] NCCL INFO Connected all trees gpua059:191940:191993 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua059:191940:191993 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua059:191940:191993 [1] NCCL INFO comm 0x55c992a34460 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua060:348866:348915 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read gpua060:348866:348915 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read gpua060:348866:348915 [1] NCCL INFO Connected all trees gpua060:348866:348915 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua060:348866:348915 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua060:348866:348915 [1] NCCL INFO comm 0x55e76c0c43c0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua060:348868:348868 [3] NCCL INFO cudaDriverVersion 12020 gpua060:348868:348868 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0> gpua060:348868:348868 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua060:348868:348918 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0> gpua060:348868:348918 [3] NCCL INFO Using network IB gpua060:348868:348918 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua060:348868:348918 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 gpua060:348868:348918 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 gpua060:348868:348918 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0 gpua060:348868:348918 [3] NCCL INFO Connected all rings gpua060:348868:348918 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read gpua060:348868:348918 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read gpua062:238248:238248 [3] NCCL INFO cudaDriverVersion 12020 gpua062:238248:238248 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0> gpua062:238248:238248 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua062:238248:238300 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0> gpua062:238248:238300 [3] NCCL INFO Using network IB gpua062:238248:238300 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua062:238248:238300 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 gpua062:238248:238300 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 gpua062:238248:238300 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0 gpua062:238248:238300 [3] NCCL INFO Connected all rings gpua062:238248:238300 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read gpua062:238248:238300 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read gpua060:348868:348918 [3] NCCL INFO Connected all trees gpua060:348868:348918 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua060:348868:348918 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua060:348868:348918 [3] NCCL INFO comm 0x55fb8bb7dc90 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua062:238248:238300 [3] NCCL INFO Connected all trees gpua062:238248:238300 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua062:238248:238300 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua062:238248:238300 [3] NCCL INFO comm 0x55e0af8b6ca0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua059:191939:191939 [0] NCCL INFO cudaDriverVersion 12020 gpua059:191939:191939 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.59<0> gpua059:191939:191939 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua059:191939:191994 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.59<0> gpua059:191939:191994 [0] NCCL INFO Using network IB gpua059:191939:191994 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua059:191939:191994 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 gpua059:191939:191994 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 gpua059:191939:191994 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0 gpua059:191939:191994 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read gpua059:191939:191994 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read gpua059:191939:191994 [0] NCCL INFO Connected all rings gpua055:284215:284215 [0] NCCL INFO cudaDriverVersion 12020 gpua055:284215:284215 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> gpua055:284215:284215 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua055:284215:284271 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> gpua055:284215:284271 [0] NCCL INFO Using network IB gpua055:284215:284271 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua055:284215:284271 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 gpua055:284215:284271 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 gpua055:284215:284271 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0 gpua055:284215:284271 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read gpua055:284215:284271 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read gpua055:284215:284271 [0] NCCL INFO Connected all rings gpua059:191939:191994 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0 gpua059:191939:191994 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0 gpua059:191939:191994 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0 gpua059:191939:191994 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0 gpua059:191939:191994 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0 gpua059:191939:191994 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0 gpua059:191939:191994 [0] NCCL INFO Connected all trees gpua059:191939:191994 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua059:191939:191994 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua059:191939:191994 [0] NCCL INFO comm 0x5588acab4260 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua055:284215:284271 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0 gpua055:284215:284271 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0 gpua055:284215:284271 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0 gpua055:284215:284271 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0 gpua055:284215:284271 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0 gpua055:284215:284271 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0 gpua055:284215:284271 [0] NCCL INFO Connected all trees gpua055:284215:284271 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua055:284215:284271 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua055:284215:284271 [0] NCCL INFO comm 0x5576e44c2a60 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua055:284218:284218 [3] NCCL INFO cudaDriverVersion 12020 gpua055:284218:284218 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0> gpua055:284218:284218 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua055:284218:284269 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0> gpua055:284218:284269 [3] NCCL INFO Using network IB gpua055:284218:284269 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua055:284218:284269 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 gpua055:284218:284269 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 gpua055:284218:284269 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0 gpua055:284218:284269 [3] NCCL INFO Connected all rings gpua055:284218:284269 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read gpua055:284218:284269 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read gpua055:284218:284269 [3] NCCL INFO Connected all trees gpua055:284218:284269 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua055:284218:284269 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua055:284218:284269 [3] NCCL INFO comm 0x55b306d123e0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua050:169710:169710 [3] NCCL INFO cudaDriverVersion 12020 gpua050:169710:169710 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.50<0> gpua050:169710:169710 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua050:169710:169764 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.50<0> gpua050:169710:169764 [3] NCCL INFO Using network IB gpua050:169710:169764 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua050:169710:169764 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 gpua050:169710:169764 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 gpua050:169710:169764 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0 gpua050:169710:169764 [3] NCCL INFO Connected all rings gpua050:169710:169764 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read gpua050:169710:169764 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read gpua050:169710:169764 [3] NCCL INFO Connected all trees gpua050:169710:169764 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua050:169710:169764 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua050:169710:169764 [3] NCCL INFO comm 0x55ba399329a0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua054:154664:154664 [1] NCCL INFO cudaDriverVersion 12020 gpua054:154664:154664 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0> gpua054:154664:154664 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua054:154664:154725 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.54<0> gpua054:154664:154725 [1] NCCL INFO Using network IB gpua054:154664:154725 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua054:154664:154725 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 gpua054:154664:154725 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read gpua054:154664:154725 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read gpua054:154664:154725 [1] NCCL INFO Connected all rings gpua054:154664:154725 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0 gpua054:154664:154725 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0 gpua050:169707:169707 [0] NCCL INFO cudaDriverVersion 12020 gpua050:169707:169707 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.50<0> gpua050:169707:169707 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua050:169707:169763 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.50<0> gpua050:169707:169763 [0] NCCL INFO Using network IB gpua050:169707:169763 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua050:169707:169763 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 gpua050:169707:169763 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 gpua050:169707:169763 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0 gpua050:169707:169763 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read gpua050:169707:169763 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read gpua050:169707:169763 [0] NCCL INFO Connected all rings gpua054:154664:154725 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read gpua054:154664:154725 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read gpua054:154664:154725 [1] NCCL INFO Connected all trees gpua054:154664:154725 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua054:154664:154725 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua054:154664:154725 [1] NCCL INFO comm 0x5652ad2c5b80 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua051:179961:179961 [3] NCCL INFO cudaDriverVersion 12020 gpua051:179961:179961 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.51<0> gpua051:179961:179961 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua051:179961:180012 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.51<0> gpua051:179961:180012 [3] NCCL INFO Using network IB gpua051:179961:180012 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua051:179961:180012 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 gpua051:179961:180012 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 gpua051:179961:180012 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0 gpua051:179961:180012 [3] NCCL INFO Connected all rings gpua051:179961:180012 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read gpua051:179961:180012 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read gpua064:196994:196994 [3] NCCL INFO cudaDriverVersion 12020 gpua064:196994:196994 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.64<0> gpua064:196994:196994 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua064:196994:197048 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.64<0> gpua064:196994:197048 [3] NCCL INFO Using network IB gpua064:196994:197048 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua064:196994:197048 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 gpua064:196994:197048 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 gpua064:196994:197048 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0 gpua064:196994:197048 [3] NCCL INFO Connected all rings gpua064:196994:197048 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read gpua064:196994:197048 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read gpua050:169707:169763 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0 gpua050:169707:169763 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0 gpua050:169707:169763 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0 gpua050:169707:169763 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0 gpua050:169707:169763 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0 gpua050:169707:169763 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0 gpua050:169707:169763 [0] NCCL INFO Connected all trees gpua050:169707:169763 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua050:169707:169763 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua050:169707:169763 [0] NCCL INFO comm 0x555a776973c0 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua054:154666:154666 [3] NCCL INFO cudaDriverVersion 12020 gpua054:154666:154666 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0> gpua054:154666:154666 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua054:154666:154724 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.54<0> gpua054:154666:154724 [3] NCCL INFO Using network IB gpua054:154666:154724 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua054:154666:154724 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 gpua054:154666:154724 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 gpua054:154666:154724 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0 gpua054:154666:154724 [3] NCCL INFO Connected all rings gpua054:154666:154724 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read gpua054:154666:154724 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read gpua051:179961:180012 [3] NCCL INFO Connected all trees gpua051:179961:180012 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua051:179961:180012 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua051:179961:180012 [3] NCCL INFO comm 0x55d28f9b4a20 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua064:196994:197048 [3] NCCL INFO Connected all trees gpua064:196994:197048 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua064:196994:197048 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua064:196994:197048 [3] NCCL INFO comm 0x5645bf734790 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua050:169709:169709 [2] NCCL INFO cudaDriverVersion 12020 gpua050:169709:169709 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.50<0> gpua050:169709:169709 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua050:169709:169759 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.50<0> gpua050:169709:169759 [2] NCCL INFO Using network IB gpua050:169709:169759 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua050:169709:169759 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 gpua050:169709:169759 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read gpua050:169709:169759 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read gpua050:169709:169759 [2] NCCL INFO Connected all rings gpua050:169709:169759 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read gpua050:169709:169759 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read gpua050:169709:169759 [2] NCCL INFO Connected all trees gpua054:154666:154724 [3] NCCL INFO Connected all trees gpua054:154666:154724 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua054:154666:154724 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua054:154666:154724 [3] NCCL INFO comm 0x55e8a174eda0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua051:179960:179960 [2] NCCL INFO cudaDriverVersion 12020 gpua051:179960:179960 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.51<0> gpua051:179960:179960 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua051:179960:180011 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.51<0> gpua051:179960:180011 [2] NCCL INFO Using network IB gpua051:179960:180011 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua051:179960:180011 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 gpua051:179960:180011 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read gpua051:179960:180011 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read gpua051:179960:180011 [2] NCCL INFO Connected all rings gpua051:179960:180011 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read gpua051:179960:180011 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read gpua064:196992:196992 [1] NCCL INFO cudaDriverVersion 12020 gpua064:196992:196992 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.64<0> gpua064:196992:196992 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua064:196992:197049 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.64<0> gpua064:196992:197049 [1] NCCL INFO Using network IB gpua064:196992:197049 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua064:196992:197049 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 gpua064:196992:197049 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read gpua064:196992:197049 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read gpua064:196992:197049 [1] NCCL INFO Connected all rings gpua064:196992:197049 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read gpua064:196992:197049 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read gpua050:169709:169759 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua050:169709:169759 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua050:169709:169759 [2] NCCL INFO comm 0x5581bffc13a0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua054:154665:154665 [2] NCCL INFO cudaDriverVersion 12020 gpua054:154665:154665 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0> gpua054:154665:154665 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua054:154665:154723 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.54<0> gpua054:154665:154723 [2] NCCL INFO Using network IB gpua054:154665:154723 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua054:154665:154723 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 gpua054:154665:154723 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read gpua054:154665:154723 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read gpua054:154665:154723 [2] NCCL INFO Connected all rings gpua054:154665:154723 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read gpua054:154665:154723 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read gpua051:179960:180011 [2] NCCL INFO Connected all trees gpua051:179960:180011 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua051:179960:180011 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua051:179960:180011 [2] NCCL INFO comm 0x556d6e357bc0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua064:196992:197049 [1] NCCL INFO Connected all trees gpua064:196992:197049 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua064:196992:197049 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua064:196992:197049 [1] NCCL INFO comm 0x55c49450ba20 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua054:154665:154723 [2] NCCL INFO Connected all trees gpua054:154665:154723 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua054:154665:154723 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua054:154665:154723 [2] NCCL INFO comm 0x558687e0b2a0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua051:179958:179958 [0] NCCL INFO cudaDriverVersion 12020 gpua051:179958:179958 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.51<0> gpua051:179958:179958 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua051:179958:180007 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.51<0> gpua051:179958:180007 [0] NCCL INFO Using network IB gpua051:179958:180007 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua051:179958:180007 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 gpua051:179958:180007 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 gpua051:179958:180007 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0 gpua051:179958:180007 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read gpua051:179958:180007 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read gpua051:179958:180007 [0] NCCL INFO Connected all rings gpua064:196991:196991 [0] NCCL INFO cudaDriverVersion 12020 gpua064:196991:196991 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.64<0> gpua064:196991:196991 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua064:196991:197050 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.64<0> gpua064:196991:197050 [0] NCCL INFO Using network IB gpua064:196991:197050 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua064:196991:197050 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 gpua064:196991:197050 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 gpua064:196991:197050 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0 gpua064:196991:197050 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read gpua064:196991:197050 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read gpua064:196991:197050 [0] NCCL INFO Connected all rings gpua054:154663:154663 [0] NCCL INFO cudaDriverVersion 12020 gpua054:154663:154663 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.54<0> gpua054:154663:154663 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua054:154663:154726 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.54<0> gpua054:154663:154726 [0] NCCL INFO Using network IB gpua054:154663:154726 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua054:154663:154726 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 gpua054:154663:154726 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 gpua054:154663:154726 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0 gpua054:154663:154726 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read gpua054:154663:154726 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read gpua054:154663:154726 [0] NCCL INFO Connected all rings gpua051:179958:180007 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0 gpua051:179958:180007 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0 gpua051:179958:180007 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0 gpua051:179958:180007 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0 gpua051:179958:180007 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0 gpua051:179958:180007 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0 gpua051:179958:180007 [0] NCCL INFO Connected all trees gpua051:179958:180007 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua051:179958:180007 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua051:179958:180007 [0] NCCL INFO comm 0x55e98c238680 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua064:196991:197050 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0 gpua064:196991:197050 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0 gpua064:196991:197050 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0 gpua064:196991:197050 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0 gpua064:196991:197050 [0] NCCL INFO Connected all trees gpua064:196991:197050 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua064:196991:197050 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua064:196991:197050 [0] NCCL INFO comm 0x561ce7d9fd20 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua054:154663:154726 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0 gpua054:154663:154726 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0 gpua054:154663:154726 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0 gpua054:154663:154726 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0 gpua054:154663:154726 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0 gpua054:154663:154726 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0 gpua054:154663:154726 [0] NCCL INFO Connected all trees gpua054:154663:154726 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua054:154663:154726 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua054:154663:154726 [0] NCCL INFO comm 0x55b902fb5300 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua064:196993:196993 [2] NCCL INFO cudaDriverVersion 12020 gpua064:196993:196993 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.64<0> gpua064:196993:196993 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua064:196993:197046 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.64<0> gpua064:196993:197046 [2] NCCL INFO Using network IB gpua064:196993:197046 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua064:196993:197046 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 gpua064:196993:197046 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read gpua064:196993:197046 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read gpua064:196993:197046 [2] NCCL INFO Connected all rings gpua064:196993:197046 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read gpua064:196993:197046 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read gpua050:169708:169708 [1] NCCL INFO cudaDriverVersion 12020 gpua050:169708:169708 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.50<0> gpua050:169708:169708 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua050:169708:169760 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.50<0> gpua050:169708:169760 [1] NCCL INFO Using network IB gpua050:169708:169760 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua050:169708:169760 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 gpua050:169708:169760 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read gpua050:169708:169760 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read gpua050:169708:169760 [1] NCCL INFO Connected all rings gpua050:169708:169760 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0 gpua050:169708:169760 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0 gpua051:179959:179959 [1] NCCL INFO cudaDriverVersion 12020 gpua051:179959:179959 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.51<0> gpua051:179959:179959 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua051:179959:180008 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.51<0> gpua051:179959:180008 [1] NCCL INFO Using network IB gpua051:179959:180008 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua051:179959:180008 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 gpua051:179959:180008 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read gpua051:179959:180008 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read gpua051:179959:180008 [1] NCCL INFO Connected all rings gpua051:179959:180008 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0 gpua051:179959:180008 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0 gpua064:196993:197046 [2] NCCL INFO Connected all trees gpua064:196993:197046 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua064:196993:197046 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua064:196993:197046 [2] NCCL INFO comm 0x556dfe573370 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua050:169708:169760 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read gpua050:169708:169760 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read gpua050:169708:169760 [1] NCCL INFO Connected all trees gpua050:169708:169760 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua050:169708:169760 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua050:169708:169760 [1] NCCL INFO comm 0x55abbbf4d4c0 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua051:179959:180008 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read gpua051:179959:180008 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read gpua051:179959:180008 [1] NCCL INFO Connected all trees gpua051:179959:180008 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua051:179959:180008 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua051:179959:180008 [1] NCCL INFO comm 0x55f0f6917fa0 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua053:2281091:2281091 [1] NCCL INFO cudaDriverVersion 12020 gpua053:2281091:2281091 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> gpua053:2281091:2281091 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua053:2281091:2281138 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> gpua053:2281091:2281138 [1] NCCL INFO Using network IB gpua053:2281091:2281138 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua053:2281091:2281138 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 gpua053:2281091:2281138 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read gpua053:2281091:2281138 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read gpua053:2281091:2281138 [1] NCCL INFO Connected all rings gpua053:2281091:2281138 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0 gpua053:2281091:2281138 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0 gpua058:180896:180896 [0] NCCL INFO cudaDriverVersion 12020 gpua058:180896:180896 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.58<0> gpua058:180896:180896 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua058:180896:180957 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.58<0> gpua058:180896:180957 [0] NCCL INFO Using network IB gpua058:180896:180957 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua058:180896:180957 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 gpua058:180896:180957 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 gpua058:180896:180957 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0 gpua058:180896:180957 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read gpua058:180896:180957 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read gpua058:180896:180957 [0] NCCL INFO Connected all rings gpua053:2281091:2281138 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read gpua053:2281091:2281138 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read gpua053:2281091:2281138 [1] NCCL INFO Connected all trees gpua053:2281091:2281138 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua053:2281091:2281138 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua053:2281091:2281138 [1] NCCL INFO comm 0x56012e4c8bf0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua058:180896:180957 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0 gpua058:180896:180957 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0 gpua058:180896:180957 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0 gpua058:180896:180957 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0 gpua058:180896:180957 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0 gpua058:180896:180957 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0 gpua058:180896:180957 [0] NCCL INFO Connected all trees gpua058:180896:180957 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua058:180896:180957 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua058:180896:180957 [0] NCCL INFO comm 0x557aab43f3a0 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua058:180897:180897 [1] NCCL INFO cudaDriverVersion 12020 gpua058:180897:180897 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.58<0> gpua058:180897:180897 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua058:180897:180953 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.58<0> gpua058:180897:180953 [1] NCCL INFO Using network IB gpua058:180897:180953 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua058:180897:180953 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 gpua058:180897:180953 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read gpua058:180897:180953 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read gpua058:180897:180953 [1] NCCL INFO Connected all rings gpua058:180897:180953 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0 gpua058:180897:180953 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0 gpua063:407502:407502 [3] NCCL INFO cudaDriverVersion 12020 gpua063:407502:407502 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> gpua063:407502:407502 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua063:407502:407555 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> gpua063:407502:407555 [3] NCCL INFO Using network IB gpua063:407502:407555 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua063:407502:407555 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 gpua063:407502:407555 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 gpua063:407502:407555 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0 gpua063:407502:407555 [3] NCCL INFO Connected all rings gpua063:407502:407555 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read gpua063:407502:407555 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read gpua052:157615:157615 [3] NCCL INFO cudaDriverVersion 12020 gpua052:157615:157615 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.52<0> gpua052:157615:157615 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua052:157615:157670 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.52<0> gpua052:157615:157670 [3] NCCL INFO Using network IB gpua052:157615:157670 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua052:157615:157670 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 gpua052:157615:157670 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 gpua052:157615:157670 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0 gpua052:157615:157670 [3] NCCL INFO Connected all rings gpua052:157615:157670 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read gpua052:157615:157670 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read gpua053:2281093:2281093 [3] NCCL INFO cudaDriverVersion 12020 gpua053:2281093:2281093 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> gpua053:2281093:2281093 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua053:2281093:2281137 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> gpua053:2281093:2281137 [3] NCCL INFO Using network IB gpua053:2281093:2281137 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua053:2281093:2281137 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 gpua053:2281093:2281137 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 gpua053:2281093:2281137 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0 gpua053:2281093:2281137 [3] NCCL INFO Connected all rings gpua053:2281093:2281137 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read gpua053:2281093:2281137 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read gpua058:180897:180953 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read gpua058:180897:180953 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read gpua058:180897:180953 [1] NCCL INFO Connected all trees gpua058:180897:180953 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua058:180897:180953 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua058:180897:180953 [1] NCCL INFO comm 0x55b957da0ee0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua063:407502:407555 [3] NCCL INFO Connected all trees gpua063:407502:407555 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua063:407502:407555 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua063:407502:407555 [3] NCCL INFO comm 0x562bc7459e00 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua052:157615:157670 [3] NCCL INFO Connected all trees gpua052:157615:157670 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua052:157615:157670 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua052:157615:157670 [3] NCCL INFO comm 0x55a7a3f3a070 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua053:2281093:2281137 [3] NCCL INFO Connected all trees gpua053:2281093:2281137 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua053:2281093:2281137 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua053:2281093:2281137 [3] NCCL INFO comm 0x55aa8e5602c0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua063:407499:407499 [0] NCCL INFO cudaDriverVersion 12020 gpua063:407499:407499 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> gpua063:407499:407499 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua063:407499:407556 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> gpua063:407499:407556 [0] NCCL INFO Using network IB gpua063:407499:407556 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua063:407499:407556 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 gpua063:407499:407556 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 gpua063:407499:407556 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0 gpua063:407499:407556 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read gpua063:407499:407556 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read gpua063:407499:407556 [0] NCCL INFO Connected all rings gpua052:157612:157612 [0] NCCL INFO cudaDriverVersion 12020 gpua052:157612:157612 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.52<0> gpua052:157612:157612 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua052:157612:157666 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.52<0> gpua052:157612:157666 [0] NCCL INFO Using network IB gpua052:157612:157666 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua052:157612:157666 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 gpua052:157612:157666 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 gpua052:157612:157666 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0 gpua052:157612:157666 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read gpua052:157612:157666 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read gpua052:157612:157666 [0] NCCL INFO Connected all rings gpua063:407499:407556 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0 gpua063:407499:407556 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0 gpua063:407499:407556 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0 gpua063:407499:407556 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0 gpua063:407499:407556 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0 gpua063:407499:407556 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0 gpua063:407499:407556 [0] NCCL INFO Connected all trees gpua063:407499:407556 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua063:407499:407556 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua063:407499:407556 [0] NCCL INFO comm 0x556c67433be0 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua052:157612:157666 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0 gpua052:157612:157666 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0 gpua052:157612:157666 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0 gpua052:157612:157666 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0 gpua052:157612:157666 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0 gpua052:157612:157666 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0 gpua052:157612:157666 [0] NCCL INFO Connected all trees gpua052:157612:157666 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua052:157612:157666 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua052:157612:157666 [0] NCCL INFO comm 0x563a7fff66a0 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua052:157614:157614 [2] NCCL INFO cudaDriverVersion 12020 gpua063:407501:407501 [2] NCCL INFO cudaDriverVersion 12020 gpua063:407501:407501 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> gpua063:407501:407501 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua063:407501:407552 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> gpua063:407501:407552 [2] NCCL INFO Using network IB gpua063:407501:407552 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua063:407501:407552 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 gpua063:407501:407552 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read gpua063:407501:407552 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read gpua063:407501:407552 [2] NCCL INFO Connected all rings gpua063:407501:407552 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read gpua063:407501:407552 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read gpua052:157614:157614 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.52<0> gpua052:157614:157614 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua052:157614:157671 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.52<0> gpua052:157614:157671 [2] NCCL INFO Using network IB gpua052:157614:157671 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua052:157614:157671 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 gpua052:157614:157671 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read gpua052:157614:157671 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read gpua052:157614:157671 [2] NCCL INFO Connected all rings gpua052:157614:157671 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read gpua052:157614:157671 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read gpua052:157614:157671 [2] NCCL INFO Connected all trees gpua063:407501:407552 [2] NCCL INFO Connected all trees gpua063:407501:407552 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua063:407501:407552 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua063:407501:407552 [2] NCCL INFO comm 0x560deb2cdf40 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua052:157614:157671 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua052:157614:157671 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua052:157614:157671 [2] NCCL INFO comm 0x55636c2e3dc0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua063:407500:407500 [1] NCCL INFO cudaDriverVersion 12020 gpua063:407500:407500 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0> gpua063:407500:407500 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua063:407500:407551 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0> gpua063:407500:407551 [1] NCCL INFO Using network IB gpua063:407500:407551 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua063:407500:407551 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 gpua063:407500:407551 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read gpua063:407500:407551 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read gpua063:407500:407551 [1] NCCL INFO Connected all rings gpua063:407500:407551 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0 gpua063:407500:407551 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0 gpua052:157613:157613 [1] NCCL INFO cudaDriverVersion 12020 gpua052:157613:157613 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.52<0> gpua052:157613:157613 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua052:157613:157667 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.52<0> gpua052:157613:157667 [1] NCCL INFO Using network IB gpua052:157613:157667 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua052:157613:157667 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 gpua052:157613:157667 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read gpua052:157613:157667 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read gpua052:157613:157667 [1] NCCL INFO Connected all rings gpua052:157613:157667 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0 gpua052:157613:157667 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0 gpua053:2281090:2281090 [0] NCCL INFO cudaDriverVersion 12020 gpua053:2281090:2281090 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> gpua053:2281090:2281090 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua053:2281090:2281139 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> gpua053:2281090:2281139 [0] NCCL INFO Using network IB gpua053:2281090:2281139 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua053:2281090:2281139 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 gpua053:2281090:2281139 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 gpua053:2281090:2281139 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0 gpua053:2281090:2281139 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read gpua053:2281090:2281139 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read gpua053:2281090:2281139 [0] NCCL INFO Connected all rings gpua058:180899:180899 [3] NCCL INFO cudaDriverVersion 12020 gpua058:180899:180899 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.58<0> gpua058:180899:180899 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua058:180899:180951 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.58<0> gpua058:180899:180951 [3] NCCL INFO Using network IB gpua058:180899:180951 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua058:180899:180951 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 gpua058:180899:180951 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 gpua058:180899:180951 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0 gpua058:180899:180951 [3] NCCL INFO Connected all rings gpua058:180899:180951 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read gpua058:180899:180951 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read gpua063:407500:407551 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read gpua063:407500:407551 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read gpua063:407500:407551 [1] NCCL INFO Connected all trees gpua063:407500:407551 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua063:407500:407551 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua063:407500:407551 [1] NCCL INFO comm 0x55e5c42b4cb0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua052:157613:157667 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read gpua052:157613:157667 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read gpua052:157613:157667 [1] NCCL INFO Connected all trees gpua052:157613:157667 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua052:157613:157667 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua052:157613:157667 [1] NCCL INFO comm 0x559455f2b900 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua053:2281090:2281139 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0 gpua053:2281090:2281139 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0 gpua053:2281090:2281139 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0 gpua053:2281090:2281139 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0 gpua053:2281090:2281139 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0 gpua053:2281090:2281139 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0 gpua053:2281090:2281139 [0] NCCL INFO Connected all trees gpua053:2281090:2281139 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua053:2281090:2281139 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua053:2281090:2281139 [0] NCCL INFO comm 0x5588b42d3e20 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua058:180899:180951 [3] NCCL INFO Connected all trees gpua058:180899:180951 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua058:180899:180951 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua058:180899:180951 [3] NCCL INFO comm 0x561f7c2b5d60 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua053:2281092:2281092 [2] NCCL INFO cudaDriverVersion 12020 gpua053:2281092:2281092 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0> gpua053:2281092:2281092 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua053:2281092:2281136 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0> gpua053:2281092:2281136 [2] NCCL INFO Using network IB gpua053:2281092:2281136 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua053:2281092:2281136 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 gpua053:2281092:2281136 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read gpua053:2281092:2281136 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read gpua053:2281092:2281136 [2] NCCL INFO Connected all rings gpua053:2281092:2281136 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read gpua053:2281092:2281136 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read gpua053:2281092:2281136 [2] NCCL INFO Connected all trees gpua053:2281092:2281136 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua053:2281092:2281136 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua053:2281092:2281136 [2] NCCL INFO comm 0x55b33ffe07e0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua058:180898:180898 [2] NCCL INFO cudaDriverVersion 12020 gpua058:180898:180898 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.58<0> gpua058:180898:180898 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua058:180898:180952 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.58<0> gpua058:180898:180952 [2] NCCL INFO Using network IB gpua058:180898:180952 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua058:180898:180952 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 gpua058:180898:180952 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read gpua058:180898:180952 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read gpua058:180898:180952 [2] NCCL INFO Connected all rings gpua058:180898:180952 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read gpua058:180898:180952 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read gpua058:180898:180952 [2] NCCL INFO Connected all trees gpua058:180898:180952 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua058:180898:180952 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua058:180898:180952 [2] NCCL INFO comm 0x5604896172b0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua049:3563091:3563091 [2] NCCL INFO cudaDriverVersion 12020 gpua049:3563091:3563091 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0> gpua049:3563091:3563091 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua049:3563091:3563143 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.49<0> gpua049:3563091:3563143 [2] NCCL INFO Using network IB gpua049:3563091:3563143 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua049:3563091:3563143 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 gpua049:3563091:3563143 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read gpua049:3563091:3563143 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read gpua049:3563091:3563143 [2] NCCL INFO Connected all rings gpua049:3563091:3563143 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read gpua049:3563091:3563143 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read gpua049:3563091:3563143 [2] NCCL INFO Connected all trees gpua049:3563091:3563143 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua049:3563091:3563143 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua049:3563091:3563143 [2] NCCL INFO comm 0x55939414b5c0 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua049:3563090:3563090 [1] NCCL INFO cudaDriverVersion 12020 gpua049:3563090:3563090 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0> gpua049:3563090:3563090 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua049:3563090:3563141 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.49<0> gpua049:3563090:3563141 [1] NCCL INFO Using network IB gpua049:3563090:3563141 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua049:3563090:3563141 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 gpua049:3563090:3563141 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read gpua049:3563090:3563141 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read gpua049:3563090:3563141 [1] NCCL INFO Connected all rings gpua049:3563090:3563141 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read gpua049:3563090:3563141 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read gpua049:3563090:3563141 [1] NCCL INFO Connected all trees gpua049:3563090:3563141 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua049:3563090:3563141 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua049:3563090:3563141 [1] NCCL INFO comm 0x5590462e1060 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua049:3563092:3563092 [3] NCCL INFO cudaDriverVersion 12020 gpua049:3563092:3563092 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.49<0> gpua049:3563092:3563092 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua049:3563092:3563144 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.49<0> gpua049:3563092:3563144 [3] NCCL INFO Using network IB gpua049:3563092:3563144 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua049:3563092:3563144 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 gpua049:3563092:3563144 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 gpua049:3563092:3563144 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0 gpua049:3563092:3563144 [3] NCCL INFO Connected all rings gpua049:3563092:3563144 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read gpua049:3563092:3563144 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read gpua049:3563092:3563144 [3] NCCL INFO Connected all trees gpua049:3563092:3563144 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua049:3563092:3563144 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua049:3563092:3563144 [3] NCCL INFO comm 0x558304c587c0 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua049:3563089:3563142 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.49<0> gpua049:3563089:3563142 [0] NCCL INFO Using network IB gpua049:3563089:3563142 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua049:3563089:3563142 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 gpua049:3563089:3563142 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 gpua049:3563089:3563142 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 gpua049:3563089:3563142 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 gpua049:3563089:3563142 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0 gpua049:3563089:3563142 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read gpua049:3563089:3563142 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read gpua049:3563089:3563142 [0] NCCL INFO Connected all rings gpua049:3563089:3563142 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0 gpua049:3563089:3563142 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0 gpua049:3563089:3563142 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0 gpua049:3563089:3563142 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0 gpua049:3563089:3563142 [0] NCCL INFO Connected all trees gpua049:3563089:3563142 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua049:3563089:3563142 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua049:3563089:3563142 [0] NCCL INFO comm 0x55fd274326c0 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua061:400022:400022 [3] NCCL INFO cudaDriverVersion 12020 gpua061:400022:400022 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.61<0> gpua061:400022:400022 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua061:400022:400077 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.61<0> gpua061:400022:400077 [3] NCCL INFO Using network IB gpua061:400022:400077 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua061:400022:400077 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 gpua061:400022:400077 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 gpua061:400022:400077 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0 gpua061:400022:400077 [3] NCCL INFO Connected all rings gpua061:400022:400077 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read gpua061:400022:400077 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read gpua061:400022:400077 [3] NCCL INFO Connected all trees gpua061:400022:400077 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua061:400022:400077 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua061:400022:400077 [3] NCCL INFO comm 0x5602ebd700a0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua061:400020:400020 [1] NCCL INFO cudaDriverVersion 12020 gpua061:400020:400020 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.61<0> gpua061:400020:400020 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua061:400020:400079 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.61<0> gpua061:400020:400079 [1] NCCL INFO Using network IB gpua061:400020:400079 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua061:400020:400079 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 gpua061:400020:400079 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read gpua061:400020:400079 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read gpua061:400020:400079 [1] NCCL INFO Connected all rings gpua061:400020:400079 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0 gpua061:400020:400079 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0 gpua061:400020:400079 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read gpua061:400020:400079 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read gpua061:400020:400079 [1] NCCL INFO Connected all trees gpua061:400020:400079 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua061:400020:400079 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua061:400020:400079 [1] NCCL INFO comm 0x558949fdd880 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua061:400019:400019 [0] NCCL INFO cudaDriverVersion 12020 gpua061:400019:400019 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.61<0> gpua061:400019:400019 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua061:400019:400076 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.61<0> gpua061:400019:400076 [0] NCCL INFO Using network IB gpua061:400019:400076 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua061:400019:400076 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 gpua061:400019:400076 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 gpua061:400019:400076 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0 gpua061:400019:400076 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read gpua061:400019:400076 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read gpua061:400019:400076 [0] NCCL INFO Connected all rings gpua061:400019:400076 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0 gpua061:400019:400076 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0 gpua061:400019:400076 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0 gpua061:400019:400076 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0 gpua061:400019:400076 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0 gpua061:400019:400076 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0 gpua061:400019:400076 [0] NCCL INFO Connected all trees gpua061:400019:400076 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua061:400019:400076 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua061:400019:400076 [0] NCCL INFO comm 0x5580c987cf00 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua057:178769:178769 [3] NCCL INFO cudaDriverVersion 12020 gpua057:178769:178769 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> gpua057:178769:178769 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua057:178769:178820 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> gpua057:178769:178820 [3] NCCL INFO Using network IB gpua057:178769:178820 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua057:178769:178820 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 gpua057:178769:178820 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 gpua057:178769:178820 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0 gpua057:178769:178820 [3] NCCL INFO Connected all rings gpua057:178769:178820 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read gpua057:178769:178820 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read gpua056:2720517:2720517 [1] NCCL INFO cudaDriverVersion 12020 gpua056:2720517:2720517 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.56<0> gpua056:2720517:2720517 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua056:2720517:2720563 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.56<0> gpua056:2720517:2720563 [1] NCCL INFO Using network IB gpua056:2720517:2720563 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua056:2720517:2720563 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 gpua056:2720517:2720563 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read gpua056:2720517:2720563 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read gpua056:2720517:2720563 [1] NCCL INFO Connected all rings gpua056:2720517:2720563 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0 gpua056:2720517:2720563 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0 gpua057:178769:178820 [3] NCCL INFO Connected all trees gpua057:178769:178820 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua057:178769:178820 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua057:178769:178820 [3] NCCL INFO comm 0x55ae8f8bce40 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua057:178768:178768 [2] NCCL INFO cudaDriverVersion 12020 gpua057:178768:178768 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> gpua057:178768:178768 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua057:178768:178817 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> gpua057:178768:178817 [2] NCCL INFO Using network IB gpua057:178768:178817 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua057:178768:178817 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 gpua057:178768:178817 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read gpua056:2720517:2720563 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read gpua056:2720517:2720563 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read gpua056:2720517:2720563 [1] NCCL INFO Connected all trees gpua056:2720517:2720563 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua056:2720517:2720563 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua056:2720517:2720563 [1] NCCL INFO comm 0x562448403fc0 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua057:178768:178817 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read gpua057:178768:178817 [2] NCCL INFO Connected all rings gpua057:178768:178817 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read gpua057:178768:178817 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read gpua057:178768:178817 [2] NCCL INFO Connected all trees gpua057:178768:178817 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua057:178768:178817 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua057:178768:178817 [2] NCCL INFO comm 0x5587a9be5030 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua056:2720519:2720519 [3] NCCL INFO cudaDriverVersion 12020 gpua056:2720519:2720519 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.56<0> gpua056:2720519:2720519 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua056:2720519:2720565 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.56<0> gpua056:2720519:2720565 [3] NCCL INFO Using network IB gpua056:2720519:2720565 [3] NCCL INFO Setting affinity for GPU 3 to ffff gpua056:2720519:2720565 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 gpua056:2720519:2720565 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 gpua056:2720519:2720565 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0 gpua056:2720519:2720565 [3] NCCL INFO Connected all rings gpua056:2720519:2720565 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read gpua056:2720519:2720565 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read gpua057:178767:178767 [1] NCCL INFO cudaDriverVersion 12020 gpua057:178767:178767 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> gpua057:178767:178767 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua057:178767:178818 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> gpua057:178767:178818 [1] NCCL INFO Using network IB gpua057:178767:178818 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 gpua057:178767:178818 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 gpua057:178767:178818 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read gpua057:178767:178818 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read gpua057:178767:178818 [1] NCCL INFO Connected all rings gpua057:178767:178818 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0 gpua057:178767:178818 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0 gpua061:400021:400021 [2] NCCL INFO cudaDriverVersion 12020 gpua061:400021:400021 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.61<0> gpua061:400021:400021 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua061:400021:400078 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.61<0> gpua061:400021:400078 [2] NCCL INFO Using network IB gpua061:400021:400078 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua061:400021:400078 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 gpua061:400021:400078 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read gpua061:400021:400078 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read gpua061:400021:400078 [2] NCCL INFO Connected all rings gpua061:400021:400078 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read gpua061:400021:400078 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read gpua056:2720519:2720565 [3] NCCL INFO Connected all trees gpua056:2720519:2720565 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua056:2720519:2720565 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua056:2720519:2720565 [3] NCCL INFO comm 0x5608127703c0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE gpua057:178767:178818 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read gpua057:178767:178818 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read gpua057:178767:178818 [1] NCCL INFO Connected all trees gpua057:178767:178818 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua057:178767:178818 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua057:178767:178818 [1] NCCL INFO comm 0x555ef34603c0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE gpua061:400021:400078 [2] NCCL INFO Connected all trees gpua061:400021:400078 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua061:400021:400078 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua061:400021:400078 [2] NCCL INFO comm 0x56297ceea4c0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua056:2720518:2720518 [2] NCCL INFO cudaDriverVersion 12020 gpua056:2720518:2720518 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.56<0> gpua056:2720518:2720518 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua056:2720518:2720564 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.56<0> gpua056:2720518:2720564 [2] NCCL INFO Using network IB gpua056:2720518:2720564 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 gpua056:2720518:2720564 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 gpua056:2720518:2720564 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read gpua056:2720518:2720564 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read gpua056:2720518:2720564 [2] NCCL INFO Connected all rings gpua056:2720518:2720564 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read gpua056:2720518:2720564 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read gpua057:178766:178766 [0] NCCL INFO cudaDriverVersion 12020 gpua057:178766:178766 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0> gpua057:178766:178766 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua057:178766:178819 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0> gpua057:178766:178819 [0] NCCL INFO Using network IB gpua057:178766:178819 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua057:178766:178819 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 gpua057:178766:178819 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 gpua057:178766:178819 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0 gpua057:178766:178819 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read gpua057:178766:178819 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read gpua057:178766:178819 [0] NCCL INFO Connected all rings gpua056:2720518:2720564 [2] NCCL INFO Connected all trees gpua056:2720518:2720564 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua056:2720518:2720564 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua056:2720518:2720564 [2] NCCL INFO comm 0x55d04a945e90 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE gpua056:2720516:2720516 [0] NCCL INFO cudaDriverVersion 12020 gpua056:2720516:2720516 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.56<0> gpua056:2720516:2720516 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation gpua056:2720516:2720566 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.56<0> gpua056:2720516:2720566 [0] NCCL INFO Using network IB gpua056:2720516:2720566 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 gpua056:2720516:2720566 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 gpua057:178766:178819 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0 gpua057:178766:178819 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0 gpua057:178766:178819 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0 gpua057:178766:178819 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0 gpua057:178766:178819 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0 gpua057:178766:178819 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0 gpua057:178766:178819 [0] NCCL INFO Connected all trees gpua057:178766:178819 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua057:178766:178819 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua057:178766:178819 [0] NCCL INFO comm 0x559e46decd10 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE gpua056:2720516:2720566 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 gpua056:2720516:2720566 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0 gpua056:2720516:2720566 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read gpua056:2720516:2720566 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read gpua056:2720516:2720566 [0] NCCL INFO Connected all rings gpua056:2720516:2720566 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0 gpua056:2720516:2720566 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0 gpua056:2720516:2720566 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0 gpua056:2720516:2720566 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0 gpua056:2720516:2720566 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0 gpua056:2720516:2720566 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0 gpua056:2720516:2720566 [0] NCCL INFO Connected all trees gpua056:2720516:2720566 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 gpua056:2720516:2720566 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer gpua056:2720516:2720566 [0] NCCL INFO comm 0x5644faf6d380 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE [gpua049:0/64] 2024-01-14 14:42:38,968 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. [gpua049:0/64] 2024-01-14 14:47:17,484 (trainer:753) INFO: 1epoch:train:1-100batch: iter_time=3.668, forward_time=0.227, loss_ctc=3.120e+03, loss_interctc_layer6=3.646e+03, loss_interctc_layer12=3.481e+03, loss_interctc_layer15=3.404e+03, loss_interctc_layer21=3.279e+03, loss=3.386e+03, backward_time=0.758, grad_norm=2.185e+04, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.081, optim0_lr0=8.583e-08, train_time=6.916 [gpua049:0/64] 2024-01-14 14:59:59,100 (trainer:753) INFO: 1epoch:train:101-200batch: iter_time=1.147e-04, forward_time=0.140, loss_ctc=794.786, loss_interctc_layer6=1.854e+03, loss_interctc_layer12=1.273e+03, loss_interctc_layer15=1.085e+03, loss_interctc_layer21=902.727, loss=1.182e+03, backward_time=1.402, grad_norm=1.612e+04, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.079, optim0_lr0=2.525e-07, train_time=7.616 [gpua049:0/64] 2024-01-14 15:05:43,147 (trainer:753) INFO: 1epoch:train:201-300batch: iter_time=9.594e-05, forward_time=0.139, loss_ctc=396.331, loss_interctc_layer6=405.750, loss_interctc_layer12=384.603, loss_interctc_layer15=382.808, loss_interctc_layer21=398.968, loss=393.692, backward_time=0.757, grad_norm=971.141, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=4.192e-07, train_time=3.440 [gpua049:0/64] 2024-01-14 15:10:13,045 (trainer:753) INFO: 1epoch:train:301-400batch: iter_time=9.295e-05, forward_time=0.259, loss_ctc=343.816, loss_interctc_layer6=333.911, loss_interctc_layer12=327.094, loss_interctc_layer15=324.909, loss_interctc_layer21=344.644, loss=334.875, backward_time=0.636, grad_norm=620.039, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.086, optim0_lr0=5.858e-07, train_time=2.698 [gpua049:0/64] 2024-01-14 15:13:22,453 (trainer:753) INFO: 1epoch:train:401-500batch: iter_time=8.670e-05, forward_time=0.167, loss_ctc=425.649, loss_interctc_layer6=417.282, loss_interctc_layer12=408.314, loss_interctc_layer15=406.275, loss_interctc_layer21=425.788, loss=416.661, backward_time=0.371, grad_norm=869.272, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=7.525e-07, train_time=1.895 [gpua049:0/64] 2024-01-14 15:16:39,473 (trainer:753) INFO: 1epoch:train:501-600batch: iter_time=9.448e-05, forward_time=0.140, loss_ctc=386.906, loss_interctc_layer6=369.961, loss_interctc_layer12=362.886, loss_interctc_layer15=361.809, loss_interctc_layer21=386.855, loss=373.683, backward_time=0.436, grad_norm=1.044e+03, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.079, optim0_lr0=9.192e-07, train_time=1.969 [gpua049:0/64] 2024-01-14 15:19:45,497 (trainer:753) INFO: 1epoch:train:601-700batch: iter_time=8.982e-05, forward_time=0.140, loss_ctc=398.883, loss_interctc_layer6=390.927, loss_interctc_layer12=384.820, loss_interctc_layer15=383.710, loss_interctc_layer21=398.558, loss=391.380, backward_time=0.400, grad_norm=986.675, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=1.086e-06, train_time=1.861 [gpua049:0/64] 2024-01-14 15:22:59,165 (trainer:753) INFO: 1epoch:train:701-800batch: iter_time=8.896e-05, forward_time=0.140, loss_ctc=360.477, loss_interctc_layer6=349.876, loss_interctc_layer12=345.459, loss_interctc_layer15=344.629, loss_interctc_layer21=360.500, loss=352.188, backward_time=0.445, grad_norm=780.747, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.079, optim0_lr0=1.252e-06, train_time=1.936 [gpua049:0/64] 2024-01-14 15:25:58,209 (trainer:753) INFO: 1epoch:train:801-900batch: iter_time=8.871e-05, forward_time=0.195, loss_ctc=365.048, loss_interctc_layer6=351.992, loss_interctc_layer12=347.678, loss_interctc_layer15=346.833, loss_interctc_layer21=364.900, loss=355.290, backward_time=0.407, grad_norm=790.960, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.080, optim0_lr0=1.419e-06, train_time=1.790 [gpua049:0/64] 2024-01-14 15:29:14,419 (trainer:753) INFO: 1epoch:train:901-1000batch: iter_time=8.583e-05, forward_time=0.197, loss_ctc=378.631, loss_interctc_layer6=367.518, loss_interctc_layer12=363.419, loss_interctc_layer15=362.671, loss_interctc_layer21=378.273, loss=370.102, backward_time=0.429, grad_norm=877.885, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.085, optim0_lr0=1.586e-06, train_time=1.962 [gpua049:0/64] 2024-01-14 15:31:58,727 (trainer:753) INFO: 1epoch:train:1001-1100batch: iter_time=8.590e-05, forward_time=0.141, loss_ctc=363.272, loss_interctc_layer6=346.376, loss_interctc_layer12=342.631, loss_interctc_layer15=341.843, loss_interctc_layer21=363.238, loss=351.472, backward_time=0.370, grad_norm=633.701, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=1.752e-06, train_time=1.643 [gpua049:0/64] 2024-01-14 15:34:57,847 (trainer:753) INFO: 1epoch:train:1101-1200batch: iter_time=8.250e-05, forward_time=0.140, loss_ctc=337.841, loss_interctc_layer6=323.630, loss_interctc_layer12=320.130, loss_interctc_layer15=319.549, loss_interctc_layer21=337.432, loss=327.717, backward_time=0.378, grad_norm=606.538, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=1.919e-06, train_time=1.791 [gpua049:0/64] 2024-01-14 15:36:38,794 (multiple_iter_factory:32) INFO: Building 1th iter-factory... [gpua049:0/64] 2024-01-14 15:36:58,952 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 15:37:02,608 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 15:37:02,608 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, [gpua049:0/64] 2024-01-14 15:37:02,611 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 15:45:29,323 (trainer:753) INFO: 1epoch:train:1201-1300batch: iter_time=4.747, forward_time=0.250, loss_ctc=341.089, loss_interctc_layer6=327.723, loss_interctc_layer12=324.600, loss_interctc_layer15=324.107, loss_interctc_layer21=340.926, loss=331.689, backward_time=0.362, grad_norm=572.242, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.084, optim0_lr0=2.086e-06, train_time=6.314 [gpua049:0/64] 2024-01-14 15:47:55,983 (trainer:753) INFO: 1epoch:train:1301-1400batch: iter_time=9.698e-05, forward_time=0.166, loss_ctc=301.724, loss_interctc_layer6=292.632, loss_interctc_layer12=289.972, loss_interctc_layer15=289.515, loss_interctc_layer21=301.427, loss=295.054, backward_time=0.330, grad_norm=436.346, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.081, optim0_lr0=2.253e-06, train_time=1.467 [gpua049:0/64] 2024-01-14 15:50:43,936 (trainer:753) INFO: 1epoch:train:1401-1500batch: iter_time=9.087e-05, forward_time=0.224, loss_ctc=316.176, loss_interctc_layer6=310.319, loss_interctc_layer12=308.236, loss_interctc_layer15=307.937, loss_interctc_layer21=315.808, loss=311.695, backward_time=0.350, grad_norm=550.249, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.082, optim0_lr0=2.419e-06, train_time=1.679 [gpua049:0/64] 2024-01-14 15:53:52,396 (trainer:753) INFO: 1epoch:train:1501-1600batch: iter_time=8.711e-05, forward_time=0.142, loss_ctc=304.464, loss_interctc_layer6=294.021, loss_interctc_layer12=291.847, loss_interctc_layer15=291.481, loss_interctc_layer21=304.245, loss=297.212, backward_time=0.384, grad_norm=432.328, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=2.586e-06, train_time=1.884 [gpua049:0/64] 2024-01-14 15:56:12,615 (trainer:753) INFO: 1epoch:train:1601-1700batch: iter_time=9.079e-05, forward_time=0.154, loss_ctc=298.677, loss_interctc_layer6=296.300, loss_interctc_layer12=294.557, loss_interctc_layer15=294.298, loss_interctc_layer21=298.597, loss=296.486, backward_time=0.324, grad_norm=421.836, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=2.752e-06, train_time=1.402 [gpua049:0/64] 2024-01-14 15:59:01,781 (trainer:753) INFO: 1epoch:train:1701-1800batch: iter_time=8.380e-05, forward_time=0.228, loss_ctc=343.073, loss_interctc_layer6=329.871, loss_interctc_layer12=328.280, loss_interctc_layer15=327.996, loss_interctc_layer21=342.604, loss=334.365, backward_time=0.379, grad_norm=504.901, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.087, optim0_lr0=2.919e-06, train_time=1.691 [gpua049:0/64] 2024-01-14 16:01:53,304 (trainer:753) INFO: 1epoch:train:1801-1900batch: iter_time=8.557e-05, forward_time=0.166, loss_ctc=358.672, loss_interctc_layer6=349.547, loss_interctc_layer12=348.399, loss_interctc_layer15=348.309, loss_interctc_layer21=358.718, loss=352.729, backward_time=0.348, grad_norm=564.936, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=3.086e-06, train_time=1.715 [gpua049:0/64] 2024-01-14 16:04:05,246 (trainer:753) INFO: 1epoch:train:1901-2000batch: iter_time=8.687e-05, forward_time=0.142, loss_ctc=299.012, loss_interctc_layer6=294.019, loss_interctc_layer12=292.761, loss_interctc_layer15=292.579, loss_interctc_layer21=298.936, loss=295.461, backward_time=0.311, grad_norm=361.330, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.078, optim0_lr0=3.253e-06, train_time=1.319 [gpua049:0/64] 2024-01-14 16:06:30,516 (trainer:753) INFO: 1epoch:train:2001-2100batch: iter_time=8.147e-05, forward_time=0.180, loss_ctc=326.563, loss_interctc_layer6=318.927, loss_interctc_layer12=317.595, loss_interctc_layer15=317.485, loss_interctc_layer21=326.299, loss=321.374, backward_time=0.336, grad_norm=388.865, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.080, optim0_lr0=3.419e-06, train_time=1.453 [gpua049:0/64] 2024-01-14 16:09:05,828 (trainer:753) INFO: 1epoch:train:2101-2200batch: iter_time=8.589e-05, forward_time=0.208, loss_ctc=305.578, loss_interctc_layer6=296.505, loss_interctc_layer12=295.284, loss_interctc_layer15=295.227, loss_interctc_layer21=305.441, loss=299.607, backward_time=0.375, grad_norm=362.850, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.083, optim0_lr0=3.586e-06, train_time=1.552 [gpua049:0/64] 2024-01-14 16:11:48,164 (trainer:753) INFO: 1epoch:train:2201-2300batch: iter_time=8.431e-05, forward_time=0.142, loss_ctc=288.850, loss_interctc_layer6=286.030, loss_interctc_layer12=284.985, loss_interctc_layer15=284.889, loss_interctc_layer21=288.561, loss=286.663, backward_time=0.362, grad_norm=271.725, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.079, optim0_lr0=3.753e-06, train_time=1.624 [gpua049:0/64] 2024-01-14 16:14:26,620 (trainer:753) INFO: 1epoch:train:2301-2400batch: iter_time=8.607e-05, forward_time=0.144, loss_ctc=334.099, loss_interctc_layer6=316.557, loss_interctc_layer12=315.215, loss_interctc_layer15=315.031, loss_interctc_layer21=333.956, loss=322.972, backward_time=0.317, grad_norm=285.591, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.079, optim0_lr0=3.919e-06, train_time=1.584 [gpua049:0/64] 2024-01-14 16:16:56,034 (trainer:753) INFO: 1epoch:train:2401-2500batch: iter_time=7.974e-05, forward_time=0.251, loss_ctc=274.856, loss_interctc_layer6=265.445, loss_interctc_layer12=264.506, loss_interctc_layer15=264.468, loss_interctc_layer21=274.620, loss=268.779, backward_time=0.343, grad_norm=283.735, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.085, optim0_lr0=4.086e-06, train_time=1.494 [gpua049:0/64] 2024-01-14 16:17:16,081 (multiple_iter_factory:32) INFO: Building 2th iter-factory... [gpua049:0/64] 2024-01-14 16:17:36,420 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 16:17:40,216 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 16:17:40,216 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, [gpua049:0/64] 2024-01-14 16:17:40,219 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 16:24:33,905 (trainer:753) INFO: 1epoch:train:2501-2600batch: iter_time=3.178, forward_time=0.181, loss_ctc=290.280, loss_interctc_layer6=269.202, loss_interctc_layer12=268.173, loss_interctc_layer15=268.112, loss_interctc_layer21=289.544, loss=277.062, backward_time=0.324, grad_norm=279.624, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.080, optim0_lr0=4.253e-06, train_time=4.578 [gpua049:0/64] 2024-01-14 16:27:01,617 (trainer:753) INFO: 1epoch:train:2601-2700batch: iter_time=8.443e-05, forward_time=0.142, loss_ctc=283.097, loss_interctc_layer6=268.949, loss_interctc_layer12=267.959, loss_interctc_layer15=267.800, loss_interctc_layer21=282.731, loss=274.107, backward_time=0.331, grad_norm=225.507, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.078, optim0_lr0=4.419e-06, train_time=1.477 [gpua049:0/64] 2024-01-14 16:29:28,683 (trainer:753) INFO: 1epoch:train:2701-2800batch: iter_time=8.398e-05, forward_time=0.141, loss_ctc=283.245, loss_interctc_layer6=271.886, loss_interctc_layer12=271.183, loss_interctc_layer15=271.086, loss_interctc_layer21=283.073, loss=276.095, backward_time=0.326, grad_norm=285.167, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.078, optim0_lr0=4.586e-06, train_time=1.470 [gpua049:0/64] 2024-01-14 16:32:14,801 (trainer:753) INFO: 1epoch:train:2801-2900batch: iter_time=9.649e-05, forward_time=0.235, loss_ctc=262.028, loss_interctc_layer6=244.431, loss_interctc_layer12=243.801, loss_interctc_layer15=243.692, loss_interctc_layer21=261.604, loss=251.111, backward_time=0.402, grad_norm=230.784, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.088, optim0_lr0=4.752e-06, train_time=1.660 [gpua049:0/64] 2024-01-14 16:34:40,051 (trainer:753) INFO: 1epoch:train:2901-3000batch: iter_time=8.320e-05, forward_time=0.143, loss_ctc=333.677, loss_interctc_layer6=317.038, loss_interctc_layer12=316.271, loss_interctc_layer15=316.241, loss_interctc_layer21=333.246, loss=323.294, backward_time=0.368, grad_norm=317.149, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.077, optim0_lr0=4.919e-06, train_time=1.453 [gpua049:0/64] 2024-01-14 16:37:01,012 (trainer:753) INFO: 1epoch:train:3001-3100batch: iter_time=7.493e-05, forward_time=0.141, loss_ctc=300.562, loss_interctc_layer6=282.811, loss_interctc_layer12=282.153, loss_interctc_layer15=281.886, loss_interctc_layer21=300.216, loss=289.526, backward_time=0.307, grad_norm=304.782, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.077, optim0_lr0=5.086e-06, train_time=1.410 [gpua049:0/64] 2024-01-14 16:39:37,803 (trainer:753) INFO: 1epoch:train:3101-3200batch: iter_time=8.235e-05, forward_time=0.143, loss_ctc=316.059, loss_interctc_layer6=304.569, loss_interctc_layer12=303.654, loss_interctc_layer15=303.265, loss_interctc_layer21=315.677, loss=308.645, backward_time=0.330, grad_norm=304.518, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.078, optim0_lr0=5.252e-06, train_time=1.568 [gpua049:0/64] 2024-01-14 16:42:39,568 (trainer:753) INFO: 1epoch:train:3201-3300batch: iter_time=8.448e-05, forward_time=0.202, loss_ctc=294.564, loss_interctc_layer6=283.404, loss_interctc_layer12=282.818, loss_interctc_layer15=282.684, loss_interctc_layer21=294.164, loss=287.527, backward_time=0.414, grad_norm=286.513, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.089, optim0_lr0=5.419e-06, train_time=1.816 [gpua049:0/64] 2024-01-14 16:45:14,614 (trainer:753) INFO: 1epoch:train:3301-3400batch: iter_time=8.767e-05, forward_time=0.143, loss_ctc=289.257, loss_interctc_layer6=281.758, loss_interctc_layer12=280.767, loss_interctc_layer15=280.627, loss_interctc_layer21=288.736, loss=284.229, backward_time=0.379, grad_norm=240.319, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.078, optim0_lr0=5.586e-06, train_time=1.551 [gpua049:0/64] 2024-01-14 16:47:31,647 (trainer:753) INFO: 1epoch:train:3401-3500batch: iter_time=9.091e-05, forward_time=0.142, loss_ctc=305.635, loss_interctc_layer6=295.997, loss_interctc_layer12=295.327, loss_interctc_layer15=295.189, loss_interctc_layer21=305.225, loss=299.475, backward_time=0.317, grad_norm=259.556, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.078, optim0_lr0=5.753e-06, train_time=1.371 [gpua049:0/64] 2024-01-14 16:49:55,094 (trainer:753) INFO: 1epoch:train:3501-3600batch: iter_time=7.899e-05, forward_time=0.165, loss_ctc=298.018, loss_interctc_layer6=282.454, loss_interctc_layer12=281.345, loss_interctc_layer15=281.008, loss_interctc_layer21=297.641, loss=288.093, backward_time=0.327, grad_norm=237.730, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.078, optim0_lr0=5.919e-06, train_time=1.434 [gpua049:0/64] 2024-01-14 16:52:28,366 (trainer:753) INFO: 1epoch:train:3601-3700batch: iter_time=8.386e-05, forward_time=0.242, loss_ctc=272.434, loss_interctc_layer6=262.363, loss_interctc_layer12=261.549, loss_interctc_layer15=261.290, loss_interctc_layer21=272.537, loss=266.034, backward_time=0.343, grad_norm=220.113, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.083, optim0_lr0=6.086e-06, train_time=1.529 [gpua049:0/64] 2024-01-14 16:54:06,304 (multiple_iter_factory:32) INFO: Building 3th iter-factory... [gpua049:0/64] 2024-01-14 16:54:26,528 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 16:54:30,271 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 16:54:30,271 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, [gpua049:0/64] 2024-01-14 16:54:30,274 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 17:00:10,663 (trainer:753) INFO: 1epoch:train:3701-3800batch: iter_time=3.093, forward_time=0.249, loss_ctc=293.224, loss_interctc_layer6=272.692, loss_interctc_layer12=271.722, loss_interctc_layer15=271.551, loss_interctc_layer21=293.484, loss=280.535, backward_time=0.335, grad_norm=263.471, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.083, optim0_lr0=6.253e-06, train_time=4.626 [gpua049:0/64] 2024-01-14 17:02:48,693 (trainer:753) INFO: 1epoch:train:3801-3900batch: iter_time=8.218e-05, forward_time=0.143, loss_ctc=259.708, loss_interctc_layer6=244.700, loss_interctc_layer12=243.460, loss_interctc_layer15=243.267, loss_interctc_layer21=258.984, loss=250.024, backward_time=0.395, grad_norm=190.265, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.078, optim0_lr0=6.419e-06, train_time=1.578 [gpua049:0/64] 2024-01-14 17:06:02,291 (trainer:753) INFO: 1epoch:train:3901-4000batch: iter_time=8.216e-05, forward_time=0.175, loss_ctc=277.081, loss_interctc_layer6=265.182, loss_interctc_layer12=263.737, loss_interctc_layer15=263.387, loss_interctc_layer21=276.871, loss=269.252, backward_time=0.469, grad_norm=260.066, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.084, optim0_lr0=6.586e-06, train_time=1.938 [gpua049:0/64] 2024-01-14 17:08:53,055 (trainer:753) INFO: 1epoch:train:4001-4100batch: iter_time=8.282e-05, forward_time=0.155, loss_ctc=265.363, loss_interctc_layer6=250.665, loss_interctc_layer12=249.003, loss_interctc_layer15=248.640, loss_interctc_layer21=265.596, loss=255.854, backward_time=0.399, grad_norm=212.080, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.080, optim0_lr0=6.752e-06, train_time=1.707 [gpua049:0/64] 2024-01-14 17:12:03,464 (trainer:753) INFO: 1epoch:train:4101-4200batch: iter_time=8.426e-05, forward_time=0.142, loss_ctc=263.771, loss_interctc_layer6=258.139, loss_interctc_layer12=256.610, loss_interctc_layer15=256.371, loss_interctc_layer21=263.555, loss=259.689, backward_time=0.486, grad_norm=206.996, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.077, optim0_lr0=6.919e-06, train_time=1.904 [gpua049:0/64] 2024-01-14 17:15:14,245 (trainer:753) INFO: 1epoch:train:4201-4300batch: iter_time=9.251e-05, forward_time=0.213, loss_ctc=314.279, loss_interctc_layer6=297.373, loss_interctc_layer12=294.922, loss_interctc_layer15=294.822, loss_interctc_layer21=313.939, loss=303.067, backward_time=0.495, grad_norm=313.537, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.084, optim0_lr0=7.086e-06, train_time=1.908 [gpua049:0/64] 2024-01-14 17:18:21,874 (trainer:753) INFO: 1epoch:train:4301-4400batch: iter_time=8.996e-05, forward_time=0.143, loss_ctc=322.288, loss_interctc_layer6=310.599, loss_interctc_layer12=308.408, loss_interctc_layer15=308.644, loss_interctc_layer21=322.849, loss=314.558, backward_time=0.474, grad_norm=312.272, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.079, optim0_lr0=7.253e-06, train_time=1.876 [gpua049:0/64] 2024-01-14 17:21:33,280 (trainer:753) INFO: 1epoch:train:4401-4500batch: iter_time=8.691e-05, forward_time=0.172, loss_ctc=276.229, loss_interctc_layer6=267.589, loss_interctc_layer12=264.973, loss_interctc_layer15=264.945, loss_interctc_layer21=275.931, loss=269.933, backward_time=0.447, grad_norm=236.691, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.084, optim0_lr0=7.419e-06, train_time=1.913 [gpua049:0/64] 2024-01-14 17:24:29,083 (trainer:753) INFO: 1epoch:train:4501-4600batch: iter_time=9.441e-05, forward_time=0.160, loss_ctc=300.422, loss_interctc_layer6=292.870, loss_interctc_layer12=289.561, loss_interctc_layer15=289.660, loss_interctc_layer21=300.153, loss=294.533, backward_time=0.478, grad_norm=247.837, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.080, optim0_lr0=7.586e-06, train_time=1.759 [gpua049:0/64] 2024-01-14 17:27:21,433 (trainer:753) INFO: 1epoch:train:4601-4700batch: iter_time=9.394e-05, forward_time=0.142, loss_ctc=278.550, loss_interctc_layer6=271.935, loss_interctc_layer12=268.594, loss_interctc_layer15=268.384, loss_interctc_layer21=278.950, loss=273.283, backward_time=0.412, grad_norm=248.769, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.079, optim0_lr0=7.753e-06, train_time=1.723 [gpua049:0/64] 2024-01-14 17:30:16,441 (trainer:753) INFO: 1epoch:train:4701-4800batch: iter_time=9.071e-05, forward_time=0.169, loss_ctc=265.769, loss_interctc_layer6=263.659, loss_interctc_layer12=260.559, loss_interctc_layer15=260.403, loss_interctc_layer21=264.503, loss=262.979, backward_time=0.413, grad_norm=215.343, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.084, optim0_lr0=7.919e-06, train_time=1.750 [gpua049:0/64] 2024-01-14 17:33:07,255 (trainer:753) INFO: 1epoch:train:4801-4900batch: iter_time=1.710e-04, forward_time=0.168, loss_ctc=307.585, loss_interctc_layer6=294.075, loss_interctc_layer12=291.435, loss_interctc_layer15=291.203, loss_interctc_layer21=308.285, loss=298.517, backward_time=0.427, grad_norm=204.425, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.081, optim0_lr0=8.086e-06, train_time=1.708 [gpua049:0/64] 2024-01-14 17:35:50,579 (trainer:753) INFO: 1epoch:train:4901-5000batch: iter_time=8.363e-05, forward_time=0.142, loss_ctc=257.676, loss_interctc_layer6=251.201, loss_interctc_layer12=247.925, loss_interctc_layer15=247.691, loss_interctc_layer21=256.911, loss=252.281, backward_time=0.403, grad_norm=214.563, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.079, optim0_lr0=8.252e-06, train_time=1.633 [gpua049:0/64] 2024-01-14 17:36:10,690 (multiple_iter_factory:32) INFO: Building 4th iter-factory... [gpua049:0/64] 2024-01-14 17:36:30,979 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 17:36:34,613 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 17:36:34,614 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, [gpua049:0/64] 2024-01-14 17:36:34,617 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 17:46:14,472 (trainer:753) INFO: 1epoch:train:5001-5100batch: iter_time=3.002, forward_time=0.186, loss_ctc=268.801, loss_interctc_layer6=252.285, loss_interctc_layer12=249.230, loss_interctc_layer15=248.838, loss_interctc_layer21=269.050, loss=257.641, backward_time=0.478, grad_norm=240.842, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.082, optim0_lr0=8.419e-06, train_time=6.239 [gpua049:0/64] 2024-01-14 17:49:38,080 (trainer:753) INFO: 1epoch:train:5101-5200batch: iter_time=8.702e-05, forward_time=0.141, loss_ctc=260.803, loss_interctc_layer6=250.311, loss_interctc_layer12=247.237, loss_interctc_layer15=247.266, loss_interctc_layer21=260.821, loss=253.287, backward_time=0.419, grad_norm=209.802, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.079, optim0_lr0=8.586e-06, train_time=2.036 [gpua049:0/64] 2024-01-14 17:52:52,504 (trainer:753) INFO: 1epoch:train:5201-5300batch: iter_time=8.499e-05, forward_time=0.153, loss_ctc=263.859, loss_interctc_layer6=259.882, loss_interctc_layer12=256.188, loss_interctc_layer15=255.949, loss_interctc_layer21=263.589, loss=259.893, backward_time=0.412, grad_norm=230.342, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.079, optim0_lr0=8.753e-06, train_time=1.943 [gpua049:0/64] 2024-01-14 17:55:35,713 (trainer:753) INFO: 1epoch:train:5301-5400batch: iter_time=9.038e-05, forward_time=0.141, loss_ctc=241.901, loss_interctc_layer6=229.865, loss_interctc_layer12=226.559, loss_interctc_layer15=226.522, loss_interctc_layer21=242.775, loss=233.524, backward_time=0.332, grad_norm=156.321, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.078, optim0_lr0=8.919e-06, train_time=1.633 [gpua049:0/64] 2024-01-14 17:58:54,229 (trainer:753) INFO: 1epoch:train:5401-5500batch: iter_time=9.069e-05, forward_time=0.221, loss_ctc=312.316, loss_interctc_layer6=303.848, loss_interctc_layer12=298.995, loss_interctc_layer15=298.843, loss_interctc_layer21=310.463, loss=304.893, backward_time=0.425, grad_norm=251.018, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.087, optim0_lr0=9.086e-06, train_time=1.985 [gpua049:0/64] 2024-01-14 18:01:58,421 (trainer:753) INFO: 1epoch:train:5501-5600batch: iter_time=9.058e-05, forward_time=0.142, loss_ctc=282.139, loss_interctc_layer6=271.066, loss_interctc_layer12=266.928, loss_interctc_layer15=267.102, loss_interctc_layer21=281.569, loss=273.761, backward_time=0.423, grad_norm=263.161, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.078, optim0_lr0=9.252e-06, train_time=1.842 [gpua049:0/64] 2024-01-14 18:05:23,877 (trainer:753) INFO: 1epoch:train:5601-5700batch: iter_time=8.381e-05, forward_time=0.141, loss_ctc=297.027, loss_interctc_layer6=292.954, loss_interctc_layer12=287.642, loss_interctc_layer15=287.513, loss_interctc_layer21=297.020, loss=292.431, backward_time=0.499, grad_norm=271.146, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.078, optim0_lr0=9.419e-06, train_time=2.054 [gpua049:0/64] 2024-01-14 18:09:25,458 (trainer:753) INFO: 1epoch:train:5701-5800batch: iter_time=8.293e-05, forward_time=0.142, loss_ctc=276.570, loss_interctc_layer6=271.830, loss_interctc_layer12=267.590, loss_interctc_layer15=267.634, loss_interctc_layer21=276.391, loss=272.003, backward_time=0.531, grad_norm=232.724, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.078, optim0_lr0=9.586e-06, train_time=2.415 [gpua049:0/64] 2024-01-14 18:14:17,589 (trainer:753) INFO: 1epoch:train:5801-5900batch: iter_time=8.985e-05, forward_time=0.187, loss_ctc=270.513, loss_interctc_layer6=269.325, loss_interctc_layer12=265.028, loss_interctc_layer15=264.725, loss_interctc_layer21=269.597, loss=267.838, backward_time=0.575, grad_norm=208.338, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.082, optim0_lr0=9.753e-06, train_time=2.922 [gpua049:0/64] 2024-01-14 18:17:57,468 (trainer:753) INFO: 1epoch:train:5901-6000batch: iter_time=9.080e-05, forward_time=0.157, loss_ctc=282.722, loss_interctc_layer6=279.328, loss_interctc_layer12=275.763, loss_interctc_layer15=275.641, loss_interctc_layer21=282.147, loss=279.120, backward_time=0.497, grad_norm=204.439, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.079, optim0_lr0=9.919e-06, train_time=2.198 [gpua049:0/64] 2024-01-14 18:21:25,276 (trainer:753) INFO: 1epoch:train:6001-6100batch: iter_time=9.717e-05, forward_time=0.143, loss_ctc=278.136, loss_interctc_layer6=267.450, loss_interctc_layer12=264.247, loss_interctc_layer15=264.010, loss_interctc_layer21=277.165, loss=270.202, backward_time=0.432, grad_norm=200.486, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.079, optim0_lr0=1.009e-05, train_time=2.078 [gpua049:0/64] 2024-01-14 18:24:36,082 (trainer:753) INFO: 1epoch:train:6101-6200batch: iter_time=8.864e-05, forward_time=0.142, loss_ctc=253.928, loss_interctc_layer6=250.327, loss_interctc_layer12=247.559, loss_interctc_layer15=247.399, loss_interctc_layer21=253.726, loss=250.588, backward_time=0.421, grad_norm=189.051, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.079, optim0_lr0=1.025e-05, train_time=1.908 [gpua049:0/64] 2024-01-14 18:26:21,123 (multiple_iter_factory:32) INFO: Building 5th iter-factory... [gpua049:0/64] 2024-01-14 18:26:41,144 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 18:26:44,811 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 18:26:44,811 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, [gpua049:0/64] 2024-01-14 18:26:44,814 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 18:36:14,593 (trainer:753) INFO: 1epoch:train:6201-6300batch: iter_time=3.425, forward_time=0.188, loss_ctc=285.589, loss_interctc_layer6=260.437, loss_interctc_layer12=257.149, loss_interctc_layer15=256.909, loss_interctc_layer21=285.986, loss=269.214, backward_time=0.370, grad_norm=229.892, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.081, optim0_lr0=1.042e-05, train_time=6.985 [gpua049:0/64] 2024-01-14 18:39:14,474 (trainer:753) INFO: 1epoch:train:6301-6400batch: iter_time=9.294e-05, forward_time=0.168, loss_ctc=250.060, loss_interctc_layer6=233.158, loss_interctc_layer12=230.763, loss_interctc_layer15=230.802, loss_interctc_layer21=250.257, loss=239.008, backward_time=0.397, grad_norm=173.791, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.081, optim0_lr0=1.059e-05, train_time=1.799 [gpua049:0/64] 2024-01-14 18:42:02,350 (trainer:753) INFO: 1epoch:train:6401-6500batch: iter_time=8.190e-05, forward_time=0.215, loss_ctc=276.121, loss_interctc_layer6=253.653, loss_interctc_layer12=250.409, loss_interctc_layer15=250.263, loss_interctc_layer21=276.318, loss=261.353, backward_time=0.406, grad_norm=227.591, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.087, optim0_lr0=1.075e-05, train_time=1.678 [gpua049:0/64] 2024-01-14 18:45:26,997 (trainer:753) INFO: 1epoch:train:6501-6600batch: iter_time=8.002e-04, forward_time=0.228, loss_ctc=262.419, loss_interctc_layer6=237.334, loss_interctc_layer12=234.425, loss_interctc_layer15=234.863, loss_interctc_layer21=261.089, loss=246.026, backward_time=0.472, grad_norm=185.529, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.093, optim0_lr0=1.092e-05, train_time=2.045 [gpua049:0/64] 2024-01-14 18:48:40,917 (trainer:753) INFO: 1epoch:train:6601-6700batch: iter_time=8.783e-05, forward_time=0.152, loss_ctc=263.622, loss_interctc_layer6=246.128, loss_interctc_layer12=244.195, loss_interctc_layer15=243.902, loss_interctc_layer21=262.967, loss=252.163, backward_time=0.424, grad_norm=197.033, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.079, optim0_lr0=1.109e-05, train_time=1.939 [gpua049:0/64] 2024-01-14 18:51:51,239 (trainer:753) INFO: 1epoch:train:6701-6800batch: iter_time=8.871e-05, forward_time=0.227, loss_ctc=304.658, loss_interctc_layer6=278.879, loss_interctc_layer12=276.888, loss_interctc_layer15=276.978, loss_interctc_layer21=303.959, loss=288.272, backward_time=0.440, grad_norm=243.526, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.086, optim0_lr0=1.125e-05, train_time=1.904 [gpua049:0/64] 2024-01-14 18:54:51,676 (trainer:753) INFO: 1epoch:train:6801-6900batch: iter_time=8.376e-05, forward_time=0.245, loss_ctc=320.053, loss_interctc_layer6=296.364, loss_interctc_layer12=292.639, loss_interctc_layer15=292.719, loss_interctc_layer21=319.295, loss=304.214, backward_time=0.414, grad_norm=226.159, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.088, optim0_lr0=1.142e-05, train_time=1.804 [gpua049:0/64] 2024-01-14 18:57:53,734 (trainer:753) INFO: 1epoch:train:6901-7000batch: iter_time=2.937e-04, forward_time=0.206, loss_ctc=265.422, loss_interctc_layer6=254.017, loss_interctc_layer12=250.084, loss_interctc_layer15=250.052, loss_interctc_layer21=266.104, loss=257.136, backward_time=0.414, grad_norm=212.006, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.084, optim0_lr0=1.159e-05, train_time=1.820 [gpua049:0/64] 2024-01-14 19:00:50,539 (trainer:753) INFO: 1epoch:train:7001-7100batch: iter_time=8.722e-05, forward_time=0.159, loss_ctc=300.099, loss_interctc_layer6=280.595, loss_interctc_layer12=278.222, loss_interctc_layer15=277.196, loss_interctc_layer21=300.001, loss=287.223, backward_time=0.424, grad_norm=246.823, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.083, optim0_lr0=1.175e-05, train_time=1.768 [gpua049:0/64] 2024-01-14 19:03:57,569 (trainer:753) INFO: 1epoch:train:7101-7200batch: iter_time=1.864e-04, forward_time=0.239, loss_ctc=271.668, loss_interctc_layer6=258.260, loss_interctc_layer12=254.963, loss_interctc_layer15=254.758, loss_interctc_layer21=271.328, loss=262.195, backward_time=0.495, grad_norm=216.025, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.088, optim0_lr0=1.192e-05, train_time=1.869 [gpua049:0/64] 2024-01-14 19:06:56,519 (trainer:753) INFO: 1epoch:train:7201-7300batch: iter_time=8.781e-05, forward_time=0.149, loss_ctc=265.216, loss_interctc_layer6=253.861, loss_interctc_layer12=250.141, loss_interctc_layer15=250.098, loss_interctc_layer21=264.507, loss=256.765, backward_time=0.423, grad_norm=187.153, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.083, optim0_lr0=1.209e-05, train_time=1.789 [gpua049:0/64] 2024-01-14 19:09:38,205 (trainer:753) INFO: 1epoch:train:7301-7400batch: iter_time=4.180e-04, forward_time=0.244, loss_ctc=306.177, loss_interctc_layer6=280.290, loss_interctc_layer12=277.620, loss_interctc_layer15=277.793, loss_interctc_layer21=305.957, loss=289.567, backward_time=0.393, grad_norm=199.830, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.091, optim0_lr0=1.225e-05, train_time=1.618 [gpua049:0/64] 2024-01-14 19:12:26,081 (trainer:753) INFO: 1epoch:train:7401-7500batch: iter_time=7.924e-05, forward_time=0.148, loss_ctc=252.596, loss_interctc_layer6=240.400, loss_interctc_layer12=238.280, loss_interctc_layer15=238.468, loss_interctc_layer21=251.725, loss=244.294, backward_time=0.352, grad_norm=209.951, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.081, optim0_lr0=1.242e-05, train_time=1.679 [gpua049:0/64] 2024-01-14 19:12:44,281 (multiple_iter_factory:32) INFO: Building 6th iter-factory... [gpua049:0/64] 2024-01-14 19:13:04,330 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 19:13:08,090 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 19:13:08,090 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, [gpua049:0/64] 2024-01-14 19:13:08,094 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 19:20:08,977 (trainer:753) INFO: 1epoch:train:7501-7600batch: iter_time=2.691, forward_time=0.191, loss_ctc=266.793, loss_interctc_layer6=241.153, loss_interctc_layer12=237.769, loss_interctc_layer15=237.694, loss_interctc_layer21=266.492, loss=249.980, backward_time=0.428, grad_norm=191.445, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.081, optim0_lr0=1.259e-05, train_time=4.629 [gpua049:0/64] 2024-01-14 19:23:53,325 (trainer:753) INFO: 1epoch:train:7601-7700batch: iter_time=9.420e-05, forward_time=0.156, loss_ctc=251.191, loss_interctc_layer6=239.762, loss_interctc_layer12=236.799, loss_interctc_layer15=236.799, loss_interctc_layer21=251.364, loss=243.183, backward_time=0.488, grad_norm=163.054, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.081, optim0_lr0=1.275e-05, train_time=2.243 [gpua049:0/64] 2024-01-14 19:26:38,987 (trainer:753) INFO: 1epoch:train:7701-7800batch: iter_time=8.720e-05, forward_time=0.210, loss_ctc=266.469, loss_interctc_layer6=248.762, loss_interctc_layer12=245.511, loss_interctc_layer15=245.413, loss_interctc_layer21=266.049, loss=254.441, backward_time=0.371, grad_norm=191.895, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.084, optim0_lr0=1.292e-05, train_time=1.657 [gpua049:0/64] 2024-01-14 19:29:13,581 (trainer:753) INFO: 1epoch:train:7801-7900batch: iter_time=8.916e-05, forward_time=0.141, loss_ctc=238.246, loss_interctc_layer6=221.008, loss_interctc_layer12=219.092, loss_interctc_layer15=218.139, loss_interctc_layer21=238.245, loss=226.946, backward_time=0.321, grad_norm=188.696, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.078, optim0_lr0=1.309e-05, train_time=1.546 [gpua049:0/64] 2024-01-14 19:31:43,187 (trainer:753) INFO: 1epoch:train:7901-8000batch: iter_time=9.040e-05, forward_time=0.178, loss_ctc=306.349, loss_interctc_layer6=288.847, loss_interctc_layer12=286.293, loss_interctc_layer15=286.477, loss_interctc_layer21=305.975, loss=294.788, backward_time=0.352, grad_norm=235.405, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.080, optim0_lr0=1.325e-05, train_time=1.494 [gpua049:0/64] 2024-01-14 19:34:19,264 (trainer:753) INFO: 1epoch:train:8001-8100batch: iter_time=8.701e-05, forward_time=0.182, loss_ctc=280.060, loss_interctc_layer6=257.973, loss_interctc_layer12=255.076, loss_interctc_layer15=254.970, loss_interctc_layer21=278.763, loss=265.368, backward_time=0.360, grad_norm=210.853, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.083, optim0_lr0=1.342e-05, train_time=1.563 [gpua049:0/64] 2024-01-14 19:36:39,442 (trainer:753) INFO: 1epoch:train:8101-8200batch: iter_time=8.795e-05, forward_time=0.143, loss_ctc=293.114, loss_interctc_layer6=281.390, loss_interctc_layer12=277.922, loss_interctc_layer15=277.514, loss_interctc_layer21=291.396, loss=284.267, backward_time=0.345, grad_norm=230.178, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.079, optim0_lr0=1.359e-05, train_time=1.402 [gpua049:0/64] 2024-01-14 19:39:11,677 (trainer:753) INFO: 1epoch:train:8201-8300batch: iter_time=8.456e-05, forward_time=0.142, loss_ctc=270.040, loss_interctc_layer6=258.466, loss_interctc_layer12=255.177, loss_interctc_layer15=255.705, loss_interctc_layer21=270.832, loss=262.044, backward_time=0.352, grad_norm=225.134, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.079, optim0_lr0=1.375e-05, train_time=1.521 [gpua049:0/64] 2024-01-14 19:42:37,487 (trainer:753) INFO: 1epoch:train:8301-8400batch: iter_time=8.211e-05, forward_time=0.212, loss_ctc=276.900, loss_interctc_layer6=261.342, loss_interctc_layer12=258.152, loss_interctc_layer15=257.747, loss_interctc_layer21=275.651, loss=265.958, backward_time=0.516, grad_norm=229.482, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.083, optim0_lr0=1.392e-05, train_time=2.056 [gpua049:0/64] 2024-01-14 19:46:02,602 (trainer:753) INFO: 1epoch:train:8401-8500batch: iter_time=9.926e-05, forward_time=0.163, loss_ctc=280.967, loss_interctc_layer6=270.879, loss_interctc_layer12=267.751, loss_interctc_layer15=267.658, loss_interctc_layer21=281.148, loss=273.681, backward_time=0.429, grad_norm=210.009, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.081, optim0_lr0=1.409e-05, train_time=2.053 [gpua049:0/64] 2024-01-14 19:49:11,862 (trainer:753) INFO: 1epoch:train:8501-8600batch: iter_time=8.673e-05, forward_time=0.143, loss_ctc=276.092, loss_interctc_layer6=259.669, loss_interctc_layer12=256.853, loss_interctc_layer15=256.911, loss_interctc_layer21=277.385, loss=265.382, backward_time=0.392, grad_norm=181.193, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.078, optim0_lr0=1.425e-05, train_time=1.893 [gpua049:0/64] 2024-01-14 19:52:17,733 (trainer:753) INFO: 1epoch:train:8601-8700batch: iter_time=9.448e-05, forward_time=0.185, loss_ctc=256.579, loss_interctc_layer6=242.782, loss_interctc_layer12=240.377, loss_interctc_layer15=240.597, loss_interctc_layer21=256.310, loss=247.329, backward_time=0.459, grad_norm=169.268, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.082, optim0_lr0=1.442e-05, train_time=1.858 [gpua049:0/64] 2024-01-14 19:54:01,605 (multiple_iter_factory:32) INFO: Building 7th iter-factory... [gpua049:0/64] 2024-01-14 19:54:21,685 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 19:54:25,548 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 19:54:25,548 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, [gpua049:0/64] 2024-01-14 19:54:25,551 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 20:03:17,131 (trainer:753) INFO: 1epoch:train:8701-8800batch: iter_time=3.409, forward_time=0.198, loss_ctc=270.754, loss_interctc_layer6=250.665, loss_interctc_layer12=248.368, loss_interctc_layer15=248.119, loss_interctc_layer21=271.004, loss=257.782, backward_time=0.326, grad_norm=206.455, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.083, optim0_lr0=1.459e-05, train_time=6.594 [gpua049:0/64] 2024-01-14 20:06:08,312 (trainer:753) INFO: 1epoch:train:8801-8900batch: iter_time=8.783e-05, forward_time=0.142, loss_ctc=238.074, loss_interctc_layer6=225.897, loss_interctc_layer12=223.600, loss_interctc_layer15=223.656, loss_interctc_layer21=237.561, loss=229.758, backward_time=0.353, grad_norm=158.926, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.078, optim0_lr0=1.475e-05, train_time=1.712 [gpua049:0/64] 2024-01-14 20:08:22,015 (trainer:753) INFO: 1epoch:train:8901-9000batch: iter_time=9.387e-05, forward_time=0.197, loss_ctc=258.913, loss_interctc_layer6=244.839, loss_interctc_layer12=242.665, loss_interctc_layer15=243.519, loss_interctc_layer21=258.249, loss=249.637, backward_time=0.302, grad_norm=205.997, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.087, optim0_lr0=1.492e-05, train_time=1.336 [gpua049:0/64] 2024-01-14 20:11:03,016 (trainer:753) INFO: 1epoch:train:9001-9100batch: iter_time=8.727e-05, forward_time=0.227, loss_ctc=246.611, loss_interctc_layer6=231.952, loss_interctc_layer12=228.259, loss_interctc_layer15=228.184, loss_interctc_layer21=245.553, loss=236.112, backward_time=0.358, grad_norm=197.581, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.081, optim0_lr0=1.509e-05, train_time=1.610 [gpua049:0/64] 2024-01-14 20:13:38,284 (trainer:753) INFO: 1epoch:train:9101-9200batch: iter_time=9.475e-05, forward_time=0.143, loss_ctc=249.381, loss_interctc_layer6=239.041, loss_interctc_layer12=237.061, loss_interctc_layer15=236.773, loss_interctc_layer21=250.167, loss=242.485, backward_time=0.384, grad_norm=180.404, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.078, optim0_lr0=1.525e-05, train_time=1.553 [gpua049:0/64] 2024-01-14 20:16:12,026 (trainer:753) INFO: 1epoch:train:9201-9300batch: iter_time=8.319e-05, forward_time=0.142, loss_ctc=290.321, loss_interctc_layer6=270.460, loss_interctc_layer12=269.408, loss_interctc_layer15=268.182, loss_interctc_layer21=290.651, loss=277.804, backward_time=0.344, grad_norm=229.185, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.078, optim0_lr0=1.542e-05, train_time=1.537 [gpua049:0/64] 2024-01-14 20:18:50,006 (trainer:753) INFO: 1epoch:train:9301-9400batch: iter_time=9.217e-05, forward_time=0.240, loss_ctc=304.964, loss_interctc_layer6=290.807, loss_interctc_layer12=285.908, loss_interctc_layer15=285.632, loss_interctc_layer21=305.804, loss=294.623, backward_time=0.377, grad_norm=204.308, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.087, optim0_lr0=1.559e-05, train_time=1.579 [gpua049:0/64] 2024-01-14 20:21:36,804 (trainer:753) INFO: 1epoch:train:9401-9500batch: iter_time=8.347e-05, forward_time=0.143, loss_ctc=252.030, loss_interctc_layer6=243.598, loss_interctc_layer12=240.819, loss_interctc_layer15=239.941, loss_interctc_layer21=251.450, loss=245.568, backward_time=0.352, grad_norm=217.515, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.078, optim0_lr0=1.575e-05, train_time=1.668 [gpua049:0/64] 2024-01-14 20:24:13,215 (trainer:753) INFO: 1epoch:train:9501-9600batch: iter_time=9.103e-05, forward_time=0.143, loss_ctc=285.500, loss_interctc_layer6=272.933, loss_interctc_layer12=270.643, loss_interctc_layer15=269.420, loss_interctc_layer21=287.138, loss=277.127, backward_time=0.367, grad_norm=253.729, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.078, optim0_lr0=1.592e-05, train_time=1.564 [gpua049:0/64] 2024-01-14 20:27:00,767 (trainer:753) INFO: 1epoch:train:9601-9700batch: iter_time=4.682e-04, forward_time=0.254, loss_ctc=263.776, loss_interctc_layer6=252.120, loss_interctc_layer12=249.898, loss_interctc_layer15=250.112, loss_interctc_layer21=263.296, loss=255.841, backward_time=0.408, grad_norm=219.105, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.082, optim0_lr0=1.609e-05, train_time=1.674 [gpua049:0/64] 2024-01-14 20:29:34,766 (trainer:753) INFO: 1epoch:train:9701-9800batch: iter_time=9.073e-05, forward_time=0.142, loss_ctc=252.664, loss_interctc_layer6=245.638, loss_interctc_layer12=242.427, loss_interctc_layer15=242.265, loss_interctc_layer21=251.338, loss=246.866, backward_time=0.378, grad_norm=177.545, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.078, optim0_lr0=1.625e-05, train_time=1.540 [gpua049:0/64] 2024-01-14 20:31:56,258 (trainer:753) INFO: 1epoch:train:9801-9900batch: iter_time=8.309e-05, forward_time=0.144, loss_ctc=292.575, loss_interctc_layer6=273.512, loss_interctc_layer12=270.150, loss_interctc_layer15=271.100, loss_interctc_layer21=292.184, loss=279.904, backward_time=0.375, grad_norm=186.403, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.079, optim0_lr0=1.642e-05, train_time=1.415 [gpua049:0/64] 2024-01-14 20:34:45,034 (trainer:753) INFO: 1epoch:train:9901-10000batch: iter_time=3.336e-04, forward_time=0.247, loss_ctc=239.618, loss_interctc_layer6=231.967, loss_interctc_layer12=230.102, loss_interctc_layer15=229.697, loss_interctc_layer21=239.448, loss=234.166, backward_time=0.383, grad_norm=224.781, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.087, optim0_lr0=1.659e-05, train_time=1.688 [gpua049:0/64] 2024-01-14 20:35:05,116 (multiple_iter_factory:32) INFO: Building 8th iter-factory... [gpua049:0/64] 2024-01-14 20:35:25,659 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 20:35:29,819 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 20:35:29,819 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, [gpua049:0/64] 2024-01-14 20:35:29,822 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 20:46:17,414 (trainer:753) INFO: 1epoch:train:10001-10100batch: iter_time=3.384, forward_time=0.142, loss_ctc=245.844, loss_interctc_layer6=235.603, loss_interctc_layer12=232.830, loss_interctc_layer15=232.669, loss_interctc_layer21=245.950, loss=238.579, backward_time=0.311, grad_norm=179.759, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.675e-05, train_time=6.924 [gpua049:0/64] 2024-01-14 20:48:50,880 (trainer:753) INFO: 1epoch:train:10101-10200batch: iter_time=9.103e-05, forward_time=0.141, loss_ctc=241.760, loss_interctc_layer6=235.783, loss_interctc_layer12=232.352, loss_interctc_layer15=232.486, loss_interctc_layer21=242.092, loss=236.895, backward_time=0.374, grad_norm=153.413, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.692e-05, train_time=1.533 [gpua049:0/64] 2024-01-14 20:50:57,696 (trainer:753) INFO: 1epoch:train:10201-10300batch: iter_time=9.659e-05, forward_time=0.142, loss_ctc=244.721, loss_interctc_layer6=243.008, loss_interctc_layer12=240.519, loss_interctc_layer15=240.062, loss_interctc_layer21=245.369, loss=242.736, backward_time=0.301, grad_norm=186.936, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.709e-05, train_time=1.269 [gpua049:0/64] 2024-01-14 20:53:36,427 (trainer:753) INFO: 1epoch:train:10301-10400batch: iter_time=9.638e-05, forward_time=0.141, loss_ctc=222.342, loss_interctc_layer6=215.094, loss_interctc_layer12=212.815, loss_interctc_layer15=213.119, loss_interctc_layer21=223.316, loss=217.337, backward_time=0.363, grad_norm=174.739, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.725e-05, train_time=1.587 [gpua049:0/64] 2024-01-14 20:56:29,351 (trainer:753) INFO: 1epoch:train:10401-10500batch: iter_time=0.008, forward_time=0.279, loss_ctc=289.281, loss_interctc_layer6=282.410, loss_interctc_layer12=279.483, loss_interctc_layer15=279.009, loss_interctc_layer21=288.453, loss=283.727, backward_time=0.375, grad_norm=212.977, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.084, optim0_lr0=1.742e-05, train_time=1.729 [gpua049:0/64] 2024-01-14 20:58:57,139 (trainer:753) INFO: 1epoch:train:10501-10600batch: iter_time=9.243e-05, forward_time=0.141, loss_ctc=259.763, loss_interctc_layer6=253.684, loss_interctc_layer12=250.612, loss_interctc_layer15=250.327, loss_interctc_layer21=259.958, loss=254.869, backward_time=0.329, grad_norm=224.268, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.759e-05, train_time=1.478 [gpua049:0/64] 2024-01-14 21:01:29,362 (trainer:753) INFO: 1epoch:train:10601-10700batch: iter_time=9.059e-05, forward_time=0.141, loss_ctc=275.846, loss_interctc_layer6=274.226, loss_interctc_layer12=270.745, loss_interctc_layer15=270.644, loss_interctc_layer21=275.729, loss=273.438, backward_time=0.340, grad_norm=184.274, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.775e-05, train_time=1.522 [gpua049:0/64] 2024-01-14 21:04:10,115 (trainer:753) INFO: 1epoch:train:10701-10800batch: iter_time=8.795e-05, forward_time=0.141, loss_ctc=255.198, loss_interctc_layer6=251.283, loss_interctc_layer12=247.709, loss_interctc_layer15=247.966, loss_interctc_layer21=254.431, loss=251.317, backward_time=0.360, grad_norm=205.709, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.792e-05, train_time=1.607 [gpua049:0/64] 2024-01-14 21:06:45,966 (trainer:753) INFO: 1epoch:train:10801-10900batch: iter_time=8.632e-05, forward_time=0.141, loss_ctc=252.335, loss_interctc_layer6=253.487, loss_interctc_layer12=250.570, loss_interctc_layer15=250.519, loss_interctc_layer21=253.520, loss=252.086, backward_time=0.387, grad_norm=211.231, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.809e-05, train_time=1.558 [gpua049:0/64] 2024-01-14 21:09:38,377 (trainer:753) INFO: 1epoch:train:10901-11000batch: iter_time=8.981e-05, forward_time=0.142, loss_ctc=267.336, loss_interctc_layer6=265.443, loss_interctc_layer12=262.501, loss_interctc_layer15=262.446, loss_interctc_layer21=266.767, loss=264.899, backward_time=0.375, grad_norm=180.580, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.825e-05, train_time=1.723 [gpua049:0/64] 2024-01-14 21:11:58,719 (trainer:753) INFO: 1epoch:train:11001-11100batch: iter_time=8.673e-05, forward_time=0.142, loss_ctc=260.222, loss_interctc_layer6=253.419, loss_interctc_layer12=250.772, loss_interctc_layer15=250.504, loss_interctc_layer21=260.756, loss=255.135, backward_time=0.333, grad_norm=189.385, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.842e-05, train_time=1.404 [gpua049:0/64] 2024-01-14 21:14:29,505 (trainer:753) INFO: 1epoch:train:11101-11200batch: iter_time=8.638e-05, forward_time=0.143, loss_ctc=240.364, loss_interctc_layer6=237.123, loss_interctc_layer12=235.081, loss_interctc_layer15=235.004, loss_interctc_layer21=239.828, loss=237.480, backward_time=0.313, grad_norm=171.659, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.859e-05, train_time=1.508 [gpua049:0/64] 2024-01-14 21:16:03,474 (multiple_iter_factory:32) INFO: Building 9th iter-factory... [gpua049:0/64] 2024-01-14 21:16:23,960 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 21:16:27,767 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 21:16:27,767 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, [gpua049:0/64] 2024-01-14 21:16:27,771 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 21:25:26,605 (trainer:753) INFO: 1epoch:train:11201-11300batch: iter_time=3.239, forward_time=0.185, loss_ctc=251.367, loss_interctc_layer6=244.658, loss_interctc_layer12=241.851, loss_interctc_layer15=241.715, loss_interctc_layer21=251.326, loss=246.183, backward_time=0.335, grad_norm=185.063, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.081, optim0_lr0=1.875e-05, train_time=6.570 [gpua049:0/64] 2024-01-14 21:27:54,573 (trainer:753) INFO: 1epoch:train:11301-11400batch: iter_time=9.155e-05, forward_time=0.141, loss_ctc=225.242, loss_interctc_layer6=219.565, loss_interctc_layer12=216.839, loss_interctc_layer15=217.290, loss_interctc_layer21=225.250, loss=220.837, backward_time=0.377, grad_norm=138.080, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.892e-05, train_time=1.480 [gpua049:0/64] 2024-01-14 21:30:57,266 (trainer:753) INFO: 1epoch:train:11401-11500batch: iter_time=9.745e-05, forward_time=0.148, loss_ctc=242.403, loss_interctc_layer6=239.604, loss_interctc_layer12=237.353, loss_interctc_layer15=237.572, loss_interctc_layer21=242.770, loss=239.940, backward_time=0.387, grad_norm=173.926, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.909e-05, train_time=1.827 [gpua049:0/64] 2024-01-14 21:33:16,284 (trainer:753) INFO: 1epoch:train:11501-11600batch: iter_time=9.532e-05, forward_time=0.142, loss_ctc=232.380, loss_interctc_layer6=226.472, loss_interctc_layer12=224.040, loss_interctc_layer15=222.701, loss_interctc_layer21=231.289, loss=227.377, backward_time=0.322, grad_norm=181.061, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.078, optim0_lr0=1.925e-05, train_time=1.390 [gpua049:0/64] 2024-01-14 21:36:28,315 (trainer:753) INFO: 1epoch:train:11601-11700batch: iter_time=8.658e-05, forward_time=0.168, loss_ctc=231.858, loss_interctc_layer6=231.896, loss_interctc_layer12=229.785, loss_interctc_layer15=230.196, loss_interctc_layer21=231.770, loss=231.101, backward_time=0.427, grad_norm=173.981, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.942e-05, train_time=1.919 [gpua049:0/64] 2024-01-14 21:39:31,951 (trainer:753) INFO: 1epoch:train:11701-11800batch: iter_time=9.274e-05, forward_time=0.207, loss_ctc=270.341, loss_interctc_layer6=262.229, loss_interctc_layer12=260.365, loss_interctc_layer15=259.588, loss_interctc_layer21=271.691, loss=264.843, backward_time=0.392, grad_norm=176.674, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.084, optim0_lr0=1.959e-05, train_time=1.837 [gpua049:0/64] 2024-01-14 21:42:39,084 (trainer:753) INFO: 1epoch:train:11801-11900batch: iter_time=8.808e-04, forward_time=0.154, loss_ctc=286.804, loss_interctc_layer6=283.340, loss_interctc_layer12=280.467, loss_interctc_layer15=280.474, loss_interctc_layer21=286.554, loss=283.528, backward_time=0.368, grad_norm=202.837, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.975e-05, train_time=1.872 [gpua049:0/64] 2024-01-14 21:45:21,621 (trainer:753) INFO: 1epoch:train:11901-12000batch: iter_time=2.430e-04, forward_time=0.145, loss_ctc=241.002, loss_interctc_layer6=238.590, loss_interctc_layer12=235.939, loss_interctc_layer15=235.689, loss_interctc_layer21=240.214, loss=238.287, backward_time=0.414, grad_norm=165.868, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.079, optim0_lr0=1.992e-05, train_time=1.625 [gpua049:0/64] 2024-01-14 21:47:46,644 (trainer:753) INFO: 1epoch:train:12001-12100batch: iter_time=9.455e-05, forward_time=0.195, loss_ctc=267.990, loss_interctc_layer6=268.119, loss_interctc_layer12=263.960, loss_interctc_layer15=264.113, loss_interctc_layer21=266.909, loss=266.218, backward_time=0.329, grad_norm=188.980, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.083, optim0_lr0=2.009e-05, train_time=1.449 [gpua049:0/64] 2024-01-14 21:50:22,453 (trainer:753) INFO: 1epoch:train:12101-12200batch: iter_time=9.550e-05, forward_time=0.154, loss_ctc=247.869, loss_interctc_layer6=245.138, loss_interctc_layer12=242.572, loss_interctc_layer15=242.462, loss_interctc_layer21=247.758, loss=245.160, backward_time=0.364, grad_norm=173.672, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.080, optim0_lr0=2.025e-05, train_time=1.559 [gpua049:0/64] 2024-01-14 21:53:20,478 (trainer:753) INFO: 1epoch:train:12201-12300batch: iter_time=9.007e-05, forward_time=0.145, loss_ctc=238.596, loss_interctc_layer6=240.876, loss_interctc_layer12=238.890, loss_interctc_layer15=238.136, loss_interctc_layer21=237.952, loss=238.890, backward_time=0.364, grad_norm=161.672, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.079, optim0_lr0=2.042e-05, train_time=1.779 [gpua049:0/64] 2024-01-14 21:56:13,880 (trainer:753) INFO: 1epoch:train:12301-12400batch: iter_time=8.557e-05, forward_time=0.154, loss_ctc=271.373, loss_interctc_layer6=265.356, loss_interctc_layer12=262.369, loss_interctc_layer15=262.260, loss_interctc_layer21=270.873, loss=266.446, backward_time=0.401, grad_norm=171.484, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.079, optim0_lr0=2.059e-05, train_time=1.735 [gpua049:0/64] 2024-01-14 21:58:54,635 (trainer:753) INFO: 1epoch:train:12401-12500batch: iter_time=8.729e-05, forward_time=0.147, loss_ctc=230.175, loss_interctc_layer6=228.148, loss_interctc_layer12=225.247, loss_interctc_layer15=225.467, loss_interctc_layer21=229.745, loss=227.756, backward_time=0.371, grad_norm=169.008, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.078, optim0_lr0=2.075e-05, train_time=1.607 [gpua049:0/64] 2024-01-14 21:59:14,665 (multiple_iter_factory:32) INFO: Building 10th iter-factory... [gpua049:0/64] 2024-01-14 21:59:35,593 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 21:59:39,329 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 21:59:39,329 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, [gpua049:0/64] 2024-01-14 21:59:39,332 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 22:07:50,777 (trainer:753) INFO: 1epoch:train:12501-12600batch: iter_time=3.891, forward_time=0.232, loss_ctc=251.541, loss_interctc_layer6=230.959, loss_interctc_layer12=229.004, loss_interctc_layer15=228.902, loss_interctc_layer21=251.568, loss=238.395, backward_time=0.331, grad_norm=151.962, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.082, optim0_lr0=2.092e-05, train_time=5.361 [gpua049:0/64] 2024-01-14 22:10:28,443 (trainer:753) INFO: 1epoch:train:12601-12700batch: iter_time=8.256e-05, forward_time=0.142, loss_ctc=238.374, loss_interctc_layer6=231.131, loss_interctc_layer12=227.872, loss_interctc_layer15=228.013, loss_interctc_layer21=238.589, loss=232.796, backward_time=0.386, grad_norm=159.615, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.078, optim0_lr0=2.109e-05, train_time=1.577 [gpua049:0/64] 2024-01-14 22:13:36,188 (trainer:753) INFO: 1epoch:train:12701-12800batch: iter_time=8.514e-05, forward_time=0.291, loss_ctc=251.333, loss_interctc_layer6=237.188, loss_interctc_layer12=234.267, loss_interctc_layer15=234.546, loss_interctc_layer21=251.143, loss=241.696, backward_time=0.398, grad_norm=177.660, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.086, optim0_lr0=2.125e-05, train_time=1.876 [gpua049:0/64] 2024-01-14 22:16:54,042 (trainer:753) INFO: 1epoch:train:12801-12900batch: iter_time=9.629e-05, forward_time=0.142, loss_ctc=223.222, loss_interctc_layer6=208.933, loss_interctc_layer12=206.443, loss_interctc_layer15=205.928, loss_interctc_layer21=222.384, loss=213.382, backward_time=0.401, grad_norm=137.092, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.078, optim0_lr0=2.142e-05, train_time=1.980 [gpua049:0/64] 2024-01-14 22:19:50,158 (trainer:753) INFO: 1epoch:train:12901-13000batch: iter_time=9.791e-05, forward_time=0.142, loss_ctc=288.649, loss_interctc_layer6=274.703, loss_interctc_layer12=272.201, loss_interctc_layer15=272.288, loss_interctc_layer21=289.185, loss=279.405, backward_time=0.358, grad_norm=162.063, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.078, optim0_lr0=2.159e-05, train_time=1.761 [gpua049:0/64] 2024-01-14 22:22:27,865 (trainer:753) INFO: 1epoch:train:13001-13100batch: iter_time=9.249e-05, forward_time=0.141, loss_ctc=264.363, loss_interctc_layer6=246.822, loss_interctc_layer12=243.602, loss_interctc_layer15=244.225, loss_interctc_layer21=262.893, loss=252.381, backward_time=0.335, grad_norm=212.987, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.078, optim0_lr0=2.175e-05, train_time=1.577 [gpua049:0/64] 2024-01-14 22:25:19,724 (trainer:753) INFO: 1epoch:train:13101-13200batch: iter_time=2.082e-04, forward_time=0.262, loss_ctc=277.437, loss_interctc_layer6=268.826, loss_interctc_layer12=265.060, loss_interctc_layer15=265.126, loss_interctc_layer21=278.030, loss=270.896, backward_time=0.363, grad_norm=163.287, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.093, optim0_lr0=2.192e-05, train_time=1.718 [gpua049:0/64] 2024-01-14 22:28:06,340 (trainer:753) INFO: 1epoch:train:13201-13300batch: iter_time=9.682e-05, forward_time=0.141, loss_ctc=251.911, loss_interctc_layer6=244.290, loss_interctc_layer12=242.321, loss_interctc_layer15=241.916, loss_interctc_layer21=251.557, loss=246.399, backward_time=0.413, grad_norm=189.235, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.079, optim0_lr0=2.209e-05, train_time=1.667 [gpua049:0/64] 2024-01-14 22:30:47,287 (trainer:753) INFO: 1epoch:train:13301-13400batch: iter_time=9.723e-05, forward_time=0.141, loss_ctc=259.923, loss_interctc_layer6=249.055, loss_interctc_layer12=245.496, loss_interctc_layer15=244.915, loss_interctc_layer21=261.283, loss=252.134, backward_time=0.350, grad_norm=187.318, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.079, optim0_lr0=2.225e-05, train_time=1.609 [gpua049:0/64] 2024-01-14 22:33:47,428 (trainer:753) INFO: 1epoch:train:13401-13500batch: iter_time=9.854e-05, forward_time=0.145, loss_ctc=266.674, loss_interctc_layer6=257.067, loss_interctc_layer12=254.823, loss_interctc_layer15=254.178, loss_interctc_layer21=265.846, loss=259.718, backward_time=0.393, grad_norm=176.535, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.079, optim0_lr0=2.242e-05, train_time=1.801 [gpua049:0/64] 2024-01-14 22:36:32,757 (trainer:753) INFO: 1epoch:train:13501-13600batch: iter_time=5.941e-04, forward_time=0.183, loss_ctc=260.237, loss_interctc_layer6=246.872, loss_interctc_layer12=244.520, loss_interctc_layer15=244.245, loss_interctc_layer21=259.977, loss=251.170, backward_time=0.333, grad_norm=201.108, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.087, optim0_lr0=2.259e-05, train_time=1.652 [gpua049:0/64] 2024-01-14 22:39:25,572 (trainer:753) INFO: 1epoch:train:13601-13700batch: iter_time=9.401e-05, forward_time=0.200, loss_ctc=241.447, loss_interctc_layer6=231.662, loss_interctc_layer12=228.826, loss_interctc_layer15=228.625, loss_interctc_layer21=241.170, loss=234.346, backward_time=0.371, grad_norm=155.387, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.080, optim0_lr0=2.275e-05, train_time=1.728 [gpua049:0/64] 2024-01-14 22:40:59,943 (multiple_iter_factory:32) INFO: Building 11th iter-factory... [gpua049:0/64] 2024-01-14 22:41:20,312 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 22:41:24,200 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 22:41:24,201 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, [gpua049:0/64] 2024-01-14 22:41:24,204 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 22:50:24,764 (trainer:753) INFO: 1epoch:train:13701-13800batch: iter_time=4.593, forward_time=0.196, loss_ctc=254.735, loss_interctc_layer6=239.518, loss_interctc_layer12=237.385, loss_interctc_layer15=236.643, loss_interctc_layer21=254.607, loss=244.578, backward_time=0.316, grad_norm=176.934, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.083, optim0_lr0=2.292e-05, train_time=6.592 [gpua049:0/64] 2024-01-14 22:52:26,845 (trainer:753) INFO: 1epoch:train:13801-13900batch: iter_time=8.891e-05, forward_time=0.142, loss_ctc=224.945, loss_interctc_layer6=217.445, loss_interctc_layer12=214.365, loss_interctc_layer15=213.402, loss_interctc_layer21=225.095, loss=219.050, backward_time=0.298, grad_norm=142.457, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.079, optim0_lr0=2.309e-05, train_time=1.221 [gpua049:0/64] 2024-01-14 22:54:55,817 (trainer:753) INFO: 1epoch:train:13901-14000batch: iter_time=6.422e-04, forward_time=0.275, loss_ctc=246.786, loss_interctc_layer6=235.449, loss_interctc_layer12=231.912, loss_interctc_layer15=232.355, loss_interctc_layer21=247.213, loss=238.743, backward_time=0.353, grad_norm=166.168, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.092, optim0_lr0=2.325e-05, train_time=1.487 [gpua049:0/64] 2024-01-14 22:57:58,819 (trainer:753) INFO: 1epoch:train:14001-14100batch: iter_time=9.342e-05, forward_time=0.144, loss_ctc=233.395, loss_interctc_layer6=219.668, loss_interctc_layer12=217.081, loss_interctc_layer15=217.129, loss_interctc_layer21=232.975, loss=224.050, backward_time=0.360, grad_norm=158.181, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.080, optim0_lr0=2.342e-05, train_time=1.832 [gpua049:0/64] 2024-01-14 23:01:10,300 (trainer:753) INFO: 1epoch:train:14101-14200batch: iter_time=8.831e-05, forward_time=0.244, loss_ctc=237.151, loss_interctc_layer6=228.768, loss_interctc_layer12=225.871, loss_interctc_layer15=225.795, loss_interctc_layer21=237.239, loss=230.965, backward_time=0.402, grad_norm=156.498, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.088, optim0_lr0=2.359e-05, train_time=1.915 [gpua049:0/64] 2024-01-14 23:03:24,499 (trainer:753) INFO: 1epoch:train:14201-14300batch: iter_time=8.726e-05, forward_time=0.142, loss_ctc=273.850, loss_interctc_layer6=258.352, loss_interctc_layer12=255.221, loss_interctc_layer15=254.840, loss_interctc_layer21=273.199, loss=263.092, backward_time=0.300, grad_norm=180.994, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.078, optim0_lr0=2.375e-05, train_time=1.339 [gpua049:0/64] 2024-01-14 23:06:36,317 (trainer:753) INFO: 1epoch:train:14301-14400batch: iter_time=9.120e-05, forward_time=0.143, loss_ctc=290.862, loss_interctc_layer6=278.153, loss_interctc_layer12=273.616, loss_interctc_layer15=273.473, loss_interctc_layer21=290.477, loss=281.316, backward_time=0.412, grad_norm=176.595, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.079, optim0_lr0=2.392e-05, train_time=1.920 [gpua049:0/64] 2024-01-14 23:09:03,201 (trainer:753) INFO: 1epoch:train:14401-14500batch: iter_time=2.967e-04, forward_time=0.245, loss_ctc=241.050, loss_interctc_layer6=234.063, loss_interctc_layer12=231.160, loss_interctc_layer15=231.333, loss_interctc_layer21=241.035, loss=235.728, backward_time=0.359, grad_norm=158.413, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.087, optim0_lr0=2.409e-05, train_time=1.468 [gpua049:0/64] 2024-01-14 23:11:39,198 (trainer:753) INFO: 1epoch:train:14501-14600batch: iter_time=9.259e-05, forward_time=0.143, loss_ctc=273.416, loss_interctc_layer6=263.281, loss_interctc_layer12=258.972, loss_interctc_layer15=259.138, loss_interctc_layer21=273.933, loss=265.748, backward_time=0.368, grad_norm=182.254, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.079, optim0_lr0=2.425e-05, train_time=1.557 [gpua049:0/64] 2024-01-14 23:15:05,948 (trainer:753) INFO: 1epoch:train:14601-14700batch: iter_time=4.984e-04, forward_time=0.201, loss_ctc=248.609, loss_interctc_layer6=240.511, loss_interctc_layer12=237.458, loss_interctc_layer15=237.523, loss_interctc_layer21=248.297, loss=242.480, backward_time=0.418, grad_norm=147.297, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.087, optim0_lr0=2.442e-05, train_time=2.070 [gpua049:0/64] 2024-01-14 23:17:34,523 (trainer:753) INFO: 1epoch:train:14701-14800batch: iter_time=8.165e-05, forward_time=0.165, loss_ctc=241.933, loss_interctc_layer6=234.986, loss_interctc_layer12=231.928, loss_interctc_layer15=231.619, loss_interctc_layer21=241.552, loss=236.404, backward_time=0.316, grad_norm=168.158, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.078, optim0_lr0=2.459e-05, train_time=1.485 [gpua049:0/64] 2024-01-14 23:20:08,652 (trainer:753) INFO: 1epoch:train:14801-14900batch: iter_time=9.092e-05, forward_time=0.144, loss_ctc=278.367, loss_interctc_layer6=261.841, loss_interctc_layer12=259.548, loss_interctc_layer15=258.912, loss_interctc_layer21=277.603, loss=267.254, backward_time=0.325, grad_norm=180.877, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.078, optim0_lr0=2.475e-05, train_time=1.539 [gpua049:0/64] 2024-01-14 23:22:46,146 (trainer:753) INFO: 1epoch:train:14901-15000batch: iter_time=4.434e-04, forward_time=0.285, loss_ctc=231.295, loss_interctc_layer6=225.160, loss_interctc_layer12=222.194, loss_interctc_layer15=221.980, loss_interctc_layer21=231.144, loss=226.355, backward_time=0.330, grad_norm=179.467, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.088, optim0_lr0=2.492e-05, train_time=1.577 [gpua049:0/64] 2024-01-14 23:52:33,922 (trainer:352) INFO: 1epoch results: [train] iter_time=0.282, forward_time=0.174, loss_ctc=301.595, loss_interctc_layer6=301.445, loss_interctc_layer12=293.631, loss_interctc_layer15=291.647, loss_interctc_layer21=303.209, loss=298.306, backward_time=0.397, grad_norm=514.625, clip=100.000, loss_scale=1.669e+06, optim_step_time=0.081, optim0_lr0=1.250e-05, train_time=2.108, time=8 hours, 47 minutes and 24.28 seconds, total_count=15000, gpu_max_cached_mem_GB=34.504, [valid] loss_ctc=182.971, cer_ctc=0.803, loss_interctc_layer6=184.670, cer_interctc_layer6=0.824, loss_interctc_layer12=179.684, cer_interctc_layer12=0.807, loss_interctc_layer15=177.589, cer_interctc_layer15=0.807, loss_interctc_layer21=182.988, cer_interctc_layer21=0.803, loss=181.580, time=29 minutes and 23.74 seconds, total_count=4671, gpu_max_cached_mem_GB=34.504 [gpua049:0/64] 2024-01-14 23:53:06,072 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count [gpua049:0/64] 2024-01-14 23:53:06,072 (trainer:286) INFO: 2/45epoch started. Estimated time to finish: 2 weeks, 3 days and 42 minutes [gpua049:0/64] 2024-01-14 23:53:06,090 (multiple_iter_factory:32) INFO: Building 0th iter-factory... [gpua049:0/64] 2024-01-14 23:53:25,897 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-14 23:53:29,652 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-14 23:53:29,652 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, [gpua049:0/64] 2024-01-14 23:53:29,656 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-14 23:59:44,268 (trainer:753) INFO: 2epoch:train:1-100batch: iter_time=2.491, forward_time=0.176, loss_ctc=223.316, loss_interctc_layer6=218.746, loss_interctc_layer12=216.523, loss_interctc_layer15=216.597, loss_interctc_layer21=223.265, loss=219.689, backward_time=0.310, grad_norm=162.857, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.087, optim0_lr0=2.509e-05, train_time=3.980 [gpua049:0/64] 2024-01-15 00:02:03,670 (trainer:753) INFO: 2epoch:train:101-200batch: iter_time=8.338e-05, forward_time=0.151, loss_ctc=232.306, loss_interctc_layer6=232.406, loss_interctc_layer12=229.437, loss_interctc_layer15=229.720, loss_interctc_layer21=232.633, loss=231.300, backward_time=0.335, grad_norm=131.316, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.083, optim0_lr0=2.525e-05, train_time=1.395 [gpua049:0/64] 2024-01-15 00:04:10,973 (trainer:753) INFO: 2epoch:train:201-300batch: iter_time=8.352e-05, forward_time=0.142, loss_ctc=264.472, loss_interctc_layer6=260.888, loss_interctc_layer12=257.653, loss_interctc_layer15=257.633, loss_interctc_layer21=264.740, loss=261.077, backward_time=0.299, grad_norm=174.038, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.082, optim0_lr0=2.542e-05, train_time=1.273 [gpua049:0/64] 2024-01-15 00:06:40,662 (trainer:753) INFO: 2epoch:train:301-400batch: iter_time=8.372e-05, forward_time=0.142, loss_ctc=234.075, loss_interctc_layer6=236.315, loss_interctc_layer12=233.966, loss_interctc_layer15=233.719, loss_interctc_layer21=233.798, loss=234.374, backward_time=0.346, grad_norm=163.880, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.082, optim0_lr0=2.559e-05, train_time=1.497 [gpua049:0/64] 2024-01-15 00:09:03,763 (trainer:753) INFO: 2epoch:train:401-500batch: iter_time=7.941e-05, forward_time=0.141, loss_ctc=229.194, loss_interctc_layer6=230.442, loss_interctc_layer12=228.095, loss_interctc_layer15=228.222, loss_interctc_layer21=228.897, loss=228.970, backward_time=0.319, grad_norm=173.767, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.082, optim0_lr0=2.575e-05, train_time=1.431 [gpua049:0/64] 2024-01-15 00:11:16,099 (trainer:753) INFO: 2epoch:train:501-600batch: iter_time=7.794e-05, forward_time=0.142, loss_ctc=223.095, loss_interctc_layer6=221.408, loss_interctc_layer12=218.992, loss_interctc_layer15=218.676, loss_interctc_layer21=222.806, loss=220.995, backward_time=0.317, grad_norm=143.027, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.083, optim0_lr0=2.592e-05, train_time=1.323 [gpua049:0/64] 2024-01-15 00:13:54,725 (trainer:753) INFO: 2epoch:train:601-700batch: iter_time=8.301e-05, forward_time=0.143, loss_ctc=221.792, loss_interctc_layer6=223.793, loss_interctc_layer12=219.690, loss_interctc_layer15=219.789, loss_interctc_layer21=221.481, loss=221.309, backward_time=0.329, grad_norm=140.521, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.083, optim0_lr0=2.609e-05, train_time=1.586 [gpua049:0/64] 2024-01-15 00:16:11,463 (trainer:753) INFO: 2epoch:train:701-800batch: iter_time=9.044e-05, forward_time=0.144, loss_ctc=257.140, loss_interctc_layer6=248.209, loss_interctc_layer12=246.296, loss_interctc_layer15=245.787, loss_interctc_layer21=257.138, loss=250.914, backward_time=0.315, grad_norm=171.777, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.083, optim0_lr0=2.625e-05, train_time=1.367 [gpua049:0/64] 2024-01-15 00:18:33,812 (trainer:753) INFO: 2epoch:train:801-900batch: iter_time=8.536e-05, forward_time=0.143, loss_ctc=229.657, loss_interctc_layer6=226.983, loss_interctc_layer12=224.451, loss_interctc_layer15=224.478, loss_interctc_layer21=228.961, loss=226.906, backward_time=0.336, grad_norm=127.644, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.083, optim0_lr0=2.642e-05, train_time=1.423 [gpua049:0/64] 2024-01-15 00:21:08,907 (trainer:753) INFO: 2epoch:train:901-1000batch: iter_time=8.244e-05, forward_time=0.152, loss_ctc=245.674, loss_interctc_layer6=239.075, loss_interctc_layer12=236.805, loss_interctc_layer15=237.025, loss_interctc_layer21=245.791, loss=240.874, backward_time=0.337, grad_norm=135.909, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.083, optim0_lr0=2.659e-05, train_time=1.551 [gpua049:0/64] 2024-01-15 00:23:54,679 (trainer:753) INFO: 2epoch:train:1001-1100batch: iter_time=8.154e-05, forward_time=0.194, loss_ctc=244.434, loss_interctc_layer6=242.501, loss_interctc_layer12=239.342, loss_interctc_layer15=239.361, loss_interctc_layer21=244.329, loss=241.994, backward_time=0.376, grad_norm=143.203, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.089, optim0_lr0=2.675e-05, train_time=1.657 [gpua049:0/64] 2024-01-15 00:26:18,522 (trainer:753) INFO: 2epoch:train:1101-1200batch: iter_time=9.063e-05, forward_time=0.151, loss_ctc=265.259, loss_interctc_layer6=261.328, loss_interctc_layer12=257.978, loss_interctc_layer15=258.052, loss_interctc_layer21=264.854, loss=261.494, backward_time=0.322, grad_norm=131.033, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.692e-05, train_time=1.439 [gpua049:0/64] 2024-01-15 00:27:39,503 (multiple_iter_factory:32) INFO: Building 1th iter-factory... [gpua049:0/64] 2024-01-15 00:27:59,009 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 00:28:02,597 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 00:28:02,597 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, [gpua049:0/64] 2024-01-15 00:28:02,600 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 00:35:32,527 (trainer:753) INFO: 2epoch:train:1201-1300batch: iter_time=2.099, forward_time=0.142, loss_ctc=240.825, loss_interctc_layer6=236.516, loss_interctc_layer12=234.109, loss_interctc_layer15=233.566, loss_interctc_layer21=240.100, loss=237.023, backward_time=0.311, grad_norm=150.486, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.083, optim0_lr0=2.709e-05, train_time=5.540 [gpua049:0/64] 2024-01-15 00:37:35,952 (trainer:753) INFO: 2epoch:train:1301-1400batch: iter_time=7.801e-05, forward_time=0.142, loss_ctc=230.448, loss_interctc_layer6=220.883, loss_interctc_layer12=218.577, loss_interctc_layer15=218.877, loss_interctc_layer21=230.634, loss=223.884, backward_time=0.297, grad_norm=124.109, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.725e-05, train_time=1.234 [gpua049:0/64] 2024-01-15 00:39:58,519 (trainer:753) INFO: 2epoch:train:1401-1500batch: iter_time=8.745e-05, forward_time=0.142, loss_ctc=244.585, loss_interctc_layer6=239.116, loss_interctc_layer12=236.961, loss_interctc_layer15=236.453, loss_interctc_layer21=244.836, loss=240.390, backward_time=0.338, grad_norm=128.115, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.081, optim0_lr0=2.742e-05, train_time=1.425 [gpua049:0/64] 2024-01-15 00:42:15,237 (trainer:753) INFO: 2epoch:train:1501-1600batch: iter_time=8.639e-05, forward_time=0.144, loss_ctc=239.284, loss_interctc_layer6=225.310, loss_interctc_layer12=222.735, loss_interctc_layer15=223.217, loss_interctc_layer21=239.106, loss=229.931, backward_time=0.332, grad_norm=135.158, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.081, optim0_lr0=2.759e-05, train_time=1.367 [gpua049:0/64] 2024-01-15 00:44:27,140 (trainer:753) INFO: 2epoch:train:1601-1700batch: iter_time=7.634e-05, forward_time=0.143, loss_ctc=245.101, loss_interctc_layer6=236.663, loss_interctc_layer12=234.553, loss_interctc_layer15=233.936, loss_interctc_layer21=244.184, loss=238.887, backward_time=0.301, grad_norm=142.044, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.775e-05, train_time=1.319 [gpua049:0/64] 2024-01-15 00:46:47,966 (trainer:753) INFO: 2epoch:train:1701-1800batch: iter_time=8.568e-05, forward_time=0.142, loss_ctc=246.456, loss_interctc_layer6=230.757, loss_interctc_layer12=228.202, loss_interctc_layer15=228.437, loss_interctc_layer21=246.287, loss=236.028, backward_time=0.320, grad_norm=128.896, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.081, optim0_lr0=2.792e-05, train_time=1.408 [gpua049:0/64] 2024-01-15 00:49:00,537 (trainer:753) INFO: 2epoch:train:1801-1900batch: iter_time=8.514e-05, forward_time=0.142, loss_ctc=225.682, loss_interctc_layer6=218.887, loss_interctc_layer12=216.341, loss_interctc_layer15=215.975, loss_interctc_layer21=225.840, loss=220.545, backward_time=0.309, grad_norm=120.509, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.081, optim0_lr0=2.809e-05, train_time=1.325 [gpua049:0/64] 2024-01-15 00:51:34,549 (trainer:753) INFO: 2epoch:train:1901-2000batch: iter_time=8.287e-05, forward_time=0.143, loss_ctc=257.437, loss_interctc_layer6=237.691, loss_interctc_layer12=234.609, loss_interctc_layer15=234.888, loss_interctc_layer21=257.833, loss=244.492, backward_time=0.318, grad_norm=138.607, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.081, optim0_lr0=2.825e-05, train_time=1.540 [gpua049:0/64] 2024-01-15 00:53:51,655 (trainer:753) INFO: 2epoch:train:2001-2100batch: iter_time=7.638e-05, forward_time=0.142, loss_ctc=245.957, loss_interctc_layer6=237.414, loss_interctc_layer12=233.883, loss_interctc_layer15=233.885, loss_interctc_layer21=245.686, loss=239.365, backward_time=0.305, grad_norm=122.413, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.842e-05, train_time=1.371 [gpua049:0/64] 2024-01-15 00:56:11,041 (trainer:753) INFO: 2epoch:train:2101-2200batch: iter_time=7.416e-05, forward_time=0.142, loss_ctc=241.173, loss_interctc_layer6=227.054, loss_interctc_layer12=225.004, loss_interctc_layer15=223.902, loss_interctc_layer21=241.428, loss=231.712, backward_time=0.306, grad_norm=132.887, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.859e-05, train_time=1.394 [gpua049:0/64] 2024-01-15 00:58:26,337 (trainer:753) INFO: 2epoch:train:2201-2300batch: iter_time=7.811e-05, forward_time=0.142, loss_ctc=259.155, loss_interctc_layer6=250.921, loss_interctc_layer12=248.215, loss_interctc_layer15=247.947, loss_interctc_layer21=259.050, loss=253.058, backward_time=0.302, grad_norm=142.075, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.875e-05, train_time=1.353 [gpua049:0/64] 2024-01-15 01:00:54,932 (trainer:753) INFO: 2epoch:train:2301-2400batch: iter_time=7.741e-05, forward_time=0.143, loss_ctc=250.762, loss_interctc_layer6=238.291, loss_interctc_layer12=235.174, loss_interctc_layer15=235.313, loss_interctc_layer21=250.380, loss=241.984, backward_time=0.368, grad_norm=131.275, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.892e-05, train_time=1.486 [gpua049:0/64] 2024-01-15 01:03:27,333 (trainer:753) INFO: 2epoch:train:2401-2500batch: iter_time=7.924e-05, forward_time=0.142, loss_ctc=247.301, loss_interctc_layer6=241.906, loss_interctc_layer12=239.291, loss_interctc_layer15=238.923, loss_interctc_layer21=247.181, loss=242.921, backward_time=0.327, grad_norm=160.125, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.909e-05, train_time=1.524 [gpua049:0/64] 2024-01-15 01:03:28,626 (multiple_iter_factory:32) INFO: Building 2th iter-factory... [gpua049:0/64] 2024-01-15 01:03:48,282 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 01:03:51,955 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 01:03:51,955 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, [gpua049:0/64] 2024-01-15 01:03:51,958 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 01:12:36,104 (trainer:753) INFO: 2epoch:train:2501-2600batch: iter_time=1.324, forward_time=0.146, loss_ctc=218.650, loss_interctc_layer6=212.442, loss_interctc_layer12=210.005, loss_interctc_layer15=209.806, loss_interctc_layer21=218.553, loss=213.891, backward_time=0.309, grad_norm=138.489, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.081, optim0_lr0=2.925e-05, train_time=5.487 [gpua049:0/64] 2024-01-15 01:15:08,336 (trainer:753) INFO: 2epoch:train:2601-2700batch: iter_time=8.098e-05, forward_time=0.143, loss_ctc=236.654, loss_interctc_layer6=228.175, loss_interctc_layer12=225.974, loss_interctc_layer15=225.673, loss_interctc_layer21=237.208, loss=230.737, backward_time=0.318, grad_norm=127.589, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.942e-05, train_time=1.522 [gpua049:0/64] 2024-01-15 01:17:56,484 (trainer:753) INFO: 2epoch:train:2701-2800batch: iter_time=8.107e-05, forward_time=0.143, loss_ctc=264.960, loss_interctc_layer6=254.264, loss_interctc_layer12=251.690, loss_interctc_layer15=251.732, loss_interctc_layer21=265.418, loss=257.613, backward_time=0.439, grad_norm=153.410, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.959e-05, train_time=1.681 [gpua049:0/64] 2024-01-15 01:20:28,197 (trainer:753) INFO: 2epoch:train:2801-2900batch: iter_time=8.128e-05, forward_time=0.142, loss_ctc=244.934, loss_interctc_layer6=234.787, loss_interctc_layer12=231.984, loss_interctc_layer15=232.016, loss_interctc_layer21=243.887, loss=237.522, backward_time=0.350, grad_norm=158.498, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.975e-05, train_time=1.517 [gpua049:0/64] 2024-01-15 01:22:57,552 (trainer:753) INFO: 2epoch:train:2901-3000batch: iter_time=8.169e-05, forward_time=0.142, loss_ctc=238.301, loss_interctc_layer6=225.133, loss_interctc_layer12=222.555, loss_interctc_layer15=222.679, loss_interctc_layer21=237.871, loss=229.308, backward_time=0.379, grad_norm=152.417, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.082, optim0_lr0=2.992e-05, train_time=1.493 [gpua049:0/64] 2024-01-15 01:25:55,052 (trainer:753) INFO: 2epoch:train:3001-3100batch: iter_time=8.118e-05, forward_time=0.142, loss_ctc=227.093, loss_interctc_layer6=218.445, loss_interctc_layer12=215.690, loss_interctc_layer15=215.685, loss_interctc_layer21=226.646, loss=220.712, backward_time=0.374, grad_norm=124.494, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.009e-05, train_time=1.775 [gpua049:0/64] 2024-01-15 01:28:37,177 (trainer:753) INFO: 2epoch:train:3101-3200batch: iter_time=8.247e-05, forward_time=0.165, loss_ctc=227.752, loss_interctc_layer6=218.231, loss_interctc_layer12=216.595, loss_interctc_layer15=216.085, loss_interctc_layer21=227.968, loss=221.326, backward_time=0.390, grad_norm=124.752, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.025e-05, train_time=1.621 [gpua049:0/64] 2024-01-15 01:30:47,561 (trainer:753) INFO: 2epoch:train:3201-3300batch: iter_time=7.745e-05, forward_time=0.144, loss_ctc=262.088, loss_interctc_layer6=243.332, loss_interctc_layer12=240.799, loss_interctc_layer15=240.714, loss_interctc_layer21=261.781, loss=249.743, backward_time=0.303, grad_norm=141.778, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.042e-05, train_time=1.304 [gpua049:0/64] 2024-01-15 01:33:18,890 (trainer:753) INFO: 2epoch:train:3301-3400batch: iter_time=7.990e-05, forward_time=0.166, loss_ctc=234.497, loss_interctc_layer6=223.151, loss_interctc_layer12=220.631, loss_interctc_layer15=220.972, loss_interctc_layer21=233.729, loss=226.596, backward_time=0.360, grad_norm=131.508, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.086, optim0_lr0=3.059e-05, train_time=1.512 [gpua049:0/64] 2024-01-15 01:35:40,782 (trainer:753) INFO: 2epoch:train:3401-3500batch: iter_time=8.326e-05, forward_time=0.176, loss_ctc=247.824, loss_interctc_layer6=235.954, loss_interctc_layer12=233.461, loss_interctc_layer15=233.343, loss_interctc_layer21=247.868, loss=239.690, backward_time=0.342, grad_norm=133.884, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.075e-05, train_time=1.420 [gpua049:0/64] 2024-01-15 01:38:23,733 (trainer:753) INFO: 2epoch:train:3501-3600batch: iter_time=7.474e-05, forward_time=0.153, loss_ctc=250.046, loss_interctc_layer6=238.451, loss_interctc_layer12=235.259, loss_interctc_layer15=235.032, loss_interctc_layer21=249.740, loss=241.706, backward_time=0.368, grad_norm=140.866, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.087, optim0_lr0=3.092e-05, train_time=1.629 [gpua049:0/64] 2024-01-15 01:41:04,149 (trainer:753) INFO: 2epoch:train:3601-3700batch: iter_time=7.546e-05, forward_time=0.143, loss_ctc=267.511, loss_interctc_layer6=256.229, loss_interctc_layer12=253.891, loss_interctc_layer15=253.507, loss_interctc_layer21=267.076, loss=259.643, backward_time=0.351, grad_norm=133.646, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.109e-05, train_time=1.605 [gpua049:0/64] 2024-01-15 01:42:25,815 (multiple_iter_factory:32) INFO: Building 3th iter-factory... [gpua049:0/64] 2024-01-15 01:42:45,830 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 01:42:49,527 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 01:42:49,527 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, [gpua049:0/64] 2024-01-15 01:42:49,531 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 01:51:52,022 (trainer:753) INFO: 2epoch:train:3701-3800batch: iter_time=1.779, forward_time=0.146, loss_ctc=240.331, loss_interctc_layer6=233.198, loss_interctc_layer12=230.249, loss_interctc_layer15=230.190, loss_interctc_layer21=239.512, loss=234.696, backward_time=0.329, grad_norm=162.005, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.125e-05, train_time=6.479 [gpua049:0/64] 2024-01-15 01:53:56,033 (trainer:753) INFO: 2epoch:train:3801-3900batch: iter_time=7.505e-05, forward_time=0.143, loss_ctc=227.039, loss_interctc_layer6=217.908, loss_interctc_layer12=214.990, loss_interctc_layer15=215.046, loss_interctc_layer21=226.597, loss=220.316, backward_time=0.298, grad_norm=126.623, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.083, optim0_lr0=3.142e-05, train_time=1.240 [gpua049:0/64] 2024-01-15 01:56:21,320 (trainer:753) INFO: 2epoch:train:3901-4000batch: iter_time=7.256e-05, forward_time=0.143, loss_ctc=241.903, loss_interctc_layer6=237.853, loss_interctc_layer12=234.777, loss_interctc_layer15=234.872, loss_interctc_layer21=242.435, loss=238.368, backward_time=0.310, grad_norm=139.624, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.159e-05, train_time=1.453 [gpua049:0/64] 2024-01-15 01:59:03,394 (trainer:753) INFO: 2epoch:train:4001-4100batch: iter_time=7.412e-05, forward_time=0.144, loss_ctc=235.797, loss_interctc_layer6=221.782, loss_interctc_layer12=219.341, loss_interctc_layer15=219.448, loss_interctc_layer21=235.535, loss=226.380, backward_time=0.353, grad_norm=143.897, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.175e-05, train_time=1.621 [gpua049:0/64] 2024-01-15 02:01:05,672 (trainer:753) INFO: 2epoch:train:4101-4200batch: iter_time=7.289e-05, forward_time=0.143, loss_ctc=241.928, loss_interctc_layer6=234.580, loss_interctc_layer12=231.960, loss_interctc_layer15=232.015, loss_interctc_layer21=241.493, loss=236.395, backward_time=0.297, grad_norm=161.683, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.192e-05, train_time=1.223 [gpua049:0/64] 2024-01-15 02:03:34,939 (trainer:753) INFO: 2epoch:train:4201-4300batch: iter_time=7.480e-05, forward_time=0.142, loss_ctc=242.157, loss_interctc_layer6=227.833, loss_interctc_layer12=225.539, loss_interctc_layer15=225.401, loss_interctc_layer21=242.204, loss=232.627, backward_time=0.323, grad_norm=135.107, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.209e-05, train_time=1.492 [gpua049:0/64] 2024-01-15 02:06:17,853 (trainer:753) INFO: 2epoch:train:4301-4400batch: iter_time=7.592e-05, forward_time=0.142, loss_ctc=222.293, loss_interctc_layer6=215.880, loss_interctc_layer12=213.568, loss_interctc_layer15=213.351, loss_interctc_layer21=221.759, loss=217.370, backward_time=0.368, grad_norm=127.482, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.225e-05, train_time=1.629 [gpua049:0/64] 2024-01-15 02:08:36,556 (trainer:753) INFO: 2epoch:train:4401-4500batch: iter_time=7.398e-05, forward_time=0.143, loss_ctc=253.674, loss_interctc_layer6=234.173, loss_interctc_layer12=232.676, loss_interctc_layer15=232.585, loss_interctc_layer21=254.023, loss=241.426, backward_time=0.322, grad_norm=139.837, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.242e-05, train_time=1.387 [gpua049:0/64] 2024-01-15 02:10:51,903 (trainer:753) INFO: 2epoch:train:4501-4600batch: iter_time=7.549e-05, forward_time=0.143, loss_ctc=242.143, loss_interctc_layer6=233.105, loss_interctc_layer12=230.560, loss_interctc_layer15=230.306, loss_interctc_layer21=241.605, loss=235.544, backward_time=0.321, grad_norm=127.787, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.259e-05, train_time=1.353 [gpua049:0/64] 2024-01-15 02:13:01,918 (trainer:753) INFO: 2epoch:train:4601-4700batch: iter_time=7.387e-05, forward_time=0.144, loss_ctc=237.790, loss_interctc_layer6=224.624, loss_interctc_layer12=221.743, loss_interctc_layer15=221.380, loss_interctc_layer21=237.027, loss=228.513, backward_time=0.313, grad_norm=118.468, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.275e-05, train_time=1.300 [gpua049:0/64] 2024-01-15 02:15:22,541 (trainer:753) INFO: 2epoch:train:4701-4800batch: iter_time=7.374e-05, forward_time=0.143, loss_ctc=256.590, loss_interctc_layer6=248.241, loss_interctc_layer12=245.709, loss_interctc_layer15=245.966, loss_interctc_layer21=256.294, loss=250.560, backward_time=0.332, grad_norm=140.304, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.083, optim0_lr0=3.292e-05, train_time=1.406 [gpua049:0/64] 2024-01-15 02:17:28,047 (trainer:753) INFO: 2epoch:train:4801-4900batch: iter_time=7.424e-05, forward_time=0.143, loss_ctc=247.721, loss_interctc_layer6=236.024, loss_interctc_layer12=233.567, loss_interctc_layer15=233.587, loss_interctc_layer21=247.580, loss=239.696, backward_time=0.299, grad_norm=126.471, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.083, optim0_lr0=3.309e-05, train_time=1.255 [gpua049:0/64] 2024-01-15 02:19:31,776 (trainer:753) INFO: 2epoch:train:4901-5000batch: iter_time=7.481e-05, forward_time=0.143, loss_ctc=243.212, loss_interctc_layer6=238.627, loss_interctc_layer12=235.864, loss_interctc_layer15=235.734, loss_interctc_layer21=243.071, loss=239.302, backward_time=0.297, grad_norm=149.750, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.082, optim0_lr0=3.325e-05, train_time=1.237 [gpua049:0/64] 2024-01-15 02:19:44,851 (multiple_iter_factory:32) INFO: Building 4th iter-factory... [gpua049:0/64] 2024-01-15 02:20:05,150 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 02:20:08,803 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 02:20:08,803 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, [gpua049:0/64] 2024-01-15 02:20:08,806 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 02:25:35,198 (trainer:753) INFO: 2epoch:train:5001-5100batch: iter_time=1.874, forward_time=0.145, loss_ctc=214.641, loss_interctc_layer6=209.694, loss_interctc_layer12=207.384, loss_interctc_layer15=207.675, loss_interctc_layer21=214.194, loss=210.717, backward_time=0.314, grad_norm=128.406, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.342e-05, train_time=3.634 [gpua049:0/64] 2024-01-15 02:28:00,053 (trainer:753) INFO: 2epoch:train:5101-5200batch: iter_time=8.012e-05, forward_time=0.142, loss_ctc=226.134, loss_interctc_layer6=225.596, loss_interctc_layer12=222.684, loss_interctc_layer15=222.603, loss_interctc_layer21=226.143, loss=224.632, backward_time=0.351, grad_norm=122.373, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.359e-05, train_time=1.448 [gpua049:0/64] 2024-01-15 02:30:44,948 (trainer:753) INFO: 2epoch:train:5201-5300batch: iter_time=7.891e-05, forward_time=0.143, loss_ctc=256.030, loss_interctc_layer6=250.656, loss_interctc_layer12=248.242, loss_interctc_layer15=248.467, loss_interctc_layer21=255.313, loss=251.742, backward_time=0.338, grad_norm=155.090, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.375e-05, train_time=1.649 [gpua049:0/64] 2024-01-15 02:33:11,877 (trainer:753) INFO: 2epoch:train:5301-5400batch: iter_time=7.667e-05, forward_time=0.143, loss_ctc=229.520, loss_interctc_layer6=231.053, loss_interctc_layer12=228.644, loss_interctc_layer15=228.891, loss_interctc_layer21=228.980, loss=229.418, backward_time=0.353, grad_norm=139.497, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.392e-05, train_time=1.469 [gpua049:0/64] 2024-01-15 02:35:44,436 (trainer:753) INFO: 2epoch:train:5401-5500batch: iter_time=7.582e-05, forward_time=0.142, loss_ctc=224.484, loss_interctc_layer6=225.067, loss_interctc_layer12=221.855, loss_interctc_layer15=222.157, loss_interctc_layer21=224.167, loss=223.546, backward_time=0.326, grad_norm=131.581, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.409e-05, train_time=1.525 [gpua049:0/64] 2024-01-15 02:38:20,535 (trainer:753) INFO: 2epoch:train:5501-5600batch: iter_time=7.607e-05, forward_time=0.143, loss_ctc=218.538, loss_interctc_layer6=216.993, loss_interctc_layer12=214.709, loss_interctc_layer15=214.341, loss_interctc_layer21=219.458, loss=216.808, backward_time=0.332, grad_norm=118.488, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.425e-05, train_time=1.561 [gpua049:0/64] 2024-01-15 02:40:26,466 (trainer:753) INFO: 2epoch:train:5601-5700batch: iter_time=7.824e-05, forward_time=0.143, loss_ctc=214.188, loss_interctc_layer6=214.796, loss_interctc_layer12=212.287, loss_interctc_layer15=212.011, loss_interctc_layer21=213.859, loss=213.428, backward_time=0.303, grad_norm=110.012, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.442e-05, train_time=1.259 [gpua049:0/64] 2024-01-15 02:43:11,889 (trainer:753) INFO: 2epoch:train:5701-5800batch: iter_time=7.809e-05, forward_time=0.143, loss_ctc=248.768, loss_interctc_layer6=240.415, loss_interctc_layer12=238.080, loss_interctc_layer15=238.954, loss_interctc_layer21=248.553, loss=242.954, backward_time=0.419, grad_norm=127.218, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.459e-05, train_time=1.654 [gpua049:0/64] 2024-01-15 02:45:59,907 (trainer:753) INFO: 2epoch:train:5801-5900batch: iter_time=7.747e-05, forward_time=0.142, loss_ctc=222.576, loss_interctc_layer6=220.282, loss_interctc_layer12=217.494, loss_interctc_layer15=217.401, loss_interctc_layer21=221.723, loss=219.895, backward_time=0.359, grad_norm=126.648, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.475e-05, train_time=1.680 [gpua049:0/64] 2024-01-15 02:48:15,289 (trainer:753) INFO: 2epoch:train:5901-6000batch: iter_time=7.669e-05, forward_time=0.142, loss_ctc=237.674, loss_interctc_layer6=231.297, loss_interctc_layer12=228.849, loss_interctc_layer15=228.620, loss_interctc_layer21=237.674, loss=232.823, backward_time=0.344, grad_norm=126.304, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.492e-05, train_time=1.354 [gpua049:0/64] 2024-01-15 02:50:57,143 (trainer:753) INFO: 2epoch:train:6001-6100batch: iter_time=7.775e-05, forward_time=0.143, loss_ctc=238.200, loss_interctc_layer6=235.253, loss_interctc_layer12=232.694, loss_interctc_layer15=232.798, loss_interctc_layer21=238.427, loss=235.474, backward_time=0.367, grad_norm=138.486, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.509e-05, train_time=1.618 [gpua049:0/64] 2024-01-15 02:53:07,168 (trainer:753) INFO: 2epoch:train:6101-6200batch: iter_time=7.651e-05, forward_time=0.143, loss_ctc=257.785, loss_interctc_layer6=254.150, loss_interctc_layer12=251.204, loss_interctc_layer15=251.027, loss_interctc_layer21=257.180, loss=254.269, backward_time=0.302, grad_norm=126.714, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.525e-05, train_time=1.300 [gpua049:0/64] 2024-01-15 02:54:21,893 (multiple_iter_factory:32) INFO: Building 5th iter-factory... [gpua049:0/64] 2024-01-15 02:54:42,052 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 02:54:46,007 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 02:54:46,007 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, [gpua049:0/64] 2024-01-15 02:54:46,010 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 03:00:10,552 (trainer:753) INFO: 2epoch:train:6201-6300batch: iter_time=1.688, forward_time=0.169, loss_ctc=230.332, loss_interctc_layer6=228.096, loss_interctc_layer12=225.981, loss_interctc_layer15=225.736, loss_interctc_layer21=230.103, loss=228.050, backward_time=0.325, grad_norm=141.797, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.084, optim0_lr0=3.542e-05, train_time=4.234 [gpua049:0/64] 2024-01-15 03:02:15,887 (trainer:753) INFO: 2epoch:train:6301-6400batch: iter_time=7.448e-05, forward_time=0.143, loss_ctc=214.456, loss_interctc_layer6=213.440, loss_interctc_layer12=212.081, loss_interctc_layer15=211.498, loss_interctc_layer21=213.975, loss=213.090, backward_time=0.297, grad_norm=114.262, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.559e-05, train_time=1.253 [gpua049:0/64] 2024-01-15 03:04:28,668 (trainer:753) INFO: 2epoch:train:6401-6500batch: iter_time=7.509e-05, forward_time=0.143, loss_ctc=232.776, loss_interctc_layer6=232.873, loss_interctc_layer12=230.912, loss_interctc_layer15=230.831, loss_interctc_layer21=232.947, loss=232.068, backward_time=0.305, grad_norm=127.117, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.575e-05, train_time=1.328 [gpua049:0/64] 2024-01-15 03:06:43,469 (trainer:753) INFO: 2epoch:train:6501-6600batch: iter_time=7.959e-05, forward_time=0.142, loss_ctc=222.875, loss_interctc_layer6=218.285, loss_interctc_layer12=216.085, loss_interctc_layer15=216.367, loss_interctc_layer21=222.670, loss=219.256, backward_time=0.315, grad_norm=135.024, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.592e-05, train_time=1.348 [gpua049:0/64] 2024-01-15 03:08:58,735 (trainer:753) INFO: 2epoch:train:6601-6700batch: iter_time=8.037e-05, forward_time=0.142, loss_ctc=226.829, loss_interctc_layer6=230.643, loss_interctc_layer12=227.665, loss_interctc_layer15=227.630, loss_interctc_layer21=226.079, loss=227.769, backward_time=0.309, grad_norm=138.662, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.082, optim0_lr0=3.609e-05, train_time=1.352 [gpua049:0/64] 2024-01-15 03:11:14,918 (trainer:753) INFO: 2epoch:train:6701-6800batch: iter_time=7.366e-05, forward_time=0.144, loss_ctc=230.201, loss_interctc_layer6=225.549, loss_interctc_layer12=223.377, loss_interctc_layer15=223.714, loss_interctc_layer21=230.105, loss=226.589, backward_time=0.318, grad_norm=134.318, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.625e-05, train_time=1.362 [gpua049:0/64] 2024-01-15 03:13:25,705 (trainer:753) INFO: 2epoch:train:6801-6900batch: iter_time=7.615e-05, forward_time=0.142, loss_ctc=210.579, loss_interctc_layer6=213.636, loss_interctc_layer12=210.612, loss_interctc_layer15=210.279, loss_interctc_layer21=210.589, loss=211.139, backward_time=0.300, grad_norm=112.843, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.642e-05, train_time=1.308 [gpua049:0/64] 2024-01-15 03:15:40,650 (trainer:753) INFO: 2epoch:train:6901-7000batch: iter_time=7.667e-05, forward_time=0.142, loss_ctc=239.601, loss_interctc_layer6=231.092, loss_interctc_layer12=229.773, loss_interctc_layer15=229.181, loss_interctc_layer21=238.839, loss=233.697, backward_time=0.315, grad_norm=140.581, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.083, optim0_lr0=3.659e-05, train_time=1.349 [gpua049:0/64] 2024-01-15 03:18:16,788 (trainer:753) INFO: 2epoch:train:7001-7100batch: iter_time=7.589e-05, forward_time=0.142, loss_ctc=233.588, loss_interctc_layer6=229.719, loss_interctc_layer12=227.617, loss_interctc_layer15=227.343, loss_interctc_layer21=233.291, loss=230.312, backward_time=0.329, grad_norm=124.099, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.083, optim0_lr0=3.675e-05, train_time=1.561 [gpua049:0/64] 2024-01-15 03:20:35,447 (trainer:753) INFO: 2epoch:train:7101-7200batch: iter_time=7.613e-05, forward_time=0.142, loss_ctc=227.765, loss_interctc_layer6=221.629, loss_interctc_layer12=219.516, loss_interctc_layer15=219.330, loss_interctc_layer21=227.731, loss=223.194, backward_time=0.350, grad_norm=124.545, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.692e-05, train_time=1.386 [gpua049:0/64] 2024-01-15 03:22:56,425 (trainer:753) INFO: 2epoch:train:7201-7300batch: iter_time=7.972e-05, forward_time=0.141, loss_ctc=246.144, loss_interctc_layer6=245.064, loss_interctc_layer12=242.943, loss_interctc_layer15=243.003, loss_interctc_layer21=245.706, loss=244.572, backward_time=0.306, grad_norm=143.127, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.709e-05, train_time=1.410 [gpua049:0/64] 2024-01-15 03:25:17,267 (trainer:753) INFO: 2epoch:train:7301-7400batch: iter_time=7.828e-05, forward_time=0.143, loss_ctc=236.175, loss_interctc_layer6=232.455, loss_interctc_layer12=229.838, loss_interctc_layer15=229.854, loss_interctc_layer21=235.890, loss=232.842, backward_time=0.306, grad_norm=121.916, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.725e-05, train_time=1.408 [gpua049:0/64] 2024-01-15 03:27:31,845 (trainer:753) INFO: 2epoch:train:7401-7500batch: iter_time=7.628e-05, forward_time=0.146, loss_ctc=238.150, loss_interctc_layer6=236.757, loss_interctc_layer12=233.708, loss_interctc_layer15=234.275, loss_interctc_layer21=237.925, loss=236.163, backward_time=0.311, grad_norm=150.474, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.742e-05, train_time=1.346 [gpua049:0/64] 2024-01-15 03:27:45,322 (multiple_iter_factory:32) INFO: Building 6th iter-factory... [gpua049:0/64] 2024-01-15 03:28:05,658 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 03:28:09,366 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 03:28:09,366 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, [gpua049:0/64] 2024-01-15 03:28:09,369 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 03:36:45,778 (trainer:753) INFO: 2epoch:train:7501-7600batch: iter_time=2.086, forward_time=0.142, loss_ctc=212.485, loss_interctc_layer6=207.305, loss_interctc_layer12=205.495, loss_interctc_layer15=205.973, loss_interctc_layer21=211.871, loss=208.626, backward_time=0.302, grad_norm=128.892, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.083, optim0_lr0=3.759e-05, train_time=5.539 [gpua049:0/64] 2024-01-15 03:38:49,394 (trainer:753) INFO: 2epoch:train:7601-7700batch: iter_time=7.792e-05, forward_time=0.142, loss_ctc=222.755, loss_interctc_layer6=223.408, loss_interctc_layer12=220.933, loss_interctc_layer15=220.903, loss_interctc_layer21=222.630, loss=222.126, backward_time=0.297, grad_norm=126.525, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.775e-05, train_time=1.236 [gpua049:0/64] 2024-01-15 03:41:16,761 (trainer:753) INFO: 2epoch:train:7701-7800batch: iter_time=7.788e-05, forward_time=0.141, loss_ctc=253.190, loss_interctc_layer6=247.683, loss_interctc_layer12=244.465, loss_interctc_layer15=244.538, loss_interctc_layer21=252.604, loss=248.496, backward_time=0.358, grad_norm=151.950, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.792e-05, train_time=1.473 [gpua049:0/64] 2024-01-15 03:43:29,562 (trainer:753) INFO: 2epoch:train:7801-7900batch: iter_time=8.170e-05, forward_time=0.141, loss_ctc=226.140, loss_interctc_layer6=227.927, loss_interctc_layer12=225.603, loss_interctc_layer15=225.792, loss_interctc_layer21=225.337, loss=226.160, backward_time=0.298, grad_norm=113.048, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.809e-05, train_time=1.328 [gpua049:0/64] 2024-01-15 03:45:54,406 (trainer:753) INFO: 2epoch:train:7901-8000batch: iter_time=7.907e-05, forward_time=0.141, loss_ctc=221.106, loss_interctc_layer6=222.172, loss_interctc_layer12=218.816, loss_interctc_layer15=218.644, loss_interctc_layer21=221.108, loss=220.369, backward_time=0.309, grad_norm=114.216, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.825e-05, train_time=1.448 [gpua049:0/64] 2024-01-15 03:48:22,156 (trainer:753) INFO: 2epoch:train:8001-8100batch: iter_time=8.242e-05, forward_time=0.141, loss_ctc=214.950, loss_interctc_layer6=213.422, loss_interctc_layer12=211.185, loss_interctc_layer15=211.429, loss_interctc_layer21=214.701, loss=213.138, backward_time=0.351, grad_norm=116.463, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.842e-05, train_time=1.477 [gpua049:0/64] 2024-01-15 03:50:28,492 (trainer:753) INFO: 2epoch:train:8101-8200batch: iter_time=7.976e-05, forward_time=0.141, loss_ctc=213.309, loss_interctc_layer6=214.456, loss_interctc_layer12=211.585, loss_interctc_layer15=211.253, loss_interctc_layer21=212.850, loss=212.691, backward_time=0.298, grad_norm=121.737, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.859e-05, train_time=1.263 [gpua049:0/64] 2024-01-15 03:52:39,853 (trainer:753) INFO: 2epoch:train:8201-8300batch: iter_time=8.130e-05, forward_time=0.142, loss_ctc=246.003, loss_interctc_layer6=239.542, loss_interctc_layer12=236.839, loss_interctc_layer15=236.104, loss_interctc_layer21=245.941, loss=240.886, backward_time=0.306, grad_norm=136.678, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.875e-05, train_time=1.313 [gpua049:0/64] 2024-01-15 03:54:53,990 (trainer:753) INFO: 2epoch:train:8301-8400batch: iter_time=7.984e-05, forward_time=0.141, loss_ctc=220.667, loss_interctc_layer6=219.641, loss_interctc_layer12=217.122, loss_interctc_layer15=216.984, loss_interctc_layer21=220.089, loss=218.901, backward_time=0.298, grad_norm=125.201, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.892e-05, train_time=1.341 [gpua049:0/64] 2024-01-15 03:57:26,371 (trainer:753) INFO: 2epoch:train:8401-8500batch: iter_time=8.051e-05, forward_time=0.143, loss_ctc=236.069, loss_interctc_layer6=229.940, loss_interctc_layer12=227.645, loss_interctc_layer15=227.849, loss_interctc_layer21=236.272, loss=231.555, backward_time=0.365, grad_norm=129.479, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.909e-05, train_time=1.524 [gpua049:0/64] 2024-01-15 03:59:59,633 (trainer:753) INFO: 2epoch:train:8501-8600batch: iter_time=7.659e-05, forward_time=0.142, loss_ctc=235.951, loss_interctc_layer6=233.788, loss_interctc_layer12=231.434, loss_interctc_layer15=230.956, loss_interctc_layer21=235.361, loss=233.498, backward_time=0.336, grad_norm=123.720, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.925e-05, train_time=1.532 [gpua049:0/64] 2024-01-15 04:02:31,078 (trainer:753) INFO: 2epoch:train:8601-8700batch: iter_time=8.183e-05, forward_time=0.142, loss_ctc=255.911, loss_interctc_layer6=253.235, loss_interctc_layer12=249.876, loss_interctc_layer15=249.887, loss_interctc_layer21=255.315, loss=252.845, backward_time=0.332, grad_norm=120.340, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.942e-05, train_time=1.514 [gpua049:0/64] 2024-01-15 04:03:37,062 (multiple_iter_factory:32) INFO: Building 7th iter-factory... [gpua049:0/64] 2024-01-15 04:03:57,349 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 04:04:01,044 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 04:04:01,044 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, [gpua049:0/64] 2024-01-15 04:04:01,047 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 04:09:50,101 (trainer:753) INFO: 2epoch:train:8701-8800batch: iter_time=1.381, forward_time=0.185, loss_ctc=229.919, loss_interctc_layer6=227.428, loss_interctc_layer12=225.302, loss_interctc_layer15=225.525, loss_interctc_layer21=229.883, loss=227.611, backward_time=0.310, grad_norm=154.608, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.086, optim0_lr0=3.959e-05, train_time=4.390 [gpua049:0/64] 2024-01-15 04:11:53,153 (trainer:753) INFO: 2epoch:train:8801-8900batch: iter_time=7.771e-05, forward_time=0.142, loss_ctc=213.377, loss_interctc_layer6=212.665, loss_interctc_layer12=210.474, loss_interctc_layer15=210.464, loss_interctc_layer21=212.853, loss=211.967, backward_time=0.297, grad_norm=121.167, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.975e-05, train_time=1.230 [gpua049:0/64] 2024-01-15 04:13:59,601 (trainer:753) INFO: 2epoch:train:8901-9000batch: iter_time=7.762e-05, forward_time=0.142, loss_ctc=231.540, loss_interctc_layer6=232.327, loss_interctc_layer12=229.500, loss_interctc_layer15=229.440, loss_interctc_layer21=231.555, loss=230.872, backward_time=0.301, grad_norm=143.602, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.082, optim0_lr0=3.992e-05, train_time=1.264 [gpua049:0/64] 2024-01-15 04:16:03,157 (trainer:753) INFO: 2epoch:train:9001-9100batch: iter_time=7.656e-05, forward_time=0.142, loss_ctc=220.393, loss_interctc_layer6=216.822, loss_interctc_layer12=213.792, loss_interctc_layer15=213.847, loss_interctc_layer21=220.147, loss=217.000, backward_time=0.296, grad_norm=139.182, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.009e-05, train_time=1.235 [gpua049:0/64] 2024-01-15 04:18:09,676 (trainer:753) INFO: 2epoch:train:9101-9200batch: iter_time=7.631e-05, forward_time=0.142, loss_ctc=224.053, loss_interctc_layer6=227.984, loss_interctc_layer12=225.329, loss_interctc_layer15=225.661, loss_interctc_layer21=223.404, loss=225.286, backward_time=0.302, grad_norm=130.634, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.082, optim0_lr0=4.025e-05, train_time=1.265 [gpua049:0/64] 2024-01-15 04:20:25,021 (trainer:753) INFO: 2epoch:train:9201-9300batch: iter_time=7.866e-05, forward_time=0.142, loss_ctc=227.227, loss_interctc_layer6=223.491, loss_interctc_layer12=221.366, loss_interctc_layer15=221.658, loss_interctc_layer21=227.084, loss=224.165, backward_time=0.316, grad_norm=118.025, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.042e-05, train_time=1.353 [gpua049:0/64] 2024-01-15 04:22:57,328 (trainer:753) INFO: 2epoch:train:9301-9400batch: iter_time=7.681e-05, forward_time=0.143, loss_ctc=208.252, loss_interctc_layer6=211.337, loss_interctc_layer12=209.086, loss_interctc_layer15=208.629, loss_interctc_layer21=208.394, loss=209.140, backward_time=0.338, grad_norm=113.793, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.059e-05, train_time=1.523 [gpua049:0/64] 2024-01-15 04:25:10,717 (trainer:753) INFO: 2epoch:train:9401-9500batch: iter_time=7.827e-05, forward_time=0.142, loss_ctc=236.020, loss_interctc_layer6=228.539, loss_interctc_layer12=226.475, loss_interctc_layer15=225.763, loss_interctc_layer21=235.317, loss=230.423, backward_time=0.304, grad_norm=125.271, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.082, optim0_lr0=4.075e-05, train_time=1.334 [gpua049:0/64] 2024-01-15 04:27:14,478 (trainer:753) INFO: 2epoch:train:9501-9600batch: iter_time=7.506e-05, forward_time=0.142, loss_ctc=231.499, loss_interctc_layer6=227.838, loss_interctc_layer12=224.946, loss_interctc_layer15=225.064, loss_interctc_layer21=231.183, loss=228.106, backward_time=0.299, grad_norm=127.098, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.092e-05, train_time=1.237 [gpua049:0/64] 2024-01-15 04:29:48,720 (trainer:753) INFO: 2epoch:train:9601-9700batch: iter_time=7.395e-05, forward_time=0.142, loss_ctc=225.471, loss_interctc_layer6=219.698, loss_interctc_layer12=217.861, loss_interctc_layer15=217.580, loss_interctc_layer21=225.389, loss=221.200, backward_time=0.371, grad_norm=145.737, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.109e-05, train_time=1.542 [gpua049:0/64] 2024-01-15 04:32:22,683 (trainer:753) INFO: 2epoch:train:9701-9800batch: iter_time=7.811e-05, forward_time=0.144, loss_ctc=243.050, loss_interctc_layer6=242.721, loss_interctc_layer12=240.309, loss_interctc_layer15=240.228, loss_interctc_layer21=242.803, loss=241.822, backward_time=0.342, grad_norm=137.588, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.125e-05, train_time=1.539 [gpua049:0/64] 2024-01-15 04:34:52,425 (trainer:753) INFO: 2epoch:train:9801-9900batch: iter_time=7.957e-05, forward_time=0.146, loss_ctc=233.340, loss_interctc_layer6=229.937, loss_interctc_layer12=227.787, loss_interctc_layer15=227.557, loss_interctc_layer21=232.954, loss=230.315, backward_time=0.355, grad_norm=118.670, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.142e-05, train_time=1.497 [gpua049:0/64] 2024-01-15 04:37:12,293 (trainer:753) INFO: 2epoch:train:9901-10000batch: iter_time=8.106e-05, forward_time=0.142, loss_ctc=236.607, loss_interctc_layer6=234.373, loss_interctc_layer12=232.098, loss_interctc_layer15=231.708, loss_interctc_layer21=236.273, loss=234.212, backward_time=0.320, grad_norm=136.508, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.159e-05, train_time=1.398 [gpua049:0/64] 2024-01-15 04:37:23,575 (multiple_iter_factory:32) INFO: Building 8th iter-factory... [gpua049:0/64] 2024-01-15 04:37:43,435 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 04:37:47,128 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 04:37:47,128 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, [gpua049:0/64] 2024-01-15 04:37:47,131 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 04:44:42,636 (trainer:753) INFO: 2epoch:train:10001-10100batch: iter_time=1.878, forward_time=0.142, loss_ctc=213.910, loss_interctc_layer6=206.152, loss_interctc_layer12=204.045, loss_interctc_layer15=204.394, loss_interctc_layer21=213.245, loss=208.349, backward_time=0.330, grad_norm=126.851, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.175e-05, train_time=4.503 [gpua049:0/64] 2024-01-15 04:46:49,235 (trainer:753) INFO: 2epoch:train:10101-10200batch: iter_time=8.001e-05, forward_time=0.143, loss_ctc=230.712, loss_interctc_layer6=221.506, loss_interctc_layer12=218.341, loss_interctc_layer15=218.291, loss_interctc_layer21=230.889, loss=223.948, backward_time=0.300, grad_norm=117.335, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.192e-05, train_time=1.266 [gpua049:0/64] 2024-01-15 04:49:18,496 (trainer:753) INFO: 2epoch:train:10201-10300batch: iter_time=8.064e-05, forward_time=0.142, loss_ctc=258.559, loss_interctc_layer6=247.226, loss_interctc_layer12=243.921, loss_interctc_layer15=243.847, loss_interctc_layer21=257.728, loss=250.256, backward_time=0.314, grad_norm=140.460, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.209e-05, train_time=1.492 [gpua049:0/64] 2024-01-15 04:51:47,936 (trainer:753) INFO: 2epoch:train:10301-10400batch: iter_time=8.070e-05, forward_time=0.156, loss_ctc=237.594, loss_interctc_layer6=226.770, loss_interctc_layer12=223.821, loss_interctc_layer15=224.061, loss_interctc_layer21=236.996, loss=229.848, backward_time=0.348, grad_norm=128.745, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.225e-05, train_time=1.494 [gpua049:0/64] 2024-01-15 04:54:25,883 (trainer:753) INFO: 2epoch:train:10401-10500batch: iter_time=7.948e-05, forward_time=0.176, loss_ctc=233.572, loss_interctc_layer6=219.194, loss_interctc_layer12=217.192, loss_interctc_layer15=216.585, loss_interctc_layer21=233.481, loss=224.005, backward_time=0.340, grad_norm=136.060, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.087, optim0_lr0=4.242e-05, train_time=1.578 [gpua049:0/64] 2024-01-15 04:56:40,382 (trainer:753) INFO: 2epoch:train:10501-10600batch: iter_time=8.012e-05, forward_time=0.142, loss_ctc=221.487, loss_interctc_layer6=212.438, loss_interctc_layer12=209.621, loss_interctc_layer15=209.442, loss_interctc_layer21=221.051, loss=214.808, backward_time=0.304, grad_norm=129.084, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.259e-05, train_time=1.346 [gpua049:0/64] 2024-01-15 04:59:15,715 (trainer:753) INFO: 2epoch:train:10601-10700batch: iter_time=7.845e-05, forward_time=0.142, loss_ctc=223.320, loss_interctc_layer6=211.043, loss_interctc_layer12=208.379, loss_interctc_layer15=208.535, loss_interctc_layer21=222.089, loss=214.673, backward_time=0.353, grad_norm=122.611, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.082, optim0_lr0=4.275e-05, train_time=1.553 [gpua049:0/64] 2024-01-15 05:01:36,343 (trainer:753) INFO: 2epoch:train:10701-10800batch: iter_time=7.869e-05, forward_time=0.142, loss_ctc=255.550, loss_interctc_layer6=237.916, loss_interctc_layer12=235.484, loss_interctc_layer15=234.930, loss_interctc_layer21=254.776, loss=243.731, backward_time=0.323, grad_norm=145.269, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.082, optim0_lr0=4.292e-05, train_time=1.406 [gpua049:0/64] 2024-01-15 05:04:02,660 (trainer:753) INFO: 2epoch:train:10801-10900batch: iter_time=7.879e-05, forward_time=0.145, loss_ctc=227.856, loss_interctc_layer6=216.592, loss_interctc_layer12=214.158, loss_interctc_layer15=214.503, loss_interctc_layer21=227.496, loss=220.121, backward_time=0.353, grad_norm=125.094, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.082, optim0_lr0=4.309e-05, train_time=1.463 [gpua049:0/64] 2024-01-15 05:06:22,148 (trainer:753) INFO: 2epoch:train:10901-11000batch: iter_time=7.815e-05, forward_time=0.143, loss_ctc=240.731, loss_interctc_layer6=227.493, loss_interctc_layer12=225.303, loss_interctc_layer15=225.580, loss_interctc_layer21=240.277, loss=231.877, backward_time=0.332, grad_norm=124.433, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.083, optim0_lr0=4.325e-05, train_time=1.395 [gpua049:0/64] 2024-01-15 05:08:32,260 (trainer:753) INFO: 2epoch:train:11001-11100batch: iter_time=7.786e-05, forward_time=0.143, loss_ctc=242.339, loss_interctc_layer6=230.454, loss_interctc_layer12=228.147, loss_interctc_layer15=227.932, loss_interctc_layer21=241.927, loss=234.160, backward_time=0.301, grad_norm=122.561, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.083, optim0_lr0=4.342e-05, train_time=1.301 [gpua049:0/64] 2024-01-15 05:11:08,508 (trainer:753) INFO: 2epoch:train:11101-11200batch: iter_time=7.899e-05, forward_time=0.144, loss_ctc=260.563, loss_interctc_layer6=250.319, loss_interctc_layer12=247.527, loss_interctc_layer15=247.302, loss_interctc_layer21=260.073, loss=253.157, backward_time=0.337, grad_norm=138.390, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.359e-05, train_time=1.562 [gpua049:0/64] 2024-01-15 05:12:26,374 (multiple_iter_factory:32) INFO: Building 9th iter-factory... [gpua049:0/64] 2024-01-15 05:12:46,623 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 05:12:50,325 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 05:12:50,325 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, [gpua049:0/64] 2024-01-15 05:12:50,328 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 05:19:10,342 (trainer:753) INFO: 2epoch:train:11201-11300batch: iter_time=1.368, forward_time=0.145, loss_ctc=231.575, loss_interctc_layer6=224.807, loss_interctc_layer12=222.539, loss_interctc_layer15=222.945, loss_interctc_layer21=231.092, loss=226.592, backward_time=0.372, grad_norm=141.420, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.083, optim0_lr0=4.375e-05, train_time=4.818 [gpua049:0/64] 2024-01-15 05:21:27,512 (trainer:753) INFO: 2epoch:train:11301-11400batch: iter_time=7.422e-05, forward_time=0.145, loss_ctc=219.759, loss_interctc_layer6=211.208, loss_interctc_layer12=208.801, loss_interctc_layer15=208.772, loss_interctc_layer21=219.251, loss=213.558, backward_time=0.317, grad_norm=109.497, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.392e-05, train_time=1.371 [gpua049:0/64] 2024-01-15 05:24:03,103 (trainer:753) INFO: 2epoch:train:11401-11500batch: iter_time=7.737e-05, forward_time=0.143, loss_ctc=235.978, loss_interctc_layer6=231.213, loss_interctc_layer12=229.186, loss_interctc_layer15=228.869, loss_interctc_layer21=235.593, loss=232.168, backward_time=0.325, grad_norm=118.977, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.083, optim0_lr0=4.409e-05, train_time=1.556 [gpua049:0/64] 2024-01-15 05:26:13,889 (trainer:753) INFO: 2epoch:train:11501-11600batch: iter_time=7.980e-05, forward_time=0.142, loss_ctc=227.884, loss_interctc_layer6=214.025, loss_interctc_layer12=211.626, loss_interctc_layer15=211.699, loss_interctc_layer21=227.643, loss=218.575, backward_time=0.296, grad_norm=126.678, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.425e-05, train_time=1.308 [gpua049:0/64] 2024-01-15 05:28:43,366 (trainer:753) INFO: 2epoch:train:11601-11700batch: iter_time=7.920e-05, forward_time=0.143, loss_ctc=233.216, loss_interctc_layer6=225.065, loss_interctc_layer12=222.463, loss_interctc_layer15=222.571, loss_interctc_layer21=233.255, loss=227.314, backward_time=0.335, grad_norm=129.602, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.442e-05, train_time=1.495 [gpua049:0/64] 2024-01-15 05:31:14,163 (trainer:753) INFO: 2epoch:train:11701-11800batch: iter_time=7.830e-05, forward_time=0.142, loss_ctc=236.057, loss_interctc_layer6=220.912, loss_interctc_layer12=218.373, loss_interctc_layer15=218.324, loss_interctc_layer21=235.498, loss=225.833, backward_time=0.340, grad_norm=124.159, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.459e-05, train_time=1.508 [gpua049:0/64] 2024-01-15 05:33:38,527 (trainer:753) INFO: 2epoch:train:11801-11900batch: iter_time=7.904e-05, forward_time=0.142, loss_ctc=217.104, loss_interctc_layer6=208.847, loss_interctc_layer12=206.062, loss_interctc_layer15=205.922, loss_interctc_layer21=216.810, loss=210.949, backward_time=0.317, grad_norm=114.455, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.475e-05, train_time=1.443 [gpua049:0/64] 2024-01-15 05:35:46,430 (trainer:753) INFO: 2epoch:train:11901-12000batch: iter_time=7.865e-05, forward_time=0.143, loss_ctc=244.815, loss_interctc_layer6=225.498, loss_interctc_layer12=223.459, loss_interctc_layer15=222.791, loss_interctc_layer21=244.151, loss=232.143, backward_time=0.303, grad_norm=114.406, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.492e-05, train_time=1.279 [gpua049:0/64] 2024-01-15 05:37:55,795 (trainer:753) INFO: 2epoch:train:12001-12100batch: iter_time=7.805e-05, forward_time=0.142, loss_ctc=234.381, loss_interctc_layer6=224.553, loss_interctc_layer12=221.739, loss_interctc_layer15=221.963, loss_interctc_layer21=233.751, loss=227.277, backward_time=0.309, grad_norm=120.189, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.509e-05, train_time=1.293 [gpua049:0/64] 2024-01-15 05:40:15,567 (trainer:753) INFO: 2epoch:train:12101-12200batch: iter_time=7.742e-05, forward_time=0.142, loss_ctc=232.330, loss_interctc_layer6=218.091, loss_interctc_layer12=215.218, loss_interctc_layer15=215.487, loss_interctc_layer21=231.782, loss=222.582, backward_time=0.306, grad_norm=120.736, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.082, optim0_lr0=4.525e-05, train_time=1.397 [gpua049:0/64] 2024-01-15 05:42:56,750 (trainer:753) INFO: 2epoch:train:12201-12300batch: iter_time=8.020e-05, forward_time=0.165, loss_ctc=249.202, loss_interctc_layer6=239.811, loss_interctc_layer12=236.835, loss_interctc_layer15=236.580, loss_interctc_layer21=248.804, loss=242.246, backward_time=0.337, grad_norm=122.132, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.087, optim0_lr0=4.542e-05, train_time=1.611 [gpua049:0/64] 2024-01-15 05:45:31,313 (trainer:753) INFO: 2epoch:train:12301-12400batch: iter_time=7.814e-05, forward_time=0.147, loss_ctc=239.470, loss_interctc_layer6=227.637, loss_interctc_layer12=224.918, loss_interctc_layer15=224.812, loss_interctc_layer21=239.278, loss=231.223, backward_time=0.320, grad_norm=111.802, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.083, optim0_lr0=4.559e-05, train_time=1.544 [gpua049:0/64] 2024-01-15 05:48:00,375 (trainer:753) INFO: 2epoch:train:12401-12500batch: iter_time=7.718e-05, forward_time=0.142, loss_ctc=236.422, loss_interctc_layer6=231.119, loss_interctc_layer12=228.555, loss_interctc_layer15=228.457, loss_interctc_layer21=235.848, loss=232.080, backward_time=0.340, grad_norm=142.927, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.083, optim0_lr0=4.575e-05, train_time=1.492 [gpua049:0/64] 2024-01-15 05:48:20,406 (multiple_iter_factory:32) INFO: Building 10th iter-factory... [gpua049:0/64] 2024-01-15 05:48:40,635 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 05:48:44,326 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 05:48:44,326 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, [gpua049:0/64] 2024-01-15 05:48:44,329 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 05:55:07,825 (trainer:753) INFO: 2epoch:train:12501-12600batch: iter_time=2.935, forward_time=0.145, loss_ctc=207.955, loss_interctc_layer6=201.786, loss_interctc_layer12=199.098, loss_interctc_layer15=198.956, loss_interctc_layer21=207.423, loss=203.044, backward_time=0.303, grad_norm=127.780, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.084, optim0_lr0=4.592e-05, train_time=4.274 [gpua049:0/64] 2024-01-15 05:57:32,517 (trainer:753) INFO: 2epoch:train:12601-12700batch: iter_time=7.584e-05, forward_time=0.144, loss_ctc=226.775, loss_interctc_layer6=218.318, loss_interctc_layer12=216.044, loss_interctc_layer15=215.630, loss_interctc_layer21=226.271, loss=220.608, backward_time=0.319, grad_norm=118.068, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.084, optim0_lr0=4.609e-05, train_time=1.447 [gpua049:0/64] 2024-01-15 05:59:58,589 (trainer:753) INFO: 2epoch:train:12701-12800batch: iter_time=8.225e-05, forward_time=0.144, loss_ctc=252.249, loss_interctc_layer6=241.917, loss_interctc_layer12=238.413, loss_interctc_layer15=238.298, loss_interctc_layer21=251.621, loss=244.500, backward_time=0.318, grad_norm=134.495, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.084, optim0_lr0=4.625e-05, train_time=1.461 [gpua049:0/64] 2024-01-15 06:02:04,320 (trainer:753) INFO: 2epoch:train:12801-12900batch: iter_time=7.850e-05, forward_time=0.143, loss_ctc=233.081, loss_interctc_layer6=223.226, loss_interctc_layer12=220.238, loss_interctc_layer15=220.118, loss_interctc_layer21=232.531, loss=225.839, backward_time=0.302, grad_norm=128.898, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.084, optim0_lr0=4.642e-05, train_time=1.257 [gpua049:0/64] 2024-01-15 06:04:14,295 (trainer:753) INFO: 2epoch:train:12901-13000batch: iter_time=7.754e-05, forward_time=0.143, loss_ctc=228.874, loss_interctc_layer6=215.404, loss_interctc_layer12=213.286, loss_interctc_layer15=213.470, loss_interctc_layer21=228.549, loss=219.917, backward_time=0.302, grad_norm=114.018, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.084, optim0_lr0=4.659e-05, train_time=1.300 [gpua049:0/64] 2024-01-15 06:06:33,085 (trainer:753) INFO: 2epoch:train:13001-13100batch: iter_time=7.845e-05, forward_time=0.143, loss_ctc=218.839, loss_interctc_layer6=209.774, loss_interctc_layer12=206.460, loss_interctc_layer15=206.328, loss_interctc_layer21=218.090, loss=211.898, backward_time=0.322, grad_norm=119.274, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.084, optim0_lr0=4.675e-05, train_time=1.388 [gpua049:0/64] 2024-01-15 06:09:07,200 (trainer:753) INFO: 2epoch:train:13101-13200batch: iter_time=7.973e-05, forward_time=0.183, loss_ctc=216.718, loss_interctc_layer6=207.119, loss_interctc_layer12=203.972, loss_interctc_layer15=203.784, loss_interctc_layer21=216.243, loss=209.567, backward_time=0.353, grad_norm=123.589, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.090, optim0_lr0=4.692e-05, train_time=1.541 [gpua049:0/64] 2024-01-15 06:11:36,053 (trainer:753) INFO: 2epoch:train:13201-13300batch: iter_time=8.500e-05, forward_time=0.167, loss_ctc=254.068, loss_interctc_layer6=234.374, loss_interctc_layer12=230.182, loss_interctc_layer15=230.947, loss_interctc_layer21=253.195, loss=240.553, backward_time=0.349, grad_norm=133.054, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.083, optim0_lr0=4.709e-05, train_time=1.488 [gpua049:0/64] 2024-01-15 06:13:57,743 (trainer:753) INFO: 2epoch:train:13301-13400batch: iter_time=8.689e-05, forward_time=0.161, loss_ctc=224.108, loss_interctc_layer6=212.261, loss_interctc_layer12=209.345, loss_interctc_layer15=208.840, loss_interctc_layer21=223.477, loss=215.606, backward_time=0.311, grad_norm=106.833, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.725e-05, train_time=1.416 [gpua049:0/64] 2024-01-15 06:16:24,255 (trainer:753) INFO: 2epoch:train:13401-13500batch: iter_time=8.237e-05, forward_time=0.144, loss_ctc=234.646, loss_interctc_layer6=223.611, loss_interctc_layer12=220.015, loss_interctc_layer15=219.840, loss_interctc_layer21=234.092, loss=226.441, backward_time=0.356, grad_norm=116.238, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.742e-05, train_time=1.466 [gpua049:0/64] 2024-01-15 06:18:41,170 (trainer:753) INFO: 2epoch:train:13501-13600batch: iter_time=8.089e-05, forward_time=0.142, loss_ctc=237.577, loss_interctc_layer6=225.657, loss_interctc_layer12=222.301, loss_interctc_layer15=222.209, loss_interctc_layer21=236.859, loss=228.920, backward_time=0.308, grad_norm=109.217, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.759e-05, train_time=1.369 [gpua049:0/64] 2024-01-15 06:21:14,002 (trainer:753) INFO: 2epoch:train:13601-13700batch: iter_time=8.447e-05, forward_time=0.143, loss_ctc=254.434, loss_interctc_layer6=243.216, loss_interctc_layer12=239.834, loss_interctc_layer15=239.396, loss_interctc_layer21=253.736, loss=246.123, backward_time=0.358, grad_norm=119.227, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.081, optim0_lr0=4.775e-05, train_time=1.528 [gpua049:0/64] 2024-01-15 06:22:40,885 (multiple_iter_factory:32) INFO: Building 11th iter-factory... [gpua049:0/64] 2024-01-15 06:23:01,328 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 06:23:04,973 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 06:23:04,973 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, [gpua049:0/64] 2024-01-15 06:23:04,976 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 06:29:54,835 (trainer:753) INFO: 2epoch:train:13701-13800batch: iter_time=2.121, forward_time=0.172, loss_ctc=223.571, loss_interctc_layer6=219.705, loss_interctc_layer12=216.150, loss_interctc_layer15=216.786, loss_interctc_layer21=222.789, loss=219.800, backward_time=0.305, grad_norm=131.252, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.084, optim0_lr0=4.792e-05, train_time=5.208 [gpua049:0/64] 2024-01-15 06:32:17,076 (trainer:753) INFO: 2epoch:train:13801-13900batch: iter_time=7.293e-05, forward_time=0.142, loss_ctc=208.146, loss_interctc_layer6=207.165, loss_interctc_layer12=203.464, loss_interctc_layer15=202.918, loss_interctc_layer21=207.559, loss=205.851, backward_time=0.317, grad_norm=104.896, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.083, optim0_lr0=4.809e-05, train_time=1.422 [gpua049:0/64] 2024-01-15 06:34:42,875 (trainer:753) INFO: 2epoch:train:13901-14000batch: iter_time=7.465e-05, forward_time=0.145, loss_ctc=224.206, loss_interctc_layer6=224.134, loss_interctc_layer12=220.624, loss_interctc_layer15=220.550, loss_interctc_layer21=223.558, loss=222.614, backward_time=0.333, grad_norm=112.786, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.825e-05, train_time=1.458 [gpua049:0/64] 2024-01-15 06:37:22,253 (trainer:753) INFO: 2epoch:train:14001-14100batch: iter_time=7.880e-05, forward_time=0.142, loss_ctc=214.965, loss_interctc_layer6=209.609, loss_interctc_layer12=206.588, loss_interctc_layer15=206.334, loss_interctc_layer21=214.708, loss=210.441, backward_time=0.359, grad_norm=114.371, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.842e-05, train_time=1.594 [gpua049:0/64] 2024-01-15 06:39:55,867 (trainer:753) INFO: 2epoch:train:14101-14200batch: iter_time=7.756e-05, forward_time=0.142, loss_ctc=220.155, loss_interctc_layer6=221.319, loss_interctc_layer12=217.668, loss_interctc_layer15=217.264, loss_interctc_layer21=217.943, loss=218.870, backward_time=0.339, grad_norm=125.038, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.859e-05, train_time=1.536 [gpua049:0/64] 2024-01-15 06:42:16,798 (trainer:753) INFO: 2epoch:train:14201-14300batch: iter_time=8.106e-05, forward_time=0.141, loss_ctc=221.805, loss_interctc_layer6=215.866, loss_interctc_layer12=212.054, loss_interctc_layer15=211.521, loss_interctc_layer21=221.376, loss=216.524, backward_time=0.336, grad_norm=110.765, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.875e-05, train_time=1.409 [gpua049:0/64] 2024-01-15 06:44:47,831 (trainer:753) INFO: 2epoch:train:14301-14400batch: iter_time=7.844e-05, forward_time=0.142, loss_ctc=202.273, loss_interctc_layer6=204.124, loss_interctc_layer12=200.523, loss_interctc_layer15=199.913, loss_interctc_layer21=201.520, loss=201.671, backward_time=0.321, grad_norm=106.851, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.083, optim0_lr0=4.892e-05, train_time=1.511 [gpua049:0/64] 2024-01-15 06:47:04,611 (trainer:753) INFO: 2epoch:train:14401-14500batch: iter_time=8.070e-05, forward_time=0.146, loss_ctc=229.017, loss_interctc_layer6=218.218, loss_interctc_layer12=214.662, loss_interctc_layer15=214.857, loss_interctc_layer21=227.897, loss=220.930, backward_time=0.308, grad_norm=108.953, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.909e-05, train_time=1.368 [gpua049:0/64] 2024-01-15 06:49:38,845 (trainer:753) INFO: 2epoch:train:14501-14600batch: iter_time=8.180e-05, forward_time=0.142, loss_ctc=223.938, loss_interctc_layer6=218.724, loss_interctc_layer12=214.343, loss_interctc_layer15=214.507, loss_interctc_layer21=223.095, loss=218.922, backward_time=0.349, grad_norm=115.768, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.925e-05, train_time=1.542 [gpua049:0/64] 2024-01-15 06:52:19,677 (trainer:753) INFO: 2epoch:train:14601-14700batch: iter_time=7.872e-05, forward_time=0.142, loss_ctc=216.305, loss_interctc_layer6=209.613, loss_interctc_layer12=205.200, loss_interctc_layer15=204.806, loss_interctc_layer21=215.519, loss=210.289, backward_time=0.346, grad_norm=105.291, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.942e-05, train_time=1.608 [gpua049:0/64] 2024-01-15 06:54:36,574 (trainer:753) INFO: 2epoch:train:14701-14800batch: iter_time=7.680e-05, forward_time=0.143, loss_ctc=229.174, loss_interctc_layer6=228.151, loss_interctc_layer12=222.939, loss_interctc_layer15=222.571, loss_interctc_layer21=228.149, loss=226.197, backward_time=0.314, grad_norm=112.550, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.082, optim0_lr0=4.959e-05, train_time=1.369 [gpua049:0/64] 2024-01-15 06:57:19,492 (trainer:753) INFO: 2epoch:train:14801-14900batch: iter_time=7.853e-05, forward_time=0.247, loss_ctc=223.668, loss_interctc_layer6=219.207, loss_interctc_layer12=214.630, loss_interctc_layer15=214.115, loss_interctc_layer21=222.948, loss=218.914, backward_time=0.360, grad_norm=104.148, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.087, optim0_lr0=4.975e-05, train_time=1.628 [gpua049:0/64] 2024-01-15 06:59:44,756 (trainer:753) INFO: 2epoch:train:14901-15000batch: iter_time=7.409e-05, forward_time=0.143, loss_ctc=222.791, loss_interctc_layer6=221.872, loss_interctc_layer12=215.855, loss_interctc_layer15=215.363, loss_interctc_layer21=221.866, loss=219.549, backward_time=0.342, grad_norm=115.437, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.083, optim0_lr0=4.992e-05, train_time=1.453 [gpua049:0/64] 2024-01-15 07:28:00,665 (trainer:352) INFO: 2epoch results: [train] iter_time=0.154, forward_time=0.146, loss_ctc=234.190, loss_interctc_layer6=227.616, loss_interctc_layer12=224.901, loss_interctc_layer15=224.819, loss_interctc_layer21=233.832, loss=229.071, backward_time=0.327, grad_norm=130.456, clip=100.000, loss_scale=2.847e+08, optim_step_time=0.083, optim0_lr0=3.750e-05, train_time=1.706, time=7 hours, 7 minutes and 2.9 seconds, total_count=30000, gpu_max_cached_mem_GB=34.508, [valid] loss_ctc=170.194, cer_ctc=0.749, loss_interctc_layer6=161.818, cer_interctc_layer6=0.747, loss_interctc_layer12=157.887, cer_interctc_layer12=0.689, loss_interctc_layer15=158.066, cer_interctc_layer15=0.695, loss_interctc_layer21=167.963, cer_interctc_layer21=0.746, loss=163.185, time=27 minutes and 51.64 seconds, total_count=9342, gpu_max_cached_mem_GB=34.508 [gpua049:0/64] 2024-01-15 07:28:20,077 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count [gpua049:0/64] 2024-01-15 07:28:20,159 (trainer:286) INFO: 3/45epoch started. Estimated time to finish: 2 weeks, 1 day and 2 hours [gpua049:0/64] 2024-01-15 07:28:21,000 (multiple_iter_factory:32) INFO: Building 0th iter-factory... [gpua049:0/64] 2024-01-15 07:28:41,154 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 07:28:44,843 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 07:28:44,843 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, [gpua049:0/64] 2024-01-15 07:28:44,846 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 07:35:01,711 (trainer:753) INFO: 3epoch:train:1-100batch: iter_time=2.738, forward_time=0.168, loss_ctc=257.206, loss_interctc_layer6=233.101, loss_interctc_layer12=227.764, loss_interctc_layer15=227.192, loss_interctc_layer21=255.926, loss=240.238, backward_time=0.299, grad_norm=137.960, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.084, optim0_lr0=5.026e-05, train_time=4.007 [gpua049:0/64] 2024-01-15 07:37:05,216 (trainer:753) INFO: 3epoch:train:101-200batch: iter_time=7.838e-05, forward_time=0.143, loss_ctc=253.836, loss_interctc_layer6=244.003, loss_interctc_layer12=238.104, loss_interctc_layer15=237.546, loss_interctc_layer21=253.165, loss=245.331, backward_time=0.297, grad_norm=134.750, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.083, optim0_lr0=5.076e-05, train_time=1.235 [gpua049:0/64] 2024-01-15 07:39:15,390 (trainer:753) INFO: 3epoch:train:201-300batch: iter_time=7.703e-05, forward_time=0.142, loss_ctc=215.238, loss_interctc_layer6=205.653, loss_interctc_layer12=200.742, loss_interctc_layer15=200.209, loss_interctc_layer21=213.950, loss=207.158, backward_time=0.305, grad_norm=124.243, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.083, optim0_lr0=5.126e-05, train_time=1.302 [gpua049:0/64] 2024-01-15 07:41:30,895 (trainer:753) INFO: 3epoch:train:301-400batch: iter_time=8.027e-05, forward_time=0.142, loss_ctc=227.749, loss_interctc_layer6=214.573, loss_interctc_layer12=209.286, loss_interctc_layer15=208.589, loss_interctc_layer21=226.829, loss=217.405, backward_time=0.321, grad_norm=128.186, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.083, optim0_lr0=5.176e-05, train_time=1.355 [gpua049:0/64] 2024-01-15 07:43:50,554 (trainer:753) INFO: 3epoch:train:401-500batch: iter_time=7.916e-05, forward_time=0.144, loss_ctc=233.458, loss_interctc_layer6=219.531, loss_interctc_layer12=214.325, loss_interctc_layer15=214.053, loss_interctc_layer21=232.503, loss=222.774, backward_time=0.327, grad_norm=126.074, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.082, optim0_lr0=5.226e-05, train_time=1.394 [gpua049:0/64] 2024-01-15 07:45:59,777 (trainer:753) INFO: 3epoch:train:501-600batch: iter_time=7.672e-05, forward_time=0.142, loss_ctc=213.857, loss_interctc_layer6=202.213, loss_interctc_layer12=197.034, loss_interctc_layer15=196.421, loss_interctc_layer21=213.159, loss=204.537, backward_time=0.311, grad_norm=128.676, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.082, optim0_lr0=5.276e-05, train_time=1.295 [gpua049:0/64] 2024-01-15 07:48:16,349 (trainer:753) INFO: 3epoch:train:601-700batch: iter_time=2.264e-04, forward_time=0.142, loss_ctc=243.948, loss_interctc_layer6=216.554, loss_interctc_layer12=211.284, loss_interctc_layer15=210.884, loss_interctc_layer21=243.424, loss=225.219, backward_time=0.320, grad_norm=126.947, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.082, optim0_lr0=5.326e-05, train_time=1.365 [gpua049:0/64] 2024-01-15 07:50:54,702 (trainer:753) INFO: 3epoch:train:701-800batch: iter_time=7.940e-05, forward_time=0.174, loss_ctc=255.249, loss_interctc_layer6=231.724, loss_interctc_layer12=225.634, loss_interctc_layer15=224.904, loss_interctc_layer21=253.901, loss=238.282, backward_time=0.361, grad_norm=124.026, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.085, optim0_lr0=5.376e-05, train_time=1.583 [gpua049:0/64] 2024-01-15 07:53:21,216 (trainer:753) INFO: 3epoch:train:801-900batch: iter_time=7.707e-05, forward_time=0.142, loss_ctc=216.003, loss_interctc_layer6=204.916, loss_interctc_layer12=198.739, loss_interctc_layer15=198.438, loss_interctc_layer21=214.887, loss=206.597, backward_time=0.335, grad_norm=107.450, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.426e-05, train_time=1.465 [gpua049:0/64] 2024-01-15 07:55:56,632 (trainer:753) INFO: 3epoch:train:901-1000batch: iter_time=7.667e-05, forward_time=0.142, loss_ctc=244.167, loss_interctc_layer6=232.815, loss_interctc_layer12=225.424, loss_interctc_layer15=224.806, loss_interctc_layer21=242.762, loss=233.995, backward_time=0.347, grad_norm=138.151, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.476e-05, train_time=1.554 [gpua049:0/64] 2024-01-15 07:58:28,588 (trainer:753) INFO: 3epoch:train:1001-1100batch: iter_time=7.773e-05, forward_time=0.148, loss_ctc=231.484, loss_interctc_layer6=221.637, loss_interctc_layer12=215.490, loss_interctc_layer15=214.657, loss_interctc_layer21=230.383, loss=222.730, backward_time=0.327, grad_norm=122.252, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.081, optim0_lr0=5.526e-05, train_time=1.518 [gpua049:0/64] 2024-01-15 08:01:22,154 (trainer:753) INFO: 3epoch:train:1101-1200batch: iter_time=8.512e-05, forward_time=0.179, loss_ctc=217.716, loss_interctc_layer6=207.262, loss_interctc_layer12=201.693, loss_interctc_layer15=200.936, loss_interctc_layer21=216.727, loss=208.867, backward_time=0.378, grad_norm=118.313, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.087, optim0_lr0=5.576e-05, train_time=1.737 [gpua049:0/64] 2024-01-15 08:02:52,892 (multiple_iter_factory:32) INFO: Building 1th iter-factory... [gpua049:0/64] 2024-01-15 08:03:13,016 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 08:03:16,666 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 08:03:16,666 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, [gpua049:0/64] 2024-01-15 08:03:16,669 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 08:09:16,407 (trainer:753) INFO: 3epoch:train:1201-1300batch: iter_time=2.845, forward_time=0.169, loss_ctc=224.251, loss_interctc_layer6=210.125, loss_interctc_layer12=204.996, loss_interctc_layer15=203.832, loss_interctc_layer21=223.041, loss=213.249, backward_time=0.325, grad_norm=117.682, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.085, optim0_lr0=5.626e-05, train_time=4.742 [gpua049:0/64] 2024-01-15 08:11:20,458 (trainer:753) INFO: 3epoch:train:1301-1400batch: iter_time=7.524e-05, forward_time=0.144, loss_ctc=251.406, loss_interctc_layer6=238.986, loss_interctc_layer12=230.393, loss_interctc_layer15=229.609, loss_interctc_layer21=250.202, loss=240.119, backward_time=0.297, grad_norm=130.975, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.084, optim0_lr0=5.676e-05, train_time=1.240 [gpua049:0/64] 2024-01-15 08:13:24,659 (trainer:753) INFO: 3epoch:train:1401-1500batch: iter_time=7.810e-05, forward_time=0.143, loss_ctc=228.486, loss_interctc_layer6=224.232, loss_interctc_layer12=216.216, loss_interctc_layer15=216.168, loss_interctc_layer21=227.275, loss=222.475, backward_time=0.298, grad_norm=139.306, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.084, optim0_lr0=5.726e-05, train_time=1.242 [gpua049:0/64] 2024-01-15 08:15:52,746 (trainer:753) INFO: 3epoch:train:1501-1600batch: iter_time=7.893e-05, forward_time=0.154, loss_ctc=187.576, loss_interctc_layer6=187.678, loss_interctc_layer12=181.760, loss_interctc_layer15=181.005, loss_interctc_layer21=185.698, loss=184.743, backward_time=0.330, grad_norm=119.338, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.084, optim0_lr0=5.776e-05, train_time=1.481 [gpua049:0/64] 2024-01-15 08:18:06,452 (trainer:753) INFO: 3epoch:train:1601-1700batch: iter_time=7.726e-05, forward_time=0.142, loss_ctc=205.336, loss_interctc_layer6=199.307, loss_interctc_layer12=193.024, loss_interctc_layer15=192.236, loss_interctc_layer21=204.788, loss=198.938, backward_time=0.310, grad_norm=105.614, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.083, optim0_lr0=5.826e-05, train_time=1.337 [gpua049:0/64] 2024-01-15 08:20:19,493 (trainer:753) INFO: 3epoch:train:1701-1800batch: iter_time=7.713e-05, forward_time=0.148, loss_ctc=209.892, loss_interctc_layer6=205.323, loss_interctc_layer12=198.669, loss_interctc_layer15=197.929, loss_interctc_layer21=208.057, loss=203.974, backward_time=0.304, grad_norm=130.557, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.083, optim0_lr0=5.876e-05, train_time=1.330 [gpua049:0/64] 2024-01-15 08:22:36,339 (trainer:753) INFO: 3epoch:train:1801-1900batch: iter_time=7.408e-05, forward_time=0.143, loss_ctc=216.571, loss_interctc_layer6=206.812, loss_interctc_layer12=200.220, loss_interctc_layer15=199.525, loss_interctc_layer21=214.263, loss=207.478, backward_time=0.315, grad_norm=125.291, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.083, optim0_lr0=5.926e-05, train_time=1.368 [gpua049:0/64] 2024-01-15 08:25:04,477 (trainer:753) INFO: 3epoch:train:1901-2000batch: iter_time=7.836e-05, forward_time=0.142, loss_ctc=232.878, loss_interctc_layer6=215.465, loss_interctc_layer12=208.559, loss_interctc_layer15=207.626, loss_interctc_layer21=231.931, loss=219.292, backward_time=0.312, grad_norm=144.444, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.082, optim0_lr0=5.976e-05, train_time=1.481 [gpua049:0/64] 2024-01-15 08:27:22,977 (trainer:753) INFO: 3epoch:train:2001-2100batch: iter_time=7.834e-05, forward_time=0.152, loss_ctc=215.562, loss_interctc_layer6=204.758, loss_interctc_layer12=196.538, loss_interctc_layer15=195.296, loss_interctc_layer21=214.782, loss=205.387, backward_time=0.320, grad_norm=109.153, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.026e-05, train_time=1.385 [gpua049:0/64] 2024-01-15 08:29:49,624 (trainer:753) INFO: 3epoch:train:2101-2200batch: iter_time=7.694e-05, forward_time=0.145, loss_ctc=195.624, loss_interctc_layer6=194.068, loss_interctc_layer12=186.926, loss_interctc_layer15=186.082, loss_interctc_layer21=194.203, loss=191.381, backward_time=0.328, grad_norm=108.058, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.083, optim0_lr0=6.076e-05, train_time=1.465 [gpua049:0/64] 2024-01-15 08:32:16,899 (trainer:753) INFO: 3epoch:train:2201-2300batch: iter_time=0.003, forward_time=0.182, loss_ctc=238.100, loss_interctc_layer6=226.729, loss_interctc_layer12=219.177, loss_interctc_layer15=218.257, loss_interctc_layer21=236.192, loss=227.691, backward_time=0.332, grad_norm=133.192, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.083, optim0_lr0=6.126e-05, train_time=1.474 [gpua049:0/64] 2024-01-15 08:34:49,157 (trainer:753) INFO: 3epoch:train:2301-2400batch: iter_time=7.666e-05, forward_time=0.196, loss_ctc=231.019, loss_interctc_layer6=221.556, loss_interctc_layer12=214.507, loss_interctc_layer15=213.643, loss_interctc_layer21=228.305, loss=221.806, backward_time=0.350, grad_norm=153.881, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.089, optim0_lr0=6.176e-05, train_time=1.522 [gpua049:0/64] 2024-01-15 08:36:55,263 (trainer:753) INFO: 3epoch:train:2401-2500batch: iter_time=7.585e-05, forward_time=0.142, loss_ctc=192.954, loss_interctc_layer6=185.784, loss_interctc_layer12=178.717, loss_interctc_layer15=177.906, loss_interctc_layer21=191.797, loss=185.431, backward_time=0.297, grad_norm=92.258, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.226e-05, train_time=1.261 [gpua049:0/64] 2024-01-15 08:37:15,293 (multiple_iter_factory:32) INFO: Building 2th iter-factory... [gpua049:0/64] 2024-01-15 08:37:35,813 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 08:37:39,492 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 08:37:39,492 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, [gpua049:0/64] 2024-01-15 08:37:39,496 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 08:44:22,353 (trainer:753) INFO: 3epoch:train:2501-2600batch: iter_time=2.171, forward_time=0.159, loss_ctc=231.622, loss_interctc_layer6=213.641, loss_interctc_layer12=205.719, loss_interctc_layer15=205.305, loss_interctc_layer21=230.400, loss=217.338, backward_time=0.300, grad_norm=123.346, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.083, optim0_lr0=6.276e-05, train_time=4.471 [gpua049:0/64] 2024-01-15 08:46:27,588 (trainer:753) INFO: 3epoch:train:2601-2700batch: iter_time=7.684e-05, forward_time=0.144, loss_ctc=223.557, loss_interctc_layer6=221.565, loss_interctc_layer12=211.936, loss_interctc_layer15=211.026, loss_interctc_layer21=222.338, loss=218.084, backward_time=0.297, grad_norm=118.789, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.081, optim0_lr0=6.326e-05, train_time=1.252 [gpua049:0/64] 2024-01-15 08:48:32,365 (trainer:753) INFO: 3epoch:train:2701-2800batch: iter_time=7.813e-05, forward_time=0.141, loss_ctc=194.815, loss_interctc_layer6=190.689, loss_interctc_layer12=184.368, loss_interctc_layer15=182.818, loss_interctc_layer21=192.894, loss=189.117, backward_time=0.301, grad_norm=115.968, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.376e-05, train_time=1.247 [gpua049:0/64] 2024-01-15 08:50:43,752 (trainer:753) INFO: 3epoch:train:2801-2900batch: iter_time=8.003e-05, forward_time=0.142, loss_ctc=196.636, loss_interctc_layer6=192.842, loss_interctc_layer12=184.925, loss_interctc_layer15=183.655, loss_interctc_layer21=195.098, loss=190.631, backward_time=0.319, grad_norm=120.968, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.426e-05, train_time=1.314 [gpua049:0/64] 2024-01-15 08:52:56,759 (trainer:753) INFO: 3epoch:train:2901-3000batch: iter_time=7.903e-05, forward_time=0.142, loss_ctc=207.237, loss_interctc_layer6=200.060, loss_interctc_layer12=192.347, loss_interctc_layer15=191.446, loss_interctc_layer21=206.486, loss=199.515, backward_time=0.330, grad_norm=115.788, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.476e-05, train_time=1.330 [gpua049:0/64] 2024-01-15 08:55:06,723 (trainer:753) INFO: 3epoch:train:3001-3100batch: iter_time=7.886e-05, forward_time=0.142, loss_ctc=187.739, loss_interctc_layer6=182.550, loss_interctc_layer12=175.084, loss_interctc_layer15=173.727, loss_interctc_layer21=187.334, loss=181.287, backward_time=0.298, grad_norm=110.893, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.526e-05, train_time=1.299 [gpua049:0/64] 2024-01-15 08:57:14,574 (trainer:753) INFO: 3epoch:train:3101-3200batch: iter_time=7.786e-05, forward_time=0.142, loss_ctc=214.710, loss_interctc_layer6=196.438, loss_interctc_layer12=188.837, loss_interctc_layer15=187.642, loss_interctc_layer21=215.317, loss=200.589, backward_time=0.298, grad_norm=125.972, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.576e-05, train_time=1.278 [gpua049:0/64] 2024-01-15 08:59:28,714 (trainer:753) INFO: 3epoch:train:3201-3300batch: iter_time=7.554e-05, forward_time=0.155, loss_ctc=226.379, loss_interctc_layer6=208.196, loss_interctc_layer12=199.519, loss_interctc_layer15=198.421, loss_interctc_layer21=224.299, loss=211.363, backward_time=0.307, grad_norm=114.859, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.626e-05, train_time=1.341 [gpua049:0/64] 2024-01-15 09:01:52,939 (trainer:753) INFO: 3epoch:train:3301-3400batch: iter_time=7.772e-05, forward_time=0.143, loss_ctc=192.351, loss_interctc_layer6=185.742, loss_interctc_layer12=177.879, loss_interctc_layer15=177.071, loss_interctc_layer21=191.321, loss=184.873, backward_time=0.335, grad_norm=108.492, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.676e-05, train_time=1.442 [gpua049:0/64] 2024-01-15 09:04:09,239 (trainer:753) INFO: 3epoch:train:3401-3500batch: iter_time=7.826e-05, forward_time=0.143, loss_ctc=219.386, loss_interctc_layer6=210.708, loss_interctc_layer12=202.307, loss_interctc_layer15=200.749, loss_interctc_layer21=217.948, loss=210.219, backward_time=0.316, grad_norm=121.648, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.726e-05, train_time=1.361 [gpua049:0/64] 2024-01-15 09:06:33,489 (trainer:753) INFO: 3epoch:train:3501-3600batch: iter_time=7.864e-05, forward_time=0.162, loss_ctc=209.607, loss_interctc_layer6=203.385, loss_interctc_layer12=194.048, loss_interctc_layer15=192.976, loss_interctc_layer21=208.503, loss=201.704, backward_time=0.324, grad_norm=116.595, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.083, optim0_lr0=6.776e-05, train_time=1.444 [gpua049:0/64] 2024-01-15 09:08:42,884 (trainer:753) INFO: 3epoch:train:3601-3700batch: iter_time=7.905e-05, forward_time=0.146, loss_ctc=194.419, loss_interctc_layer6=187.938, loss_interctc_layer12=180.754, loss_interctc_layer15=179.706, loss_interctc_layer21=193.514, loss=187.266, backward_time=0.307, grad_norm=100.347, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.083, optim0_lr0=6.826e-05, train_time=1.294 [gpua049:0/64] 2024-01-15 09:09:46,466 (multiple_iter_factory:32) INFO: Building 3th iter-factory... [gpua049:0/64] 2024-01-15 09:10:06,491 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 09:10:10,341 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 09:10:10,342 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, [gpua049:0/64] 2024-01-15 09:10:10,345 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 09:16:09,091 (trainer:753) INFO: 3epoch:train:3701-3800batch: iter_time=1.367, forward_time=0.167, loss_ctc=202.496, loss_interctc_layer6=191.438, loss_interctc_layer12=183.433, loss_interctc_layer15=182.311, loss_interctc_layer21=201.299, loss=192.196, backward_time=0.302, grad_norm=106.511, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.085, optim0_lr0=6.876e-05, train_time=4.462 [gpua049:0/64] 2024-01-15 09:18:14,354 (trainer:753) INFO: 3epoch:train:3801-3900batch: iter_time=7.617e-05, forward_time=0.143, loss_ctc=230.967, loss_interctc_layer6=214.592, loss_interctc_layer12=203.217, loss_interctc_layer15=202.666, loss_interctc_layer21=229.578, loss=216.204, backward_time=0.296, grad_norm=113.693, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.926e-05, train_time=1.252 [gpua049:0/64] 2024-01-15 09:20:30,010 (trainer:753) INFO: 3epoch:train:3901-4000batch: iter_time=8.150e-05, forward_time=0.142, loss_ctc=209.053, loss_interctc_layer6=202.519, loss_interctc_layer12=195.932, loss_interctc_layer15=194.659, loss_interctc_layer21=207.466, loss=201.926, backward_time=0.300, grad_norm=112.500, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.082, optim0_lr0=6.976e-05, train_time=1.356 [gpua049:0/64] 2024-01-15 09:22:44,394 (trainer:753) INFO: 3epoch:train:4001-4100batch: iter_time=8.420e-05, forward_time=0.142, loss_ctc=176.201, loss_interctc_layer6=173.656, loss_interctc_layer12=166.764, loss_interctc_layer15=166.409, loss_interctc_layer21=174.210, loss=171.448, backward_time=0.310, grad_norm=116.162, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.082, optim0_lr0=7.026e-05, train_time=1.344 [gpua049:0/64] 2024-01-15 09:24:56,324 (trainer:753) INFO: 3epoch:train:4101-4200batch: iter_time=8.252e-05, forward_time=0.144, loss_ctc=188.964, loss_interctc_layer6=179.682, loss_interctc_layer12=170.552, loss_interctc_layer15=169.083, loss_interctc_layer21=188.089, loss=179.274, backward_time=0.300, grad_norm=97.275, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.082, optim0_lr0=7.076e-05, train_time=1.319 [gpua049:0/64] 2024-01-15 09:27:16,616 (trainer:753) INFO: 3epoch:train:4201-4300batch: iter_time=8.013e-05, forward_time=0.143, loss_ctc=193.086, loss_interctc_layer6=187.107, loss_interctc_layer12=179.076, loss_interctc_layer15=177.636, loss_interctc_layer21=191.771, loss=185.735, backward_time=0.332, grad_norm=121.836, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.126e-05, train_time=1.403 [gpua049:0/64] 2024-01-15 09:29:58,758 (trainer:753) INFO: 3epoch:train:4301-4400batch: iter_time=7.824e-05, forward_time=0.143, loss_ctc=200.733, loss_interctc_layer6=188.173, loss_interctc_layer12=179.926, loss_interctc_layer15=178.432, loss_interctc_layer21=199.488, loss=189.350, backward_time=0.334, grad_norm=97.797, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.176e-05, train_time=1.621 [gpua049:0/64] 2024-01-15 09:32:13,196 (trainer:753) INFO: 3epoch:train:4401-4500batch: iter_time=7.877e-05, forward_time=0.142, loss_ctc=217.968, loss_interctc_layer6=194.771, loss_interctc_layer12=185.549, loss_interctc_layer15=184.474, loss_interctc_layer21=217.259, loss=200.004, backward_time=0.312, grad_norm=131.420, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.226e-05, train_time=1.344 [gpua049:0/64] 2024-01-15 09:34:22,305 (trainer:753) INFO: 3epoch:train:4501-4600batch: iter_time=7.789e-05, forward_time=0.143, loss_ctc=197.679, loss_interctc_layer6=182.928, loss_interctc_layer12=173.343, loss_interctc_layer15=171.832, loss_interctc_layer21=196.475, loss=184.451, backward_time=0.303, grad_norm=101.797, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.276e-05, train_time=1.291 [gpua049:0/64] 2024-01-15 09:36:42,240 (trainer:753) INFO: 3epoch:train:4601-4700batch: iter_time=8.122e-05, forward_time=0.142, loss_ctc=179.016, loss_interctc_layer6=175.594, loss_interctc_layer12=167.417, loss_interctc_layer15=166.298, loss_interctc_layer21=177.552, loss=173.176, backward_time=0.325, grad_norm=114.834, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.082, optim0_lr0=7.326e-05, train_time=1.399 [gpua049:0/64] 2024-01-15 09:38:53,125 (trainer:753) INFO: 3epoch:train:4701-4800batch: iter_time=7.918e-05, forward_time=0.142, loss_ctc=220.522, loss_interctc_layer6=207.709, loss_interctc_layer12=197.661, loss_interctc_layer15=197.129, loss_interctc_layer21=219.084, loss=208.421, backward_time=0.307, grad_norm=124.089, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.376e-05, train_time=1.309 [gpua049:0/64] 2024-01-15 09:41:06,242 (trainer:753) INFO: 3epoch:train:4801-4900batch: iter_time=7.999e-05, forward_time=0.143, loss_ctc=209.399, loss_interctc_layer6=202.827, loss_interctc_layer12=192.865, loss_interctc_layer15=191.242, loss_interctc_layer21=208.988, loss=201.064, backward_time=0.312, grad_norm=126.095, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.426e-05, train_time=1.331 [gpua049:0/64] 2024-01-15 09:43:13,259 (trainer:753) INFO: 3epoch:train:4901-5000batch: iter_time=7.641e-05, forward_time=0.142, loss_ctc=181.075, loss_interctc_layer6=169.889, loss_interctc_layer12=161.320, loss_interctc_layer15=159.950, loss_interctc_layer21=179.715, loss=170.390, backward_time=0.297, grad_norm=86.472, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.476e-05, train_time=1.270 [gpua049:0/64] 2024-01-15 09:43:30,485 (multiple_iter_factory:32) INFO: Building 4th iter-factory... [gpua049:0/64] 2024-01-15 09:43:50,689 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 09:43:54,360 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 09:43:54,360 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, [gpua049:0/64] 2024-01-15 09:43:54,364 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 09:49:31,609 (trainer:753) INFO: 3epoch:train:5001-5100batch: iter_time=2.408, forward_time=0.143, loss_ctc=232.077, loss_interctc_layer6=196.285, loss_interctc_layer12=185.913, loss_interctc_layer15=183.904, loss_interctc_layer21=230.631, loss=205.762, backward_time=0.303, grad_norm=120.059, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.082, optim0_lr0=7.526e-05, train_time=3.783 [gpua049:0/64] 2024-01-15 09:51:34,233 (trainer:753) INFO: 3epoch:train:5101-5200batch: iter_time=7.965e-05, forward_time=0.145, loss_ctc=219.501, loss_interctc_layer6=201.413, loss_interctc_layer12=190.800, loss_interctc_layer15=189.018, loss_interctc_layer21=216.383, loss=203.423, backward_time=0.297, grad_norm=113.868, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.576e-05, train_time=1.226 [gpua049:0/64] 2024-01-15 09:53:45,355 (trainer:753) INFO: 3epoch:train:5201-5300batch: iter_time=8.052e-05, forward_time=0.141, loss_ctc=186.581, loss_interctc_layer6=173.164, loss_interctc_layer12=165.601, loss_interctc_layer15=164.677, loss_interctc_layer21=185.219, loss=175.048, backward_time=0.312, grad_norm=109.756, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.082, optim0_lr0=7.626e-05, train_time=1.311 [gpua049:0/64] 2024-01-15 09:55:53,566 (trainer:753) INFO: 3epoch:train:5301-5400batch: iter_time=7.627e-05, forward_time=0.160, loss_ctc=195.652, loss_interctc_layer6=174.756, loss_interctc_layer12=165.271, loss_interctc_layer15=163.300, loss_interctc_layer21=194.964, loss=178.789, backward_time=0.306, grad_norm=112.090, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.084, optim0_lr0=7.676e-05, train_time=1.282 [gpua049:0/64] 2024-01-15 09:58:12,292 (trainer:753) INFO: 3epoch:train:5401-5500batch: iter_time=7.635e-05, forward_time=0.143, loss_ctc=204.913, loss_interctc_layer6=184.994, loss_interctc_layer12=175.935, loss_interctc_layer15=174.262, loss_interctc_layer21=203.788, loss=188.778, backward_time=0.314, grad_norm=136.861, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.726e-05, train_time=1.387 [gpua049:0/64] 2024-01-15 10:00:38,201 (trainer:753) INFO: 3epoch:train:5501-5600batch: iter_time=8.336e-05, forward_time=0.142, loss_ctc=181.894, loss_interctc_layer6=166.082, loss_interctc_layer12=156.877, loss_interctc_layer15=154.872, loss_interctc_layer21=181.212, loss=168.188, backward_time=0.344, grad_norm=106.223, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.776e-05, train_time=1.459 [gpua049:0/64] 2024-01-15 10:02:57,627 (trainer:753) INFO: 3epoch:train:5601-5700batch: iter_time=7.715e-05, forward_time=0.143, loss_ctc=220.762, loss_interctc_layer6=180.197, loss_interctc_layer12=170.970, loss_interctc_layer15=169.751, loss_interctc_layer21=220.029, loss=192.342, backward_time=0.317, grad_norm=127.285, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.083, optim0_lr0=7.826e-05, train_time=1.393 [gpua049:0/64] 2024-01-15 10:05:29,652 (trainer:753) INFO: 3epoch:train:5701-5800batch: iter_time=8.112e-05, forward_time=0.190, loss_ctc=223.365, loss_interctc_layer6=189.768, loss_interctc_layer12=179.055, loss_interctc_layer15=177.490, loss_interctc_layer21=220.717, loss=198.079, backward_time=0.336, grad_norm=114.284, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.087, optim0_lr0=7.876e-05, train_time=1.521 [gpua049:0/64] 2024-01-15 10:08:09,515 (trainer:753) INFO: 3epoch:train:5801-5900batch: iter_time=8.008e-05, forward_time=0.185, loss_ctc=184.112, loss_interctc_layer6=168.093, loss_interctc_layer12=159.041, loss_interctc_layer15=157.281, loss_interctc_layer21=183.017, loss=170.309, backward_time=0.321, grad_norm=109.322, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.084, optim0_lr0=7.926e-05, train_time=1.598 [gpua049:0/64] 2024-01-15 10:10:30,883 (trainer:753) INFO: 3epoch:train:5901-6000batch: iter_time=7.968e-05, forward_time=0.143, loss_ctc=211.219, loss_interctc_layer6=194.534, loss_interctc_layer12=184.057, loss_interctc_layer15=182.615, loss_interctc_layer21=209.074, loss=196.300, backward_time=0.305, grad_norm=127.998, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.082, optim0_lr0=7.976e-05, train_time=1.413 [gpua049:0/64] 2024-01-15 10:12:58,805 (trainer:753) INFO: 3epoch:train:6001-6100batch: iter_time=7.659e-05, forward_time=0.144, loss_ctc=199.251, loss_interctc_layer6=187.606, loss_interctc_layer12=175.436, loss_interctc_layer15=173.961, loss_interctc_layer21=197.673, loss=186.785, backward_time=0.359, grad_norm=110.522, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.026e-05, train_time=1.479 [gpua049:0/64] 2024-01-15 10:15:04,346 (trainer:753) INFO: 3epoch:train:6101-6200batch: iter_time=7.799e-05, forward_time=0.143, loss_ctc=185.403, loss_interctc_layer6=170.401, loss_interctc_layer12=161.500, loss_interctc_layer15=160.718, loss_interctc_layer21=183.444, loss=172.293, backward_time=0.310, grad_norm=101.624, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.076e-05, train_time=1.255 [gpua049:0/64] 2024-01-15 10:16:17,832 (multiple_iter_factory:32) INFO: Building 5th iter-factory... [gpua049:0/64] 2024-01-15 10:16:37,763 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 10:16:41,500 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 10:16:41,500 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, [gpua049:0/64] 2024-01-15 10:16:41,503 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 10:23:51,600 (trainer:753) INFO: 3epoch:train:6201-6300batch: iter_time=1.402, forward_time=0.143, loss_ctc=202.402, loss_interctc_layer6=177.801, loss_interctc_layer12=167.473, loss_interctc_layer15=165.748, loss_interctc_layer21=201.425, loss=182.970, backward_time=0.317, grad_norm=119.742, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.126e-05, train_time=5.272 [gpua049:0/64] 2024-01-15 10:25:57,487 (trainer:753) INFO: 3epoch:train:6301-6400batch: iter_time=7.827e-05, forward_time=0.143, loss_ctc=231.385, loss_interctc_layer6=197.423, loss_interctc_layer12=185.242, loss_interctc_layer15=183.403, loss_interctc_layer21=230.287, loss=205.548, backward_time=0.299, grad_norm=116.626, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.176e-05, train_time=1.259 [gpua049:0/64] 2024-01-15 10:28:03,821 (trainer:753) INFO: 3epoch:train:6401-6500batch: iter_time=7.730e-05, forward_time=0.142, loss_ctc=199.276, loss_interctc_layer6=186.055, loss_interctc_layer12=175.981, loss_interctc_layer15=175.071, loss_interctc_layer21=197.975, loss=186.872, backward_time=0.300, grad_norm=109.802, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.226e-05, train_time=1.263 [gpua049:0/64] 2024-01-15 10:30:09,688 (trainer:753) INFO: 3epoch:train:6501-6600batch: iter_time=7.653e-05, forward_time=0.143, loss_ctc=167.495, loss_interctc_layer6=157.390, loss_interctc_layer12=148.852, loss_interctc_layer15=147.531, loss_interctc_layer21=166.126, loss=157.479, backward_time=0.316, grad_norm=106.500, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.276e-05, train_time=1.258 [gpua049:0/64] 2024-01-15 10:32:33,042 (trainer:753) INFO: 3epoch:train:6601-6700batch: iter_time=7.892e-05, forward_time=0.142, loss_ctc=186.871, loss_interctc_layer6=164.362, loss_interctc_layer12=153.737, loss_interctc_layer15=151.671, loss_interctc_layer21=185.622, loss=168.453, backward_time=0.306, grad_norm=101.857, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.326e-05, train_time=1.433 [gpua049:0/64] 2024-01-15 10:34:59,992 (trainer:753) INFO: 3epoch:train:6701-6800batch: iter_time=7.909e-05, forward_time=0.142, loss_ctc=185.894, loss_interctc_layer6=173.680, loss_interctc_layer12=163.285, loss_interctc_layer15=161.905, loss_interctc_layer21=185.488, loss=174.051, backward_time=0.346, grad_norm=126.131, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.376e-05, train_time=1.469 [gpua049:0/64] 2024-01-15 10:37:16,221 (trainer:753) INFO: 3epoch:train:6801-6900batch: iter_time=7.761e-05, forward_time=0.143, loss_ctc=201.057, loss_interctc_layer6=172.980, loss_interctc_layer12=162.930, loss_interctc_layer15=162.366, loss_interctc_layer21=200.409, loss=179.949, backward_time=0.305, grad_norm=122.384, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.426e-05, train_time=1.362 [gpua049:0/64] 2024-01-15 10:39:43,856 (trainer:753) INFO: 3epoch:train:6901-7000batch: iter_time=7.888e-05, forward_time=0.152, loss_ctc=222.184, loss_interctc_layer6=180.108, loss_interctc_layer12=168.643, loss_interctc_layer15=167.342, loss_interctc_layer21=221.450, loss=191.945, backward_time=0.335, grad_norm=132.015, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.083, optim0_lr0=8.476e-05, train_time=1.476 [gpua049:0/64] 2024-01-15 10:42:05,096 (trainer:753) INFO: 3epoch:train:7001-7100batch: iter_time=7.849e-05, forward_time=0.142, loss_ctc=188.274, loss_interctc_layer6=166.654, loss_interctc_layer12=155.795, loss_interctc_layer15=153.500, loss_interctc_layer21=186.640, loss=170.173, backward_time=0.305, grad_norm=95.431, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.081, optim0_lr0=8.526e-05, train_time=1.412 [gpua049:0/64] 2024-01-15 10:44:12,954 (trainer:753) INFO: 3epoch:train:7101-7200batch: iter_time=7.824e-05, forward_time=0.142, loss_ctc=170.338, loss_interctc_layer6=160.603, loss_interctc_layer12=151.058, loss_interctc_layer15=149.715, loss_interctc_layer21=169.258, loss=160.195, backward_time=0.317, grad_norm=98.684, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.576e-05, train_time=1.278 [gpua049:0/64] 2024-01-15 10:46:47,468 (trainer:753) INFO: 3epoch:train:7201-7300batch: iter_time=7.892e-05, forward_time=0.143, loss_ctc=211.104, loss_interctc_layer6=191.804, loss_interctc_layer12=180.302, loss_interctc_layer15=178.499, loss_interctc_layer21=209.580, loss=194.258, backward_time=0.320, grad_norm=117.903, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.626e-05, train_time=1.545 [gpua049:0/64] 2024-01-15 10:49:06,880 (trainer:753) INFO: 3epoch:train:7301-7400batch: iter_time=7.637e-05, forward_time=0.143, loss_ctc=199.374, loss_interctc_layer6=187.412, loss_interctc_layer12=174.554, loss_interctc_layer15=172.568, loss_interctc_layer21=198.147, loss=186.411, backward_time=0.318, grad_norm=128.732, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.676e-05, train_time=1.394 [gpua049:0/64] 2024-01-15 10:51:17,572 (trainer:753) INFO: 3epoch:train:7401-7500batch: iter_time=7.541e-05, forward_time=0.143, loss_ctc=180.115, loss_interctc_layer6=157.149, loss_interctc_layer12=147.829, loss_interctc_layer15=146.774, loss_interctc_layer21=179.112, loss=162.196, backward_time=0.301, grad_norm=108.656, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.726e-05, train_time=1.307 [gpua049:0/64] 2024-01-15 10:51:37,603 (multiple_iter_factory:32) INFO: Building 6th iter-factory... [gpua049:0/64] 2024-01-15 10:51:57,739 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 10:52:01,712 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 10:52:01,712 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, [gpua049:0/64] 2024-01-15 10:52:01,715 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 10:58:04,212 (trainer:753) INFO: 3epoch:train:7501-7600batch: iter_time=2.657, forward_time=0.165, loss_ctc=217.578, loss_interctc_layer6=181.243, loss_interctc_layer12=169.579, loss_interctc_layer15=167.860, loss_interctc_layer21=215.928, loss=190.438, backward_time=0.311, grad_norm=132.724, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.085, optim0_lr0=8.776e-05, train_time=4.065 [gpua049:0/64] 2024-01-15 11:00:10,933 (trainer:753) INFO: 3epoch:train:7601-7700batch: iter_time=8.021e-05, forward_time=0.144, loss_ctc=199.817, loss_interctc_layer6=185.856, loss_interctc_layer12=173.804, loss_interctc_layer15=171.655, loss_interctc_layer21=199.227, loss=186.072, backward_time=0.298, grad_norm=117.644, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.084, optim0_lr0=8.826e-05, train_time=1.268 [gpua049:0/64] 2024-01-15 11:02:16,460 (trainer:753) INFO: 3epoch:train:7701-7800batch: iter_time=7.784e-05, forward_time=0.142, loss_ctc=176.310, loss_interctc_layer6=162.781, loss_interctc_layer12=154.128, loss_interctc_layer15=153.486, loss_interctc_layer21=175.100, loss=164.361, backward_time=0.302, grad_norm=123.598, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.876e-05, train_time=1.255 [gpua049:0/64] 2024-01-15 11:04:26,278 (trainer:753) INFO: 3epoch:train:7801-7900batch: iter_time=7.732e-05, forward_time=0.143, loss_ctc=181.866, loss_interctc_layer6=162.060, loss_interctc_layer12=150.417, loss_interctc_layer15=149.216, loss_interctc_layer21=180.398, loss=164.791, backward_time=0.300, grad_norm=113.052, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.083, optim0_lr0=8.926e-05, train_time=1.298 [gpua049:0/64] 2024-01-15 11:07:00,837 (trainer:753) INFO: 3epoch:train:7901-8000batch: iter_time=7.649e-05, forward_time=0.145, loss_ctc=188.252, loss_interctc_layer6=170.197, loss_interctc_layer12=158.964, loss_interctc_layer15=157.339, loss_interctc_layer21=187.780, loss=172.507, backward_time=0.313, grad_norm=107.449, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.082, optim0_lr0=8.976e-05, train_time=1.545 [gpua049:0/64] 2024-01-15 11:09:15,227 (trainer:753) INFO: 3epoch:train:8001-8100batch: iter_time=7.202e-05, forward_time=0.143, loss_ctc=168.910, loss_interctc_layer6=153.431, loss_interctc_layer12=143.282, loss_interctc_layer15=141.365, loss_interctc_layer21=167.008, loss=154.799, backward_time=0.307, grad_norm=105.383, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.083, optim0_lr0=9.026e-05, train_time=1.344 [gpua049:0/64] 2024-01-15 11:11:31,047 (trainer:753) INFO: 3epoch:train:8101-8200batch: iter_time=7.771e-05, forward_time=0.142, loss_ctc=207.568, loss_interctc_layer6=166.612, loss_interctc_layer12=156.320, loss_interctc_layer15=154.659, loss_interctc_layer21=206.256, loss=178.283, backward_time=0.310, grad_norm=107.124, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.076e-05, train_time=1.358 [gpua049:0/64] 2024-01-15 11:13:43,946 (trainer:753) INFO: 3epoch:train:8201-8300batch: iter_time=7.693e-05, forward_time=0.149, loss_ctc=209.099, loss_interctc_layer6=176.155, loss_interctc_layer12=163.764, loss_interctc_layer15=162.139, loss_interctc_layer21=207.514, loss=183.734, backward_time=0.300, grad_norm=112.630, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.083, optim0_lr0=9.126e-05, train_time=1.329 [gpua049:0/64] 2024-01-15 11:15:58,325 (trainer:753) INFO: 3epoch:train:8301-8400batch: iter_time=8.251e-05, forward_time=0.142, loss_ctc=170.241, loss_interctc_layer6=155.809, loss_interctc_layer12=144.809, loss_interctc_layer15=143.122, loss_interctc_layer21=169.236, loss=156.643, backward_time=0.309, grad_norm=102.771, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.176e-05, train_time=1.344 [gpua049:0/64] 2024-01-15 11:18:41,259 (trainer:753) INFO: 3epoch:train:8401-8500batch: iter_time=8.037e-05, forward_time=0.143, loss_ctc=196.478, loss_interctc_layer6=181.134, loss_interctc_layer12=169.317, loss_interctc_layer15=167.628, loss_interctc_layer21=194.686, loss=181.848, backward_time=0.328, grad_norm=119.105, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.226e-05, train_time=1.629 [gpua049:0/64] 2024-01-15 11:21:07,836 (trainer:753) INFO: 3epoch:train:8501-8600batch: iter_time=8.045e-05, forward_time=0.144, loss_ctc=185.307, loss_interctc_layer6=171.920, loss_interctc_layer12=160.302, loss_interctc_layer15=158.030, loss_interctc_layer21=183.296, loss=171.771, backward_time=0.317, grad_norm=101.278, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.276e-05, train_time=1.466 [gpua049:0/64] 2024-01-15 11:23:27,109 (trainer:753) INFO: 3epoch:train:8601-8700batch: iter_time=7.713e-05, forward_time=0.145, loss_ctc=173.060, loss_interctc_layer6=159.677, loss_interctc_layer12=149.697, loss_interctc_layer15=147.820, loss_interctc_layer21=171.486, loss=160.348, backward_time=0.359, grad_norm=102.811, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.083, optim0_lr0=9.326e-05, train_time=1.393 [gpua049:0/64] 2024-01-15 11:24:42,262 (multiple_iter_factory:32) INFO: Building 7th iter-factory... [gpua049:0/64] 2024-01-15 11:25:02,020 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 11:25:05,725 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 11:25:05,726 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, [gpua049:0/64] 2024-01-15 11:25:05,729 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 11:31:01,967 (trainer:753) INFO: 3epoch:train:8701-8800batch: iter_time=1.410, forward_time=0.166, loss_ctc=190.815, loss_interctc_layer6=165.809, loss_interctc_layer12=155.999, loss_interctc_layer15=154.993, loss_interctc_layer21=189.443, loss=171.412, backward_time=0.319, grad_norm=118.142, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.084, optim0_lr0=9.376e-05, train_time=4.548 [gpua049:0/64] 2024-01-15 11:33:11,079 (trainer:753) INFO: 3epoch:train:8801-8900batch: iter_time=7.239e-05, forward_time=0.143, loss_ctc=218.149, loss_interctc_layer6=181.811, loss_interctc_layer12=168.416, loss_interctc_layer15=165.824, loss_interctc_layer21=214.365, loss=189.713, backward_time=0.297, grad_norm=130.038, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.426e-05, train_time=1.291 [gpua049:0/64] 2024-01-15 11:35:22,750 (trainer:753) INFO: 3epoch:train:8901-9000batch: iter_time=7.951e-05, forward_time=0.143, loss_ctc=190.772, loss_interctc_layer6=174.541, loss_interctc_layer12=163.089, loss_interctc_layer15=160.741, loss_interctc_layer21=188.824, loss=175.593, backward_time=0.299, grad_norm=123.716, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.476e-05, train_time=1.316 [gpua049:0/64] 2024-01-15 11:37:49,911 (trainer:753) INFO: 3epoch:train:9001-9100batch: iter_time=7.966e-05, forward_time=0.142, loss_ctc=158.204, loss_interctc_layer6=147.876, loss_interctc_layer12=138.733, loss_interctc_layer15=137.412, loss_interctc_layer21=156.117, loss=147.668, backward_time=0.324, grad_norm=110.763, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.526e-05, train_time=1.471 [gpua049:0/64] 2024-01-15 11:40:16,375 (trainer:753) INFO: 3epoch:train:9101-9200batch: iter_time=7.189e-05, forward_time=0.142, loss_ctc=175.599, loss_interctc_layer6=152.126, loss_interctc_layer12=141.469, loss_interctc_layer15=139.653, loss_interctc_layer21=174.855, loss=156.740, backward_time=0.364, grad_norm=100.233, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.576e-05, train_time=1.464 [gpua049:0/64] 2024-01-15 11:42:53,918 (trainer:753) INFO: 3epoch:train:9201-9300batch: iter_time=7.642e-05, forward_time=0.142, loss_ctc=174.808, loss_interctc_layer6=162.739, loss_interctc_layer12=152.469, loss_interctc_layer15=150.789, loss_interctc_layer21=173.496, loss=162.860, backward_time=0.359, grad_norm=109.026, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.626e-05, train_time=1.575 [gpua049:0/64] 2024-01-15 11:45:47,690 (trainer:753) INFO: 3epoch:train:9301-9400batch: iter_time=7.723e-05, forward_time=0.143, loss_ctc=189.579, loss_interctc_layer6=160.611, loss_interctc_layer12=149.700, loss_interctc_layer15=147.742, loss_interctc_layer21=188.283, loss=167.183, backward_time=0.383, grad_norm=100.828, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.676e-05, train_time=1.738 [gpua049:0/64] 2024-01-15 11:48:10,989 (trainer:753) INFO: 3epoch:train:9401-9500batch: iter_time=7.858e-05, forward_time=0.142, loss_ctc=210.134, loss_interctc_layer6=167.287, loss_interctc_layer12=155.531, loss_interctc_layer15=153.429, loss_interctc_layer21=209.427, loss=179.162, backward_time=0.325, grad_norm=116.841, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.726e-05, train_time=1.433 [gpua049:0/64] 2024-01-15 11:50:47,987 (trainer:753) INFO: 3epoch:train:9501-9600batch: iter_time=7.645e-05, forward_time=0.142, loss_ctc=176.550, loss_interctc_layer6=155.800, loss_interctc_layer12=143.175, loss_interctc_layer15=141.161, loss_interctc_layer21=174.972, loss=158.331, backward_time=0.335, grad_norm=87.879, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.776e-05, train_time=1.570 [gpua049:0/64] 2024-01-15 11:53:12,587 (trainer:753) INFO: 3epoch:train:9601-9700batch: iter_time=7.805e-05, forward_time=0.142, loss_ctc=157.754, loss_interctc_layer6=149.076, loss_interctc_layer12=138.459, loss_interctc_layer15=136.701, loss_interctc_layer21=156.028, loss=147.603, backward_time=0.321, grad_norm=96.008, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.826e-05, train_time=1.446 [gpua049:0/64] 2024-01-15 11:55:48,562 (trainer:753) INFO: 3epoch:train:9701-9800batch: iter_time=7.591e-05, forward_time=0.142, loss_ctc=197.198, loss_interctc_layer6=179.516, loss_interctc_layer12=165.505, loss_interctc_layer15=163.370, loss_interctc_layer21=195.701, loss=180.258, backward_time=0.340, grad_norm=104.987, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.876e-05, train_time=1.560 [gpua049:0/64] 2024-01-15 11:58:18,965 (trainer:753) INFO: 3epoch:train:9801-9900batch: iter_time=7.804e-05, forward_time=0.142, loss_ctc=185.043, loss_interctc_layer6=172.349, loss_interctc_layer12=159.478, loss_interctc_layer15=157.232, loss_interctc_layer21=183.357, loss=171.492, backward_time=0.330, grad_norm=110.294, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.926e-05, train_time=1.504 [gpua049:0/64] 2024-01-15 12:00:36,191 (trainer:753) INFO: 3epoch:train:9901-10000batch: iter_time=7.755e-05, forward_time=0.142, loss_ctc=170.126, loss_interctc_layer6=148.063, loss_interctc_layer12=136.861, loss_interctc_layer15=135.193, loss_interctc_layer21=167.855, loss=151.620, backward_time=0.322, grad_norm=96.495, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.082, optim0_lr0=9.976e-05, train_time=1.372 [gpua049:0/64] 2024-01-15 12:00:50,010 (multiple_iter_factory:32) INFO: Building 8th iter-factory... [gpua049:0/64] 2024-01-15 12:01:10,565 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 12:01:14,264 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 12:01:14,264 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, [gpua049:0/64] 2024-01-15 12:01:14,267 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 12:10:16,862 (trainer:753) INFO: 3epoch:train:10001-10100batch: iter_time=2.253, forward_time=0.149, loss_ctc=197.296, loss_interctc_layer6=168.641, loss_interctc_layer12=156.856, loss_interctc_layer15=154.767, loss_interctc_layer21=195.951, loss=174.702, backward_time=0.302, grad_norm=111.777, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.082, optim0_lr0=1.003e-04, train_time=5.806 [gpua049:0/64] 2024-01-15 12:12:45,160 (trainer:753) INFO: 3epoch:train:10101-10200batch: iter_time=7.653e-05, forward_time=0.143, loss_ctc=181.313, loss_interctc_layer6=172.925, loss_interctc_layer12=159.935, loss_interctc_layer15=157.594, loss_interctc_layer21=179.412, loss=170.236, backward_time=0.325, grad_norm=105.146, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.082, optim0_lr0=1.008e-04, train_time=1.483 [gpua049:0/64] 2024-01-15 12:14:52,775 (trainer:753) INFO: 3epoch:train:10201-10300batch: iter_time=7.523e-05, forward_time=0.144, loss_ctc=157.315, loss_interctc_layer6=151.706, loss_interctc_layer12=142.171, loss_interctc_layer15=141.004, loss_interctc_layer21=157.190, loss=149.877, backward_time=0.305, grad_norm=115.433, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.013e-04, train_time=1.276 [gpua049:0/64] 2024-01-15 12:17:26,901 (trainer:753) INFO: 3epoch:train:10301-10400batch: iter_time=7.666e-05, forward_time=0.178, loss_ctc=159.036, loss_interctc_layer6=151.477, loss_interctc_layer12=139.865, loss_interctc_layer15=137.957, loss_interctc_layer21=156.681, loss=149.003, backward_time=0.382, grad_norm=105.416, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.091, optim0_lr0=1.018e-04, train_time=1.541 [gpua049:0/64] 2024-01-15 12:20:08,547 (trainer:753) INFO: 3epoch:train:10401-10500batch: iter_time=7.847e-05, forward_time=0.142, loss_ctc=172.610, loss_interctc_layer6=160.625, loss_interctc_layer12=149.300, loss_interctc_layer15=147.198, loss_interctc_layer21=171.686, loss=160.284, backward_time=0.384, grad_norm=124.802, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.082, optim0_lr0=1.023e-04, train_time=1.616 [gpua049:0/64] 2024-01-15 12:23:00,793 (trainer:753) INFO: 3epoch:train:10501-10600batch: iter_time=7.829e-05, forward_time=0.197, loss_ctc=154.345, loss_interctc_layer6=144.029, loss_interctc_layer12=133.167, loss_interctc_layer15=131.231, loss_interctc_layer21=152.241, loss=143.003, backward_time=0.364, grad_norm=105.420, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.087, optim0_lr0=1.028e-04, train_time=1.722 [gpua049:0/64] 2024-01-15 12:25:33,068 (trainer:753) INFO: 3epoch:train:10601-10700batch: iter_time=7.800e-05, forward_time=0.142, loss_ctc=185.123, loss_interctc_layer6=156.646, loss_interctc_layer12=145.480, loss_interctc_layer15=143.526, loss_interctc_layer21=185.521, loss=163.259, backward_time=0.345, grad_norm=89.589, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.082, optim0_lr0=1.033e-04, train_time=1.522 [gpua049:0/64] 2024-01-15 12:27:49,003 (trainer:753) INFO: 3epoch:train:10701-10800batch: iter_time=8.014e-05, forward_time=0.142, loss_ctc=190.031, loss_interctc_layer6=164.419, loss_interctc_layer12=151.945, loss_interctc_layer15=149.623, loss_interctc_layer21=188.268, loss=168.857, backward_time=0.306, grad_norm=86.445, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.082, optim0_lr0=1.038e-04, train_time=1.360 [gpua049:0/64] 2024-01-15 12:30:29,571 (trainer:753) INFO: 3epoch:train:10801-10900batch: iter_time=7.855e-05, forward_time=0.142, loss_ctc=155.260, loss_interctc_layer6=144.793, loss_interctc_layer12=133.370, loss_interctc_layer15=131.435, loss_interctc_layer21=153.849, loss=143.741, backward_time=0.347, grad_norm=93.777, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.082, optim0_lr0=1.043e-04, train_time=1.605 [gpua049:0/64] 2024-01-15 12:33:14,110 (trainer:753) INFO: 3epoch:train:10901-11000batch: iter_time=8.027e-05, forward_time=0.145, loss_ctc=180.837, loss_interctc_layer6=169.348, loss_interctc_layer12=156.601, loss_interctc_layer15=154.425, loss_interctc_layer21=179.283, loss=168.099, backward_time=0.338, grad_norm=115.740, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.082, optim0_lr0=1.048e-04, train_time=1.645 [gpua049:0/64] 2024-01-15 12:35:36,098 (trainer:753) INFO: 3epoch:train:11001-11100batch: iter_time=7.911e-05, forward_time=0.143, loss_ctc=171.517, loss_interctc_layer6=162.419, loss_interctc_layer12=150.041, loss_interctc_layer15=148.297, loss_interctc_layer21=170.447, loss=160.544, backward_time=0.314, grad_norm=104.530, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.053e-04, train_time=1.420 [gpua049:0/64] 2024-01-15 12:38:45,824 (trainer:753) INFO: 3epoch:train:11101-11200batch: iter_time=7.856e-05, forward_time=0.143, loss_ctc=158.402, loss_interctc_layer6=148.241, loss_interctc_layer12=137.173, loss_interctc_layer15=134.980, loss_interctc_layer21=157.480, loss=147.255, backward_time=0.402, grad_norm=84.085, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.058e-04, train_time=1.897 [gpua049:0/64] 2024-01-15 12:39:58,888 (multiple_iter_factory:32) INFO: Building 9th iter-factory... [gpua049:0/64] 2024-01-15 12:40:18,852 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 12:40:22,586 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 12:40:22,586 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, [gpua049:0/64] 2024-01-15 12:40:22,589 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 12:50:32,524 (trainer:753) INFO: 3epoch:train:11201-11300batch: iter_time=1.449, forward_time=0.157, loss_ctc=178.432, loss_interctc_layer6=157.568, loss_interctc_layer12=146.205, loss_interctc_layer15=144.516, loss_interctc_layer21=176.519, loss=160.648, backward_time=0.302, grad_norm=109.700, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.063e-04, train_time=7.067 [gpua049:0/64] 2024-01-15 12:52:40,868 (trainer:753) INFO: 3epoch:train:11301-11400batch: iter_time=7.861e-05, forward_time=0.154, loss_ctc=206.968, loss_interctc_layer6=170.391, loss_interctc_layer12=157.187, loss_interctc_layer15=154.257, loss_interctc_layer21=205.506, loss=178.862, backward_time=0.322, grad_norm=118.708, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.087, optim0_lr0=1.068e-04, train_time=1.283 [gpua049:0/64] 2024-01-15 12:54:45,767 (trainer:753) INFO: 3epoch:train:11401-11500batch: iter_time=7.611e-05, forward_time=0.163, loss_ctc=177.961, loss_interctc_layer6=163.755, loss_interctc_layer12=151.798, loss_interctc_layer15=149.959, loss_interctc_layer21=176.828, loss=164.060, backward_time=0.297, grad_norm=110.980, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.073e-04, train_time=1.249 [gpua049:0/64] 2024-01-15 12:57:15,684 (trainer:753) INFO: 3epoch:train:11501-11600batch: iter_time=7.648e-05, forward_time=0.142, loss_ctc=148.751, loss_interctc_layer6=138.899, loss_interctc_layer12=128.537, loss_interctc_layer15=127.056, loss_interctc_layer21=146.864, loss=138.022, backward_time=0.326, grad_norm=108.295, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.078e-04, train_time=1.499 [gpua049:0/64] 2024-01-15 12:59:53,741 (trainer:753) INFO: 3epoch:train:11601-11700batch: iter_time=7.774e-05, forward_time=0.212, loss_ctc=165.882, loss_interctc_layer6=143.730, loss_interctc_layer12=132.069, loss_interctc_layer15=129.454, loss_interctc_layer21=164.377, loss=147.102, backward_time=0.336, grad_norm=90.047, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.088, optim0_lr0=1.083e-04, train_time=1.578 [gpua049:0/64] 2024-01-15 13:02:23,302 (trainer:753) INFO: 3epoch:train:11701-11800batch: iter_time=7.582e-05, forward_time=0.143, loss_ctc=164.530, loss_interctc_layer6=153.737, loss_interctc_layer12=141.098, loss_interctc_layer15=139.621, loss_interctc_layer21=163.957, loss=152.588, backward_time=0.371, grad_norm=107.035, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.088e-04, train_time=1.498 [gpua049:0/64] 2024-01-15 13:04:31,803 (trainer:753) INFO: 3epoch:train:11801-11900batch: iter_time=7.636e-05, forward_time=0.143, loss_ctc=179.799, loss_interctc_layer6=151.988, loss_interctc_layer12=140.208, loss_interctc_layer15=137.136, loss_interctc_layer21=177.124, loss=157.251, backward_time=0.298, grad_norm=92.056, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.093e-04, train_time=1.285 [gpua049:0/64] 2024-01-15 13:06:56,204 (trainer:753) INFO: 3epoch:train:11901-12000batch: iter_time=7.916e-05, forward_time=0.143, loss_ctc=201.384, loss_interctc_layer6=157.660, loss_interctc_layer12=145.959, loss_interctc_layer15=143.367, loss_interctc_layer21=200.046, loss=169.683, backward_time=0.373, grad_norm=95.915, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.083, optim0_lr0=1.098e-04, train_time=1.444 [gpua049:0/64] 2024-01-15 13:09:36,636 (trainer:753) INFO: 3epoch:train:12001-12100batch: iter_time=8.100e-05, forward_time=0.143, loss_ctc=167.301, loss_interctc_layer6=145.408, loss_interctc_layer12=133.254, loss_interctc_layer15=130.504, loss_interctc_layer21=166.189, loss=148.531, backward_time=0.350, grad_norm=91.707, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.103e-04, train_time=1.604 [gpua049:0/64] 2024-01-15 13:12:30,771 (trainer:753) INFO: 3epoch:train:12101-12200batch: iter_time=7.854e-05, forward_time=0.142, loss_ctc=148.793, loss_interctc_layer6=140.361, loss_interctc_layer12=129.217, loss_interctc_layer15=127.309, loss_interctc_layer21=146.949, loss=138.526, backward_time=0.367, grad_norm=98.899, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.108e-04, train_time=1.741 [gpua049:0/64] 2024-01-15 13:15:02,099 (trainer:753) INFO: 3epoch:train:12201-12300batch: iter_time=7.552e-05, forward_time=0.143, loss_ctc=188.989, loss_interctc_layer6=172.138, loss_interctc_layer12=158.271, loss_interctc_layer15=155.013, loss_interctc_layer21=187.025, loss=172.287, backward_time=0.339, grad_norm=98.841, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.113e-04, train_time=1.513 [gpua049:0/64] 2024-01-15 13:17:35,788 (trainer:753) INFO: 3epoch:train:12301-12400batch: iter_time=7.830e-05, forward_time=0.156, loss_ctc=176.044, loss_interctc_layer6=163.815, loss_interctc_layer12=150.476, loss_interctc_layer15=148.222, loss_interctc_layer21=174.263, loss=162.564, backward_time=0.355, grad_norm=107.067, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.085, optim0_lr0=1.118e-04, train_time=1.537 [gpua049:0/64] 2024-01-15 13:19:51,664 (trainer:753) INFO: 3epoch:train:12401-12500batch: iter_time=7.599e-05, forward_time=0.163, loss_ctc=161.956, loss_interctc_layer6=139.996, loss_interctc_layer12=128.897, loss_interctc_layer15=126.288, loss_interctc_layer21=160.372, loss=143.502, backward_time=0.347, grad_norm=97.496, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.086, optim0_lr0=1.123e-04, train_time=1.359 [gpua049:0/64] 2024-01-15 13:20:05,882 (multiple_iter_factory:32) INFO: Building 10th iter-factory... [gpua049:0/64] 2024-01-15 13:20:26,368 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 13:20:30,198 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 13:20:30,198 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, [gpua049:0/64] 2024-01-15 13:20:30,201 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 13:26:55,717 (trainer:753) INFO: 3epoch:train:12501-12600batch: iter_time=2.171, forward_time=0.205, loss_ctc=189.560, loss_interctc_layer6=159.176, loss_interctc_layer12=147.305, loss_interctc_layer15=145.792, loss_interctc_layer21=188.147, loss=165.996, backward_time=0.318, grad_norm=111.466, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.096, optim0_lr0=1.128e-04, train_time=4.240 [gpua049:0/64] 2024-01-15 13:29:30,113 (trainer:753) INFO: 3epoch:train:12601-12700batch: iter_time=7.811e-05, forward_time=0.143, loss_ctc=171.315, loss_interctc_layer6=163.217, loss_interctc_layer12=149.708, loss_interctc_layer15=147.308, loss_interctc_layer21=169.890, loss=160.288, backward_time=0.408, grad_norm=112.895, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.133e-04, train_time=1.544 [gpua049:0/64] 2024-01-15 13:31:49,701 (trainer:753) INFO: 3epoch:train:12701-12800batch: iter_time=7.553e-05, forward_time=0.144, loss_ctc=151.070, loss_interctc_layer6=144.359, loss_interctc_layer12=133.976, loss_interctc_layer15=132.672, loss_interctc_layer21=149.478, loss=142.311, backward_time=0.301, grad_norm=112.833, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.138e-04, train_time=1.396 [gpua049:0/64] 2024-01-15 13:34:11,774 (trainer:753) INFO: 3epoch:train:12801-12900batch: iter_time=7.883e-05, forward_time=0.142, loss_ctc=151.236, loss_interctc_layer6=142.616, loss_interctc_layer12=131.022, loss_interctc_layer15=128.902, loss_interctc_layer21=149.862, loss=140.728, backward_time=0.352, grad_norm=104.550, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.143e-04, train_time=1.421 [gpua049:0/64] 2024-01-15 13:36:27,845 (trainer:753) INFO: 3epoch:train:12901-13000batch: iter_time=7.830e-05, forward_time=0.141, loss_ctc=164.949, loss_interctc_layer6=153.072, loss_interctc_layer12=140.375, loss_interctc_layer15=138.735, loss_interctc_layer21=163.037, loss=152.034, backward_time=0.312, grad_norm=104.550, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.081, optim0_lr0=1.148e-04, train_time=1.360 [gpua049:0/64] 2024-01-15 13:39:01,234 (trainer:753) INFO: 3epoch:train:13001-13100batch: iter_time=7.756e-05, forward_time=0.142, loss_ctc=145.545, loss_interctc_layer6=136.904, loss_interctc_layer12=125.247, loss_interctc_layer15=122.872, loss_interctc_layer21=144.427, loss=134.999, backward_time=0.386, grad_norm=93.378, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.153e-04, train_time=1.534 [gpua049:0/64] 2024-01-15 13:41:37,453 (trainer:753) INFO: 3epoch:train:13101-13200batch: iter_time=7.989e-05, forward_time=0.142, loss_ctc=179.700, loss_interctc_layer6=150.888, loss_interctc_layer12=139.630, loss_interctc_layer15=136.004, loss_interctc_layer21=178.715, loss=156.987, backward_time=0.383, grad_norm=103.436, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.158e-04, train_time=1.562 [gpua049:0/64] 2024-01-15 13:44:37,296 (trainer:753) INFO: 3epoch:train:13201-13300batch: iter_time=7.906e-05, forward_time=0.142, loss_ctc=182.967, loss_interctc_layer6=156.442, loss_interctc_layer12=143.577, loss_interctc_layer15=141.570, loss_interctc_layer21=181.694, loss=161.250, backward_time=0.379, grad_norm=85.553, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.163e-04, train_time=1.798 [gpua049:0/64] 2024-01-15 13:47:10,250 (trainer:753) INFO: 3epoch:train:13301-13400batch: iter_time=8.734e-05, forward_time=0.160, loss_ctc=145.379, loss_interctc_layer6=136.461, loss_interctc_layer12=124.697, loss_interctc_layer15=122.262, loss_interctc_layer21=144.450, loss=134.650, backward_time=0.347, grad_norm=93.990, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.168e-04, train_time=1.529 [gpua049:0/64] 2024-01-15 13:49:40,377 (trainer:753) INFO: 3epoch:train:13401-13500batch: iter_time=7.844e-05, forward_time=0.185, loss_ctc=174.004, loss_interctc_layer6=161.605, loss_interctc_layer12=149.034, loss_interctc_layer15=146.706, loss_interctc_layer21=171.400, loss=160.550, backward_time=0.314, grad_norm=112.472, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.083, optim0_lr0=1.173e-04, train_time=1.501 [gpua049:0/64] 2024-01-15 13:51:56,077 (trainer:753) INFO: 3epoch:train:13501-13600batch: iter_time=7.419e-05, forward_time=0.172, loss_ctc=164.575, loss_interctc_layer6=155.060, loss_interctc_layer12=142.203, loss_interctc_layer15=140.188, loss_interctc_layer21=162.752, loss=152.956, backward_time=0.319, grad_norm=95.629, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.093, optim0_lr0=1.178e-04, train_time=1.355 [gpua049:0/64] 2024-01-15 13:54:07,135 (trainer:753) INFO: 3epoch:train:13601-13700batch: iter_time=7.800e-05, forward_time=0.143, loss_ctc=151.593, loss_interctc_layer6=142.023, loss_interctc_layer12=129.923, loss_interctc_layer15=127.199, loss_interctc_layer21=149.670, loss=140.082, backward_time=0.300, grad_norm=96.882, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.183e-04, train_time=1.313 [gpua049:0/64] 2024-01-15 13:55:37,089 (multiple_iter_factory:32) INFO: Building 11th iter-factory... [gpua049:0/64] 2024-01-15 13:55:57,629 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 13:56:01,332 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 13:56:01,332 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, [gpua049:0/64] 2024-01-15 13:56:01,335 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 14:05:11,713 (trainer:753) INFO: 3epoch:train:13701-13800batch: iter_time=3.142, forward_time=0.142, loss_ctc=164.096, loss_interctc_layer6=148.441, loss_interctc_layer12=136.250, loss_interctc_layer15=135.068, loss_interctc_layer21=163.872, loss=149.545, backward_time=0.332, grad_norm=102.498, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.188e-04, train_time=6.646 [gpua049:0/64] 2024-01-15 14:07:15,378 (trainer:753) INFO: 3epoch:train:13801-13900batch: iter_time=7.968e-05, forward_time=0.143, loss_ctc=188.786, loss_interctc_layer6=162.527, loss_interctc_layer12=148.892, loss_interctc_layer15=145.964, loss_interctc_layer21=186.720, loss=166.578, backward_time=0.297, grad_norm=106.669, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.193e-04, train_time=1.236 [gpua049:0/64] 2024-01-15 14:09:21,991 (trainer:753) INFO: 3epoch:train:13901-14000batch: iter_time=7.805e-05, forward_time=0.143, loss_ctc=165.681, loss_interctc_layer6=156.680, loss_interctc_layer12=143.248, loss_interctc_layer15=141.932, loss_interctc_layer21=163.459, loss=154.200, backward_time=0.301, grad_norm=106.841, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.082, optim0_lr0=1.198e-04, train_time=1.266 [gpua049:0/64] 2024-01-15 14:12:11,996 (trainer:753) INFO: 3epoch:train:14001-14100batch: iter_time=7.831e-05, forward_time=0.154, loss_ctc=134.772, loss_interctc_layer6=132.564, loss_interctc_layer12=123.343, loss_interctc_layer15=121.661, loss_interctc_layer21=133.832, loss=129.234, backward_time=0.339, grad_norm=105.558, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.082, optim0_lr0=1.203e-04, train_time=1.700 [gpua049:0/64] 2024-01-15 14:14:50,207 (trainer:753) INFO: 3epoch:train:14101-14200batch: iter_time=7.627e-05, forward_time=0.155, loss_ctc=150.977, loss_interctc_layer6=138.093, loss_interctc_layer12=124.728, loss_interctc_layer15=122.779, loss_interctc_layer21=150.156, loss=137.347, backward_time=0.327, grad_norm=89.166, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.082, optim0_lr0=1.208e-04, train_time=1.582 [gpua049:0/64] 2024-01-15 14:17:28,005 (trainer:753) INFO: 3epoch:train:14201-14300batch: iter_time=7.579e-05, forward_time=0.226, loss_ctc=151.055, loss_interctc_layer6=145.936, loss_interctc_layer12=133.173, loss_interctc_layer15=131.266, loss_interctc_layer21=149.899, loss=142.266, backward_time=0.377, grad_norm=100.120, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.092, optim0_lr0=1.213e-04, train_time=1.578 [gpua049:0/64] 2024-01-15 14:20:15,555 (trainer:753) INFO: 3epoch:train:14301-14400batch: iter_time=7.529e-05, forward_time=0.142, loss_ctc=164.279, loss_interctc_layer6=145.306, loss_interctc_layer12=134.257, loss_interctc_layer15=131.260, loss_interctc_layer21=162.925, loss=147.605, backward_time=0.334, grad_norm=105.412, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.083, optim0_lr0=1.218e-04, train_time=1.675 [gpua049:0/64] 2024-01-15 14:23:20,397 (trainer:753) INFO: 3epoch:train:14401-14500batch: iter_time=7.719e-05, forward_time=0.142, loss_ctc=183.094, loss_interctc_layer6=150.571, loss_interctc_layer12=138.184, loss_interctc_layer15=136.400, loss_interctc_layer21=183.053, loss=158.260, backward_time=0.362, grad_norm=101.635, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.083, optim0_lr0=1.223e-04, train_time=1.845 [gpua049:0/64] 2024-01-15 14:25:46,882 (trainer:753) INFO: 3epoch:train:14501-14600batch: iter_time=7.862e-05, forward_time=0.143, loss_ctc=157.899, loss_interctc_layer6=139.110, loss_interctc_layer12=126.688, loss_interctc_layer15=124.279, loss_interctc_layer21=155.890, loss=140.773, backward_time=0.331, grad_norm=88.978, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.082, optim0_lr0=1.228e-04, train_time=1.468 [gpua049:0/64] 2024-01-15 14:28:11,714 (trainer:753) INFO: 3epoch:train:14601-14700batch: iter_time=7.789e-05, forward_time=0.142, loss_ctc=137.277, loss_interctc_layer6=134.778, loss_interctc_layer12=123.405, loss_interctc_layer15=121.077, loss_interctc_layer21=135.833, loss=130.474, backward_time=0.303, grad_norm=110.776, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.082, optim0_lr0=1.233e-04, train_time=1.448 [gpua049:0/64] 2024-01-15 14:30:29,536 (trainer:753) INFO: 3epoch:train:14701-14800batch: iter_time=7.801e-05, forward_time=0.157, loss_ctc=176.580, loss_interctc_layer6=161.819, loss_interctc_layer12=148.520, loss_interctc_layer15=146.437, loss_interctc_layer21=175.943, loss=161.860, backward_time=0.309, grad_norm=89.237, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.084, optim0_lr0=1.238e-04, train_time=1.378 [gpua049:0/64] 2024-01-15 14:32:56,118 (trainer:753) INFO: 3epoch:train:14801-14900batch: iter_time=7.362e-05, forward_time=0.143, loss_ctc=166.240, loss_interctc_layer6=157.006, loss_interctc_layer12=144.608, loss_interctc_layer15=140.832, loss_interctc_layer21=164.551, loss=154.647, backward_time=0.348, grad_norm=105.517, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.083, optim0_lr0=1.243e-04, train_time=1.466 [gpua049:0/64] 2024-01-15 14:35:34,625 (trainer:753) INFO: 3epoch:train:14901-15000batch: iter_time=7.637e-05, forward_time=0.142, loss_ctc=148.952, loss_interctc_layer6=133.153, loss_interctc_layer12=122.116, loss_interctc_layer15=120.212, loss_interctc_layer21=147.003, loss=134.287, backward_time=0.360, grad_norm=94.327, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.083, optim0_lr0=1.248e-04, train_time=1.585 [gpua049:0/64] 2024-01-15 15:05:08,466 (trainer:352) INFO: 3epoch results: [train] iter_time=0.174, forward_time=0.149, loss_ctc=191.975, loss_interctc_layer6=176.181, loss_interctc_layer12=166.150, loss_interctc_layer15=164.567, loss_interctc_layer21=190.623, loss=177.899, backward_time=0.325, grad_norm=111.329, clip=100.000, loss_scale=5.469e+10, optim_step_time=0.083, optim0_lr0=8.751e-05, train_time=1.709, time=7 hours, 7 minutes and 38.64 seconds, total_count=45000, gpu_max_cached_mem_GB=34.508, [valid] loss_ctc=126.191, cer_ctc=0.556, loss_interctc_layer6=114.313, cer_interctc_layer6=0.524, loss_interctc_layer12=107.647, cer_interctc_layer12=0.462, loss_interctc_layer15=103.751, cer_interctc_layer15=0.430, loss_interctc_layer21=124.472, cer_interctc_layer21=0.535, loss=115.275, time=29 minutes and 9.34 seconds, total_count=14013, gpu_max_cached_mem_GB=34.508 [gpua049:0/64] 2024-01-15 15:05:27,552 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count [gpua049:0/64] 2024-01-15 15:05:27,556 (trainer:286) INFO: 4/45epoch started. Estimated time to finish: 2 weeks, 6 hours and 55 minutes [gpua049:0/64] 2024-01-15 15:05:27,570 (multiple_iter_factory:32) INFO: Building 0th iter-factory... [gpua049:0/64] 2024-01-15 15:05:47,516 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 15:05:51,255 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 15:05:51,255 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, [gpua049:0/64] 2024-01-15 15:05:51,258 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 15:12:33,436 (trainer:753) INFO: 4epoch:train:1-100batch: iter_time=2.974, forward_time=0.171, loss_ctc=153.006, loss_interctc_layer6=142.683, loss_interctc_layer12=131.429, loss_interctc_layer15=129.667, loss_interctc_layer21=151.642, loss=141.685, backward_time=0.304, grad_norm=97.991, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.085, optim0_lr0=1.253e-04, train_time=4.258 [gpua049:0/64] 2024-01-15 15:14:39,176 (trainer:753) INFO: 4epoch:train:101-200batch: iter_time=8.313e-05, forward_time=0.142, loss_ctc=169.808, loss_interctc_layer6=136.947, loss_interctc_layer12=125.747, loss_interctc_layer15=123.595, loss_interctc_layer21=170.632, loss=145.346, backward_time=0.298, grad_norm=112.323, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.082, optim0_lr0=1.258e-04, train_time=1.257 [gpua049:0/64] 2024-01-15 15:16:44,844 (trainer:753) INFO: 4epoch:train:201-300batch: iter_time=8.331e-05, forward_time=0.144, loss_ctc=161.212, loss_interctc_layer6=147.110, loss_interctc_layer12=133.937, loss_interctc_layer15=131.641, loss_interctc_layer21=159.428, loss=146.665, backward_time=0.306, grad_norm=92.579, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.082, optim0_lr0=1.263e-04, train_time=1.256 [gpua049:0/64] 2024-01-15 15:18:49,645 (trainer:753) INFO: 4epoch:train:301-400batch: iter_time=8.668e-05, forward_time=0.154, loss_ctc=171.570, loss_interctc_layer6=150.080, loss_interctc_layer12=137.885, loss_interctc_layer15=135.564, loss_interctc_layer21=169.468, loss=152.913, backward_time=0.301, grad_norm=102.573, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.082, optim0_lr0=1.268e-04, train_time=1.245 [gpua049:0/64] 2024-01-15 15:21:09,568 (trainer:753) INFO: 4epoch:train:401-500batch: iter_time=8.765e-05, forward_time=0.142, loss_ctc=147.804, loss_interctc_layer6=126.530, loss_interctc_layer12=117.277, loss_interctc_layer15=115.059, loss_interctc_layer21=146.978, loss=130.730, backward_time=0.336, grad_norm=101.099, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.081, optim0_lr0=1.273e-04, train_time=1.402 [gpua049:0/64] 2024-01-15 15:23:53,618 (trainer:753) INFO: 4epoch:train:501-600batch: iter_time=8.112e-05, forward_time=0.184, loss_ctc=142.553, loss_interctc_layer6=128.987, loss_interctc_layer12=118.357, loss_interctc_layer15=116.486, loss_interctc_layer21=140.997, loss=129.476, backward_time=0.379, grad_norm=86.984, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.089, optim0_lr0=1.278e-04, train_time=1.640 [gpua049:0/64] 2024-01-15 15:26:33,875 (trainer:753) INFO: 4epoch:train:601-700batch: iter_time=8.360e-05, forward_time=0.166, loss_ctc=164.405, loss_interctc_layer6=143.929, loss_interctc_layer12=132.375, loss_interctc_layer15=130.577, loss_interctc_layer21=162.243, loss=146.706, backward_time=0.380, grad_norm=99.736, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.088, optim0_lr0=1.283e-04, train_time=1.602 [gpua049:0/64] 2024-01-15 15:28:50,929 (trainer:753) INFO: 4epoch:train:701-800batch: iter_time=8.389e-05, forward_time=0.143, loss_ctc=135.436, loss_interctc_layer6=128.308, loss_interctc_layer12=117.300, loss_interctc_layer15=114.453, loss_interctc_layer21=136.083, loss=126.316, backward_time=0.309, grad_norm=94.440, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.082, optim0_lr0=1.288e-04, train_time=1.370 [gpua049:0/64] 2024-01-15 15:31:16,150 (trainer:753) INFO: 4epoch:train:801-900batch: iter_time=8.262e-05, forward_time=0.149, loss_ctc=141.584, loss_interctc_layer6=127.373, loss_interctc_layer12=116.428, loss_interctc_layer15=114.224, loss_interctc_layer21=140.996, loss=128.121, backward_time=0.338, grad_norm=89.920, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.083, optim0_lr0=1.293e-04, train_time=1.449 [gpua049:0/64] 2024-01-15 15:34:11,698 (trainer:753) INFO: 4epoch:train:901-1000batch: iter_time=8.297e-05, forward_time=0.185, loss_ctc=159.758, loss_interctc_layer6=143.927, loss_interctc_layer12=132.437, loss_interctc_layer15=130.812, loss_interctc_layer21=158.822, loss=145.151, backward_time=0.386, grad_norm=111.968, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.086, optim0_lr0=1.298e-04, train_time=1.758 [gpua049:0/64] 2024-01-15 15:36:39,454 (trainer:753) INFO: 4epoch:train:1001-1100batch: iter_time=7.791e-05, forward_time=0.153, loss_ctc=155.747, loss_interctc_layer6=135.946, loss_interctc_layer12=125.318, loss_interctc_layer15=123.556, loss_interctc_layer21=153.953, loss=138.904, backward_time=0.351, grad_norm=85.550, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.088, optim0_lr0=1.303e-04, train_time=1.477 [gpua049:0/64] 2024-01-15 15:39:01,620 (trainer:753) INFO: 4epoch:train:1101-1200batch: iter_time=7.989e-05, forward_time=0.150, loss_ctc=161.507, loss_interctc_layer6=139.114, loss_interctc_layer12=127.540, loss_interctc_layer15=124.728, loss_interctc_layer21=159.988, loss=142.575, backward_time=0.332, grad_norm=88.270, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.083, optim0_lr0=1.308e-04, train_time=1.421 [gpua049:0/64] 2024-01-15 15:40:41,625 (multiple_iter_factory:32) INFO: Building 1th iter-factory... [gpua049:0/64] 2024-01-15 15:41:01,803 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 15:41:05,525 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 15:41:05,525 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, [gpua049:0/64] 2024-01-15 15:41:05,528 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 15:47:44,125 (trainer:753) INFO: 4epoch:train:1201-1300batch: iter_time=3.134, forward_time=0.143, loss_ctc=146.248, loss_interctc_layer6=138.643, loss_interctc_layer12=126.197, loss_interctc_layer15=123.887, loss_interctc_layer21=144.769, loss=135.949, backward_time=0.350, grad_norm=114.368, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.083, optim0_lr0=1.313e-04, train_time=5.225 [gpua049:0/64] 2024-01-15 15:49:47,990 (trainer:753) INFO: 4epoch:train:1301-1400batch: iter_time=7.738e-05, forward_time=0.150, loss_ctc=150.771, loss_interctc_layer6=137.712, loss_interctc_layer12=124.453, loss_interctc_layer15=121.638, loss_interctc_layer21=148.327, loss=136.580, backward_time=0.298, grad_norm=96.010, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.083, optim0_lr0=1.318e-04, train_time=1.238 [gpua049:0/64] 2024-01-15 15:52:03,532 (trainer:753) INFO: 4epoch:train:1401-1500batch: iter_time=7.809e-05, forward_time=0.151, loss_ctc=155.221, loss_interctc_layer6=144.318, loss_interctc_layer12=132.527, loss_interctc_layer15=129.981, loss_interctc_layer21=152.985, loss=143.007, backward_time=0.320, grad_norm=95.094, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.083, optim0_lr0=1.323e-04, train_time=1.355 [gpua049:0/64] 2024-01-15 15:54:29,181 (trainer:753) INFO: 4epoch:train:1501-1600batch: iter_time=6.360e-04, forward_time=0.153, loss_ctc=142.461, loss_interctc_layer6=128.911, loss_interctc_layer12=117.328, loss_interctc_layer15=114.740, loss_interctc_layer21=140.704, loss=128.829, backward_time=0.318, grad_norm=92.225, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.083, optim0_lr0=1.328e-04, train_time=1.455 [gpua049:0/64] 2024-01-15 15:56:59,944 (trainer:753) INFO: 4epoch:train:1601-1700batch: iter_time=0.003, forward_time=0.201, loss_ctc=167.226, loss_interctc_layer6=153.093, loss_interctc_layer12=141.810, loss_interctc_layer15=139.227, loss_interctc_layer21=165.717, loss=153.414, backward_time=0.319, grad_norm=119.685, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.086, optim0_lr0=1.333e-04, train_time=1.509 [gpua049:0/64] 2024-01-15 15:59:29,559 (trainer:753) INFO: 4epoch:train:1701-1800batch: iter_time=8.079e-05, forward_time=0.166, loss_ctc=126.687, loss_interctc_layer6=116.400, loss_interctc_layer12=107.147, loss_interctc_layer15=105.395, loss_interctc_layer21=125.832, loss=116.292, backward_time=0.332, grad_norm=83.599, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.081, optim0_lr0=1.338e-04, train_time=1.496 [gpua049:0/64] 2024-01-15 16:01:53,477 (trainer:753) INFO: 4epoch:train:1801-1900batch: iter_time=0.006, forward_time=0.172, loss_ctc=160.237, loss_interctc_layer6=140.582, loss_interctc_layer12=129.526, loss_interctc_layer15=127.736, loss_interctc_layer21=158.187, loss=143.253, backward_time=0.318, grad_norm=93.115, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.084, optim0_lr0=1.343e-04, train_time=1.439 [gpua049:0/64] 2024-01-15 16:04:19,478 (trainer:753) INFO: 4epoch:train:1901-2000batch: iter_time=7.677e-05, forward_time=0.154, loss_ctc=129.377, loss_interctc_layer6=125.869, loss_interctc_layer12=115.024, loss_interctc_layer15=112.976, loss_interctc_layer21=127.947, loss=122.239, backward_time=0.334, grad_norm=99.631, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.082, optim0_lr0=1.348e-04, train_time=1.460 [gpua049:0/64] 2024-01-15 16:06:37,971 (trainer:753) INFO: 4epoch:train:2001-2100batch: iter_time=7.786e-05, forward_time=0.141, loss_ctc=134.323, loss_interctc_layer6=125.889, loss_interctc_layer12=114.019, loss_interctc_layer15=111.418, loss_interctc_layer21=133.266, loss=123.783, backward_time=0.319, grad_norm=80.344, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.082, optim0_lr0=1.353e-04, train_time=1.385 [gpua049:0/64] 2024-01-15 16:08:59,550 (trainer:753) INFO: 4epoch:train:2101-2200batch: iter_time=7.760e-05, forward_time=0.142, loss_ctc=155.407, loss_interctc_layer6=144.472, loss_interctc_layer12=132.460, loss_interctc_layer15=130.422, loss_interctc_layer21=153.184, loss=143.189, backward_time=0.347, grad_norm=125.542, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.081, optim0_lr0=1.358e-04, train_time=1.416 [gpua049:0/64] 2024-01-15 16:11:12,201 (trainer:753) INFO: 4epoch:train:2201-2300batch: iter_time=7.903e-05, forward_time=0.141, loss_ctc=139.421, loss_interctc_layer6=128.950, loss_interctc_layer12=116.599, loss_interctc_layer15=114.602, loss_interctc_layer21=137.941, loss=127.503, backward_time=0.329, grad_norm=96.190, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.082, optim0_lr0=1.363e-04, train_time=1.326 [gpua049:0/64] 2024-01-15 16:13:24,167 (trainer:753) INFO: 4epoch:train:2301-2400batch: iter_time=7.393e-05, forward_time=0.143, loss_ctc=156.168, loss_interctc_layer6=138.777, loss_interctc_layer12=127.013, loss_interctc_layer15=123.791, loss_interctc_layer21=154.327, loss=140.015, backward_time=0.300, grad_norm=94.680, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.082, optim0_lr0=1.368e-04, train_time=1.319 [gpua049:0/64] 2024-01-15 16:15:48,320 (trainer:753) INFO: 4epoch:train:2401-2500batch: iter_time=7.605e-05, forward_time=0.142, loss_ctc=147.872, loss_interctc_layer6=136.850, loss_interctc_layer12=124.526, loss_interctc_layer15=122.004, loss_interctc_layer21=146.010, loss=135.452, backward_time=0.341, grad_norm=82.933, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.081, optim0_lr0=1.373e-04, train_time=1.441 [gpua049:0/64] 2024-01-15 16:16:03,931 (multiple_iter_factory:32) INFO: Building 2th iter-factory... [gpua049:0/64] 2024-01-15 16:16:23,936 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 16:16:27,629 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 16:16:27,629 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, [gpua049:0/64] 2024-01-15 16:16:27,632 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 16:22:34,448 (trainer:753) INFO: 4epoch:train:2501-2600batch: iter_time=2.439, forward_time=0.149, loss_ctc=147.549, loss_interctc_layer6=137.779, loss_interctc_layer12=125.826, loss_interctc_layer15=123.125, loss_interctc_layer21=146.609, loss=136.178, backward_time=0.308, grad_norm=98.063, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.082, optim0_lr0=1.378e-04, train_time=4.061 [gpua049:0/64] 2024-01-15 16:24:44,004 (trainer:753) INFO: 4epoch:train:2601-2700batch: iter_time=7.517e-05, forward_time=0.151, loss_ctc=163.213, loss_interctc_layer6=132.347, loss_interctc_layer12=121.530, loss_interctc_layer15=118.408, loss_interctc_layer21=161.152, loss=139.330, backward_time=0.298, grad_norm=85.518, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.083, optim0_lr0=1.383e-04, train_time=1.293 [gpua049:0/64] 2024-01-15 16:27:06,262 (trainer:753) INFO: 4epoch:train:2701-2800batch: iter_time=7.947e-05, forward_time=0.163, loss_ctc=153.214, loss_interctc_layer6=140.424, loss_interctc_layer12=128.584, loss_interctc_layer15=125.360, loss_interctc_layer21=152.740, loss=140.064, backward_time=0.339, grad_norm=97.997, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.087, optim0_lr0=1.388e-04, train_time=1.424 [gpua049:0/64] 2024-01-15 16:29:24,077 (trainer:753) INFO: 4epoch:train:2801-2900batch: iter_time=0.002, forward_time=0.171, loss_ctc=164.120, loss_interctc_layer6=144.662, loss_interctc_layer12=132.240, loss_interctc_layer15=129.709, loss_interctc_layer21=162.023, loss=146.551, backward_time=0.308, grad_norm=100.440, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.082, optim0_lr0=1.393e-04, train_time=1.378 [gpua049:0/64] 2024-01-15 16:31:39,911 (trainer:753) INFO: 4epoch:train:2901-3000batch: iter_time=7.501e-05, forward_time=0.144, loss_ctc=141.375, loss_interctc_layer6=121.302, loss_interctc_layer12=111.527, loss_interctc_layer15=109.485, loss_interctc_layer21=140.441, loss=124.826, backward_time=0.310, grad_norm=78.317, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.083, optim0_lr0=1.398e-04, train_time=1.358 [gpua049:0/64] 2024-01-15 16:33:57,753 (trainer:753) INFO: 4epoch:train:3001-3100batch: iter_time=7.556e-05, forward_time=0.161, loss_ctc=138.133, loss_interctc_layer6=124.348, loss_interctc_layer12=113.951, loss_interctc_layer15=111.846, loss_interctc_layer21=136.972, loss=125.050, backward_time=0.314, grad_norm=90.367, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.086, optim0_lr0=1.403e-04, train_time=1.378 [gpua049:0/64] 2024-01-15 16:36:14,763 (trainer:753) INFO: 4epoch:train:3101-3200batch: iter_time=7.561e-05, forward_time=0.151, loss_ctc=159.342, loss_interctc_layer6=139.244, loss_interctc_layer12=127.446, loss_interctc_layer15=125.372, loss_interctc_layer21=157.609, loss=141.802, backward_time=0.313, grad_norm=108.730, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.408e-04, train_time=1.370 [gpua049:0/64] 2024-01-15 16:38:29,965 (trainer:753) INFO: 4epoch:train:3201-3300batch: iter_time=7.620e-05, forward_time=0.142, loss_ctc=130.520, loss_interctc_layer6=123.131, loss_interctc_layer12=111.122, loss_interctc_layer15=108.754, loss_interctc_layer21=128.778, loss=120.461, backward_time=0.304, grad_norm=92.575, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.413e-04, train_time=1.352 [gpua049:0/64] 2024-01-15 16:40:38,708 (trainer:753) INFO: 4epoch:train:3301-3400batch: iter_time=7.579e-05, forward_time=0.143, loss_ctc=133.524, loss_interctc_layer6=122.472, loss_interctc_layer12=111.138, loss_interctc_layer15=108.523, loss_interctc_layer21=131.626, loss=121.457, backward_time=0.305, grad_norm=84.368, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.418e-04, train_time=1.287 [gpua049:0/64] 2024-01-15 16:43:00,833 (trainer:753) INFO: 4epoch:train:3401-3500batch: iter_time=7.739e-05, forward_time=0.143, loss_ctc=149.726, loss_interctc_layer6=137.646, loss_interctc_layer12=126.298, loss_interctc_layer15=123.478, loss_interctc_layer21=148.004, loss=137.031, backward_time=0.349, grad_norm=104.472, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.082, optim0_lr0=1.423e-04, train_time=1.421 [gpua049:0/64] 2024-01-15 16:45:45,098 (trainer:753) INFO: 4epoch:train:3501-3600batch: iter_time=7.708e-05, forward_time=0.142, loss_ctc=151.896, loss_interctc_layer6=132.586, loss_interctc_layer12=120.909, loss_interctc_layer15=118.299, loss_interctc_layer21=150.336, loss=134.805, backward_time=0.349, grad_norm=93.649, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.428e-04, train_time=1.642 [gpua049:0/64] 2024-01-15 16:48:14,635 (trainer:753) INFO: 4epoch:train:3601-3700batch: iter_time=7.805e-05, forward_time=0.143, loss_ctc=155.027, loss_interctc_layer6=133.500, loss_interctc_layer12=121.516, loss_interctc_layer15=117.943, loss_interctc_layer21=153.188, loss=136.235, backward_time=0.350, grad_norm=91.098, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.433e-04, train_time=1.495 [gpua049:0/64] 2024-01-15 16:49:19,759 (multiple_iter_factory:32) INFO: Building 3th iter-factory... [gpua049:0/64] 2024-01-15 16:49:39,529 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 16:49:43,221 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 16:49:43,221 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, [gpua049:0/64] 2024-01-15 16:49:43,224 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 16:56:01,998 (trainer:753) INFO: 4epoch:train:3701-3800batch: iter_time=1.360, forward_time=0.144, loss_ctc=146.837, loss_interctc_layer6=135.091, loss_interctc_layer12=123.203, loss_interctc_layer15=120.006, loss_interctc_layer21=145.278, loss=134.083, backward_time=0.308, grad_norm=92.042, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.438e-04, train_time=4.673 [gpua049:0/64] 2024-01-15 16:58:07,766 (trainer:753) INFO: 4epoch:train:3801-3900batch: iter_time=7.803e-05, forward_time=0.142, loss_ctc=150.396, loss_interctc_layer6=132.292, loss_interctc_layer12=119.317, loss_interctc_layer15=116.230, loss_interctc_layer21=149.041, loss=133.455, backward_time=0.298, grad_norm=85.563, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.443e-04, train_time=1.257 [gpua049:0/64] 2024-01-15 17:00:31,211 (trainer:753) INFO: 4epoch:train:3901-4000batch: iter_time=7.376e-05, forward_time=0.143, loss_ctc=161.409, loss_interctc_layer6=140.352, loss_interctc_layer12=128.198, loss_interctc_layer15=126.004, loss_interctc_layer21=160.467, loss=143.286, backward_time=0.311, grad_norm=102.791, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.448e-04, train_time=1.434 [gpua049:0/64] 2024-01-15 17:02:37,453 (trainer:753) INFO: 4epoch:train:4001-4100batch: iter_time=7.661e-05, forward_time=0.142, loss_ctc=138.542, loss_interctc_layer6=124.670, loss_interctc_layer12=113.499, loss_interctc_layer15=110.524, loss_interctc_layer21=137.434, loss=124.934, backward_time=0.297, grad_norm=83.841, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.453e-04, train_time=1.262 [gpua049:0/64] 2024-01-15 17:04:40,111 (trainer:753) INFO: 4epoch:train:4101-4200batch: iter_time=7.678e-05, forward_time=0.143, loss_ctc=167.556, loss_interctc_layer6=147.601, loss_interctc_layer12=135.589, loss_interctc_layer15=133.115, loss_interctc_layer21=166.367, loss=150.045, backward_time=0.300, grad_norm=109.229, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.458e-04, train_time=1.226 [gpua049:0/64] 2024-01-15 17:07:08,121 (trainer:753) INFO: 4epoch:train:4201-4300batch: iter_time=7.729e-05, forward_time=0.141, loss_ctc=125.394, loss_interctc_layer6=113.943, loss_interctc_layer12=104.390, loss_interctc_layer15=101.979, loss_interctc_layer21=124.717, loss=114.085, backward_time=0.322, grad_norm=76.750, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.463e-04, train_time=1.480 [gpua049:0/64] 2024-01-15 17:09:35,648 (trainer:753) INFO: 4epoch:train:4301-4400batch: iter_time=7.671e-05, forward_time=0.144, loss_ctc=155.510, loss_interctc_layer6=135.942, loss_interctc_layer12=124.965, loss_interctc_layer15=121.708, loss_interctc_layer21=154.466, loss=138.518, backward_time=0.364, grad_norm=85.683, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.468e-04, train_time=1.475 [gpua049:0/64] 2024-01-15 17:11:46,601 (trainer:753) INFO: 4epoch:train:4401-4500batch: iter_time=7.810e-05, forward_time=0.142, loss_ctc=132.546, loss_interctc_layer6=121.104, loss_interctc_layer12=109.858, loss_interctc_layer15=106.524, loss_interctc_layer21=131.217, loss=120.250, backward_time=0.309, grad_norm=83.485, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.082, optim0_lr0=1.473e-04, train_time=1.309 [gpua049:0/64] 2024-01-15 17:14:34,856 (trainer:753) INFO: 4epoch:train:4501-4600batch: iter_time=7.971e-05, forward_time=0.142, loss_ctc=131.357, loss_interctc_layer6=120.530, loss_interctc_layer12=109.818, loss_interctc_layer15=106.420, loss_interctc_layer21=130.168, loss=119.659, backward_time=0.349, grad_norm=79.492, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.082, optim0_lr0=1.478e-04, train_time=1.682 [gpua049:0/64] 2024-01-15 17:17:11,322 (trainer:753) INFO: 4epoch:train:4601-4700batch: iter_time=7.718e-05, forward_time=0.143, loss_ctc=147.745, loss_interctc_layer6=137.843, loss_interctc_layer12=125.810, loss_interctc_layer15=122.822, loss_interctc_layer21=146.564, loss=136.157, backward_time=0.337, grad_norm=99.179, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.483e-04, train_time=1.564 [gpua049:0/64] 2024-01-15 17:19:34,774 (trainer:753) INFO: 4epoch:train:4701-4800batch: iter_time=7.694e-05, forward_time=0.142, loss_ctc=135.140, loss_interctc_layer6=123.303, loss_interctc_layer12=112.521, loss_interctc_layer15=108.878, loss_interctc_layer21=135.522, loss=123.073, backward_time=0.350, grad_norm=78.419, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.083, optim0_lr0=1.488e-04, train_time=1.434 [gpua049:0/64] 2024-01-15 17:22:22,520 (trainer:753) INFO: 4epoch:train:4801-4900batch: iter_time=7.856e-05, forward_time=0.154, loss_ctc=150.384, loss_interctc_layer6=133.499, loss_interctc_layer12=120.850, loss_interctc_layer15=117.102, loss_interctc_layer21=148.878, loss=134.143, backward_time=0.341, grad_norm=86.010, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.082, optim0_lr0=1.493e-04, train_time=1.677 [gpua049:0/64] 2024-01-15 17:24:43,442 (trainer:753) INFO: 4epoch:train:4901-5000batch: iter_time=7.419e-05, forward_time=0.159, loss_ctc=155.334, loss_interctc_layer6=132.866, loss_interctc_layer12=120.575, loss_interctc_layer15=116.959, loss_interctc_layer21=154.380, loss=136.023, backward_time=0.323, grad_norm=81.917, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.085, optim0_lr0=1.498e-04, train_time=1.408 [gpua049:0/64] 2024-01-15 17:25:03,517 (multiple_iter_factory:32) INFO: Building 4th iter-factory... [gpua049:0/64] 2024-01-15 17:25:23,945 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 17:25:27,597 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 17:25:27,597 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, [gpua049:0/64] 2024-01-15 17:25:27,600 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 17:32:47,729 (trainer:753) INFO: 4epoch:train:5001-5100batch: iter_time=3.543, forward_time=0.170, loss_ctc=136.018, loss_interctc_layer6=133.822, loss_interctc_layer12=121.322, loss_interctc_layer15=119.242, loss_interctc_layer21=134.215, loss=128.924, backward_time=0.302, grad_norm=108.938, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.085, optim0_lr0=1.503e-04, train_time=4.844 [gpua049:0/64] 2024-01-15 17:34:59,028 (trainer:753) INFO: 4epoch:train:5101-5200batch: iter_time=7.312e-05, forward_time=0.142, loss_ctc=141.982, loss_interctc_layer6=127.329, loss_interctc_layer12=116.125, loss_interctc_layer15=113.452, loss_interctc_layer21=140.321, loss=127.842, backward_time=0.303, grad_norm=96.717, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.083, optim0_lr0=1.508e-04, train_time=1.313 [gpua049:0/64] 2024-01-15 17:37:12,265 (trainer:753) INFO: 4epoch:train:5201-5300batch: iter_time=7.892e-05, forward_time=0.167, loss_ctc=146.829, loss_interctc_layer6=138.273, loss_interctc_layer12=125.309, loss_interctc_layer15=122.028, loss_interctc_layer21=145.038, loss=135.496, backward_time=0.320, grad_norm=93.718, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.088, optim0_lr0=1.513e-04, train_time=1.332 [gpua049:0/64] 2024-01-15 17:39:15,523 (trainer:753) INFO: 4epoch:train:5301-5400batch: iter_time=7.852e-05, forward_time=0.143, loss_ctc=156.212, loss_interctc_layer6=140.944, loss_interctc_layer12=128.372, loss_interctc_layer15=125.298, loss_interctc_layer21=154.995, loss=141.164, backward_time=0.296, grad_norm=97.753, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.081, optim0_lr0=1.518e-04, train_time=1.232 [gpua049:0/64] 2024-01-15 17:41:37,968 (trainer:753) INFO: 4epoch:train:5401-5500batch: iter_time=7.824e-05, forward_time=0.149, loss_ctc=133.144, loss_interctc_layer6=118.730, loss_interctc_layer12=107.367, loss_interctc_layer15=104.574, loss_interctc_layer21=131.812, loss=119.125, backward_time=0.374, grad_norm=79.505, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.083, optim0_lr0=1.523e-04, train_time=1.424 [gpua049:0/64] 2024-01-15 17:43:50,290 (trainer:753) INFO: 4epoch:train:5501-5600batch: iter_time=7.679e-05, forward_time=0.141, loss_ctc=131.701, loss_interctc_layer6=119.929, loss_interctc_layer12=109.389, loss_interctc_layer15=106.937, loss_interctc_layer21=130.752, loss=119.742, backward_time=0.311, grad_norm=78.242, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.082, optim0_lr0=1.528e-04, train_time=1.323 [gpua049:0/64] 2024-01-15 17:46:10,662 (trainer:753) INFO: 4epoch:train:5601-5700batch: iter_time=7.762e-05, forward_time=0.142, loss_ctc=147.147, loss_interctc_layer6=134.840, loss_interctc_layer12=123.097, loss_interctc_layer15=121.000, loss_interctc_layer21=146.017, loss=134.420, backward_time=0.332, grad_norm=101.443, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.082, optim0_lr0=1.533e-04, train_time=1.403 [gpua049:0/64] 2024-01-15 17:48:41,555 (trainer:753) INFO: 4epoch:train:5701-5800batch: iter_time=7.759e-05, forward_time=0.141, loss_ctc=119.509, loss_interctc_layer6=119.444, loss_interctc_layer12=107.418, loss_interctc_layer15=104.063, loss_interctc_layer21=118.783, loss=113.844, backward_time=0.353, grad_norm=89.629, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.082, optim0_lr0=1.538e-04, train_time=1.509 [gpua049:0/64] 2024-01-15 17:51:06,320 (trainer:753) INFO: 4epoch:train:5801-5900batch: iter_time=7.931e-05, forward_time=0.141, loss_ctc=126.175, loss_interctc_layer6=119.253, loss_interctc_layer12=107.836, loss_interctc_layer15=104.922, loss_interctc_layer21=126.211, loss=116.879, backward_time=0.309, grad_norm=77.873, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.082, optim0_lr0=1.543e-04, train_time=1.447 [gpua049:0/64] 2024-01-15 17:53:42,098 (trainer:753) INFO: 4epoch:train:5901-6000batch: iter_time=7.661e-05, forward_time=0.156, loss_ctc=143.488, loss_interctc_layer6=135.332, loss_interctc_layer12=122.720, loss_interctc_layer15=119.035, loss_interctc_layer21=142.839, loss=132.683, backward_time=0.340, grad_norm=157.731, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.082, optim0_lr0=1.548e-04, train_time=1.558 [gpua049:0/64] 2024-01-15 17:56:06,449 (trainer:753) INFO: 4epoch:train:6001-6100batch: iter_time=8.084e-05, forward_time=0.168, loss_ctc=143.685, loss_interctc_layer6=127.305, loss_interctc_layer12=116.098, loss_interctc_layer15=113.118, loss_interctc_layer21=142.058, loss=128.453, backward_time=0.320, grad_norm=77.586, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.084, optim0_lr0=1.553e-04, train_time=1.441 [gpua049:0/64] 2024-01-15 17:58:20,216 (trainer:753) INFO: 4epoch:train:6101-6200batch: iter_time=8.243e-05, forward_time=0.142, loss_ctc=140.219, loss_interctc_layer6=130.674, loss_interctc_layer12=116.847, loss_interctc_layer15=113.643, loss_interctc_layer21=139.001, loss=128.077, backward_time=0.304, grad_norm=86.232, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.081, optim0_lr0=1.558e-04, train_time=1.339 [gpua049:0/64] 2024-01-15 18:00:10,954 (multiple_iter_factory:32) INFO: Building 5th iter-factory... [gpua049:0/64] 2024-01-15 18:00:31,149 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 18:00:34,822 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 18:00:34,822 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, [gpua049:0/64] 2024-01-15 18:00:34,825 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 18:06:35,941 (trainer:753) INFO: 4epoch:train:6201-6300batch: iter_time=3.242, forward_time=0.246, loss_ctc=132.796, loss_interctc_layer6=131.170, loss_interctc_layer12=118.268, loss_interctc_layer15=115.848, loss_interctc_layer21=131.219, loss=125.860, backward_time=0.347, grad_norm=90.585, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.087, optim0_lr0=1.563e-04, train_time=4.957 [gpua049:0/64] 2024-01-15 18:08:41,989 (trainer:753) INFO: 4epoch:train:6301-6400batch: iter_time=7.737e-05, forward_time=0.143, loss_ctc=140.017, loss_interctc_layer6=130.830, loss_interctc_layer12=117.156, loss_interctc_layer15=113.610, loss_interctc_layer21=138.139, loss=127.950, backward_time=0.298, grad_norm=93.713, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.082, optim0_lr0=1.568e-04, train_time=1.260 [gpua049:0/64] 2024-01-15 18:10:45,381 (trainer:753) INFO: 4epoch:train:6401-6500batch: iter_time=7.578e-05, forward_time=0.143, loss_ctc=143.743, loss_interctc_layer6=135.517, loss_interctc_layer12=122.934, loss_interctc_layer15=119.987, loss_interctc_layer21=140.668, loss=132.570, backward_time=0.298, grad_norm=91.339, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.083, optim0_lr0=1.573e-04, train_time=1.234 [gpua049:0/64] 2024-01-15 18:12:54,986 (trainer:753) INFO: 4epoch:train:6501-6600batch: iter_time=7.824e-05, forward_time=0.141, loss_ctc=131.662, loss_interctc_layer6=121.246, loss_interctc_layer12=108.982, loss_interctc_layer15=105.796, loss_interctc_layer21=130.584, loss=119.654, backward_time=0.306, grad_norm=74.442, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.082, optim0_lr0=1.578e-04, train_time=1.296 [gpua049:0/64] 2024-01-15 18:15:21,339 (trainer:753) INFO: 4epoch:train:6601-6700batch: iter_time=7.695e-05, forward_time=0.145, loss_ctc=156.642, loss_interctc_layer6=145.028, loss_interctc_layer12=131.695, loss_interctc_layer15=129.341, loss_interctc_layer21=154.760, loss=143.493, backward_time=0.345, grad_norm=103.446, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.083, optim0_lr0=1.583e-04, train_time=1.463 [gpua049:0/64] 2024-01-15 18:17:29,957 (trainer:753) INFO: 4epoch:train:6701-6800batch: iter_time=7.701e-05, forward_time=0.142, loss_ctc=122.277, loss_interctc_layer6=111.197, loss_interctc_layer12=102.548, loss_interctc_layer15=99.146, loss_interctc_layer21=121.129, loss=111.260, backward_time=0.298, grad_norm=79.130, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.083, optim0_lr0=1.588e-04, train_time=1.286 [gpua049:0/64] 2024-01-15 18:19:50,986 (trainer:753) INFO: 4epoch:train:6801-6900batch: iter_time=7.824e-05, forward_time=0.143, loss_ctc=149.675, loss_interctc_layer6=132.589, loss_interctc_layer12=121.021, loss_interctc_layer15=117.945, loss_interctc_layer21=148.549, loss=133.956, backward_time=0.317, grad_norm=94.307, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.083, optim0_lr0=1.593e-04, train_time=1.410 [gpua049:0/64] 2024-01-15 18:22:01,424 (trainer:753) INFO: 4epoch:train:6901-7000batch: iter_time=7.805e-05, forward_time=0.141, loss_ctc=120.618, loss_interctc_layer6=118.302, loss_interctc_layer12=107.430, loss_interctc_layer15=105.512, loss_interctc_layer21=120.522, loss=114.477, backward_time=0.300, grad_norm=83.795, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.082, optim0_lr0=1.598e-04, train_time=1.304 [gpua049:0/64] 2024-01-15 18:24:26,797 (trainer:753) INFO: 4epoch:train:7001-7100batch: iter_time=7.447e-05, forward_time=0.144, loss_ctc=125.798, loss_interctc_layer6=118.933, loss_interctc_layer12=107.345, loss_interctc_layer15=103.819, loss_interctc_layer21=124.865, loss=116.152, backward_time=0.366, grad_norm=82.692, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.603e-04, train_time=1.454 [gpua049:0/64] 2024-01-15 18:26:33,628 (trainer:753) INFO: 4epoch:train:7101-7200batch: iter_time=7.709e-05, forward_time=0.142, loss_ctc=141.281, loss_interctc_layer6=134.107, loss_interctc_layer12=122.043, loss_interctc_layer15=119.263, loss_interctc_layer21=140.651, loss=131.469, backward_time=0.312, grad_norm=93.734, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.608e-04, train_time=1.268 [gpua049:0/64] 2024-01-15 18:28:38,258 (trainer:753) INFO: 4epoch:train:7201-7300batch: iter_time=7.786e-05, forward_time=0.141, loss_ctc=128.627, loss_interctc_layer6=119.856, loss_interctc_layer12=108.649, loss_interctc_layer15=105.711, loss_interctc_layer21=128.865, loss=118.341, backward_time=0.295, grad_norm=94.230, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.613e-04, train_time=1.246 [gpua049:0/64] 2024-01-15 18:31:32,164 (trainer:753) INFO: 4epoch:train:7301-7400batch: iter_time=7.892e-05, forward_time=0.143, loss_ctc=144.338, loss_interctc_layer6=131.325, loss_interctc_layer12=119.357, loss_interctc_layer15=115.055, loss_interctc_layer21=142.995, loss=130.614, backward_time=0.374, grad_norm=86.096, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.618e-04, train_time=1.739 [gpua049:0/64] 2024-01-15 18:33:58,844 (trainer:753) INFO: 4epoch:train:7401-7500batch: iter_time=7.646e-05, forward_time=0.143, loss_ctc=138.580, loss_interctc_layer6=129.540, loss_interctc_layer12=116.324, loss_interctc_layer15=112.881, loss_interctc_layer21=137.554, loss=126.976, backward_time=0.342, grad_norm=81.456, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.623e-04, train_time=1.467 [gpua049:0/64] 2024-01-15 18:34:01,967 (multiple_iter_factory:32) INFO: Building 6th iter-factory... [gpua049:0/64] 2024-01-15 18:34:22,494 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 18:34:26,222 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 18:34:26,222 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, [gpua049:0/64] 2024-01-15 18:34:26,225 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 18:42:09,590 (trainer:753) INFO: 4epoch:train:7501-7600batch: iter_time=1.384, forward_time=0.143, loss_ctc=129.704, loss_interctc_layer6=130.345, loss_interctc_layer12=117.707, loss_interctc_layer15=114.041, loss_interctc_layer21=128.445, loss=124.048, backward_time=0.311, grad_norm=94.544, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.084, optim0_lr0=1.628e-04, train_time=4.907 [gpua049:0/64] 2024-01-15 18:44:13,009 (trainer:753) INFO: 4epoch:train:7601-7700batch: iter_time=7.287e-05, forward_time=0.143, loss_ctc=134.702, loss_interctc_layer6=125.965, loss_interctc_layer12=113.061, loss_interctc_layer15=109.868, loss_interctc_layer21=133.582, loss=123.436, backward_time=0.297, grad_norm=77.654, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.084, optim0_lr0=1.633e-04, train_time=1.234 [gpua049:0/64] 2024-01-15 18:46:19,979 (trainer:753) INFO: 4epoch:train:7701-7800batch: iter_time=7.365e-05, forward_time=0.143, loss_ctc=139.539, loss_interctc_layer6=134.207, loss_interctc_layer12=120.913, loss_interctc_layer15=117.760, loss_interctc_layer21=138.305, loss=130.145, backward_time=0.298, grad_norm=83.835, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.084, optim0_lr0=1.638e-04, train_time=1.269 [gpua049:0/64] 2024-01-15 18:49:04,729 (trainer:753) INFO: 4epoch:train:7801-7900batch: iter_time=7.564e-05, forward_time=0.143, loss_ctc=148.735, loss_interctc_layer6=136.828, loss_interctc_layer12=123.724, loss_interctc_layer15=120.857, loss_interctc_layer21=148.807, loss=135.790, backward_time=0.334, grad_norm=98.363, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.084, optim0_lr0=1.643e-04, train_time=1.647 [gpua049:0/64] 2024-01-15 18:51:10,780 (trainer:753) INFO: 4epoch:train:7901-8000batch: iter_time=7.679e-05, forward_time=0.142, loss_ctc=126.214, loss_interctc_layer6=115.294, loss_interctc_layer12=104.738, loss_interctc_layer15=102.794, loss_interctc_layer21=125.540, loss=114.916, backward_time=0.308, grad_norm=85.986, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.084, optim0_lr0=1.648e-04, train_time=1.260 [gpua049:0/64] 2024-01-15 18:54:21,516 (trainer:753) INFO: 4epoch:train:8001-8100batch: iter_time=7.504e-05, forward_time=0.142, loss_ctc=127.592, loss_interctc_layer6=118.523, loss_interctc_layer12=107.368, loss_interctc_layer15=104.316, loss_interctc_layer21=126.730, loss=116.906, backward_time=0.391, grad_norm=77.166, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.084, optim0_lr0=1.653e-04, train_time=1.907 [gpua049:0/64] 2024-01-15 18:56:37,110 (trainer:753) INFO: 4epoch:train:8101-8200batch: iter_time=7.394e-05, forward_time=0.145, loss_ctc=143.397, loss_interctc_layer6=131.698, loss_interctc_layer12=120.603, loss_interctc_layer15=117.326, loss_interctc_layer21=142.710, loss=131.147, backward_time=0.337, grad_norm=89.242, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.084, optim0_lr0=1.658e-04, train_time=1.356 [gpua049:0/64] 2024-01-15 18:59:01,554 (trainer:753) INFO: 4epoch:train:8201-8300batch: iter_time=7.705e-05, forward_time=0.141, loss_ctc=116.907, loss_interctc_layer6=116.535, loss_interctc_layer12=104.871, loss_interctc_layer15=101.613, loss_interctc_layer21=116.397, loss=111.265, backward_time=0.304, grad_norm=80.855, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.663e-04, train_time=1.444 [gpua049:0/64] 2024-01-15 19:01:17,735 (trainer:753) INFO: 4epoch:train:8301-8400batch: iter_time=7.780e-05, forward_time=0.163, loss_ctc=120.661, loss_interctc_layer6=115.271, loss_interctc_layer12=103.373, loss_interctc_layer15=100.370, loss_interctc_layer21=119.728, loss=111.881, backward_time=0.311, grad_norm=70.766, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.085, optim0_lr0=1.668e-04, train_time=1.362 [gpua049:0/64] 2024-01-15 19:04:04,398 (trainer:753) INFO: 4epoch:train:8401-8500batch: iter_time=7.970e-05, forward_time=0.142, loss_ctc=133.327, loss_interctc_layer6=128.889, loss_interctc_layer12=116.264, loss_interctc_layer15=113.060, loss_interctc_layer21=132.024, loss=124.713, backward_time=0.382, grad_norm=88.788, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.673e-04, train_time=1.663 [gpua049:0/64] 2024-01-15 19:06:44,129 (trainer:753) INFO: 4epoch:train:8501-8600batch: iter_time=7.766e-05, forward_time=0.142, loss_ctc=141.387, loss_interctc_layer6=125.811, loss_interctc_layer12=113.567, loss_interctc_layer15=109.961, loss_interctc_layer21=139.454, loss=126.036, backward_time=0.384, grad_norm=85.121, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.085, optim0_lr0=1.678e-04, train_time=1.601 [gpua049:0/64] 2024-01-15 19:09:13,894 (trainer:753) INFO: 4epoch:train:8601-8700batch: iter_time=7.314e-05, forward_time=0.189, loss_ctc=137.107, loss_interctc_layer6=126.107, loss_interctc_layer12=113.378, loss_interctc_layer15=109.220, loss_interctc_layer21=136.303, loss=124.423, backward_time=0.324, grad_norm=78.554, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.083, optim0_lr0=1.683e-04, train_time=1.497 [gpua049:0/64] 2024-01-15 19:11:19,758 (multiple_iter_factory:32) INFO: Building 7th iter-factory... [gpua049:0/64] 2024-01-15 19:11:39,623 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 19:11:43,281 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 19:11:43,281 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, [gpua049:0/64] 2024-01-15 19:11:43,285 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 19:18:07,200 (trainer:753) INFO: 4epoch:train:8701-8800batch: iter_time=3.492, forward_time=0.154, loss_ctc=131.572, loss_interctc_layer6=127.458, loss_interctc_layer12=115.023, loss_interctc_layer15=111.503, loss_interctc_layer21=130.525, loss=123.216, backward_time=0.380, grad_norm=83.976, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.688e-04, train_time=5.333 [gpua049:0/64] 2024-01-15 19:20:21,776 (trainer:753) INFO: 4epoch:train:8801-8900batch: iter_time=7.613e-05, forward_time=0.142, loss_ctc=145.040, loss_interctc_layer6=125.821, loss_interctc_layer12=113.429, loss_interctc_layer15=109.461, loss_interctc_layer21=144.644, loss=127.679, backward_time=0.337, grad_norm=87.800, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.693e-04, train_time=1.346 [gpua049:0/64] 2024-01-15 19:22:48,705 (trainer:753) INFO: 4epoch:train:8901-9000batch: iter_time=7.933e-05, forward_time=0.142, loss_ctc=154.260, loss_interctc_layer6=132.682, loss_interctc_layer12=121.003, loss_interctc_layer15=116.981, loss_interctc_layer21=153.331, loss=135.651, backward_time=0.307, grad_norm=104.459, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.082, optim0_lr0=1.698e-04, train_time=1.469 [gpua049:0/64] 2024-01-15 19:24:59,208 (trainer:753) INFO: 4epoch:train:9001-9100batch: iter_time=7.608e-05, forward_time=0.142, loss_ctc=132.984, loss_interctc_layer6=118.614, loss_interctc_layer12=107.543, loss_interctc_layer15=104.048, loss_interctc_layer21=132.062, loss=119.050, backward_time=0.310, grad_norm=89.112, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.703e-04, train_time=1.305 [gpua049:0/64] 2024-01-15 19:27:35,036 (trainer:753) INFO: 4epoch:train:9101-9200batch: iter_time=7.638e-05, forward_time=0.143, loss_ctc=160.069, loss_interctc_layer6=141.051, loss_interctc_layer12=128.926, loss_interctc_layer15=125.065, loss_interctc_layer21=158.018, loss=142.626, backward_time=0.341, grad_norm=101.652, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.708e-04, train_time=1.558 [gpua049:0/64] 2024-01-15 19:30:17,773 (trainer:753) INFO: 4epoch:train:9201-9300batch: iter_time=7.966e-05, forward_time=0.141, loss_ctc=119.464, loss_interctc_layer6=107.172, loss_interctc_layer12=97.302, loss_interctc_layer15=94.739, loss_interctc_layer21=118.420, loss=107.419, backward_time=0.353, grad_norm=73.649, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.081, optim0_lr0=1.713e-04, train_time=1.627 [gpua049:0/64] 2024-01-15 19:32:41,869 (trainer:753) INFO: 4epoch:train:9301-9400batch: iter_time=7.956e-05, forward_time=0.144, loss_ctc=149.938, loss_interctc_layer6=131.142, loss_interctc_layer12=119.071, loss_interctc_layer15=115.618, loss_interctc_layer21=149.335, loss=133.021, backward_time=0.319, grad_norm=87.603, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.718e-04, train_time=1.441 [gpua049:0/64] 2024-01-15 19:35:19,933 (trainer:753) INFO: 4epoch:train:9401-9500batch: iter_time=8.072e-05, forward_time=0.142, loss_ctc=128.244, loss_interctc_layer6=114.771, loss_interctc_layer12=103.758, loss_interctc_layer15=101.322, loss_interctc_layer21=126.060, loss=114.831, backward_time=0.344, grad_norm=89.458, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.723e-04, train_time=1.580 [gpua049:0/64] 2024-01-15 19:37:52,849 (trainer:753) INFO: 4epoch:train:9501-9600batch: iter_time=7.855e-05, forward_time=0.142, loss_ctc=123.591, loss_interctc_layer6=115.878, loss_interctc_layer12=103.880, loss_interctc_layer15=100.588, loss_interctc_layer21=123.887, loss=113.565, backward_time=0.384, grad_norm=75.568, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.728e-04, train_time=1.529 [gpua049:0/64] 2024-01-15 19:40:09,769 (trainer:753) INFO: 4epoch:train:9601-9700batch: iter_time=7.753e-05, forward_time=0.142, loss_ctc=141.635, loss_interctc_layer6=130.605, loss_interctc_layer12=118.337, loss_interctc_layer15=115.180, loss_interctc_layer21=140.374, loss=129.226, backward_time=0.311, grad_norm=101.466, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.733e-04, train_time=1.369 [gpua049:0/64] 2024-01-15 19:42:32,485 (trainer:753) INFO: 4epoch:train:9701-9800batch: iter_time=7.709e-05, forward_time=0.142, loss_ctc=132.546, loss_interctc_layer6=118.852, loss_interctc_layer12=106.457, loss_interctc_layer15=103.677, loss_interctc_layer21=130.348, loss=118.376, backward_time=0.307, grad_norm=90.861, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.738e-04, train_time=1.427 [gpua049:0/64] 2024-01-15 19:45:09,176 (trainer:753) INFO: 4epoch:train:9801-9900batch: iter_time=7.814e-05, forward_time=0.142, loss_ctc=144.058, loss_interctc_layer6=127.636, loss_interctc_layer12=114.170, loss_interctc_layer15=110.620, loss_interctc_layer21=142.579, loss=127.813, backward_time=0.398, grad_norm=79.957, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.081, optim0_lr0=1.743e-04, train_time=1.567 [gpua049:0/64] 2024-01-15 19:48:42,018 (trainer:753) INFO: 4epoch:train:9901-10000batch: iter_time=7.624e-05, forward_time=0.143, loss_ctc=148.922, loss_interctc_layer6=125.918, loss_interctc_layer12=113.945, loss_interctc_layer15=110.154, loss_interctc_layer21=148.525, loss=129.493, backward_time=0.478, grad_norm=83.310, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.748e-04, train_time=2.128 [gpua049:0/64] 2024-01-15 19:48:46,233 (multiple_iter_factory:32) INFO: Building 8th iter-factory... [gpua049:0/64] 2024-01-15 19:49:07,403 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 19:49:11,377 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 19:49:11,377 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, [gpua049:0/64] 2024-01-15 19:49:11,381 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 19:57:40,526 (trainer:753) INFO: 4epoch:train:10001-10100batch: iter_time=1.396, forward_time=0.157, loss_ctc=128.288, loss_interctc_layer6=127.780, loss_interctc_layer12=115.964, loss_interctc_layer15=112.161, loss_interctc_layer21=126.763, loss=122.191, backward_time=0.312, grad_norm=87.334, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.753e-04, train_time=5.385 [gpua049:0/64] 2024-01-15 19:59:57,047 (trainer:753) INFO: 4epoch:train:10101-10200batch: iter_time=7.417e-05, forward_time=0.142, loss_ctc=132.180, loss_interctc_layer6=122.938, loss_interctc_layer12=111.031, loss_interctc_layer15=107.189, loss_interctc_layer21=131.273, loss=120.922, backward_time=0.311, grad_norm=73.112, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.758e-04, train_time=1.365 [gpua049:0/64] 2024-01-15 20:01:59,187 (trainer:753) INFO: 4epoch:train:10201-10300batch: iter_time=7.922e-05, forward_time=0.143, loss_ctc=135.562, loss_interctc_layer6=130.907, loss_interctc_layer12=117.189, loss_interctc_layer15=113.577, loss_interctc_layer21=134.933, loss=126.434, backward_time=0.298, grad_norm=84.757, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.081, optim0_lr0=1.763e-04, train_time=1.221 [gpua049:0/64] 2024-01-15 20:04:16,666 (trainer:753) INFO: 4epoch:train:10301-10400batch: iter_time=8.442e-05, forward_time=0.159, loss_ctc=145.685, loss_interctc_layer6=133.884, loss_interctc_layer12=121.543, loss_interctc_layer15=118.158, loss_interctc_layer21=144.788, loss=132.812, backward_time=0.325, grad_norm=88.033, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.084, optim0_lr0=1.768e-04, train_time=1.374 [gpua049:0/64] 2024-01-15 20:07:03,175 (trainer:753) INFO: 4epoch:train:10401-10500batch: iter_time=7.877e-05, forward_time=0.141, loss_ctc=123.483, loss_interctc_layer6=112.415, loss_interctc_layer12=102.099, loss_interctc_layer15=99.351, loss_interctc_layer21=122.444, loss=111.958, backward_time=0.344, grad_norm=84.471, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.773e-04, train_time=1.665 [gpua049:0/64] 2024-01-15 20:10:37,053 (trainer:753) INFO: 4epoch:train:10501-10600batch: iter_time=7.753e-05, forward_time=0.141, loss_ctc=124.983, loss_interctc_layer6=115.453, loss_interctc_layer12=104.924, loss_interctc_layer15=101.430, loss_interctc_layer21=124.022, loss=114.162, backward_time=0.539, grad_norm=81.311, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.081, optim0_lr0=1.778e-04, train_time=2.139 [gpua049:0/64] 2024-01-15 20:12:58,202 (trainer:753) INFO: 4epoch:train:10601-10700batch: iter_time=7.450e-05, forward_time=0.179, loss_ctc=139.123, loss_interctc_layer6=130.615, loss_interctc_layer12=118.772, loss_interctc_layer15=114.297, loss_interctc_layer21=138.336, loss=128.229, backward_time=0.322, grad_norm=88.135, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.097, optim0_lr0=1.783e-04, train_time=1.411 [gpua049:0/64] 2024-01-15 20:15:21,441 (trainer:753) INFO: 4epoch:train:10701-10800batch: iter_time=7.547e-05, forward_time=0.220, loss_ctc=114.063, loss_interctc_layer6=114.036, loss_interctc_layer12=103.668, loss_interctc_layer15=99.575, loss_interctc_layer21=113.529, loss=108.974, backward_time=0.322, grad_norm=82.488, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.084, optim0_lr0=1.788e-04, train_time=1.432 [gpua049:0/64] 2024-01-15 20:17:37,432 (trainer:753) INFO: 4epoch:train:10801-10900batch: iter_time=5.118e-04, forward_time=0.151, loss_ctc=119.368, loss_interctc_layer6=113.929, loss_interctc_layer12=103.056, loss_interctc_layer15=99.417, loss_interctc_layer21=118.215, loss=110.797, backward_time=0.309, grad_norm=76.022, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.082, optim0_lr0=1.793e-04, train_time=1.360 [gpua049:0/64] 2024-01-15 20:20:14,007 (trainer:753) INFO: 4epoch:train:10901-11000batch: iter_time=7.527e-05, forward_time=0.142, loss_ctc=132.311, loss_interctc_layer6=127.343, loss_interctc_layer12=115.218, loss_interctc_layer15=111.406, loss_interctc_layer21=131.240, loss=123.503, backward_time=0.353, grad_norm=102.822, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.081, optim0_lr0=1.798e-04, train_time=1.566 [gpua049:0/64] 2024-01-15 20:22:23,605 (trainer:753) INFO: 4epoch:train:11001-11100batch: iter_time=7.598e-05, forward_time=0.142, loss_ctc=137.522, loss_interctc_layer6=122.771, loss_interctc_layer12=110.892, loss_interctc_layer15=107.737, loss_interctc_layer21=136.730, loss=123.131, backward_time=0.314, grad_norm=99.898, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.081, optim0_lr0=1.803e-04, train_time=1.296 [gpua049:0/64] 2024-01-15 20:25:19,131 (trainer:753) INFO: 4epoch:train:11101-11200batch: iter_time=7.539e-05, forward_time=0.142, loss_ctc=133.550, loss_interctc_layer6=125.132, loss_interctc_layer12=111.822, loss_interctc_layer15=107.922, loss_interctc_layer21=132.601, loss=122.205, backward_time=0.407, grad_norm=91.277, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.081, optim0_lr0=1.808e-04, train_time=1.755 [gpua049:0/64] 2024-01-15 20:26:45,483 (multiple_iter_factory:32) INFO: Building 9th iter-factory... [gpua049:0/64] 2024-01-15 20:27:05,670 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 20:27:09,386 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 20:27:09,386 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, [gpua049:0/64] 2024-01-15 20:27:09,389 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 20:32:50,798 (trainer:753) INFO: 4epoch:train:11201-11300batch: iter_time=2.992, forward_time=0.143, loss_ctc=125.083, loss_interctc_layer6=125.872, loss_interctc_layer12=113.556, loss_interctc_layer15=110.246, loss_interctc_layer21=124.125, loss=119.776, backward_time=0.307, grad_norm=93.865, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.813e-04, train_time=4.516 [gpua049:0/64] 2024-01-15 20:35:20,990 (trainer:753) INFO: 4epoch:train:11301-11400batch: iter_time=7.667e-05, forward_time=0.142, loss_ctc=130.221, loss_interctc_layer6=123.895, loss_interctc_layer12=110.588, loss_interctc_layer15=106.585, loss_interctc_layer21=130.095, loss=120.277, backward_time=0.317, grad_norm=89.041, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.818e-04, train_time=1.502 [gpua049:0/64] 2024-01-15 20:37:32,820 (trainer:753) INFO: 4epoch:train:11401-11500batch: iter_time=7.742e-05, forward_time=0.142, loss_ctc=133.510, loss_interctc_layer6=130.404, loss_interctc_layer12=117.340, loss_interctc_layer15=112.823, loss_interctc_layer21=131.845, loss=125.184, backward_time=0.330, grad_norm=94.435, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.823e-04, train_time=1.318 [gpua049:0/64] 2024-01-15 20:39:48,188 (trainer:753) INFO: 4epoch:train:11501-11600batch: iter_time=8.001e-05, forward_time=0.142, loss_ctc=126.254, loss_interctc_layer6=116.723, loss_interctc_layer12=105.370, loss_interctc_layer15=102.203, loss_interctc_layer21=126.131, loss=115.336, backward_time=0.311, grad_norm=77.107, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.828e-04, train_time=1.353 [gpua049:0/64] 2024-01-15 20:41:57,044 (trainer:753) INFO: 4epoch:train:11601-11700batch: iter_time=7.826e-05, forward_time=0.154, loss_ctc=148.223, loss_interctc_layer6=138.661, loss_interctc_layer12=127.025, loss_interctc_layer15=122.739, loss_interctc_layer21=146.567, loss=136.643, backward_time=0.312, grad_norm=93.336, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.833e-04, train_time=1.288 [gpua049:0/64] 2024-01-15 20:44:18,150 (trainer:753) INFO: 4epoch:train:11701-11800batch: iter_time=7.976e-05, forward_time=0.141, loss_ctc=114.098, loss_interctc_layer6=106.224, loss_interctc_layer12=96.881, loss_interctc_layer15=93.697, loss_interctc_layer21=113.955, loss=104.971, backward_time=0.323, grad_norm=68.734, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.081, optim0_lr0=1.838e-04, train_time=1.411 [gpua049:0/64] 2024-01-15 20:46:26,089 (trainer:753) INFO: 4epoch:train:11801-11900batch: iter_time=5.016e-04, forward_time=0.159, loss_ctc=144.950, loss_interctc_layer6=128.312, loss_interctc_layer12=116.722, loss_interctc_layer15=113.239, loss_interctc_layer21=142.992, loss=129.243, backward_time=0.309, grad_norm=83.556, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.083, optim0_lr0=1.843e-04, train_time=1.277 [gpua049:0/64] 2024-01-15 20:48:39,902 (trainer:753) INFO: 4epoch:train:11901-12000batch: iter_time=7.568e-05, forward_time=0.149, loss_ctc=113.678, loss_interctc_layer6=114.067, loss_interctc_layer12=103.659, loss_interctc_layer15=100.122, loss_interctc_layer21=113.218, loss=108.949, backward_time=0.307, grad_norm=73.021, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.848e-04, train_time=1.340 [gpua049:0/64] 2024-01-15 20:50:56,024 (trainer:753) INFO: 4epoch:train:12001-12100batch: iter_time=7.813e-05, forward_time=0.142, loss_ctc=118.476, loss_interctc_layer6=113.476, loss_interctc_layer12=101.378, loss_interctc_layer15=97.833, loss_interctc_layer21=118.039, loss=109.840, backward_time=0.302, grad_norm=64.789, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.853e-04, train_time=1.361 [gpua049:0/64] 2024-01-15 20:53:29,874 (trainer:753) INFO: 4epoch:train:12101-12200batch: iter_time=7.911e-05, forward_time=0.143, loss_ctc=134.116, loss_interctc_layer6=126.610, loss_interctc_layer12=114.848, loss_interctc_layer15=111.602, loss_interctc_layer21=133.513, loss=124.138, backward_time=0.322, grad_norm=89.463, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.858e-04, train_time=1.538 [gpua049:0/64] 2024-01-15 20:56:15,198 (trainer:753) INFO: 4epoch:train:12201-12300batch: iter_time=7.776e-05, forward_time=0.143, loss_ctc=123.815, loss_interctc_layer6=115.877, loss_interctc_layer12=104.072, loss_interctc_layer15=100.595, loss_interctc_layer21=123.142, loss=113.500, backward_time=0.316, grad_norm=89.164, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.863e-04, train_time=1.653 [gpua049:0/64] 2024-01-15 20:58:32,363 (trainer:753) INFO: 4epoch:train:12301-12400batch: iter_time=7.737e-05, forward_time=0.170, loss_ctc=136.068, loss_interctc_layer6=125.089, loss_interctc_layer12=111.280, loss_interctc_layer15=107.473, loss_interctc_layer21=135.064, loss=122.995, backward_time=0.306, grad_norm=77.610, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.084, optim0_lr0=1.868e-04, train_time=1.371 [gpua049:0/64] 2024-01-15 21:01:15,344 (trainer:753) INFO: 4epoch:train:12401-12500batch: iter_time=7.493e-05, forward_time=0.143, loss_ctc=130.533, loss_interctc_layer6=123.905, loss_interctc_layer12=110.150, loss_interctc_layer15=106.345, loss_interctc_layer21=129.423, loss=120.071, backward_time=0.315, grad_norm=81.800, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.081, optim0_lr0=1.873e-04, train_time=1.629 [gpua049:0/64] 2024-01-15 21:01:18,512 (multiple_iter_factory:32) INFO: Building 10th iter-factory... [gpua049:0/64] 2024-01-15 21:01:38,676 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 21:01:42,353 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 21:01:42,353 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, [gpua049:0/64] 2024-01-15 21:01:42,356 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 21:08:12,334 (trainer:753) INFO: 4epoch:train:12501-12600batch: iter_time=1.409, forward_time=0.159, loss_ctc=131.150, loss_interctc_layer6=125.066, loss_interctc_layer12=112.608, loss_interctc_layer15=108.420, loss_interctc_layer21=131.088, loss=121.667, backward_time=0.301, grad_norm=123.147, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.084, optim0_lr0=1.878e-04, train_time=4.170 [gpua049:0/64] 2024-01-15 21:10:20,987 (trainer:753) INFO: 4epoch:train:12601-12700batch: iter_time=7.352e-05, forward_time=0.142, loss_ctc=148.958, loss_interctc_layer6=121.571, loss_interctc_layer12=108.799, loss_interctc_layer15=105.300, loss_interctc_layer21=146.314, loss=126.188, backward_time=0.305, grad_norm=86.677, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.083, optim0_lr0=1.883e-04, train_time=1.286 [gpua049:0/64] 2024-01-15 21:12:29,953 (trainer:753) INFO: 4epoch:train:12701-12800batch: iter_time=7.413e-05, forward_time=0.142, loss_ctc=138.473, loss_interctc_layer6=129.067, loss_interctc_layer12=115.915, loss_interctc_layer15=111.413, loss_interctc_layer21=137.301, loss=126.434, backward_time=0.302, grad_norm=90.253, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.888e-04, train_time=1.289 [gpua049:0/64] 2024-01-15 21:15:01,861 (trainer:753) INFO: 4epoch:train:12801-12900batch: iter_time=7.765e-05, forward_time=0.142, loss_ctc=148.854, loss_interctc_layer6=131.905, loss_interctc_layer12=119.112, loss_interctc_layer15=114.750, loss_interctc_layer21=147.894, loss=132.503, backward_time=0.362, grad_norm=87.745, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.082, optim0_lr0=1.893e-04, train_time=1.519 [gpua049:0/64] 2024-01-15 21:17:05,779 (trainer:753) INFO: 4epoch:train:12901-13000batch: iter_time=7.769e-05, forward_time=0.142, loss_ctc=129.302, loss_interctc_layer6=113.880, loss_interctc_layer12=102.147, loss_interctc_layer15=97.751, loss_interctc_layer21=128.002, loss=114.217, backward_time=0.301, grad_norm=75.360, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.083, optim0_lr0=1.898e-04, train_time=1.239 [gpua049:0/64] 2024-01-15 21:19:18,201 (trainer:753) INFO: 4epoch:train:13001-13100batch: iter_time=7.754e-05, forward_time=0.141, loss_ctc=124.689, loss_interctc_layer6=114.224, loss_interctc_layer12=103.573, loss_interctc_layer15=99.299, loss_interctc_layer21=123.160, loss=112.989, backward_time=0.304, grad_norm=78.182, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.903e-04, train_time=1.324 [gpua049:0/64] 2024-01-15 21:22:24,337 (trainer:753) INFO: 4epoch:train:13101-13200batch: iter_time=7.943e-05, forward_time=0.142, loss_ctc=144.212, loss_interctc_layer6=127.717, loss_interctc_layer12=115.378, loss_interctc_layer15=111.829, loss_interctc_layer21=143.653, loss=128.558, backward_time=0.417, grad_norm=80.407, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.083, optim0_lr0=1.908e-04, train_time=1.861 [gpua049:0/64] 2024-01-15 21:25:20,408 (trainer:753) INFO: 4epoch:train:13201-13300batch: iter_time=7.952e-05, forward_time=0.142, loss_ctc=116.490, loss_interctc_layer6=112.906, loss_interctc_layer12=100.712, loss_interctc_layer15=97.483, loss_interctc_layer21=115.832, loss=108.685, backward_time=0.376, grad_norm=85.002, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.083, optim0_lr0=1.913e-04, train_time=1.760 [gpua049:0/64] 2024-01-15 21:28:02,077 (trainer:753) INFO: 4epoch:train:13301-13400batch: iter_time=7.539e-05, forward_time=0.141, loss_ctc=119.939, loss_interctc_layer6=111.367, loss_interctc_layer12=99.260, loss_interctc_layer15=95.579, loss_interctc_layer21=119.303, loss=109.090, backward_time=0.354, grad_norm=69.236, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.918e-04, train_time=1.616 [gpua049:0/64] 2024-01-15 21:30:27,296 (trainer:753) INFO: 4epoch:train:13401-13500batch: iter_time=7.782e-05, forward_time=0.143, loss_ctc=133.368, loss_interctc_layer6=124.708, loss_interctc_layer12=111.990, loss_interctc_layer15=108.836, loss_interctc_layer21=133.743, loss=122.529, backward_time=0.308, grad_norm=96.294, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.083, optim0_lr0=1.923e-04, train_time=1.452 [gpua049:0/64] 2024-01-15 21:33:11,466 (trainer:753) INFO: 4epoch:train:13501-13600batch: iter_time=7.748e-05, forward_time=0.142, loss_ctc=136.257, loss_interctc_layer6=120.913, loss_interctc_layer12=108.389, loss_interctc_layer15=104.625, loss_interctc_layer21=135.258, loss=121.088, backward_time=0.354, grad_norm=74.972, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.928e-04, train_time=1.641 [gpua049:0/64] 2024-01-15 21:35:22,188 (trainer:753) INFO: 4epoch:train:13601-13700batch: iter_time=7.655e-05, forward_time=0.149, loss_ctc=140.618, loss_interctc_layer6=121.915, loss_interctc_layer12=108.765, loss_interctc_layer15=104.113, loss_interctc_layer21=139.980, loss=123.078, backward_time=0.310, grad_norm=88.419, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.933e-04, train_time=1.307 [gpua049:0/64] 2024-01-15 21:36:52,067 (multiple_iter_factory:32) INFO: Building 11th iter-factory... [gpua049:0/64] 2024-01-15 21:37:12,160 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 21:37:15,874 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 21:37:15,874 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, [gpua049:0/64] 2024-01-15 21:37:15,878 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 21:42:42,094 (trainer:753) INFO: 4epoch:train:13701-13800batch: iter_time=2.496, forward_time=0.165, loss_ctc=132.199, loss_interctc_layer6=122.627, loss_interctc_layer12=111.548, loss_interctc_layer15=107.379, loss_interctc_layer21=131.092, loss=120.969, backward_time=0.330, grad_norm=88.695, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.086, optim0_lr0=1.938e-04, train_time=4.398 [gpua049:0/64] 2024-01-15 21:45:05,328 (trainer:753) INFO: 4epoch:train:13801-13900batch: iter_time=7.971e-05, forward_time=0.145, loss_ctc=138.525, loss_interctc_layer6=121.661, loss_interctc_layer12=108.594, loss_interctc_layer15=104.519, loss_interctc_layer21=137.114, loss=122.083, backward_time=0.311, grad_norm=84.232, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.943e-04, train_time=1.433 [gpua049:0/64] 2024-01-15 21:47:11,259 (trainer:753) INFO: 4epoch:train:13901-14000batch: iter_time=7.803e-05, forward_time=0.143, loss_ctc=144.790, loss_interctc_layer6=128.463, loss_interctc_layer12=115.706, loss_interctc_layer15=111.086, loss_interctc_layer21=144.475, loss=128.904, backward_time=0.296, grad_norm=95.279, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.948e-04, train_time=1.259 [gpua049:0/64] 2024-01-15 21:49:38,405 (trainer:753) INFO: 4epoch:train:14001-14100batch: iter_time=7.922e-05, forward_time=0.142, loss_ctc=127.104, loss_interctc_layer6=115.560, loss_interctc_layer12=104.031, loss_interctc_layer15=100.325, loss_interctc_layer21=127.450, loss=114.894, backward_time=0.334, grad_norm=81.144, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.953e-04, train_time=1.471 [gpua049:0/64] 2024-01-15 21:52:17,477 (trainer:753) INFO: 4epoch:train:14101-14200batch: iter_time=7.778e-05, forward_time=0.143, loss_ctc=152.095, loss_interctc_layer6=135.687, loss_interctc_layer12=123.388, loss_interctc_layer15=119.408, loss_interctc_layer21=151.505, loss=136.417, backward_time=0.327, grad_norm=91.885, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.958e-04, train_time=1.590 [gpua049:0/64] 2024-01-15 21:54:36,108 (trainer:753) INFO: 4epoch:train:14201-14300batch: iter_time=7.420e-05, forward_time=0.142, loss_ctc=113.963, loss_interctc_layer6=104.252, loss_interctc_layer12=94.286, loss_interctc_layer15=91.644, loss_interctc_layer21=113.136, loss=103.456, backward_time=0.315, grad_norm=72.749, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.963e-04, train_time=1.386 [gpua049:0/64] 2024-01-15 21:57:22,181 (trainer:753) INFO: 4epoch:train:14301-14400batch: iter_time=7.555e-05, forward_time=0.142, loss_ctc=144.088, loss_interctc_layer6=125.728, loss_interctc_layer12=114.773, loss_interctc_layer15=110.589, loss_interctc_layer21=142.290, loss=127.494, backward_time=0.373, grad_norm=84.092, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.081, optim0_lr0=1.968e-04, train_time=1.661 [gpua049:0/64] 2024-01-15 21:59:55,226 (trainer:753) INFO: 4epoch:train:14401-14500batch: iter_time=7.786e-05, forward_time=0.145, loss_ctc=123.542, loss_interctc_layer6=112.471, loss_interctc_layer12=100.370, loss_interctc_layer15=96.936, loss_interctc_layer21=122.046, loss=111.073, backward_time=0.360, grad_norm=100.407, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.973e-04, train_time=1.530 [gpua049:0/64] 2024-01-15 22:02:43,004 (trainer:753) INFO: 4epoch:train:14501-14600batch: iter_time=8.152e-05, forward_time=0.181, loss_ctc=121.155, loss_interctc_layer6=110.855, loss_interctc_layer12=99.293, loss_interctc_layer15=95.378, loss_interctc_layer21=119.965, loss=109.329, backward_time=0.409, grad_norm=81.535, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.092, optim0_lr0=1.978e-04, train_time=1.678 [gpua049:0/64] 2024-01-15 22:05:59,310 (trainer:753) INFO: 4epoch:train:14601-14700batch: iter_time=7.683e-05, forward_time=0.142, loss_ctc=134.525, loss_interctc_layer6=125.514, loss_interctc_layer12=113.507, loss_interctc_layer15=110.034, loss_interctc_layer21=134.364, loss=123.589, backward_time=0.461, grad_norm=99.791, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.983e-04, train_time=1.963 [gpua049:0/64] 2024-01-15 22:08:36,543 (trainer:753) INFO: 4epoch:train:14701-14800batch: iter_time=7.764e-05, forward_time=0.142, loss_ctc=125.959, loss_interctc_layer6=113.687, loss_interctc_layer12=102.696, loss_interctc_layer15=98.977, loss_interctc_layer21=125.167, loss=113.297, backward_time=0.337, grad_norm=85.140, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.082, optim0_lr0=1.988e-04, train_time=1.572 [gpua049:0/64] 2024-01-15 22:10:47,158 (trainer:753) INFO: 4epoch:train:14801-14900batch: iter_time=7.354e-05, forward_time=0.143, loss_ctc=136.352, loss_interctc_layer6=123.003, loss_interctc_layer12=109.717, loss_interctc_layer15=105.034, loss_interctc_layer21=135.949, loss=122.011, backward_time=0.314, grad_norm=79.008, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.083, optim0_lr0=1.993e-04, train_time=1.306 [gpua049:0/64] 2024-01-15 22:13:41,311 (trainer:753) INFO: 4epoch:train:14901-15000batch: iter_time=7.331e-05, forward_time=0.143, loss_ctc=142.315, loss_interctc_layer6=122.229, loss_interctc_layer12=109.743, loss_interctc_layer15=105.675, loss_interctc_layer21=141.537, loss=124.300, backward_time=0.381, grad_norm=72.083, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.083, optim0_lr0=1.998e-04, train_time=1.741 [gpua049:0/64] 2024-01-15 22:44:13,992 (trainer:352) INFO: 4epoch results: [train] iter_time=0.199, forward_time=0.149, loss_ctc=139.396, loss_interctc_layer6=127.412, loss_interctc_layer12=115.578, loss_interctc_layer15=112.415, loss_interctc_layer21=138.291, loss=126.618, backward_time=0.331, grad_norm=89.522, clip=100.000, loss_scale=9.328e+12, optim_step_time=0.083, optim0_lr0=1.625e-04, train_time=1.713, time=7 hours, 8 minutes and 37.94 seconds, total_count=60000, gpu_max_cached_mem_GB=34.508, [valid] loss_ctc=106.842, cer_ctc=0.462, loss_interctc_layer6=94.987, cer_interctc_layer6=0.405, loss_interctc_layer12=85.760, cer_interctc_layer12=0.360, loss_interctc_layer15=83.050, cer_interctc_layer15=0.335, loss_interctc_layer21=105.762, cer_interctc_layer21=0.453, loss=95.280, time=30 minutes and 8.41 seconds, total_count=18684, gpu_max_cached_mem_GB=34.508 [gpua049:0/64] 2024-01-15 22:44:33,664 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count [gpua049:0/64] 2024-01-15 22:44:33,742 (trainer:286) INFO: 5/45epoch started. Estimated time to finish: 1 week, 6 days and 17 hours [gpua049:0/64] 2024-01-15 22:44:33,792 (multiple_iter_factory:32) INFO: Building 0th iter-factory... [gpua049:0/64] 2024-01-15 22:44:53,446 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 22:44:57,124 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 22:44:57,124 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, [gpua049:0/64] 2024-01-15 22:44:57,127 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 22:51:04,072 (trainer:753) INFO: 5epoch:train:1-100batch: iter_time=2.208, forward_time=0.177, loss_ctc=125.746, loss_interctc_layer6=118.560, loss_interctc_layer12=108.909, loss_interctc_layer15=105.772, loss_interctc_layer21=124.440, loss=116.686, backward_time=0.310, grad_norm=87.294, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.084, optim0_lr0=1.999e-04, train_time=3.902 [gpua049:0/64] 2024-01-15 22:53:09,084 (trainer:753) INFO: 5epoch:train:101-200batch: iter_time=7.870e-05, forward_time=0.142, loss_ctc=154.261, loss_interctc_layer6=129.764, loss_interctc_layer12=117.602, loss_interctc_layer15=113.929, loss_interctc_layer21=152.849, loss=133.681, backward_time=0.296, grad_norm=105.945, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.997e-04, train_time=1.250 [gpua049:0/64] 2024-01-15 22:55:45,964 (trainer:753) INFO: 5epoch:train:201-300batch: iter_time=7.656e-05, forward_time=0.144, loss_ctc=141.181, loss_interctc_layer6=124.970, loss_interctc_layer12=113.310, loss_interctc_layer15=109.372, loss_interctc_layer21=140.622, loss=125.891, backward_time=0.314, grad_norm=86.176, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.996e-04, train_time=1.569 [gpua049:0/64] 2024-01-15 22:58:06,193 (trainer:753) INFO: 5epoch:train:301-400batch: iter_time=7.674e-05, forward_time=0.142, loss_ctc=163.210, loss_interctc_layer6=138.535, loss_interctc_layer12=125.593, loss_interctc_layer15=122.032, loss_interctc_layer21=163.114, loss=142.497, backward_time=0.306, grad_norm=108.084, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.994e-04, train_time=1.402 [gpua049:0/64] 2024-01-15 23:00:20,116 (trainer:753) INFO: 5epoch:train:401-500batch: iter_time=7.930e-05, forward_time=0.142, loss_ctc=139.258, loss_interctc_layer6=126.779, loss_interctc_layer12=114.756, loss_interctc_layer15=109.954, loss_interctc_layer21=138.009, loss=125.751, backward_time=0.298, grad_norm=104.282, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.081, optim0_lr0=1.993e-04, train_time=1.339 [gpua049:0/64] 2024-01-15 23:03:16,531 (trainer:753) INFO: 5epoch:train:501-600batch: iter_time=0.004, forward_time=0.275, loss_ctc=127.504, loss_interctc_layer6=111.507, loss_interctc_layer12=99.993, loss_interctc_layer15=96.454, loss_interctc_layer21=126.755, loss=112.442, backward_time=0.403, grad_norm=105.135, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.089, optim0_lr0=1.991e-04, train_time=1.763 [gpua049:0/64] 2024-01-15 23:06:06,690 (trainer:753) INFO: 5epoch:train:601-700batch: iter_time=7.863e-05, forward_time=0.143, loss_ctc=149.623, loss_interctc_layer6=138.119, loss_interctc_layer12=127.532, loss_interctc_layer15=123.450, loss_interctc_layer21=148.636, loss=137.472, backward_time=0.368, grad_norm=105.330, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.083, optim0_lr0=1.989e-04, train_time=1.702 [gpua049:0/64] 2024-01-15 23:08:50,178 (trainer:753) INFO: 5epoch:train:701-800batch: iter_time=7.769e-05, forward_time=0.143, loss_ctc=141.954, loss_interctc_layer6=128.836, loss_interctc_layer12=117.059, loss_interctc_layer15=112.775, loss_interctc_layer21=141.644, loss=128.453, backward_time=0.353, grad_norm=93.980, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.083, optim0_lr0=1.988e-04, train_time=1.635 [gpua049:0/64] 2024-01-15 23:11:28,749 (trainer:753) INFO: 5epoch:train:801-900batch: iter_time=7.421e-05, forward_time=0.141, loss_ctc=114.408, loss_interctc_layer6=112.399, loss_interctc_layer12=101.760, loss_interctc_layer15=98.973, loss_interctc_layer21=113.404, loss=108.189, backward_time=0.329, grad_norm=78.967, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.986e-04, train_time=1.585 [gpua049:0/64] 2024-01-15 23:14:03,532 (trainer:753) INFO: 5epoch:train:901-1000batch: iter_time=8.081e-05, forward_time=0.142, loss_ctc=141.472, loss_interctc_layer6=131.147, loss_interctc_layer12=119.449, loss_interctc_layer15=115.933, loss_interctc_layer21=140.539, loss=129.708, backward_time=0.327, grad_norm=101.578, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.984e-04, train_time=1.548 [gpua049:0/64] 2024-01-15 23:16:23,882 (trainer:753) INFO: 5epoch:train:1001-1100batch: iter_time=7.885e-05, forward_time=0.142, loss_ctc=142.566, loss_interctc_layer6=130.340, loss_interctc_layer12=117.611, loss_interctc_layer15=113.696, loss_interctc_layer21=142.121, loss=129.267, backward_time=0.322, grad_norm=91.608, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.983e-04, train_time=1.403 [gpua049:0/64] 2024-01-15 23:19:02,835 (trainer:753) INFO: 5epoch:train:1101-1200batch: iter_time=8.024e-05, forward_time=0.141, loss_ctc=107.274, loss_interctc_layer6=109.969, loss_interctc_layer12=98.404, loss_interctc_layer15=94.346, loss_interctc_layer21=106.437, loss=103.286, backward_time=0.320, grad_norm=68.497, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.981e-04, train_time=1.589 [gpua049:0/64] 2024-01-15 23:20:49,422 (multiple_iter_factory:32) INFO: Building 1th iter-factory... [gpua049:0/64] 2024-01-15 23:21:09,426 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 23:21:13,222 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 23:21:13,222 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, [gpua049:0/64] 2024-01-15 23:21:13,225 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-15 23:26:12,984 (trainer:753) INFO: 5epoch:train:1201-1300batch: iter_time=2.688, forward_time=0.142, loss_ctc=114.456, loss_interctc_layer6=114.819, loss_interctc_layer12=103.410, loss_interctc_layer15=99.560, loss_interctc_layer21=114.053, loss=109.260, backward_time=0.342, grad_norm=71.019, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.979e-04, train_time=4.301 [gpua049:0/64] 2024-01-15 23:28:16,385 (trainer:753) INFO: 5epoch:train:1301-1400batch: iter_time=7.562e-05, forward_time=0.141, loss_ctc=135.432, loss_interctc_layer6=118.312, loss_interctc_layer12=107.544, loss_interctc_layer15=103.893, loss_interctc_layer21=133.705, loss=119.777, backward_time=0.297, grad_norm=81.866, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.978e-04, train_time=1.234 [gpua049:0/64] 2024-01-15 23:30:23,127 (trainer:753) INFO: 5epoch:train:1401-1500batch: iter_time=7.546e-05, forward_time=0.142, loss_ctc=133.987, loss_interctc_layer6=123.029, loss_interctc_layer12=110.961, loss_interctc_layer15=106.962, loss_interctc_layer21=132.953, loss=121.578, backward_time=0.303, grad_norm=88.273, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.083, optim0_lr0=1.976e-04, train_time=1.267 [gpua049:0/64] 2024-01-15 23:32:32,679 (trainer:753) INFO: 5epoch:train:1501-1600batch: iter_time=7.618e-05, forward_time=0.142, loss_ctc=151.076, loss_interctc_layer6=135.111, loss_interctc_layer12=122.775, loss_interctc_layer15=118.191, loss_interctc_layer21=149.654, loss=135.361, backward_time=0.300, grad_norm=113.424, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.975e-04, train_time=1.295 [gpua049:0/64] 2024-01-15 23:34:38,198 (trainer:753) INFO: 5epoch:train:1601-1700batch: iter_time=7.410e-05, forward_time=0.144, loss_ctc=142.631, loss_interctc_layer6=126.978, loss_interctc_layer12=114.447, loss_interctc_layer15=109.830, loss_interctc_layer21=142.066, loss=127.190, backward_time=0.297, grad_norm=78.732, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.973e-04, train_time=1.255 [gpua049:0/64] 2024-01-15 23:36:55,533 (trainer:753) INFO: 5epoch:train:1701-1800batch: iter_time=7.389e-05, forward_time=0.142, loss_ctc=117.467, loss_interctc_layer6=112.228, loss_interctc_layer12=100.333, loss_interctc_layer15=96.062, loss_interctc_layer21=116.520, loss=108.522, backward_time=0.303, grad_norm=65.896, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.971e-04, train_time=1.373 [gpua049:0/64] 2024-01-15 23:39:11,087 (trainer:753) INFO: 5epoch:train:1801-1900batch: iter_time=7.783e-05, forward_time=0.143, loss_ctc=126.667, loss_interctc_layer6=124.954, loss_interctc_layer12=114.404, loss_interctc_layer15=110.864, loss_interctc_layer21=125.757, loss=120.529, backward_time=0.330, grad_norm=81.259, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.970e-04, train_time=1.355 [gpua049:0/64] 2024-01-15 23:41:44,485 (trainer:753) INFO: 5epoch:train:1901-2000batch: iter_time=7.642e-05, forward_time=0.143, loss_ctc=146.670, loss_interctc_layer6=131.138, loss_interctc_layer12=119.067, loss_interctc_layer15=115.139, loss_interctc_layer21=145.234, loss=131.450, backward_time=0.336, grad_norm=111.204, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.082, optim0_lr0=1.968e-04, train_time=1.534 [gpua049:0/64] 2024-01-15 23:43:59,656 (trainer:753) INFO: 5epoch:train:2001-2100batch: iter_time=7.389e-05, forward_time=0.143, loss_ctc=111.803, loss_interctc_layer6=114.720, loss_interctc_layer12=102.115, loss_interctc_layer15=97.472, loss_interctc_layer21=111.337, loss=107.490, backward_time=0.319, grad_norm=85.212, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.967e-04, train_time=1.351 [gpua049:0/64] 2024-01-15 23:46:33,514 (trainer:753) INFO: 5epoch:train:2101-2200batch: iter_time=7.763e-05, forward_time=0.142, loss_ctc=125.936, loss_interctc_layer6=119.500, loss_interctc_layer12=110.681, loss_interctc_layer15=106.111, loss_interctc_layer21=125.146, loss=117.475, backward_time=0.325, grad_norm=80.625, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.965e-04, train_time=1.538 [gpua049:0/64] 2024-01-15 23:48:50,360 (trainer:753) INFO: 5epoch:train:2201-2300batch: iter_time=7.486e-05, forward_time=0.142, loss_ctc=125.587, loss_interctc_layer6=123.579, loss_interctc_layer12=111.827, loss_interctc_layer15=107.059, loss_interctc_layer21=123.608, loss=118.332, backward_time=0.316, grad_norm=86.359, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.963e-04, train_time=1.368 [gpua049:0/64] 2024-01-15 23:50:59,507 (trainer:753) INFO: 5epoch:train:2301-2400batch: iter_time=7.294e-05, forward_time=0.143, loss_ctc=127.455, loss_interctc_layer6=125.056, loss_interctc_layer12=111.320, loss_interctc_layer15=106.742, loss_interctc_layer21=125.746, loss=119.264, backward_time=0.315, grad_norm=87.582, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.962e-04, train_time=1.291 [gpua049:0/64] 2024-01-15 23:53:11,067 (trainer:753) INFO: 5epoch:train:2401-2500batch: iter_time=7.504e-05, forward_time=0.142, loss_ctc=102.966, loss_interctc_layer6=102.704, loss_interctc_layer12=91.564, loss_interctc_layer15=87.778, loss_interctc_layer21=103.405, loss=97.683, backward_time=0.311, grad_norm=66.900, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.960e-04, train_time=1.315 [gpua049:0/64] 2024-01-15 23:53:12,377 (multiple_iter_factory:32) INFO: Building 2th iter-factory... [gpua049:0/64] 2024-01-15 23:53:32,349 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-15 23:53:35,966 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-15 23:53:35,966 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, [gpua049:0/64] 2024-01-15 23:53:35,970 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 00:00:45,329 (trainer:753) INFO: 5epoch:train:2501-2600batch: iter_time=1.364, forward_time=0.183, loss_ctc=116.158, loss_interctc_layer6=113.125, loss_interctc_layer12=102.252, loss_interctc_layer15=98.179, loss_interctc_layer21=115.724, loss=109.088, backward_time=0.310, grad_norm=76.223, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.086, optim0_lr0=1.959e-04, train_time=4.541 [gpua049:0/64] 2024-01-16 00:02:48,750 (trainer:753) INFO: 5epoch:train:2601-2700batch: iter_time=7.832e-05, forward_time=0.142, loss_ctc=138.731, loss_interctc_layer6=125.196, loss_interctc_layer12=112.146, loss_interctc_layer15=107.412, loss_interctc_layer21=138.213, loss=124.340, backward_time=0.296, grad_norm=83.975, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.957e-04, train_time=1.235 [gpua049:0/64] 2024-01-16 00:04:53,565 (trainer:753) INFO: 5epoch:train:2701-2800batch: iter_time=7.943e-05, forward_time=0.141, loss_ctc=128.810, loss_interctc_layer6=120.693, loss_interctc_layer12=107.753, loss_interctc_layer15=103.212, loss_interctc_layer21=126.726, loss=117.439, backward_time=0.300, grad_norm=87.683, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.956e-04, train_time=1.248 [gpua049:0/64] 2024-01-16 00:07:02,849 (trainer:753) INFO: 5epoch:train:2801-2900batch: iter_time=7.840e-05, forward_time=0.142, loss_ctc=152.154, loss_interctc_layer6=133.067, loss_interctc_layer12=119.208, loss_interctc_layer15=114.474, loss_interctc_layer21=152.107, loss=134.202, backward_time=0.313, grad_norm=86.034, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.081, optim0_lr0=1.954e-04, train_time=1.293 [gpua049:0/64] 2024-01-16 00:09:15,044 (trainer:753) INFO: 5epoch:train:2901-3000batch: iter_time=7.943e-05, forward_time=0.142, loss_ctc=128.496, loss_interctc_layer6=124.181, loss_interctc_layer12=109.922, loss_interctc_layer15=105.058, loss_interctc_layer21=127.639, loss=119.059, backward_time=0.298, grad_norm=87.767, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.953e-04, train_time=1.322 [gpua049:0/64] 2024-01-16 00:11:24,399 (trainer:753) INFO: 5epoch:train:3001-3100batch: iter_time=7.659e-05, forward_time=0.142, loss_ctc=115.639, loss_interctc_layer6=108.591, loss_interctc_layer12=96.290, loss_interctc_layer15=92.447, loss_interctc_layer21=115.066, loss=105.607, backward_time=0.297, grad_norm=67.241, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.951e-04, train_time=1.293 [gpua049:0/64] 2024-01-16 00:14:00,553 (trainer:753) INFO: 5epoch:train:3101-3200batch: iter_time=7.520e-05, forward_time=0.142, loss_ctc=134.004, loss_interctc_layer6=130.844, loss_interctc_layer12=119.493, loss_interctc_layer15=115.196, loss_interctc_layer21=132.212, loss=126.350, backward_time=0.326, grad_norm=100.537, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.949e-04, train_time=1.561 [gpua049:0/64] 2024-01-16 00:16:20,622 (trainer:753) INFO: 5epoch:train:3201-3300batch: iter_time=7.826e-05, forward_time=0.142, loss_ctc=126.024, loss_interctc_layer6=122.611, loss_interctc_layer12=109.506, loss_interctc_layer15=106.496, loss_interctc_layer21=125.451, loss=118.018, backward_time=0.316, grad_norm=79.906, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.948e-04, train_time=1.401 [gpua049:0/64] 2024-01-16 00:18:34,172 (trainer:753) INFO: 5epoch:train:3301-3400batch: iter_time=7.890e-05, forward_time=0.140, loss_ctc=105.582, loss_interctc_layer6=107.610, loss_interctc_layer12=96.578, loss_interctc_layer15=92.992, loss_interctc_layer21=105.049, loss=101.562, backward_time=0.310, grad_norm=86.850, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.946e-04, train_time=1.335 [gpua049:0/64] 2024-01-16 00:21:14,925 (trainer:753) INFO: 5epoch:train:3401-3500batch: iter_time=7.708e-05, forward_time=0.142, loss_ctc=129.547, loss_interctc_layer6=127.575, loss_interctc_layer12=113.643, loss_interctc_layer15=110.633, loss_interctc_layer21=128.972, loss=122.074, backward_time=0.332, grad_norm=85.200, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.945e-04, train_time=1.607 [gpua049:0/64] 2024-01-16 00:23:30,464 (trainer:753) INFO: 5epoch:train:3501-3600batch: iter_time=7.679e-05, forward_time=0.142, loss_ctc=131.098, loss_interctc_layer6=124.722, loss_interctc_layer12=111.286, loss_interctc_layer15=106.848, loss_interctc_layer21=129.982, loss=120.787, backward_time=0.308, grad_norm=73.866, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.083, optim0_lr0=1.943e-04, train_time=1.355 [gpua049:0/64] 2024-01-16 00:25:46,327 (trainer:753) INFO: 5epoch:train:3601-3700batch: iter_time=7.590e-05, forward_time=0.142, loss_ctc=99.788, loss_interctc_layer6=106.419, loss_interctc_layer12=94.137, loss_interctc_layer15=89.851, loss_interctc_layer21=99.843, loss=98.007, backward_time=0.305, grad_norm=63.353, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.942e-04, train_time=1.358 [gpua049:0/64] 2024-01-16 00:27:11,506 (multiple_iter_factory:32) INFO: Building 3th iter-factory... [gpua049:0/64] 2024-01-16 00:27:31,498 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 00:27:35,098 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 00:27:35,098 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, [gpua049:0/64] 2024-01-16 00:27:35,101 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 00:33:05,093 (trainer:753) INFO: 5epoch:train:3701-3800batch: iter_time=3.039, forward_time=0.144, loss_ctc=108.045, loss_interctc_layer6=111.733, loss_interctc_layer12=99.472, loss_interctc_layer15=95.761, loss_interctc_layer21=107.464, loss=104.495, backward_time=0.302, grad_norm=74.601, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.940e-04, train_time=4.387 [gpua049:0/64] 2024-01-16 00:35:11,284 (trainer:753) INFO: 5epoch:train:3801-3900batch: iter_time=7.096e-05, forward_time=0.142, loss_ctc=125.819, loss_interctc_layer6=113.975, loss_interctc_layer12=102.414, loss_interctc_layer15=98.203, loss_interctc_layer21=125.345, loss=113.151, backward_time=0.296, grad_norm=73.381, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.939e-04, train_time=1.262 [gpua049:0/64] 2024-01-16 00:37:22,359 (trainer:753) INFO: 5epoch:train:3901-4000batch: iter_time=7.699e-05, forward_time=0.142, loss_ctc=126.088, loss_interctc_layer6=118.275, loss_interctc_layer12=106.087, loss_interctc_layer15=101.603, loss_interctc_layer21=125.745, loss=115.560, backward_time=0.296, grad_norm=88.682, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.082, optim0_lr0=1.937e-04, train_time=1.311 [gpua049:0/64] 2024-01-16 00:39:43,895 (trainer:753) INFO: 5epoch:train:4001-4100batch: iter_time=7.883e-05, forward_time=0.143, loss_ctc=143.557, loss_interctc_layer6=133.392, loss_interctc_layer12=119.037, loss_interctc_layer15=113.433, loss_interctc_layer21=143.804, loss=130.645, backward_time=0.319, grad_norm=107.954, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.936e-04, train_time=1.415 [gpua049:0/64] 2024-01-16 00:41:53,477 (trainer:753) INFO: 5epoch:train:4101-4200batch: iter_time=7.578e-05, forward_time=0.143, loss_ctc=136.086, loss_interctc_layer6=123.741, loss_interctc_layer12=109.755, loss_interctc_layer15=104.690, loss_interctc_layer21=135.765, loss=122.007, backward_time=0.304, grad_norm=80.368, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.934e-04, train_time=1.296 [gpua049:0/64] 2024-01-16 00:44:02,782 (trainer:753) INFO: 5epoch:train:4201-4300batch: iter_time=7.634e-05, forward_time=0.142, loss_ctc=114.060, loss_interctc_layer6=110.670, loss_interctc_layer12=98.170, loss_interctc_layer15=93.885, loss_interctc_layer21=114.286, loss=106.214, backward_time=0.303, grad_norm=69.055, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.933e-04, train_time=1.293 [gpua049:0/64] 2024-01-16 00:46:18,139 (trainer:753) INFO: 5epoch:train:4301-4400batch: iter_time=7.995e-05, forward_time=0.142, loss_ctc=120.097, loss_interctc_layer6=120.291, loss_interctc_layer12=108.863, loss_interctc_layer15=104.532, loss_interctc_layer21=119.281, loss=114.613, backward_time=0.316, grad_norm=70.205, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.931e-04, train_time=1.353 [gpua049:0/64] 2024-01-16 00:48:48,722 (trainer:753) INFO: 5epoch:train:4401-4500batch: iter_time=8.065e-05, forward_time=0.247, loss_ctc=141.551, loss_interctc_layer6=129.293, loss_interctc_layer12=116.535, loss_interctc_layer15=109.578, loss_interctc_layer21=141.496, loss=127.691, backward_time=0.323, grad_norm=94.235, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.100, optim0_lr0=1.930e-04, train_time=1.505 [gpua049:0/64] 2024-01-16 00:51:19,737 (trainer:753) INFO: 5epoch:train:4501-4600batch: iter_time=7.576e-05, forward_time=0.142, loss_ctc=105.802, loss_interctc_layer6=111.130, loss_interctc_layer12=97.866, loss_interctc_layer15=93.074, loss_interctc_layer21=105.126, loss=102.600, backward_time=0.320, grad_norm=74.136, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.928e-04, train_time=1.510 [gpua049:0/64] 2024-01-16 00:54:00,932 (trainer:753) INFO: 5epoch:train:4601-4700batch: iter_time=7.676e-05, forward_time=0.141, loss_ctc=120.766, loss_interctc_layer6=116.125, loss_interctc_layer12=104.683, loss_interctc_layer15=101.880, loss_interctc_layer21=120.024, loss=112.696, backward_time=0.341, grad_norm=76.216, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.927e-04, train_time=1.612 [gpua049:0/64] 2024-01-16 00:56:33,987 (trainer:753) INFO: 5epoch:train:4701-4800batch: iter_time=7.261e-05, forward_time=0.143, loss_ctc=119.388, loss_interctc_layer6=118.926, loss_interctc_layer12=106.887, loss_interctc_layer15=101.991, loss_interctc_layer21=118.589, loss=113.156, backward_time=0.373, grad_norm=73.955, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.925e-04, train_time=1.530 [gpua049:0/64] 2024-01-16 00:59:11,588 (trainer:753) INFO: 5epoch:train:4801-4900batch: iter_time=7.744e-05, forward_time=0.143, loss_ctc=118.969, loss_interctc_layer6=121.396, loss_interctc_layer12=108.524, loss_interctc_layer15=103.428, loss_interctc_layer21=118.089, loss=114.081, backward_time=0.316, grad_norm=86.400, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.924e-04, train_time=1.576 [gpua049:0/64] 2024-01-16 01:01:33,327 (trainer:753) INFO: 5epoch:train:4901-5000batch: iter_time=7.565e-05, forward_time=0.141, loss_ctc=97.350, loss_interctc_layer6=99.214, loss_interctc_layer12=87.380, loss_interctc_layer15=82.661, loss_interctc_layer21=96.899, loss=92.701, backward_time=0.305, grad_norm=66.089, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.922e-04, train_time=1.417 [gpua049:0/64] 2024-01-16 01:01:34,704 (multiple_iter_factory:32) INFO: Building 4th iter-factory... [gpua049:0/64] 2024-01-16 01:01:54,553 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 01:01:58,145 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 01:01:58,145 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, [gpua049:0/64] 2024-01-16 01:01:58,148 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 01:09:15,850 (trainer:753) INFO: 5epoch:train:5001-5100batch: iter_time=1.350, forward_time=0.141, loss_ctc=115.578, loss_interctc_layer6=110.983, loss_interctc_layer12=99.267, loss_interctc_layer15=95.479, loss_interctc_layer21=115.885, loss=107.438, backward_time=0.317, grad_norm=91.863, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.921e-04, train_time=4.625 [gpua049:0/64] 2024-01-16 01:11:28,263 (trainer:753) INFO: 5epoch:train:5101-5200batch: iter_time=7.305e-05, forward_time=0.142, loss_ctc=141.469, loss_interctc_layer6=122.140, loss_interctc_layer12=108.889, loss_interctc_layer15=104.498, loss_interctc_layer21=141.446, loss=123.688, backward_time=0.301, grad_norm=77.390, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.919e-04, train_time=1.324 [gpua049:0/64] 2024-01-16 01:13:34,341 (trainer:753) INFO: 5epoch:train:5201-5300batch: iter_time=7.831e-05, forward_time=0.142, loss_ctc=129.557, loss_interctc_layer6=116.631, loss_interctc_layer12=103.388, loss_interctc_layer15=98.721, loss_interctc_layer21=130.170, loss=115.693, backward_time=0.295, grad_norm=72.565, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.918e-04, train_time=1.261 [gpua049:0/64] 2024-01-16 01:15:38,247 (trainer:753) INFO: 5epoch:train:5301-5400batch: iter_time=7.893e-05, forward_time=0.143, loss_ctc=153.407, loss_interctc_layer6=129.716, loss_interctc_layer12=115.322, loss_interctc_layer15=110.924, loss_interctc_layer21=152.208, loss=132.315, backward_time=0.296, grad_norm=95.412, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.916e-04, train_time=1.239 [gpua049:0/64] 2024-01-16 01:17:43,607 (trainer:753) INFO: 5epoch:train:5401-5500batch: iter_time=7.547e-05, forward_time=0.143, loss_ctc=130.805, loss_interctc_layer6=120.877, loss_interctc_layer12=107.037, loss_interctc_layer15=101.624, loss_interctc_layer21=129.740, loss=118.017, backward_time=0.296, grad_norm=77.772, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.915e-04, train_time=1.253 [gpua049:0/64] 2024-01-16 01:20:20,488 (trainer:753) INFO: 5epoch:train:5501-5600batch: iter_time=7.949e-05, forward_time=0.142, loss_ctc=118.173, loss_interctc_layer6=105.603, loss_interctc_layer12=93.628, loss_interctc_layer15=89.677, loss_interctc_layer21=117.978, loss=105.012, backward_time=0.376, grad_norm=64.385, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.913e-04, train_time=1.569 [gpua049:0/64] 2024-01-16 01:22:48,304 (trainer:753) INFO: 5epoch:train:5601-5700batch: iter_time=7.708e-05, forward_time=0.142, loss_ctc=134.921, loss_interctc_layer6=126.845, loss_interctc_layer12=114.121, loss_interctc_layer15=109.656, loss_interctc_layer21=134.043, loss=123.917, backward_time=0.310, grad_norm=98.814, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.912e-04, train_time=1.478 [gpua049:0/64] 2024-01-16 01:25:08,078 (trainer:753) INFO: 5epoch:train:5701-5800batch: iter_time=7.481e-05, forward_time=0.143, loss_ctc=130.167, loss_interctc_layer6=120.669, loss_interctc_layer12=107.382, loss_interctc_layer15=102.370, loss_interctc_layer21=129.456, loss=118.009, backward_time=0.322, grad_norm=83.390, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.911e-04, train_time=1.398 [gpua049:0/64] 2024-01-16 01:27:17,728 (trainer:753) INFO: 5epoch:train:5801-5900batch: iter_time=7.641e-05, forward_time=0.141, loss_ctc=105.559, loss_interctc_layer6=106.010, loss_interctc_layer12=94.758, loss_interctc_layer15=90.704, loss_interctc_layer21=105.286, loss=100.463, backward_time=0.303, grad_norm=71.652, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.909e-04, train_time=1.296 [gpua049:0/64] 2024-01-16 01:29:27,994 (trainer:753) INFO: 5epoch:train:5901-6000batch: iter_time=7.452e-05, forward_time=0.143, loss_ctc=132.365, loss_interctc_layer6=123.704, loss_interctc_layer12=111.551, loss_interctc_layer15=105.672, loss_interctc_layer21=131.472, loss=120.953, backward_time=0.313, grad_norm=93.448, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.082, optim0_lr0=1.908e-04, train_time=1.302 [gpua049:0/64] 2024-01-16 01:32:01,453 (trainer:753) INFO: 5epoch:train:6001-6100batch: iter_time=7.658e-05, forward_time=0.143, loss_ctc=133.466, loss_interctc_layer6=123.187, loss_interctc_layer12=109.823, loss_interctc_layer15=105.085, loss_interctc_layer21=132.945, loss=120.901, backward_time=0.335, grad_norm=80.131, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.906e-04, train_time=1.534 [gpua049:0/64] 2024-01-16 01:34:09,871 (trainer:753) INFO: 5epoch:train:6101-6200batch: iter_time=7.546e-05, forward_time=0.142, loss_ctc=98.959, loss_interctc_layer6=104.072, loss_interctc_layer12=91.573, loss_interctc_layer15=86.867, loss_interctc_layer21=98.720, loss=96.038, backward_time=0.303, grad_norm=91.852, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.905e-04, train_time=1.284 [gpua049:0/64] 2024-01-16 01:35:29,579 (multiple_iter_factory:32) INFO: Building 5th iter-factory... [gpua049:0/64] 2024-01-16 01:35:49,566 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 01:35:53,181 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 01:35:53,181 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, [gpua049:0/64] 2024-01-16 01:35:53,187 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 01:40:03,468 (trainer:753) INFO: 5epoch:train:6201-6300batch: iter_time=2.035, forward_time=0.142, loss_ctc=106.179, loss_interctc_layer6=108.893, loss_interctc_layer12=96.577, loss_interctc_layer15=92.263, loss_interctc_layer21=106.082, loss=101.999, backward_time=0.311, grad_norm=80.443, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.083, optim0_lr0=1.903e-04, train_time=3.536 [gpua049:0/64] 2024-01-16 01:42:08,772 (trainer:753) INFO: 5epoch:train:6301-6400batch: iter_time=7.554e-05, forward_time=0.151, loss_ctc=122.613, loss_interctc_layer6=111.223, loss_interctc_layer12=98.534, loss_interctc_layer15=94.366, loss_interctc_layer21=122.471, loss=109.841, backward_time=0.299, grad_norm=71.285, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.083, optim0_lr0=1.902e-04, train_time=1.253 [gpua049:0/64] 2024-01-16 01:44:25,330 (trainer:753) INFO: 5epoch:train:6401-6500batch: iter_time=3.654e-04, forward_time=0.227, loss_ctc=124.578, loss_interctc_layer6=117.256, loss_interctc_layer12=103.690, loss_interctc_layer15=98.930, loss_interctc_layer21=124.188, loss=113.728, backward_time=0.309, grad_norm=78.192, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.095, optim0_lr0=1.900e-04, train_time=1.364 [gpua049:0/64] 2024-01-16 01:46:29,587 (trainer:753) INFO: 5epoch:train:6501-6600batch: iter_time=7.647e-05, forward_time=0.143, loss_ctc=140.972, loss_interctc_layer6=129.104, loss_interctc_layer12=116.325, loss_interctc_layer15=109.647, loss_interctc_layer21=140.880, loss=127.386, backward_time=0.306, grad_norm=94.843, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.899e-04, train_time=1.243 [gpua049:0/64] 2024-01-16 01:49:09,848 (trainer:753) INFO: 5epoch:train:6601-6700batch: iter_time=7.707e-05, forward_time=0.142, loss_ctc=131.959, loss_interctc_layer6=120.581, loss_interctc_layer12=106.470, loss_interctc_layer15=101.880, loss_interctc_layer21=130.713, loss=118.321, backward_time=0.331, grad_norm=73.603, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.898e-04, train_time=1.603 [gpua049:0/64] 2024-01-16 01:51:41,723 (trainer:753) INFO: 5epoch:train:6701-6800batch: iter_time=7.663e-05, forward_time=0.142, loss_ctc=111.628, loss_interctc_layer6=109.369, loss_interctc_layer12=96.395, loss_interctc_layer15=91.591, loss_interctc_layer21=111.606, loss=104.118, backward_time=0.304, grad_norm=71.960, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.896e-04, train_time=1.519 [gpua049:0/64] 2024-01-16 01:54:01,842 (trainer:753) INFO: 5epoch:train:6801-6900batch: iter_time=7.552e-05, forward_time=0.144, loss_ctc=115.468, loss_interctc_layer6=116.815, loss_interctc_layer12=105.155, loss_interctc_layer15=100.418, loss_interctc_layer21=115.267, loss=110.625, backward_time=0.363, grad_norm=94.916, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.895e-04, train_time=1.401 [gpua049:0/64] 2024-01-16 01:56:10,800 (trainer:753) INFO: 5epoch:train:6901-7000batch: iter_time=7.508e-05, forward_time=0.142, loss_ctc=133.980, loss_interctc_layer6=125.433, loss_interctc_layer12=110.972, loss_interctc_layer15=106.195, loss_interctc_layer21=133.925, loss=122.101, backward_time=0.310, grad_norm=82.596, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.893e-04, train_time=1.289 [gpua049:0/64] 2024-01-16 01:58:28,273 (trainer:753) INFO: 5epoch:train:7001-7100batch: iter_time=7.700e-05, forward_time=0.142, loss_ctc=103.108, loss_interctc_layer6=108.713, loss_interctc_layer12=95.697, loss_interctc_layer15=90.476, loss_interctc_layer21=102.814, loss=100.162, backward_time=0.327, grad_norm=75.419, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.892e-04, train_time=1.375 [gpua049:0/64] 2024-01-16 02:00:44,336 (trainer:753) INFO: 5epoch:train:7101-7200batch: iter_time=7.627e-05, forward_time=0.141, loss_ctc=117.593, loss_interctc_layer6=113.959, loss_interctc_layer12=103.010, loss_interctc_layer15=99.587, loss_interctc_layer21=118.293, loss=110.488, backward_time=0.311, grad_norm=95.807, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.891e-04, train_time=1.360 [gpua049:0/64] 2024-01-16 02:02:57,313 (trainer:753) INFO: 5epoch:train:7201-7300batch: iter_time=7.741e-05, forward_time=0.143, loss_ctc=115.531, loss_interctc_layer6=117.118, loss_interctc_layer12=103.918, loss_interctc_layer15=99.584, loss_interctc_layer21=114.948, loss=110.220, backward_time=0.312, grad_norm=79.055, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.889e-04, train_time=1.330 [gpua049:0/64] 2024-01-16 02:05:18,076 (trainer:753) INFO: 5epoch:train:7301-7400batch: iter_time=7.659e-05, forward_time=0.143, loss_ctc=115.728, loss_interctc_layer6=119.568, loss_interctc_layer12=106.159, loss_interctc_layer15=100.899, loss_interctc_layer21=115.603, loss=111.591, backward_time=0.319, grad_norm=62.031, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.888e-04, train_time=1.407 [gpua049:0/64] 2024-01-16 02:07:30,584 (trainer:753) INFO: 5epoch:train:7401-7500batch: iter_time=7.567e-05, forward_time=0.142, loss_ctc=95.163, loss_interctc_layer6=96.807, loss_interctc_layer12=85.411, loss_interctc_layer15=80.362, loss_interctc_layer21=94.575, loss=90.464, backward_time=0.317, grad_norm=70.565, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.886e-04, train_time=1.325 [gpua049:0/64] 2024-01-16 02:07:32,118 (multiple_iter_factory:32) INFO: Building 6th iter-factory... [gpua049:0/64] 2024-01-16 02:07:52,245 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 02:07:55,863 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 02:07:55,863 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, [gpua049:0/64] 2024-01-16 02:07:55,866 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 02:15:05,188 (trainer:753) INFO: 5epoch:train:7501-7600batch: iter_time=1.347, forward_time=0.142, loss_ctc=109.145, loss_interctc_layer6=108.203, loss_interctc_layer12=96.685, loss_interctc_layer15=92.030, loss_interctc_layer21=109.564, loss=103.125, backward_time=0.303, grad_norm=72.867, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.885e-04, train_time=4.546 [gpua049:0/64] 2024-01-16 02:17:16,573 (trainer:753) INFO: 5epoch:train:7601-7700batch: iter_time=7.749e-05, forward_time=0.143, loss_ctc=130.272, loss_interctc_layer6=119.469, loss_interctc_layer12=105.924, loss_interctc_layer15=101.026, loss_interctc_layer21=130.988, loss=117.536, backward_time=0.301, grad_norm=88.719, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.884e-04, train_time=1.314 [gpua049:0/64] 2024-01-16 02:19:28,949 (trainer:753) INFO: 5epoch:train:7701-7800batch: iter_time=7.889e-05, forward_time=0.142, loss_ctc=118.818, loss_interctc_layer6=114.935, loss_interctc_layer12=102.022, loss_interctc_layer15=96.273, loss_interctc_layer21=119.236, loss=110.257, backward_time=0.297, grad_norm=77.820, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.081, optim0_lr0=1.882e-04, train_time=1.324 [gpua049:0/64] 2024-01-16 02:21:53,297 (trainer:753) INFO: 5epoch:train:7801-7900batch: iter_time=7.677e-05, forward_time=0.143, loss_ctc=144.477, loss_interctc_layer6=127.957, loss_interctc_layer12=115.867, loss_interctc_layer15=109.800, loss_interctc_layer21=145.351, loss=128.691, backward_time=0.345, grad_norm=88.907, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.082, optim0_lr0=1.881e-04, train_time=1.443 [gpua049:0/64] 2024-01-16 02:24:02,672 (trainer:753) INFO: 5epoch:train:7901-8000batch: iter_time=7.530e-05, forward_time=0.145, loss_ctc=121.623, loss_interctc_layer6=119.546, loss_interctc_layer12=105.132, loss_interctc_layer15=99.802, loss_interctc_layer21=120.744, loss=113.370, backward_time=0.306, grad_norm=88.494, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.081, optim0_lr0=1.879e-04, train_time=1.294 [gpua049:0/64] 2024-01-16 02:26:23,584 (trainer:753) INFO: 5epoch:train:8001-8100batch: iter_time=7.670e-05, forward_time=0.142, loss_ctc=108.727, loss_interctc_layer6=103.861, loss_interctc_layer12=91.795, loss_interctc_layer15=86.912, loss_interctc_layer21=108.896, loss=100.038, backward_time=0.332, grad_norm=60.760, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.081, optim0_lr0=1.878e-04, train_time=1.409 [gpua049:0/64] 2024-01-16 02:28:31,597 (trainer:753) INFO: 5epoch:train:8101-8200batch: iter_time=7.531e-05, forward_time=0.142, loss_ctc=124.375, loss_interctc_layer6=124.077, loss_interctc_layer12=111.543, loss_interctc_layer15=106.149, loss_interctc_layer21=124.462, loss=118.121, backward_time=0.301, grad_norm=89.607, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.081, optim0_lr0=1.877e-04, train_time=1.280 [gpua049:0/64] 2024-01-16 02:30:54,030 (trainer:753) INFO: 5epoch:train:8201-8300batch: iter_time=7.589e-05, forward_time=0.143, loss_ctc=120.000, loss_interctc_layer6=119.671, loss_interctc_layer12=107.826, loss_interctc_layer15=100.861, loss_interctc_layer21=119.909, loss=113.654, backward_time=0.305, grad_norm=85.095, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.875e-04, train_time=1.424 [gpua049:0/64] 2024-01-16 02:33:25,004 (trainer:753) INFO: 5epoch:train:8301-8400batch: iter_time=7.616e-05, forward_time=0.141, loss_ctc=97.789, loss_interctc_layer6=102.692, loss_interctc_layer12=91.183, loss_interctc_layer15=86.815, loss_interctc_layer21=97.830, loss=95.262, backward_time=0.315, grad_norm=63.060, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.874e-04, train_time=1.510 [gpua049:0/64] 2024-01-16 02:35:48,474 (trainer:753) INFO: 5epoch:train:8401-8500batch: iter_time=7.717e-05, forward_time=0.142, loss_ctc=123.065, loss_interctc_layer6=121.857, loss_interctc_layer12=108.789, loss_interctc_layer15=103.721, loss_interctc_layer21=122.247, loss=115.936, backward_time=0.336, grad_norm=86.569, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.872e-04, train_time=1.434 [gpua049:0/64] 2024-01-16 02:38:10,570 (trainer:753) INFO: 5epoch:train:8501-8600batch: iter_time=7.676e-05, forward_time=0.142, loss_ctc=124.442, loss_interctc_layer6=121.146, loss_interctc_layer12=107.571, loss_interctc_layer15=102.570, loss_interctc_layer21=124.851, loss=116.116, backward_time=0.311, grad_norm=81.891, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.081, optim0_lr0=1.871e-04, train_time=1.421 [gpua049:0/64] 2024-01-16 02:40:32,230 (trainer:753) INFO: 5epoch:train:8601-8700batch: iter_time=7.480e-05, forward_time=0.142, loss_ctc=93.733, loss_interctc_layer6=101.775, loss_interctc_layer12=89.313, loss_interctc_layer15=84.250, loss_interctc_layer21=93.312, loss=92.476, backward_time=0.300, grad_norm=61.532, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.870e-04, train_time=1.416 [gpua049:0/64] 2024-01-16 02:41:54,223 (multiple_iter_factory:32) INFO: Building 7th iter-factory... [gpua049:0/64] 2024-01-16 02:42:14,146 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 02:42:17,727 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 02:42:17,727 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, [gpua049:0/64] 2024-01-16 02:42:17,730 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 02:47:30,922 (trainer:753) INFO: 5epoch:train:8701-8800batch: iter_time=2.885, forward_time=0.171, loss_ctc=101.658, loss_interctc_layer6=106.566, loss_interctc_layer12=93.965, loss_interctc_layer15=89.004, loss_interctc_layer21=101.242, loss=98.487, backward_time=0.299, grad_norm=63.878, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.084, optim0_lr0=1.868e-04, train_time=4.183 [gpua049:0/64] 2024-01-16 02:49:34,588 (trainer:753) INFO: 5epoch:train:8801-8900batch: iter_time=7.967e-05, forward_time=0.141, loss_ctc=125.074, loss_interctc_layer6=108.604, loss_interctc_layer12=96.069, loss_interctc_layer15=91.583, loss_interctc_layer21=125.899, loss=109.446, backward_time=0.295, grad_norm=69.372, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.867e-04, train_time=1.240 [gpua049:0/64] 2024-01-16 02:51:41,691 (trainer:753) INFO: 5epoch:train:8901-9000batch: iter_time=8.024e-05, forward_time=0.142, loss_ctc=127.618, loss_interctc_layer6=114.033, loss_interctc_layer12=101.036, loss_interctc_layer15=96.453, loss_interctc_layer21=127.487, loss=113.325, backward_time=0.302, grad_norm=81.461, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.866e-04, train_time=1.271 [gpua049:0/64] 2024-01-16 02:54:01,405 (trainer:753) INFO: 5epoch:train:9001-9100batch: iter_time=7.910e-05, forward_time=0.145, loss_ctc=144.283, loss_interctc_layer6=128.392, loss_interctc_layer12=114.595, loss_interctc_layer15=108.944, loss_interctc_layer21=146.114, loss=128.466, backward_time=0.337, grad_norm=87.354, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.864e-04, train_time=1.397 [gpua049:0/64] 2024-01-16 02:56:50,761 (trainer:753) INFO: 5epoch:train:9101-9200batch: iter_time=8.018e-05, forward_time=0.142, loss_ctc=135.212, loss_interctc_layer6=119.375, loss_interctc_layer12=105.376, loss_interctc_layer15=99.817, loss_interctc_layer21=134.900, loss=118.936, backward_time=0.429, grad_norm=63.058, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.863e-04, train_time=1.693 [gpua049:0/64] 2024-01-16 02:59:18,175 (trainer:753) INFO: 5epoch:train:9201-9300batch: iter_time=8.090e-05, forward_time=0.142, loss_ctc=111.812, loss_interctc_layer6=106.617, loss_interctc_layer12=92.914, loss_interctc_layer15=87.850, loss_interctc_layer21=111.420, loss=102.122, backward_time=0.309, grad_norm=67.355, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.862e-04, train_time=1.474 [gpua049:0/64] 2024-01-16 03:01:44,788 (trainer:753) INFO: 5epoch:train:9301-9400batch: iter_time=7.969e-05, forward_time=0.143, loss_ctc=116.828, loss_interctc_layer6=115.250, loss_interctc_layer12=102.381, loss_interctc_layer15=97.304, loss_interctc_layer21=116.972, loss=109.747, backward_time=0.331, grad_norm=98.465, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.860e-04, train_time=1.466 [gpua049:0/64] 2024-01-16 03:04:03,001 (trainer:753) INFO: 5epoch:train:9401-9500batch: iter_time=7.834e-05, forward_time=0.142, loss_ctc=139.838, loss_interctc_layer6=124.375, loss_interctc_layer12=109.648, loss_interctc_layer15=103.529, loss_interctc_layer21=139.398, loss=123.358, backward_time=0.307, grad_norm=105.020, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.859e-04, train_time=1.382 [gpua049:0/64] 2024-01-16 03:06:08,658 (trainer:753) INFO: 5epoch:train:9501-9600batch: iter_time=7.591e-05, forward_time=0.142, loss_ctc=105.994, loss_interctc_layer6=107.270, loss_interctc_layer12=94.077, loss_interctc_layer15=88.788, loss_interctc_layer21=105.962, loss=100.418, backward_time=0.309, grad_norm=83.232, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.858e-04, train_time=1.256 [gpua049:0/64] 2024-01-16 03:08:44,412 (trainer:753) INFO: 5epoch:train:9601-9700batch: iter_time=8.024e-05, forward_time=0.142, loss_ctc=118.970, loss_interctc_layer6=112.709, loss_interctc_layer12=100.906, loss_interctc_layer15=96.389, loss_interctc_layer21=118.788, loss=109.552, backward_time=0.380, grad_norm=87.328, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.856e-04, train_time=1.557 [gpua049:0/64] 2024-01-16 03:11:10,987 (trainer:753) INFO: 5epoch:train:9701-9800batch: iter_time=7.648e-05, forward_time=0.143, loss_ctc=117.615, loss_interctc_layer6=115.743, loss_interctc_layer12=101.991, loss_interctc_layer15=97.256, loss_interctc_layer21=117.484, loss=110.018, backward_time=0.375, grad_norm=70.203, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.855e-04, train_time=1.466 [gpua049:0/64] 2024-01-16 03:13:36,783 (trainer:753) INFO: 5epoch:train:9801-9900batch: iter_time=7.171e-05, forward_time=0.144, loss_ctc=116.992, loss_interctc_layer6=118.018, loss_interctc_layer12=104.117, loss_interctc_layer15=98.912, loss_interctc_layer21=116.721, loss=110.952, backward_time=0.317, grad_norm=81.229, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.854e-04, train_time=1.458 [gpua049:0/64] 2024-01-16 03:16:19,172 (trainer:753) INFO: 5epoch:train:9901-10000batch: iter_time=7.368e-05, forward_time=0.143, loss_ctc=95.411, loss_interctc_layer6=95.545, loss_interctc_layer12=82.967, loss_interctc_layer15=78.050, loss_interctc_layer21=95.637, loss=89.522, backward_time=0.358, grad_norm=80.681, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.082, optim0_lr0=1.852e-04, train_time=1.624 [gpua049:0/64] 2024-01-16 03:16:20,876 (multiple_iter_factory:32) INFO: Building 8th iter-factory... [gpua049:0/64] 2024-01-16 03:16:40,782 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 03:16:44,422 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 03:16:44,422 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, [gpua049:0/64] 2024-01-16 03:16:44,425 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 03:23:57,909 (trainer:753) INFO: 5epoch:train:10001-10100batch: iter_time=1.464, forward_time=0.179, loss_ctc=107.979, loss_interctc_layer6=106.173, loss_interctc_layer12=94.205, loss_interctc_layer15=89.650, loss_interctc_layer21=108.038, loss=101.209, backward_time=0.305, grad_norm=79.767, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.084, optim0_lr0=1.851e-04, train_time=4.587 [gpua049:0/64] 2024-01-16 03:26:04,379 (trainer:753) INFO: 5epoch:train:10101-10200batch: iter_time=7.895e-05, forward_time=0.143, loss_ctc=133.126, loss_interctc_layer6=118.928, loss_interctc_layer12=104.300, loss_interctc_layer15=98.924, loss_interctc_layer21=133.158, loss=117.687, backward_time=0.300, grad_norm=77.242, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.850e-04, train_time=1.265 [gpua049:0/64] 2024-01-16 03:28:06,332 (trainer:753) INFO: 5epoch:train:10201-10300batch: iter_time=7.562e-05, forward_time=0.145, loss_ctc=121.746, loss_interctc_layer6=113.290, loss_interctc_layer12=99.601, loss_interctc_layer15=94.142, loss_interctc_layer21=122.146, loss=110.185, backward_time=0.297, grad_norm=65.151, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.848e-04, train_time=1.219 [gpua049:0/64] 2024-01-16 03:30:31,466 (trainer:753) INFO: 5epoch:train:10301-10400batch: iter_time=7.886e-05, forward_time=0.144, loss_ctc=143.304, loss_interctc_layer6=125.050, loss_interctc_layer12=110.915, loss_interctc_layer15=105.601, loss_interctc_layer21=143.834, loss=125.741, backward_time=0.311, grad_norm=84.126, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.847e-04, train_time=1.451 [gpua049:0/64] 2024-01-16 03:33:04,907 (trainer:753) INFO: 5epoch:train:10401-10500batch: iter_time=7.916e-05, forward_time=0.143, loss_ctc=123.478, loss_interctc_layer6=117.182, loss_interctc_layer12=102.577, loss_interctc_layer15=96.966, loss_interctc_layer21=123.486, loss=112.738, backward_time=0.414, grad_norm=83.827, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.081, optim0_lr0=1.846e-04, train_time=1.534 [gpua049:0/64] 2024-01-16 03:35:28,501 (trainer:753) INFO: 5epoch:train:10501-10600batch: iter_time=7.997e-05, forward_time=0.143, loss_ctc=111.892, loss_interctc_layer6=101.379, loss_interctc_layer12=88.926, loss_interctc_layer15=84.219, loss_interctc_layer21=112.308, loss=99.745, backward_time=0.329, grad_norm=69.441, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.081, optim0_lr0=1.844e-04, train_time=1.436 [gpua049:0/64] 2024-01-16 03:37:49,758 (trainer:753) INFO: 5epoch:train:10601-10700batch: iter_time=8.398e-05, forward_time=0.142, loss_ctc=126.538, loss_interctc_layer6=122.554, loss_interctc_layer12=108.103, loss_interctc_layer15=103.061, loss_interctc_layer21=126.227, loss=117.297, backward_time=0.313, grad_norm=104.156, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.081, optim0_lr0=1.843e-04, train_time=1.412 [gpua049:0/64] 2024-01-16 03:40:12,779 (trainer:753) INFO: 5epoch:train:10701-10800batch: iter_time=7.717e-05, forward_time=0.144, loss_ctc=125.348, loss_interctc_layer6=119.064, loss_interctc_layer12=103.340, loss_interctc_layer15=97.565, loss_interctc_layer21=125.107, loss=114.085, backward_time=0.314, grad_norm=85.798, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.842e-04, train_time=1.430 [gpua049:0/64] 2024-01-16 03:43:11,945 (trainer:753) INFO: 5epoch:train:10801-10900batch: iter_time=7.824e-05, forward_time=0.142, loss_ctc=99.146, loss_interctc_layer6=101.875, loss_interctc_layer12=90.666, loss_interctc_layer15=85.696, loss_interctc_layer21=99.431, loss=95.363, backward_time=0.350, grad_norm=68.935, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.840e-04, train_time=1.791 [gpua049:0/64] 2024-01-16 03:45:32,951 (trainer:753) INFO: 5epoch:train:10901-11000batch: iter_time=7.714e-05, forward_time=0.146, loss_ctc=124.897, loss_interctc_layer6=118.970, loss_interctc_layer12=105.543, loss_interctc_layer15=101.288, loss_interctc_layer21=124.706, loss=115.081, backward_time=0.312, grad_norm=84.253, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.839e-04, train_time=1.410 [gpua049:0/64] 2024-01-16 03:47:59,653 (trainer:753) INFO: 5epoch:train:11001-11100batch: iter_time=7.541e-05, forward_time=0.143, loss_ctc=126.835, loss_interctc_layer6=119.100, loss_interctc_layer12=105.102, loss_interctc_layer15=99.850, loss_interctc_layer21=127.135, loss=115.604, backward_time=0.328, grad_norm=87.187, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.838e-04, train_time=1.467 [gpua049:0/64] 2024-01-16 03:50:05,750 (trainer:753) INFO: 5epoch:train:11101-11200batch: iter_time=7.772e-05, forward_time=0.143, loss_ctc=94.046, loss_interctc_layer6=101.463, loss_interctc_layer12=88.191, loss_interctc_layer15=83.192, loss_interctc_layer21=94.166, loss=92.212, backward_time=0.300, grad_norm=78.627, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.837e-04, train_time=1.261 [gpua049:0/64] 2024-01-16 03:51:31,307 (multiple_iter_factory:32) INFO: Building 9th iter-factory... [gpua049:0/64] 2024-01-16 03:51:51,459 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 03:51:55,049 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 03:51:55,050 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, [gpua049:0/64] 2024-01-16 03:51:55,053 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 03:56:49,843 (trainer:753) INFO: 5epoch:train:11201-11300batch: iter_time=2.687, forward_time=0.142, loss_ctc=101.173, loss_interctc_layer6=104.968, loss_interctc_layer12=92.190, loss_interctc_layer15=87.219, loss_interctc_layer21=100.920, loss=97.294, backward_time=0.305, grad_norm=75.908, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.835e-04, train_time=4.041 [gpua049:0/64] 2024-01-16 03:58:51,844 (trainer:753) INFO: 5epoch:train:11301-11400batch: iter_time=7.851e-05, forward_time=0.142, loss_ctc=115.909, loss_interctc_layer6=107.020, loss_interctc_layer12=93.688, loss_interctc_layer15=89.158, loss_interctc_layer21=116.303, loss=104.416, backward_time=0.297, grad_norm=85.430, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.834e-04, train_time=1.220 [gpua049:0/64] 2024-01-16 04:00:56,452 (trainer:753) INFO: 5epoch:train:11401-11500batch: iter_time=7.782e-05, forward_time=0.142, loss_ctc=117.772, loss_interctc_layer6=112.304, loss_interctc_layer12=100.234, loss_interctc_layer15=94.069, loss_interctc_layer21=118.058, loss=108.487, backward_time=0.297, grad_norm=72.340, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.833e-04, train_time=1.246 [gpua049:0/64] 2024-01-16 04:03:23,897 (trainer:753) INFO: 5epoch:train:11501-11600batch: iter_time=7.528e-05, forward_time=0.143, loss_ctc=132.285, loss_interctc_layer6=124.555, loss_interctc_layer12=109.524, loss_interctc_layer15=103.019, loss_interctc_layer21=133.057, loss=120.488, backward_time=0.324, grad_norm=82.793, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.831e-04, train_time=1.474 [gpua049:0/64] 2024-01-16 04:05:57,536 (trainer:753) INFO: 5epoch:train:11601-11700batch: iter_time=7.930e-05, forward_time=0.143, loss_ctc=127.327, loss_interctc_layer6=117.543, loss_interctc_layer12=103.503, loss_interctc_layer15=97.365, loss_interctc_layer21=127.237, loss=114.595, backward_time=0.318, grad_norm=56.392, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.830e-04, train_time=1.536 [gpua049:0/64] 2024-01-16 04:08:30,646 (trainer:753) INFO: 5epoch:train:11701-11800batch: iter_time=7.988e-05, forward_time=0.143, loss_ctc=104.677, loss_interctc_layer6=104.980, loss_interctc_layer12=91.452, loss_interctc_layer15=86.430, loss_interctc_layer21=105.248, loss=98.557, backward_time=0.327, grad_norm=61.789, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.829e-04, train_time=1.531 [gpua049:0/64] 2024-01-16 04:10:59,509 (trainer:753) INFO: 5epoch:train:11801-11900batch: iter_time=7.829e-05, forward_time=0.142, loss_ctc=108.934, loss_interctc_layer6=113.699, loss_interctc_layer12=100.420, loss_interctc_layer15=94.958, loss_interctc_layer21=109.294, loss=105.461, backward_time=0.333, grad_norm=69.679, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.082, optim0_lr0=1.828e-04, train_time=1.488 [gpua049:0/64] 2024-01-16 04:13:23,764 (trainer:753) INFO: 5epoch:train:11901-12000batch: iter_time=7.778e-05, forward_time=0.185, loss_ctc=130.600, loss_interctc_layer6=121.003, loss_interctc_layer12=106.594, loss_interctc_layer15=100.610, loss_interctc_layer21=131.219, loss=118.005, backward_time=0.309, grad_norm=81.431, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.088, optim0_lr0=1.826e-04, train_time=1.442 [gpua049:0/64] 2024-01-16 04:15:43,448 (trainer:753) INFO: 5epoch:train:12001-12100batch: iter_time=0.002, forward_time=0.197, loss_ctc=98.757, loss_interctc_layer6=105.489, loss_interctc_layer12=92.016, loss_interctc_layer15=86.685, loss_interctc_layer21=98.984, loss=96.386, backward_time=0.316, grad_norm=65.061, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.086, optim0_lr0=1.825e-04, train_time=1.396 [gpua049:0/64] 2024-01-16 04:18:17,807 (trainer:753) INFO: 5epoch:train:12101-12200batch: iter_time=7.816e-05, forward_time=0.142, loss_ctc=111.446, loss_interctc_layer6=108.852, loss_interctc_layer12=97.052, loss_interctc_layer15=93.240, loss_interctc_layer21=112.095, loss=104.537, backward_time=0.355, grad_norm=76.749, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.824e-04, train_time=1.544 [gpua049:0/64] 2024-01-16 04:20:51,218 (trainer:753) INFO: 5epoch:train:12201-12300batch: iter_time=7.959e-05, forward_time=0.143, loss_ctc=109.921, loss_interctc_layer6=112.204, loss_interctc_layer12=98.619, loss_interctc_layer15=93.988, loss_interctc_layer21=110.329, loss=105.012, backward_time=0.316, grad_norm=69.530, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.823e-04, train_time=1.534 [gpua049:0/64] 2024-01-16 04:23:27,135 (trainer:753) INFO: 5epoch:train:12301-12400batch: iter_time=7.766e-05, forward_time=0.143, loss_ctc=110.735, loss_interctc_layer6=115.898, loss_interctc_layer12=101.915, loss_interctc_layer15=96.716, loss_interctc_layer21=110.897, loss=107.232, backward_time=0.365, grad_norm=69.054, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.821e-04, train_time=1.559 [gpua049:0/64] 2024-01-16 04:25:56,332 (trainer:753) INFO: 5epoch:train:12401-12500batch: iter_time=7.738e-05, forward_time=0.142, loss_ctc=90.166, loss_interctc_layer6=93.747, loss_interctc_layer12=81.591, loss_interctc_layer15=76.593, loss_interctc_layer21=90.515, loss=86.522, backward_time=0.308, grad_norm=64.787, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.820e-04, train_time=1.492 [gpua049:0/64] 2024-01-16 04:25:59,299 (multiple_iter_factory:32) INFO: Building 10th iter-factory... [gpua049:0/64] 2024-01-16 04:26:19,093 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 04:26:22,709 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 04:26:22,709 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, [gpua049:0/64] 2024-01-16 04:26:22,713 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 04:32:19,105 (trainer:753) INFO: 5epoch:train:12501-12600batch: iter_time=1.417, forward_time=0.142, loss_ctc=106.262, loss_interctc_layer6=103.974, loss_interctc_layer12=92.177, loss_interctc_layer15=87.420, loss_interctc_layer21=106.614, loss=99.289, backward_time=0.304, grad_norm=68.032, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.819e-04, train_time=3.828 [gpua049:0/64] 2024-01-16 04:34:24,449 (trainer:753) INFO: 5epoch:train:12601-12700batch: iter_time=7.862e-05, forward_time=0.142, loss_ctc=132.048, loss_interctc_layer6=116.081, loss_interctc_layer12=101.642, loss_interctc_layer15=96.646, loss_interctc_layer21=132.793, loss=115.842, backward_time=0.295, grad_norm=81.297, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.818e-04, train_time=1.253 [gpua049:0/64] 2024-01-16 04:36:41,223 (trainer:753) INFO: 5epoch:train:12701-12800batch: iter_time=7.589e-05, forward_time=0.143, loss_ctc=120.817, loss_interctc_layer6=110.751, loss_interctc_layer12=96.902, loss_interctc_layer15=91.631, loss_interctc_layer21=121.499, loss=108.320, backward_time=0.332, grad_norm=66.537, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.816e-04, train_time=1.368 [gpua049:0/64] 2024-01-16 04:39:27,005 (trainer:753) INFO: 5epoch:train:12801-12900batch: iter_time=7.921e-05, forward_time=0.143, loss_ctc=143.729, loss_interctc_layer6=123.648, loss_interctc_layer12=110.165, loss_interctc_layer15=104.632, loss_interctc_layer21=143.268, loss=125.089, backward_time=0.341, grad_norm=93.789, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.815e-04, train_time=1.658 [gpua049:0/64] 2024-01-16 04:41:42,916 (trainer:753) INFO: 5epoch:train:12901-13000batch: iter_time=8.018e-05, forward_time=0.143, loss_ctc=120.722, loss_interctc_layer6=115.893, loss_interctc_layer12=100.937, loss_interctc_layer15=95.268, loss_interctc_layer21=121.069, loss=110.778, backward_time=0.318, grad_norm=78.347, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.814e-04, train_time=1.359 [gpua049:0/64] 2024-01-16 04:43:54,659 (trainer:753) INFO: 5epoch:train:13001-13100batch: iter_time=7.676e-05, forward_time=0.143, loss_ctc=110.561, loss_interctc_layer6=99.671, loss_interctc_layer12=87.286, loss_interctc_layer15=82.324, loss_interctc_layer21=110.982, loss=98.165, backward_time=0.304, grad_norm=67.410, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.813e-04, train_time=1.317 [gpua049:0/64] 2024-01-16 04:46:20,754 (trainer:753) INFO: 5epoch:train:13101-13200batch: iter_time=7.794e-05, forward_time=0.142, loss_ctc=122.506, loss_interctc_layer6=118.818, loss_interctc_layer12=104.763, loss_interctc_layer15=99.122, loss_interctc_layer21=123.254, loss=113.692, backward_time=0.310, grad_norm=86.595, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.811e-04, train_time=1.461 [gpua049:0/64] 2024-01-16 04:48:56,946 (trainer:753) INFO: 5epoch:train:13201-13300batch: iter_time=7.644e-05, forward_time=0.144, loss_ctc=122.200, loss_interctc_layer6=115.367, loss_interctc_layer12=100.883, loss_interctc_layer15=94.899, loss_interctc_layer21=122.596, loss=111.189, backward_time=0.331, grad_norm=73.094, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.810e-04, train_time=1.562 [gpua049:0/64] 2024-01-16 04:51:27,841 (trainer:753) INFO: 5epoch:train:13301-13400batch: iter_time=7.611e-05, forward_time=0.142, loss_ctc=96.210, loss_interctc_layer6=100.023, loss_interctc_layer12=87.877, loss_interctc_layer15=83.446, loss_interctc_layer21=96.549, loss=92.821, backward_time=0.321, grad_norm=65.247, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.809e-04, train_time=1.509 [gpua049:0/64] 2024-01-16 04:53:53,729 (trainer:753) INFO: 5epoch:train:13401-13500batch: iter_time=8.140e-05, forward_time=0.143, loss_ctc=120.665, loss_interctc_layer6=115.894, loss_interctc_layer12=102.830, loss_interctc_layer15=97.318, loss_interctc_layer21=120.835, loss=111.508, backward_time=0.332, grad_norm=68.596, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.808e-04, train_time=1.459 [gpua049:0/64] 2024-01-16 04:56:17,405 (trainer:753) INFO: 5epoch:train:13501-13600batch: iter_time=7.709e-05, forward_time=0.147, loss_ctc=124.427, loss_interctc_layer6=117.417, loss_interctc_layer12=103.528, loss_interctc_layer15=97.492, loss_interctc_layer21=124.144, loss=113.402, backward_time=0.382, grad_norm=63.996, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.806e-04, train_time=1.437 [gpua049:0/64] 2024-01-16 04:58:38,058 (trainer:753) INFO: 5epoch:train:13601-13700batch: iter_time=8.001e-05, forward_time=0.142, loss_ctc=90.652, loss_interctc_layer6=98.300, loss_interctc_layer12=85.549, loss_interctc_layer15=80.260, loss_interctc_layer21=91.024, loss=89.157, backward_time=0.318, grad_norm=65.537, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.805e-04, train_time=1.406 [gpua049:0/64] 2024-01-16 05:00:07,372 (multiple_iter_factory:32) INFO: Building 11th iter-factory... [gpua049:0/64] 2024-01-16 05:00:27,835 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 05:00:31,539 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 05:00:31,540 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, [gpua049:0/64] 2024-01-16 05:00:31,756 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 05:06:00,129 (trainer:753) INFO: 5epoch:train:13701-13800batch: iter_time=2.974, forward_time=0.205, loss_ctc=97.529, loss_interctc_layer6=103.183, loss_interctc_layer12=90.256, loss_interctc_layer15=85.434, loss_interctc_layer21=98.019, loss=94.884, backward_time=0.328, grad_norm=66.075, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.086, optim0_lr0=1.804e-04, train_time=4.420 [gpua049:0/64] 2024-01-16 05:08:05,218 (trainer:753) INFO: 5epoch:train:13801-13900batch: iter_time=8.998e-05, forward_time=0.142, loss_ctc=118.866, loss_interctc_layer6=105.070, loss_interctc_layer12=92.401, loss_interctc_layer15=87.725, loss_interctc_layer21=119.362, loss=104.685, backward_time=0.301, grad_norm=80.137, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.803e-04, train_time=1.251 [gpua049:0/64] 2024-01-16 05:10:19,726 (trainer:753) INFO: 5epoch:train:13901-14000batch: iter_time=7.749e-05, forward_time=0.143, loss_ctc=121.879, loss_interctc_layer6=110.980, loss_interctc_layer12=97.303, loss_interctc_layer15=92.024, loss_interctc_layer21=122.447, loss=108.927, backward_time=0.305, grad_norm=85.304, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.082, optim0_lr0=1.801e-04, train_time=1.345 [gpua049:0/64] 2024-01-16 05:12:40,522 (trainer:753) INFO: 5epoch:train:14001-14100batch: iter_time=7.696e-05, forward_time=0.153, loss_ctc=134.635, loss_interctc_layer6=122.180, loss_interctc_layer12=107.848, loss_interctc_layer15=100.917, loss_interctc_layer21=135.075, loss=120.131, backward_time=0.348, grad_norm=85.887, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.800e-04, train_time=1.408 [gpua049:0/64] 2024-01-16 05:14:54,178 (trainer:753) INFO: 5epoch:train:14101-14200batch: iter_time=7.612e-05, forward_time=0.176, loss_ctc=128.700, loss_interctc_layer6=116.111, loss_interctc_layer12=101.804, loss_interctc_layer15=95.716, loss_interctc_layer21=128.704, loss=114.207, backward_time=0.316, grad_norm=69.827, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.089, optim0_lr0=1.799e-04, train_time=1.336 [gpua049:0/64] 2024-01-16 05:17:26,779 (trainer:753) INFO: 5epoch:train:14201-14300batch: iter_time=7.777e-05, forward_time=0.142, loss_ctc=107.306, loss_interctc_layer6=103.302, loss_interctc_layer12=90.055, loss_interctc_layer15=84.488, loss_interctc_layer21=107.966, loss=98.623, backward_time=0.358, grad_norm=63.381, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.798e-04, train_time=1.524 [gpua049:0/64] 2024-01-16 05:20:12,546 (trainer:753) INFO: 5epoch:train:14301-14400batch: iter_time=8.015e-05, forward_time=0.142, loss_ctc=111.401, loss_interctc_layer6=111.734, loss_interctc_layer12=98.740, loss_interctc_layer15=93.590, loss_interctc_layer21=112.348, loss=105.563, backward_time=0.424, grad_norm=66.263, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.797e-04, train_time=1.659 [gpua049:0/64] 2024-01-16 05:22:44,185 (trainer:753) INFO: 5epoch:train:14401-14500batch: iter_time=7.981e-05, forward_time=0.142, loss_ctc=134.016, loss_interctc_layer6=120.214, loss_interctc_layer12=105.774, loss_interctc_layer15=99.054, loss_interctc_layer21=134.411, loss=118.694, backward_time=0.320, grad_norm=84.912, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.795e-04, train_time=1.516 [gpua049:0/64] 2024-01-16 05:25:40,571 (trainer:753) INFO: 5epoch:train:14501-14600batch: iter_time=7.782e-05, forward_time=0.143, loss_ctc=100.712, loss_interctc_layer6=103.326, loss_interctc_layer12=90.048, loss_interctc_layer15=84.605, loss_interctc_layer21=101.272, loss=95.993, backward_time=0.383, grad_norm=72.572, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.794e-04, train_time=1.764 [gpua049:0/64] 2024-01-16 05:28:26,723 (trainer:753) INFO: 5epoch:train:14601-14700batch: iter_time=7.794e-05, forward_time=0.142, loss_ctc=111.672, loss_interctc_layer6=108.226, loss_interctc_layer12=95.958, loss_interctc_layer15=91.631, loss_interctc_layer21=113.105, loss=104.118, backward_time=0.329, grad_norm=69.160, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.793e-04, train_time=1.661 [gpua049:0/64] 2024-01-16 05:30:35,830 (trainer:753) INFO: 5epoch:train:14701-14800batch: iter_time=7.840e-05, forward_time=0.142, loss_ctc=110.684, loss_interctc_layer6=110.824, loss_interctc_layer12=96.851, loss_interctc_layer15=92.095, loss_interctc_layer21=111.187, loss=104.328, backward_time=0.303, grad_norm=72.229, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.792e-04, train_time=1.291 [gpua049:0/64] 2024-01-16 05:32:48,611 (trainer:753) INFO: 5epoch:train:14801-14900batch: iter_time=7.875e-05, forward_time=0.143, loss_ctc=112.625, loss_interctc_layer6=114.694, loss_interctc_layer12=100.773, loss_interctc_layer15=95.116, loss_interctc_layer21=112.235, loss=107.089, backward_time=0.322, grad_norm=77.196, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.791e-04, train_time=1.328 [gpua049:0/64] 2024-01-16 05:34:59,946 (trainer:753) INFO: 5epoch:train:14901-15000batch: iter_time=7.810e-05, forward_time=0.141, loss_ctc=90.615, loss_interctc_layer6=92.919, loss_interctc_layer12=80.161, loss_interctc_layer15=75.399, loss_interctc_layer21=91.223, loss=86.063, backward_time=0.305, grad_norm=67.763, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.789e-04, train_time=1.313 [gpua049:0/64] 2024-01-16 06:05:37,817 (trainer:352) INFO: 5epoch results: [train] iter_time=0.170, forward_time=0.147, loss_ctc=121.442, loss_interctc_layer6=115.831, loss_interctc_layer12=103.012, loss_interctc_layer15=98.199, loss_interctc_layer21=121.258, loss=111.949, backward_time=0.321, grad_norm=80.007, clip=100.000, loss_scale=1.792e+15, optim_step_time=0.082, optim0_lr0=1.889e-04, train_time=1.642, time=6 hours, 50 minutes and 49.94 seconds, total_count=75000, gpu_max_cached_mem_GB=34.508, [valid] loss_ctc=91.142, cer_ctc=0.386, loss_interctc_layer6=82.783, cer_interctc_layer6=0.361, loss_interctc_layer12=72.572, cer_interctc_layer12=0.298, loss_interctc_layer15=70.431, cer_interctc_layer15=0.277, loss_interctc_layer21=91.407, cer_interctc_layer21=0.392, loss=81.667, time=30 minutes and 14.09 seconds, total_count=23355, gpu_max_cached_mem_GB=34.508 [gpua049:0/64] 2024-01-16 06:05:56,601 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count [gpua049:0/64] 2024-01-16 06:05:56,639 (trainer:286) INFO: 6/45epoch started. Estimated time to finish: 1 week, 6 days and 4 hours [gpua049:0/64] 2024-01-16 06:05:56,655 (multiple_iter_factory:32) INFO: Building 0th iter-factory... [gpua049:0/64] 2024-01-16 06:06:17,091 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 06:06:20,828 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 06:06:20,828 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, [gpua049:0/64] 2024-01-16 06:06:20,831 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 06:12:45,437 (trainer:753) INFO: 6epoch:train:1-100batch: iter_time=2.149, forward_time=0.167, loss_ctc=136.940, loss_interctc_layer6=131.511, loss_interctc_layer12=115.396, loss_interctc_layer15=110.457, loss_interctc_layer21=136.189, loss=126.099, backward_time=0.304, grad_norm=81.269, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.083, optim0_lr0=1.788e-04, train_time=4.087 [gpua049:0/64] 2024-01-16 06:14:48,381 (trainer:753) INFO: 6epoch:train:101-200batch: iter_time=7.709e-05, forward_time=0.143, loss_ctc=109.752, loss_interctc_layer6=112.094, loss_interctc_layer12=99.009, loss_interctc_layer15=93.448, loss_interctc_layer21=110.362, loss=104.933, backward_time=0.297, grad_norm=67.220, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.787e-04, train_time=1.229 [gpua049:0/64] 2024-01-16 06:17:02,009 (trainer:753) INFO: 6epoch:train:201-300batch: iter_time=7.664e-05, forward_time=0.142, loss_ctc=117.746, loss_interctc_layer6=119.377, loss_interctc_layer12=106.393, loss_interctc_layer15=102.315, loss_interctc_layer21=118.323, loss=112.831, backward_time=0.311, grad_norm=80.279, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.786e-04, train_time=1.336 [gpua049:0/64] 2024-01-16 06:19:20,666 (trainer:753) INFO: 6epoch:train:301-400batch: iter_time=7.531e-05, forward_time=0.142, loss_ctc=119.857, loss_interctc_layer6=118.052, loss_interctc_layer12=103.050, loss_interctc_layer15=97.244, loss_interctc_layer21=119.773, loss=111.595, backward_time=0.323, grad_norm=69.459, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.785e-04, train_time=1.386 [gpua049:0/64] 2024-01-16 06:21:56,309 (trainer:753) INFO: 6epoch:train:401-500batch: iter_time=7.736e-05, forward_time=0.141, loss_ctc=112.468, loss_interctc_layer6=108.975, loss_interctc_layer12=96.890, loss_interctc_layer15=92.242, loss_interctc_layer21=112.930, loss=104.701, backward_time=0.323, grad_norm=114.515, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.783e-04, train_time=1.556 [gpua049:0/64] 2024-01-16 06:24:18,301 (trainer:753) INFO: 6epoch:train:501-600batch: iter_time=7.689e-05, forward_time=0.164, loss_ctc=126.632, loss_interctc_layer6=128.089, loss_interctc_layer12=114.640, loss_interctc_layer15=109.475, loss_interctc_layer21=127.705, loss=121.308, backward_time=0.313, grad_norm=86.957, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.782e-04, train_time=1.420 [gpua049:0/64] 2024-01-16 06:26:45,029 (trainer:753) INFO: 6epoch:train:601-700batch: iter_time=7.736e-05, forward_time=0.141, loss_ctc=99.046, loss_interctc_layer6=92.973, loss_interctc_layer12=81.721, loss_interctc_layer15=78.405, loss_interctc_layer21=98.410, loss=90.111, backward_time=0.335, grad_norm=65.324, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.081, optim0_lr0=1.781e-04, train_time=1.467 [gpua049:0/64] 2024-01-16 06:29:26,889 (trainer:753) INFO: 6epoch:train:701-800batch: iter_time=7.617e-05, forward_time=0.157, loss_ctc=92.380, loss_interctc_layer6=95.367, loss_interctc_layer12=83.756, loss_interctc_layer15=79.490, loss_interctc_layer21=92.267, loss=88.652, backward_time=0.328, grad_norm=62.487, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.780e-04, train_time=1.618 [gpua049:0/64] 2024-01-16 06:31:58,971 (trainer:753) INFO: 6epoch:train:801-900batch: iter_time=7.679e-05, forward_time=0.151, loss_ctc=158.485, loss_interctc_layer6=136.891, loss_interctc_layer12=123.894, loss_interctc_layer15=117.921, loss_interctc_layer21=156.643, loss=138.767, backward_time=0.314, grad_norm=98.571, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.779e-04, train_time=1.521 [gpua049:0/64] 2024-01-16 06:34:15,678 (trainer:753) INFO: 6epoch:train:901-1000batch: iter_time=7.574e-05, forward_time=0.144, loss_ctc=101.424, loss_interctc_layer6=100.288, loss_interctc_layer12=87.866, loss_interctc_layer15=83.055, loss_interctc_layer21=101.455, loss=94.818, backward_time=0.316, grad_norm=62.971, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.082, optim0_lr0=1.778e-04, train_time=1.364 [gpua049:0/64] 2024-01-16 06:36:36,834 (trainer:753) INFO: 6epoch:train:1001-1100batch: iter_time=7.440e-05, forward_time=0.171, loss_ctc=98.225, loss_interctc_layer6=102.749, loss_interctc_layer12=89.874, loss_interctc_layer15=85.188, loss_interctc_layer21=98.441, loss=94.895, backward_time=0.348, grad_norm=65.167, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.088, optim0_lr0=1.776e-04, train_time=1.414 [gpua049:0/64] 2024-01-16 06:38:59,625 (trainer:753) INFO: 6epoch:train:1101-1200batch: iter_time=7.331e-05, forward_time=0.145, loss_ctc=123.750, loss_interctc_layer6=115.513, loss_interctc_layer12=102.057, loss_interctc_layer15=97.042, loss_interctc_layer21=124.503, loss=112.573, backward_time=0.319, grad_norm=113.660, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.083, optim0_lr0=1.775e-04, train_time=1.428 [gpua049:0/64] 2024-01-16 06:40:28,880 (multiple_iter_factory:32) INFO: Building 1th iter-factory... [gpua049:0/64] 2024-01-16 06:40:48,917 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 06:40:52,780 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 06:40:52,780 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, [gpua049:0/64] 2024-01-16 06:40:52,784 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 06:45:54,986 (trainer:753) INFO: 6epoch:train:1201-1300batch: iter_time=1.858, forward_time=0.143, loss_ctc=132.950, loss_interctc_layer6=126.670, loss_interctc_layer12=112.133, loss_interctc_layer15=106.354, loss_interctc_layer21=132.919, loss=122.205, backward_time=0.319, grad_norm=92.460, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.774e-04, train_time=4.154 [gpua049:0/64] 2024-01-16 06:47:58,078 (trainer:753) INFO: 6epoch:train:1301-1400batch: iter_time=7.712e-05, forward_time=0.143, loss_ctc=131.234, loss_interctc_layer6=122.950, loss_interctc_layer12=108.931, loss_interctc_layer15=102.738, loss_interctc_layer21=132.404, loss=119.651, backward_time=0.297, grad_norm=151.243, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.083, optim0_lr0=1.773e-04, train_time=1.231 [gpua049:0/64] 2024-01-16 06:50:05,707 (trainer:753) INFO: 6epoch:train:1401-1500batch: iter_time=7.428e-05, forward_time=0.143, loss_ctc=123.690, loss_interctc_layer6=121.469, loss_interctc_layer12=107.504, loss_interctc_layer15=101.999, loss_interctc_layer21=123.837, loss=115.700, backward_time=0.300, grad_norm=83.472, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.083, optim0_lr0=1.772e-04, train_time=1.276 [gpua049:0/64] 2024-01-16 06:52:30,772 (trainer:753) INFO: 6epoch:train:1501-1600batch: iter_time=7.701e-05, forward_time=0.143, loss_ctc=111.972, loss_interctc_layer6=105.331, loss_interctc_layer12=92.337, loss_interctc_layer15=87.252, loss_interctc_layer21=112.029, loss=101.784, backward_time=0.310, grad_norm=62.823, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.083, optim0_lr0=1.771e-04, train_time=1.450 [gpua049:0/64] 2024-01-16 06:54:59,429 (trainer:753) INFO: 6epoch:train:1601-1700batch: iter_time=7.940e-05, forward_time=0.143, loss_ctc=117.898, loss_interctc_layer6=118.965, loss_interctc_layer12=104.590, loss_interctc_layer15=98.932, loss_interctc_layer21=118.221, loss=111.721, backward_time=0.322, grad_norm=82.556, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.083, optim0_lr0=1.769e-04, train_time=1.486 [gpua049:0/64] 2024-01-16 06:57:36,676 (trainer:753) INFO: 6epoch:train:1701-1800batch: iter_time=7.984e-05, forward_time=0.143, loss_ctc=127.632, loss_interctc_layer6=117.988, loss_interctc_layer12=103.566, loss_interctc_layer15=97.307, loss_interctc_layer21=128.300, loss=114.959, backward_time=0.331, grad_norm=100.165, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.083, optim0_lr0=1.768e-04, train_time=1.572 [gpua049:0/64] 2024-01-16 06:59:49,946 (trainer:753) INFO: 6epoch:train:1801-1900batch: iter_time=8.115e-05, forward_time=0.142, loss_ctc=110.288, loss_interctc_layer6=110.588, loss_interctc_layer12=98.941, loss_interctc_layer15=95.043, loss_interctc_layer21=110.794, loss=105.131, backward_time=0.317, grad_norm=76.355, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.083, optim0_lr0=1.767e-04, train_time=1.332 [gpua049:0/64] 2024-01-16 07:02:07,562 (trainer:753) INFO: 6epoch:train:1901-2000batch: iter_time=7.601e-05, forward_time=0.141, loss_ctc=97.189, loss_interctc_layer6=88.113, loss_interctc_layer12=77.469, loss_interctc_layer15=72.951, loss_interctc_layer21=96.996, loss=86.544, backward_time=0.315, grad_norm=56.961, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.766e-04, train_time=1.376 [gpua049:0/64] 2024-01-16 07:04:18,634 (trainer:753) INFO: 6epoch:train:2001-2100batch: iter_time=7.118e-05, forward_time=0.142, loss_ctc=105.956, loss_interctc_layer6=103.213, loss_interctc_layer12=90.248, loss_interctc_layer15=84.820, loss_interctc_layer21=106.030, loss=98.054, backward_time=0.309, grad_norm=65.736, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.765e-04, train_time=1.310 [gpua049:0/64] 2024-01-16 07:06:38,539 (trainer:753) INFO: 6epoch:train:2101-2200batch: iter_time=7.544e-05, forward_time=0.142, loss_ctc=148.569, loss_interctc_layer6=124.484, loss_interctc_layer12=112.887, loss_interctc_layer15=106.246, loss_interctc_layer21=147.152, loss=127.868, backward_time=0.300, grad_norm=96.317, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.764e-04, train_time=1.399 [gpua049:0/64] 2024-01-16 07:09:13,790 (trainer:753) INFO: 6epoch:train:2201-2300batch: iter_time=7.608e-05, forward_time=0.142, loss_ctc=104.405, loss_interctc_layer6=104.320, loss_interctc_layer12=90.840, loss_interctc_layer15=85.482, loss_interctc_layer21=105.291, loss=98.068, backward_time=0.341, grad_norm=70.020, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.763e-04, train_time=1.552 [gpua049:0/64] 2024-01-16 07:11:28,446 (trainer:753) INFO: 6epoch:train:2301-2400batch: iter_time=7.394e-05, forward_time=0.143, loss_ctc=113.306, loss_interctc_layer6=109.544, loss_interctc_layer12=96.418, loss_interctc_layer15=91.246, loss_interctc_layer21=114.037, loss=104.910, backward_time=0.315, grad_norm=70.586, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.081, optim0_lr0=1.761e-04, train_time=1.346 [gpua049:0/64] 2024-01-16 07:13:35,274 (trainer:753) INFO: 6epoch:train:2401-2500batch: iter_time=7.184e-05, forward_time=0.143, loss_ctc=124.606, loss_interctc_layer6=111.968, loss_interctc_layer12=98.515, loss_interctc_layer15=92.943, loss_interctc_layer21=124.745, loss=110.555, backward_time=0.314, grad_norm=68.040, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.760e-04, train_time=1.268 [gpua049:0/64] 2024-01-16 07:13:36,574 (multiple_iter_factory:32) INFO: Building 2th iter-factory... [gpua049:0/64] 2024-01-16 07:13:56,686 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') [gpua049:0/64] 2024-01-16 07:14:00,599 (abs_task:1660) INFO: [train] dataset: ESPnetDataset( speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} preprocess: ) [gpua049:0/64] 2024-01-16 07:14:00,599 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, [gpua049:0/64] 2024-01-16 07:14:00,602 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 [gpua049:0/64] 2024-01-16 07:20:48,643 (trainer:753) INFO: 6epoch:train:2501-2600batch: iter_time=1.360, forward_time=0.161, loss_ctc=132.013, loss_interctc_layer6=128.502, loss_interctc_layer12=112.723, loss_interctc_layer15=106.740, loss_interctc_layer21=132.403, loss=122.476, backward_time=0.308, grad_norm=103.601, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.759e-04, train_time=4.333 [gpua049:0/64] 2024-01-16 07:23:00,991 (trainer:753) INFO: 6epoch:train:2601-2700batch: iter_time=7.549e-05, forward_time=0.144, loss_ctc=114.533, loss_interctc_layer6=111.656, loss_interctc_layer12=97.897, loss_interctc_layer15=92.213, loss_interctc_layer21=115.399, loss=106.340, backward_time=0.308, grad_norm=97.375, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.758e-04, train_time=1.323 [gpua049:0/64] 2024-01-16 07:25:13,381 (trainer:753) INFO: 6epoch:train:2701-2800batch: iter_time=7.410e-05, forward_time=0.144, loss_ctc=117.332, loss_interctc_layer6=115.938, loss_interctc_layer12=102.756, loss_interctc_layer15=97.143, loss_interctc_layer21=117.905, loss=110.215, backward_time=0.327, grad_norm=69.555, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.757e-04, train_time=1.324 [gpua049:0/64] 2024-01-16 07:27:32,220 (trainer:753) INFO: 6epoch:train:2801-2900batch: iter_time=7.580e-05, forward_time=0.143, loss_ctc=116.055, loss_interctc_layer6=113.812, loss_interctc_layer12=98.601, loss_interctc_layer15=92.800, loss_interctc_layer21=116.594, loss=107.572, backward_time=0.329, grad_norm=73.527, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.082, optim0_lr0=1.756e-04, train_time=1.388 srun: Job step aborted: Waiting up to 32 seconds for job step to finish. slurmstepd: error: *** STEP 2858185.0 ON gpua049 CANCELLED AT 2024-01-16T10:01:52 ***