diff --git "a/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.19.log" "b/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.19.log" new file mode 100644--- /dev/null +++ "b/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.19.log" @@ -0,0 +1,3292 @@ +# Running on gpub003.delta.ncsa.illinois.edu +# Started at Sun Jan 28 15:35:57 CST 2024 +# SLURMD_NODENAME=gpub003 +# SLURM_CLUSTER_NAME=delta +# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf +# SLURM_CPUS_ON_NODE=64 +# SLURM_CPUS_PER_TASK=64 +# SLURM_EXPORT_ENV=PATH +# SLURM_GET_USER_ENV=1 +# SLURM_GPUS_ON_NODE=4 +# SLURM_GTIDS=0 +# SLURM_JOBID=2903446 +# SLURM_JOB_ACCOUNT=bbjs-delta-gpu +# SLURM_JOB_CPUS_PER_NODE='64(x16)' +# SLURM_JOB_END_TIME=1706650542 +# SLURM_JOB_GID=202 +# SLURM_JOB_GPUS=0,1,2,3 +# SLURM_JOB_ID=2903446 +# SLURM_JOB_NAME=exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/train.log +# SLURM_JOB_NODELIST='gpub[003,006-007,015-016,022,026,028,053-054,056,062,083,085,088-089]' +# SLURM_JOB_NUM_NODES=16 +# SLURM_JOB_PARTITION=gpuA40x4 +# SLURM_JOB_QOS=bbjs-delta-gpu +# SLURM_JOB_START_TIME=1706477742 +# SLURM_JOB_UID=68077 +# SLURM_JOB_USER=peng6 +# SLURM_LOCALID=0 +# SLURM_MEM_PER_NODE=240000 +# SLURM_MPI_TYPE=pmi2 +# SLURM_NNODES=16 +# SLURM_NODEID=0 +# SLURM_NODELIST='gpub[003,006-007,015-016,022,026,028,053-054,056,062,083,085,088-089]' +# SLURM_NODE_ALIASES='(null)' +# SLURM_OPEN_MODE=a +# SLURM_PRIO_PROCESS=0 +# SLURM_PROCID=0 +# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1 +# SLURM_SUBMIT_HOST=dt-login03.delta.ncsa.illinois.edu +# SLURM_TASKS_PER_NODE='1(x16)' +# SLURM_TASK_PID=2602597 +# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub003 +# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node +# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9984:109 +# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_/scratch/bbjs/peng6/espnet-owsm-ctc/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-owsm-ctc/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev_v3/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000 --config conf/train_s2t_multitask-ctc_ebf27_conv2d8_size1024.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method fraw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +ile:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev_v3/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev_v3/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-owsm-ctc/egs2/owsm_v3.1_ctc/s2t1/exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/.dist_init_0a29800e-7fd3-4a06-a607-e630d460b78c +[gpub003:0/64] 2024-01-28 15:40:13,226 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0 +[gpub003:0/64] 2024-01-28 15:40:13,520 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes. +[gpub003:0/64] 2024-01-28 15:40:13,552 (s2t:420) INFO: Vocabulary size: 50002 +[gpub003:0/64] 2024-01-28 15:40:27,395 (abs_task:1270) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True +[gpub003:0/64] 2024-01-28 15:40:27,406 (abs_task:1271) INFO: Model structure: +ESPnetS2TCTCModel( + (frontend): DefaultFrontend( + (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True) + (frontend): Frontend() + (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False) + ) + (specaug): SpecAug( + (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq) + (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time) + ) + (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True) + (encoder): EBranchformerCTCEncoder( + (embed): Conv2dSubsampling8( + (conv): Sequential( + (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2)) + (1): ReLU() + (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (3): ReLU() + (4): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2)) + (5): ReLU() + ) + (out): Linear(in_features=9216, out_features=1024, bias=True) + (pos_enc): PositionalEncoding( + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (encoders): MultiSequential( + (0): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (1): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (2): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (3): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (4): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (5): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (6): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (7): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (8): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (9): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (10): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (11): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (12): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (13): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (14): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (15): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (16): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (17): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (18): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (19): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (20): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (21): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (22): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (23): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (24): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (25): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + (26): EBranchformerEncoderLayer( + (attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (cgmlp): ConvolutionalGatingMLP( + (channel_proj1): Sequential( + (0): Linear(in_features=1024, out_features=4096, bias=True) + (1): GELU(approximate='none') + ) + (csgu): ConvolutionalSpatialGatingUnit( + (norm): LayerNorm((2048,), eps=1e-12, elementwise_affine=True) + (conv): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (act): Identity() + (dropout): Dropout(p=0.1, inplace=False) + ) + (channel_proj2): Linear(in_features=2048, out_features=1024, bias=True) + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (feed_forward_macaron): PositionwiseFeedForward( + (w_1): Linear(in_features=1024, out_features=4096, bias=True) + (w_2): Linear(in_features=4096, out_features=1024, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): Swish() + ) + (norm_ff): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_ff_macaron): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mha): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_mlp): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (norm_final): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (cross_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=1024, out_features=1024, bias=True) + (linear_k): Linear(in_features=1024, out_features=1024, bias=True) + (linear_v): Linear(in_features=1024, out_features=1024, bias=True) + (linear_out): Linear(in_features=1024, out_features=1024, bias=True) + (dropout): Identity() + ) + (norm_cross_attn): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + (depthwise_conv_fusion): Conv1d(2048, 2048, kernel_size=(31,), stride=(1,), padding=(15,), groups=2048) + (merge_proj): Linear(in_features=2048, out_features=1024, bias=True) + ) + ) + (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True) + (conditioning_layer): Linear(in_features=50002, out_features=1024, bias=True) + ) + (prompt_encoder): TransformerEncoder( + (encoders): MultiSequential( + (0): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (1): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (2): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (3): EncoderLayer( + (self_attn): MultiHeadedAttention( + (linear_q): Linear(in_features=512, out_features=512, bias=True) + (linear_k): Linear(in_features=512, out_features=512, bias=True) + (linear_v): Linear(in_features=512, out_features=512, bias=True) + (linear_out): Linear(in_features=512, out_features=512, bias=True) + (dropout): Identity() + ) + (feed_forward): PositionwiseFeedForward( + (w_1): Linear(in_features=512, out_features=2048, bias=True) + (w_2): Linear(in_features=2048, out_features=512, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + (activation): ReLU() + ) + (norm1): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (norm2): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (after_norm): LayerNorm((512,), eps=1e-12, elementwise_affine=True) + ) + (embed): Embedding(50002, 512) + (pos_enc): PositionalEncoding( + (dropout): Dropout(p=0.0, inplace=False) + ) + (embed_proj): Linear(in_features=512, out_features=1024, bias=True) + (prompt_proj): Linear(in_features=512, out_features=1024, bias=True) + (ctc): CTC( + (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True) + (ctc_loss): CTCLoss() + ) +) + +Model summary: + Class Name: ESPnetS2TCTCModel + Total Number of model parameters: 1.01 B + Number of trainable parameters: 1.01 B (100.0%) + Size: 4.02 GB + Type: torch.float32 +[gpub003:0/64] 2024-01-28 15:40:27,406 (abs_task:1274) INFO: Optimizer: +AdamW ( +Parameter Group 0 + amsgrad: False + betas: [0.9, 0.98] + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 0.0002 + lr: 1.6666666666666667e-09 + maximize: False + weight_decay: 0.0 +) +[gpub003:0/64] 2024-01-28 15:40:27,406 (abs_task:1275) INFO: Scheduler: PiecewiseLinearWarmupLR(warmup_steps_list=[0, 30000, 60000], warmup_lr_list=[0.0, 5e-05, 0.0002]) +[gpub003:0/64] 2024-01-28 15:40:27,448 (abs_task:1284) INFO: Saving the configuration in exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/config.yaml +[gpub003:0/64] 2024-01-28 15:40:32,994 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-28 15:40:33,912 (abs_task:1660) INFO: [valid] dataset: +ESPnetDataset( + speech: {"path": "dump/raw/dev_v3/wav.scp", "type": "kaldi_ark"} + text_prev: {"path": "dump/raw/dev_v3/text.prev", "type": "text"} + text_ctc: {"path": "dump/raw/dev_v3/text.ctc", "type": "text"} + text: {"path": "dump/raw/dev_v3/text", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-28 15:40:33,913 (abs_task:1661) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=4671, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, +[gpub003:0/64] 2024-01-28 15:40:33,913 (abs_task:1662) INFO: [valid] mini-batch sizes summary: N-batch=4671, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-28 15:41:02,112 (trainer:167) INFO: The training was resumed using exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/checkpoint.pth +gpub003:2602694:2602694 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.103<0> +gpub003:2602694:2602694 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub003:2602694:2602694 [0] NCCL INFO cudaDriverVersion 12020 +NCCL version 2.14.3+cuda11.7 +[gpub003:0/64] 2024-01-28 15:41:07,822 (trainer:298) INFO: 15/45epoch started +[gpub003:0/64] 2024-01-28 15:41:07,862 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub003:0/64] 2024-01-28 15:41:26,548 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-28 15:41:29,993 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-28 15:41:29,993 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub003:0/64] 2024-01-28 15:41:29,996 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +gpub083:292603:292603 [3] NCCL INFO cudaDriverVersion 12020 +gpub083:292603:292603 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:292603:292603 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:292603:292660 [3] NCCL INFO NET/IB : No device found. +gpub083:292603:292660 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.183<0> [1]hsn0:141.142.145.183<0> +gpub083:292603:292660 [3] NCCL INFO Using network Socket +gpub083:292603:292660 [3] NCCL INFO Setting affinity for GPU 3 to ffffffff,ffffffff +gpub083:292603:292660 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50 +gpub083:292603:292660 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/Socket/1 +gpub083:292603:292660 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/Socket/1 +gpub083:292603:292660 [3] NCCL INFO Connected all rings +gpub083:292603:292660 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub083:292603:292660 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC +gpub083:292603:292660 [3] NCCL INFO Connected all trees +gpub083:292603:292660 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub083:292603:292660 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:292603:292660 [3] NCCL INFO comm 0x562f3105ce40 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub006:180547:180547 [3] NCCL INFO cudaDriverVersion 12020 +gpub006:180547:180547 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:180547:180547 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:180547:180601 [3] NCCL INFO NET/IB : No device found. +gpub006:180547:180601 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.106<0> [1]hsn0:141.142.145.106<0> +gpub006:180547:180601 [3] NCCL INFO Using network Socket +gpub006:180547:180601 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub006:180547:180601 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 +gpub006:180547:180601 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub006:180547:180601 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/Socket/1 +gpub006:180547:180601 [3] NCCL INFO Connected all rings +gpub006:180547:180601 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub007:822786:822786 [3] NCCL INFO cudaDriverVersion 12020 +gpub007:822786:822786 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:822786:822786 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:822786:822843 [3] NCCL INFO NET/IB : No device found. +gpub007:822786:822843 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.107<0> [1]hsn0:141.142.145.107<0> +gpub007:822786:822843 [3] NCCL INFO Using network Socket +gpub007:822786:822843 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub007:822786:822843 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10 +gpub007:822786:822843 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub007:822786:822843 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/Socket/1 +gpub007:822786:822843 [3] NCCL INFO Connected all rings +gpub007:822786:822843 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub015:743040:743040 [3] NCCL INFO cudaDriverVersion 12020 +gpub015:743040:743040 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:743040:743040 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:743040:743092 [3] NCCL INFO NET/IB : No device found. +gpub015:743040:743092 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.115<0> [1]hsn0:141.142.145.115<0> +gpub015:743040:743092 [3] NCCL INFO Using network Socket +gpub015:743040:743092 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub015:743040:743092 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14 +gpub015:743040:743092 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/Socket/1 +gpub015:743040:743092 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/Socket/1 +gpub015:743040:743092 [3] NCCL INFO Connected all rings +gpub015:743040:743092 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub016:765654:765654 [2] NCCL INFO cudaDriverVersion 12020 +gpub016:765654:765654 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:765654:765654 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:765654:765717 [2] NCCL INFO NET/IB : No device found. +gpub016:765654:765717 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.116<0> [1]hsn0:141.142.145.116<0> +gpub016:765654:765717 [2] NCCL INFO Using network Socket +gpub016:765654:765717 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub016:765654:765717 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17 +gpub016:765654:765717 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub016:765654:765717 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC +gpub016:765654:765717 [2] NCCL INFO Connected all rings +gpub016:765654:765717 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub026:836550:836550 [0] NCCL INFO cudaDriverVersion 12020 +gpub026:836550:836550 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:836550:836550 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:836550:836618 [0] NCCL INFO NET/IB : No device found. +gpub026:836550:836618 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.126<0> [1]hsn0:141.142.145.126<0> +gpub026:836550:836618 [0] NCCL INFO Using network Socket +gpub026:836550:836618 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub026:836550:836618 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21 +gpub026:836550:836618 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/Socket/1 +gpub026:836550:836618 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/Socket/1 +gpub026:836550:836618 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub026:836550:836618 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC +gpub083:292602:292602 [2] NCCL INFO cudaDriverVersion 12020 +gpub083:292602:292602 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:292602:292602 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:292602:292662 [2] NCCL INFO NET/IB : No device found. +gpub083:292602:292662 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.183<0> [1]hsn0:141.142.145.183<0> +gpub083:292602:292662 [2] NCCL INFO Using network Socket +gpub083:292602:292662 [2] NCCL INFO Setting affinity for GPU 2 to ffffffff,ffffffff +gpub083:292602:292662 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49 +gpub083:292602:292662 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub083:292602:292662 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC +gpub083:292602:292662 [2] NCCL INFO Connected all rings +gpub083:292602:292662 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub056:1489721:1489721 [3] NCCL INFO cudaDriverVersion 12020 +gpub056:1489721:1489721 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.156<0> +gpub056:1489721:1489721 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub056:1489721:1489782 [3] NCCL INFO NET/IB : No device found. +gpub056:1489721:1489782 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.156<0> [1]hsn0:141.142.145.156<0> [2]eth0:fe80::c8c2:d8a:9a8b:ad5a%eth0<0> +gpub056:1489721:1489782 [3] NCCL INFO Using network Socket +gpub056:1489721:1489782 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub056:1489721:1489782 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42 +gpub056:1489721:1489782 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/Socket/1 +gpub056:1489721:1489782 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/Socket/1 +gpub056:1489721:1489782 [3] NCCL INFO Connected all rings +gpub022:1029030:1029030 [2] NCCL INFO cudaDriverVersion 12020 +gpub022:1029030:1029030 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:1029030:1029030 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:1029030:1029081 [2] NCCL INFO NET/IB : No device found. +gpub022:1029030:1029081 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.122<0> [1]hsn0:141.142.145.122<0> +gpub022:1029030:1029081 [2] NCCL INFO Using network Socket +gpub022:1029030:1029081 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub022:1029030:1029081 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21 +gpub022:1029030:1029081 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub022:1029030:1029081 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC +gpub022:1029030:1029081 [2] NCCL INFO Connected all rings +gpub022:1029030:1029081 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub062:525771:525771 [2] NCCL INFO cudaDriverVersion 12020 +gpub062:525771:525771 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:525771:525771 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:525771:525836 [2] NCCL INFO NET/IB : No device found. +gpub062:525771:525836 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:525771:525836 [2] NCCL INFO Using network Socket +gpub062:525771:525836 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub062:525771:525836 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45 +gpub062:525771:525836 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub062:525771:525836 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC +gpub062:525771:525836 [2] NCCL INFO Connected all rings +gpub062:525771:525836 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub085:793247:793247 [2] NCCL INFO cudaDriverVersion 12020 +gpub085:793247:793247 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:793247:793247 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:793247:793310 [2] NCCL INFO NET/IB : No device found. +gpub085:793247:793310 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.185<0> [1]hsn0:141.142.145.185<0> [2]eth0:fe80::1703:8949:2817:7411%eth0<0> +gpub085:793247:793310 [2] NCCL INFO Using network Socket +gpub085:793247:793310 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub085:793247:793310 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53 +gpub085:793247:793310 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub085:793247:793310 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC +gpub085:793247:793310 [2] NCCL INFO Connected all rings +gpub085:793247:793310 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub089:160269:160269 [1] NCCL INFO cudaDriverVersion 12020 +gpub089:160269:160269 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:160269:160269 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:160269:160333 [1] NCCL INFO NET/IB : No device found. +gpub089:160269:160333 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.189<0> [1]hsn0:141.142.145.189<0> +gpub089:160269:160333 [1] NCCL INFO Using network Socket +gpub089:160269:160333 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub089:160269:160333 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60 +gpub089:160269:160333 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub089:160269:160333 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC +gpub089:160269:160333 [1] NCCL INFO Connected all rings +gpub089:160269:160333 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub088:1835047:1835047 [1] NCCL INFO cudaDriverVersion 12020 +gpub088:1835047:1835047 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0> +gpub088:1835047:1835047 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub088:1835047:1835109 [1] NCCL INFO NET/IB : No device found. +gpub088:1835047:1835109 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.188<0> [1]hsn0:141.142.145.188<0> [2]eth0:fe80::732:b822:3c8e:dec4%eth0<0> +gpub088:1835047:1835109 [1] NCCL INFO Using network Socket +gpub088:1835047:1835109 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub088:1835047:1835109 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56 +gpub088:1835047:1835109 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub088:1835047:1835109 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC +gpub088:1835047:1835109 [1] NCCL INFO Connected all rings +gpub053:3113445:3113445 [3] NCCL INFO cudaDriverVersion 12020 +gpub053:3113445:3113445 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:3113445:3113445 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:3113445:3113496 [3] NCCL INFO NET/IB : No device found. +gpub053:3113445:3113496 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.153<0> [1]hsn0:141.142.145.153<0> [2]eth0:fe80::c2e4:b356:358f:a76e%eth0<0> +gpub053:3113445:3113496 [3] NCCL INFO Using network Socket +gpub053:3113445:3113496 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub053:3113445:3113496 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34 +gpub053:3113445:3113496 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/Socket/1 +gpub053:3113445:3113496 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/Socket/1 +gpub053:3113445:3113496 [3] NCCL INFO Connected all rings +gpub003:2602695:2602695 [1] NCCL INFO cudaDriverVersion 12020 +gpub003:2602695:2602695 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.103<0> +gpub003:2602695:2602695 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub003:2602695:2602754 [1] NCCL INFO NET/IB : No device found. +gpub003:2602695:2602754 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.103<0> [1]hsn0:141.142.145.103<0> +gpub003:2602695:2602754 [1] NCCL INFO Using network Socket +gpub003:2602695:2602754 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub003:2602695:2602754 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 +gpub003:2602695:2602754 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub003:2602695:2602754 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC +gpub003:2602695:2602754 [1] NCCL INFO Connected all rings +gpub003:2602695:2602754 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub006:180547:180601 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC +gpub006:180547:180601 [3] NCCL INFO Connected all trees +gpub006:180547:180601 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:180547:180601 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:180547:180601 [3] NCCL INFO comm 0x56042f7d6680 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub007:822786:822843 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC +gpub007:822786:822843 [3] NCCL INFO Connected all trees +gpub007:822786:822843 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:822786:822843 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:822786:822843 [3] NCCL INFO comm 0x559ce753c820 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub015:743040:743092 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC +gpub015:743040:743092 [3] NCCL INFO Connected all trees +gpub015:743040:743092 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:743040:743092 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:743040:743092 [3] NCCL INFO comm 0x5558c841b250 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub016:765654:765717 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC +gpub016:765654:765717 [2] NCCL INFO Connected all trees +gpub016:765654:765717 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub016:765654:765717 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:765654:765717 [2] NCCL INFO comm 0x5571d8964b80 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub026:836550:836618 [0] NCCL INFO Connected all rings +gpub026:836550:836618 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/Socket/1 +gpub026:836550:836618 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/Socket/1 +gpub026:836550:836618 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/Socket/1 +gpub026:836550:836618 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/Socket/1 +gpub026:836550:836618 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/Socket/1 +gpub026:836550:836618 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/Socket/1 +gpub026:836550:836618 [0] NCCL INFO Connected all trees +gpub026:836550:836618 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:836550:836618 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:836550:836618 [0] NCCL INFO comm 0x55b65a946640 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub083:292602:292662 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC +gpub083:292602:292662 [2] NCCL INFO Connected all trees +gpub083:292602:292662 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub083:292602:292662 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:292602:292662 [2] NCCL INFO comm 0x55d210066220 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub083:292601:292601 [1] NCCL INFO cudaDriverVersion 12020 +gpub083:292601:292601 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:292601:292601 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:292601:292659 [1] NCCL INFO NET/IB : No device found. +gpub083:292601:292659 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.183<0> [1]hsn0:141.142.145.183<0> +gpub083:292601:292659 [1] NCCL INFO Using network Socket +gpub083:292601:292659 [1] NCCL INFO Setting affinity for GPU 1 to ffffffff,ffffffff +gpub056:1489721:1489782 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub056:1489721:1489782 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC +gpub056:1489721:1489782 [3] NCCL INFO Connected all trees +gpub056:1489721:1489782 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub056:1489721:1489782 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub056:1489721:1489782 [3] NCCL INFO comm 0x5636c57a7230 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub022:1029030:1029081 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC +gpub022:1029030:1029081 [2] NCCL INFO Connected all trees +gpub022:1029030:1029081 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:1029030:1029081 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:1029030:1029081 [2] NCCL INFO comm 0x55bcf862f6d0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub062:525771:525836 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC +gpub062:525771:525836 [2] NCCL INFO Connected all trees +gpub062:525771:525836 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub062:525771:525836 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:525771:525836 [2] NCCL INFO comm 0x555eae586520 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub085:793247:793310 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC +gpub085:793247:793310 [2] NCCL INFO Connected all trees +gpub085:793247:793310 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub085:793247:793310 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:793247:793310 [2] NCCL INFO comm 0x557afc6bed90 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub085:793245:793245 [0] NCCL INFO cudaDriverVersion 12020 +gpub085:793245:793245 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:793245:793245 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:793245:793312 [0] NCCL INFO NET/IB : No device found. +gpub085:793245:793312 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.185<0> [1]hsn0:141.142.145.185<0> [2]eth0:fe80::1703:8949:2817:7411%eth0<0> +gpub085:793245:793312 [0] NCCL INFO Using network Socket +gpub089:160269:160333 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC +gpub089:160269:160333 [1] NCCL INFO Connected all trees +gpub089:160269:160333 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub089:160269:160333 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:160269:160333 [1] NCCL INFO comm 0x5582feb7f990 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub089:160271:160271 [3] NCCL INFO cudaDriverVersion 12020 +gpub089:160271:160271 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:160271:160271 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:160271:160330 [3] NCCL INFO NET/IB : No device found. +gpub089:160271:160330 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.189<0> [1]hsn0:141.142.145.189<0> +gpub089:160271:160330 [3] NCCL INFO Using network Socket +gpub089:160271:160330 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub088:1835047:1835109 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/Socket/1 +gpub088:1835047:1835109 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/Socket/1 +gpub088:1835047:1835109 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub088:1835047:1835109 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC +gpub088:1835047:1835109 [1] NCCL INFO Connected all trees +gpub088:1835047:1835109 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub088:1835047:1835109 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub088:1835047:1835109 [1] NCCL INFO comm 0x557df21a1d10 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub053:3113445:3113496 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub053:3113445:3113496 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC +gpub053:3113445:3113496 [3] NCCL INFO Connected all trees +gpub053:3113445:3113496 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:3113445:3113496 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:3113445:3113496 [3] NCCL INFO comm 0x55c6cb4fb180 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub003:2602695:2602754 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC +gpub003:2602695:2602754 [1] NCCL INFO Connected all trees +gpub003:2602695:2602754 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub003:2602695:2602754 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub003:2602695:2602754 [1] NCCL INFO comm 0x5593ced369b0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub003:2602694:2602756 [0] NCCL INFO NET/IB : No device found. +gpub003:2602694:2602756 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.103<0> [1]hsn0:141.142.145.103<0> +gpub003:2602694:2602756 [0] NCCL INFO Using network Socket +gpub003:2602694:2602756 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub003:2602694:2602756 [0] NCCL INFO Channel 00/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub006:180544:180544 [0] NCCL INFO cudaDriverVersion 12020 +gpub006:180544:180544 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:180544:180544 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:180544:180599 [0] NCCL INFO NET/IB : No device found. +gpub006:180544:180599 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.106<0> [1]hsn0:141.142.145.106<0> +gpub006:180544:180599 [0] NCCL INFO Using network Socket +gpub006:180544:180599 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub006:180544:180599 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12 +gpub006:180544:180599 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub006:180544:180599 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/Socket/1 +gpub006:180544:180599 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub006:180544:180599 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC +gpub007:822783:822783 [0] NCCL INFO cudaDriverVersion 12020 +gpub007:822783:822783 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:822783:822783 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:822783:822844 [0] NCCL INFO NET/IB : No device found. +gpub007:822783:822844 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.107<0> [1]hsn0:141.142.145.107<0> +gpub007:822783:822844 [0] NCCL INFO Using network Socket +gpub007:822783:822844 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub007:822783:822844 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5 +gpub007:822783:822844 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub007:822783:822844 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/Socket/1 +gpub007:822783:822844 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub007:822783:822844 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC +gpub015:743039:743039 [2] NCCL INFO cudaDriverVersion 12020 +gpub015:743039:743039 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:743039:743039 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:743039:743094 [2] NCCL INFO NET/IB : No device found. +gpub015:743039:743094 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.115<0> [1]hsn0:141.142.145.115<0> +gpub015:743039:743094 [2] NCCL INFO Using network Socket +gpub015:743039:743094 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub015:743039:743094 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13 +gpub015:743039:743094 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub015:743039:743094 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC +gpub015:743039:743094 [2] NCCL INFO Connected all rings +gpub015:743039:743094 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub016:765652:765652 [0] NCCL INFO cudaDriverVersion 12020 +gpub016:765652:765652 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:765652:765652 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:765652:765716 [0] NCCL INFO NET/IB : No device found. +gpub016:765652:765716 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.116<0> [1]hsn0:141.142.145.116<0> +gpub016:765652:765716 [0] NCCL INFO Using network Socket +gpub016:765652:765716 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub016:765652:765716 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20 +gpub016:765652:765716 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/Socket/1 +gpub016:765652:765716 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/Socket/1 +gpub016:765652:765716 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub016:765652:765716 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC +gpub026:836552:836552 [2] NCCL INFO cudaDriverVersion 12020 +gpub026:836552:836552 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:836552:836552 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:836552:836621 [2] NCCL INFO NET/IB : No device found. +gpub026:836552:836621 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.126<0> [1]hsn0:141.142.145.126<0> +gpub026:836552:836621 [2] NCCL INFO Using network Socket +gpub026:836552:836621 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub026:836552:836621 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25 +gpub026:836552:836621 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub026:836552:836621 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC +gpub026:836552:836621 [2] NCCL INFO Connected all rings +gpub026:836552:836621 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub083:292601:292659 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48 +gpub083:292601:292659 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub083:292601:292659 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC +gpub083:292601:292659 [1] NCCL INFO Connected all rings +gpub083:292601:292659 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/Socket/1 +gpub083:292601:292659 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/Socket/1 +gpub083:292601:292659 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub083:292601:292659 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC +gpub083:292601:292659 [1] NCCL INFO Connected all trees +gpub083:292601:292659 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub083:292601:292659 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:292601:292659 [1] NCCL INFO comm 0x55aec83e9870 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub056:1489718:1489718 [0] NCCL INFO cudaDriverVersion 12020 +gpub056:1489718:1489718 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.156<0> +gpub056:1489718:1489718 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub056:1489718:1489781 [0] NCCL INFO NET/IB : No device found. +gpub056:1489718:1489781 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.156<0> [1]hsn0:141.142.145.156<0> [2]eth0:fe80::c8c2:d8a:9a8b:ad5a%eth0<0> +gpub056:1489718:1489781 [0] NCCL INFO Using network Socket +gpub056:1489718:1489781 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub056:1489718:1489781 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37 +gpub056:1489718:1489781 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/Socket/1 +gpub056:1489718:1489781 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/Socket/1 +gpub056:1489718:1489781 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub022:1029029:1029029 [1] NCCL INFO cudaDriverVersion 12020 +gpub022:1029029:1029029 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:1029029:1029029 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:1029029:1029083 [1] NCCL INFO NET/IB : No device found. +gpub022:1029029:1029083 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.122<0> [1]hsn0:141.142.145.122<0> +gpub022:1029029:1029083 [1] NCCL INFO Using network Socket +gpub022:1029029:1029083 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub022:1029029:1029083 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20 +gpub022:1029029:1029083 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub022:1029029:1029083 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC +gpub022:1029029:1029083 [1] NCCL INFO Connected all rings +gpub022:1029029:1029083 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/Socket/1 +gpub062:525770:525770 [1] NCCL INFO cudaDriverVersion 12020 +gpub062:525770:525770 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:525770:525770 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:525770:525837 [1] NCCL INFO NET/IB : No device found. +gpub062:525770:525837 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:525770:525837 [1] NCCL INFO Using network Socket +gpub062:525770:525837 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub062:525770:525837 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44 +gpub062:525770:525837 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub062:525770:525837 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC +gpub062:525770:525837 [1] NCCL INFO Connected all rings +gpub062:525770:525837 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/Socket/1 +gpub085:793245:793312 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub085:793245:793312 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45 +gpub085:793245:793312 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/Socket/1 +gpub085:793245:793312 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/Socket/1 +gpub085:793245:793312 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub085:793245:793312 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC +gpub085:793245:793312 [0] NCCL INFO Connected all rings +gpub085:793245:793312 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/Socket/1 +gpub085:793245:793312 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/Socket/1 +gpub085:793245:793312 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/Socket/1 +gpub085:793245:793312 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/Socket/1 +gpub089:160271:160330 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62 +gpub089:160271:160330 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub089:160271:160330 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/Socket/1 +gpub089:160271:160330 [3] NCCL INFO Connected all rings +gpub089:160271:160330 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub089:160271:160330 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC +gpub089:160271:160330 [3] NCCL INFO Connected all trees +gpub089:160271:160330 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub089:160271:160330 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:160271:160330 [3] NCCL INFO comm 0x55a116a94e10 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub088:1835049:1835049 [3] NCCL INFO cudaDriverVersion 12020 +gpub088:1835049:1835049 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0> +gpub088:1835049:1835049 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub088:1835049:1835107 [3] NCCL INFO NET/IB : No device found. +gpub088:1835049:1835107 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.188<0> [1]hsn0:141.142.145.188<0> [2]eth0:fe80::732:b822:3c8e:dec4%eth0<0> +gpub088:1835049:1835107 [3] NCCL INFO Using network Socket +gpub088:1835049:1835107 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub088:1835049:1835107 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58 +gpub088:1835049:1835107 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/Socket/1 +gpub088:1835049:1835107 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/Socket/1 +gpub088:1835049:1835107 [3] NCCL INFO Connected all rings +gpub053:3113444:3113444 [2] NCCL INFO cudaDriverVersion 12020 +gpub053:3113444:3113444 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:3113444:3113444 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:3113444:3113494 [2] NCCL INFO NET/IB : No device found. +gpub053:3113444:3113494 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.153<0> [1]hsn0:141.142.145.153<0> [2]eth0:fe80::c2e4:b356:358f:a76e%eth0<0> +gpub053:3113444:3113494 [2] NCCL INFO Using network Socket +gpub053:3113444:3113494 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub053:3113444:3113494 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33 +gpub053:3113444:3113494 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub053:3113444:3113494 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC +gpub053:3113444:3113494 [2] NCCL INFO Connected all rings +gpub053:3113444:3113494 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub003:2602694:2602756 [0] NCCL INFO Channel 01/02 : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 +gpub003:2602694:2602756 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4 +gpub003:2602694:2602756 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub003:2602694:2602756 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/Socket/1 +gpub003:2602694:2602756 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub003:2602694:2602756 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC +gpub003:2602694:2602756 [0] NCCL INFO Connected all rings +gpub003:2602694:2602756 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/Socket/1 +gpub003:2602694:2602756 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub003:2602694:2602756 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/Socket/1 +gpub006:180544:180599 [0] NCCL INFO Connected all rings +gpub006:180544:180599 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub006:180544:180599 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/Socket/1 +gpub006:180544:180599 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/Socket/1 +gpub006:180544:180599 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/Socket/1 +gpub006:180544:180599 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/Socket/1 +gpub006:180544:180599 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/Socket/1 +gpub006:180544:180599 [0] NCCL INFO Connected all trees +gpub006:180544:180599 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:180544:180599 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:180544:180599 [0] NCCL INFO comm 0x5632ab95f5a0 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub007:822783:822844 [0] NCCL INFO Connected all rings +gpub007:822783:822844 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/Socket/1 +gpub007:822783:822844 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/Socket/1 +gpub007:822783:822844 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/Socket/1 +gpub007:822783:822844 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/Socket/1 +gpub007:822783:822844 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/Socket/1 +gpub007:822783:822844 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/Socket/1 +gpub007:822783:822844 [0] NCCL INFO Connected all trees +gpub007:822783:822844 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:822783:822844 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:822783:822844 [0] NCCL INFO comm 0x561e0a6b77d0 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub015:743039:743094 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC +gpub015:743039:743094 [2] NCCL INFO Connected all trees +gpub015:743039:743094 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:743039:743094 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:743039:743094 [2] NCCL INFO comm 0x5578dd88a8c0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub016:765652:765716 [0] NCCL INFO Connected all rings +gpub016:765652:765716 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/Socket/1 +gpub016:765652:765716 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/Socket/1 +gpub016:765652:765716 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/Socket/1 +gpub016:765652:765716 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/Socket/1 +gpub016:765652:765716 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/Socket/1 +gpub016:765652:765716 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/Socket/1 +gpub016:765652:765716 [0] NCCL INFO Connected all trees +gpub016:765652:765716 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub016:765652:765716 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:765652:765716 [0] NCCL INFO comm 0x564d7f7a2360 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub026:836552:836621 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC +gpub026:836552:836621 [2] NCCL INFO Connected all trees +gpub026:836552:836621 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:836552:836621 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:836552:836621 [2] NCCL INFO comm 0x56104ad03240 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub083:292600:292600 [0] NCCL INFO cudaDriverVersion 12020 +gpub083:292600:292600 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0> +gpub083:292600:292600 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub083:292600:292661 [0] NCCL INFO NET/IB : No device found. +gpub083:292600:292661 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.183<0> [1]hsn0:141.142.145.183<0> +gpub083:292600:292661 [0] NCCL INFO Using network Socket +gpub083:292600:292661 [0] NCCL INFO Setting affinity for GPU 0 to ffffffff,ffffffff +gpub083:292600:292661 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52 +gpub083:292600:292661 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/Socket/1 +gpub083:292600:292661 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/Socket/1 +gpub083:292600:292661 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub083:292600:292661 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC +gpub056:1489718:1489781 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC +gpub056:1489718:1489781 [0] NCCL INFO Connected all rings +gpub056:1489718:1489781 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/Socket/1 +gpub056:1489718:1489781 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/Socket/1 +gpub056:1489718:1489781 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/Socket/1 +gpub056:1489718:1489781 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/Socket/1 +gpub056:1489718:1489781 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/Socket/1 +gpub056:1489718:1489781 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/Socket/1 +gpub056:1489718:1489781 [0] NCCL INFO Connected all trees +gpub056:1489718:1489781 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub056:1489718:1489781 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:1029029:1029083 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/Socket/1 +gpub022:1029029:1029083 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub022:1029029:1029083 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC +gpub022:1029029:1029083 [1] NCCL INFO Connected all trees +gpub022:1029029:1029083 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:1029029:1029083 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:1029029:1029083 [1] NCCL INFO comm 0x55d290e2a850 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub062:525770:525837 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/Socket/1 +gpub062:525770:525837 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub062:525770:525837 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC +gpub062:525770:525837 [1] NCCL INFO Connected all trees +gpub062:525770:525837 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub062:525770:525837 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:525770:525837 [1] NCCL INFO comm 0x55e47c50f100 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub085:793245:793312 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/Socket/1 +gpub085:793245:793312 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/Socket/1 +gpub085:793245:793312 [0] NCCL INFO Connected all trees +gpub085:793245:793312 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub085:793245:793312 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:793245:793312 [0] NCCL INFO comm 0x563429b8d990 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub089:160270:160270 [2] NCCL INFO cudaDriverVersion 12020 +gpub089:160270:160270 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:160270:160270 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:160270:160332 [2] NCCL INFO NET/IB : No device found. +gpub089:160270:160332 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.189<0> [1]hsn0:141.142.145.189<0> +gpub089:160270:160332 [2] NCCL INFO Using network Socket +gpub089:160270:160332 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub089:160270:160332 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61 +gpub089:160270:160332 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub089:160270:160332 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC +gpub089:160270:160332 [2] NCCL INFO Connected all rings +gpub089:160270:160332 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub088:1835049:1835107 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub088:1835049:1835107 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC +gpub088:1835049:1835107 [3] NCCL INFO Connected all trees +gpub088:1835049:1835107 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub088:1835049:1835107 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub088:1835049:1835107 [3] NCCL INFO comm 0x562493c82680 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub088:1835046:1835046 [0] NCCL INFO cudaDriverVersion 12020 +gpub088:1835046:1835046 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0> +gpub088:1835046:1835046 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub088:1835046:1835108 [0] NCCL INFO NET/IB : No device found. +gpub088:1835046:1835108 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.188<0> [1]hsn0:141.142.145.188<0> [2]eth0:fe80::732:b822:3c8e:dec4%eth0<0> +gpub053:3113444:3113494 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC +gpub053:3113444:3113494 [2] NCCL INFO Connected all trees +gpub053:3113444:3113494 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:3113444:3113494 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:3113444:3113494 [2] NCCL INFO comm 0x55a144850860 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub003:2602694:2602756 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/Socket/1 +gpub003:2602694:2602756 [0] NCCL INFO Connected all trees +gpub003:2602694:2602756 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub003:2602694:2602756 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub003:2602694:2602756 [0] NCCL INFO comm 0x55de88409120 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub006:180546:180546 [2] NCCL INFO cudaDriverVersion 12020 +gpub006:180546:180546 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:180546:180546 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:180546:180600 [2] NCCL INFO NET/IB : No device found. +gpub006:180546:180600 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.106<0> [1]hsn0:141.142.145.106<0> +gpub006:180546:180600 [2] NCCL INFO Using network Socket +gpub006:180546:180600 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub006:180546:180600 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 +gpub006:180546:180600 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub006:180546:180600 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC +gpub006:180546:180600 [2] NCCL INFO Connected all rings +gpub006:180546:180600 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub006:180546:180600 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC +gpub007:822784:822784 [1] NCCL INFO cudaDriverVersion 12020 +gpub007:822784:822784 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:822784:822784 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:822784:822842 [1] NCCL INFO NET/IB : No device found. +gpub007:822784:822842 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.107<0> [1]hsn0:141.142.145.107<0> +gpub007:822784:822842 [1] NCCL INFO Using network Socket +gpub007:822784:822842 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub007:822784:822842 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8 +gpub007:822784:822842 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub007:822784:822842 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC +gpub007:822784:822842 [1] NCCL INFO Connected all rings +gpub007:822784:822842 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/Socket/1 +gpub015:743037:743037 [0] NCCL INFO cudaDriverVersion 12020 +gpub015:743037:743037 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:743037:743037 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:743037:743093 [0] NCCL INFO NET/IB : No device found. +gpub015:743037:743093 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.115<0> [1]hsn0:141.142.145.115<0> +gpub015:743037:743093 [0] NCCL INFO Using network Socket +gpub015:743037:743093 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub015:743037:743093 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28 +gpub015:743037:743093 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub015:743037:743093 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/Socket/1 +gpub015:743037:743093 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub015:743037:743093 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC +gpub016:765653:765653 [1] NCCL INFO cudaDriverVersion 12020 +gpub016:765653:765653 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:765653:765653 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:765653:765715 [1] NCCL INFO NET/IB : No device found. +gpub016:765653:765715 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.116<0> [1]hsn0:141.142.145.116<0> +gpub016:765653:765715 [1] NCCL INFO Using network Socket +gpub016:765653:765715 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub016:765653:765715 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16 +gpub016:765653:765715 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub016:765653:765715 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC +gpub016:765653:765715 [1] NCCL INFO Connected all rings +gpub016:765653:765715 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/Socket/1 +gpub026:836553:836553 [3] NCCL INFO cudaDriverVersion 12020 +gpub026:836553:836553 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:836553:836553 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:836553:836620 [3] NCCL INFO NET/IB : No device found. +gpub026:836553:836620 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.126<0> [1]hsn0:141.142.145.126<0> +gpub026:836553:836620 [3] NCCL INFO Using network Socket +gpub026:836553:836620 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub026:836553:836620 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26 +gpub026:836553:836620 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/Socket/1 +gpub026:836553:836620 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/Socket/1 +gpub026:836553:836620 [3] NCCL INFO Connected all rings +gpub026:836553:836620 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub083:292600:292661 [0] NCCL INFO Connected all rings +gpub083:292600:292661 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/Socket/1 +gpub083:292600:292661 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/Socket/1 +gpub083:292600:292661 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/Socket/1 +gpub083:292600:292661 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/Socket/1 +gpub083:292600:292661 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/Socket/1 +gpub083:292600:292661 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/Socket/1 +gpub083:292600:292661 [0] NCCL INFO Connected all trees +gpub083:292600:292661 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub083:292600:292661 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub083:292600:292661 [0] NCCL INFO comm 0x55e4b0165670 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub056:1489718:1489781 [0] NCCL INFO comm 0x5602d3e3ebe0 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub022:1029031:1029031 [3] NCCL INFO cudaDriverVersion 12020 +gpub022:1029031:1029031 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:1029031:1029031 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:1029031:1029082 [3] NCCL INFO NET/IB : No device found. +gpub022:1029031:1029082 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.122<0> [1]hsn0:141.142.145.122<0> +gpub022:1029031:1029082 [3] NCCL INFO Using network Socket +gpub022:1029031:1029082 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub022:1029031:1029082 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22 +gpub022:1029031:1029082 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/Socket/1 +gpub022:1029031:1029082 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/Socket/1 +gpub022:1029031:1029082 [3] NCCL INFO Connected all rings +gpub022:1029031:1029082 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub062:525772:525772 [3] NCCL INFO cudaDriverVersion 12020 +gpub062:525772:525772 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:525772:525772 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:525772:525834 [3] NCCL INFO NET/IB : No device found. +gpub062:525772:525834 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:525772:525834 [3] NCCL INFO Using network Socket +gpub062:525772:525834 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub062:525772:525834 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46 +gpub062:525772:525834 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/Socket/1 +gpub062:525772:525834 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/Socket/1 +gpub062:525772:525834 [3] NCCL INFO Connected all rings +gpub062:525772:525834 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub085:793246:793246 [1] NCCL INFO cudaDriverVersion 12020 +gpub085:793246:793246 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:793246:793246 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:793246:793311 [1] NCCL INFO NET/IB : No device found. +gpub085:793246:793311 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.185<0> [1]hsn0:141.142.145.185<0> [2]eth0:fe80::1703:8949:2817:7411%eth0<0> +gpub085:793246:793311 [1] NCCL INFO Using network Socket +gpub085:793246:793311 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub085:793246:793311 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52 +gpub085:793246:793311 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub085:793246:793311 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC +gpub085:793246:793311 [1] NCCL INFO Connected all rings +gpub085:793246:793311 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/Socket/1 +gpub089:160270:160332 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC +gpub089:160270:160332 [2] NCCL INFO Connected all trees +gpub089:160270:160332 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub089:160270:160332 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:160270:160332 [2] NCCL INFO comm 0x55936f1bf990 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub088:1835046:1835108 [0] NCCL INFO Using network Socket +gpub088:1835046:1835108 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub088:1835046:1835108 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53 +gpub088:1835046:1835108 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/Socket/1 +gpub088:1835046:1835108 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/Socket/1 +gpub088:1835046:1835108 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub088:1835046:1835108 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC +gpub088:1835046:1835108 [0] NCCL INFO Connected all rings +gpub088:1835046:1835108 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/Socket/1 +gpub088:1835046:1835108 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/Socket/1 +gpub088:1835046:1835108 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/Socket/1 +gpub053:3113443:3113443 [1] NCCL INFO cudaDriverVersion 12020 +gpub053:3113443:3113443 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:3113443:3113443 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:3113443:3113497 [1] NCCL INFO NET/IB : No device found. +gpub053:3113443:3113497 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.153<0> [1]hsn0:141.142.145.153<0> [2]eth0:fe80::c2e4:b356:358f:a76e%eth0<0> +gpub053:3113443:3113497 [1] NCCL INFO Using network Socket +gpub053:3113443:3113497 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub053:3113443:3113497 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32 +gpub053:3113443:3113497 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub053:3113443:3113497 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC +gpub053:3113443:3113497 [1] NCCL INFO Connected all rings +gpub003:2602697:2602697 [3] NCCL INFO cudaDriverVersion 12020 +gpub003:2602697:2602697 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.103<0> +gpub003:2602697:2602697 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub003:2602697:2602755 [3] NCCL INFO NET/IB : No device found. +gpub003:2602697:2602755 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.103<0> [1]hsn0:141.142.145.103<0> +gpub003:2602697:2602755 [3] NCCL INFO Using network Socket +gpub003:2602697:2602755 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub003:2602697:2602755 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 +gpub003:2602697:2602755 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub003:2602697:2602755 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/Socket/1 +gpub003:2602697:2602755 [3] NCCL INFO Connected all rings +gpub003:2602697:2602755 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub006:180546:180600 [2] NCCL INFO Connected all trees +gpub006:180546:180600 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:180546:180600 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:180546:180600 [2] NCCL INFO comm 0x562b5efa2c00 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub007:822784:822842 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/Socket/1 +gpub007:822784:822842 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub007:822784:822842 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC +gpub007:822784:822842 [1] NCCL INFO Connected all trees +gpub007:822784:822842 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:822784:822842 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:822784:822842 [1] NCCL INFO comm 0x55b511f8f270 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub015:743037:743093 [0] NCCL INFO Connected all rings +gpub015:743037:743093 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub015:743037:743093 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub015:743037:743093 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/Socket/1 +gpub015:743037:743093 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/Socket/1 +gpub015:743037:743093 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/Socket/1 +gpub015:743037:743093 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/Socket/1 +gpub015:743037:743093 [0] NCCL INFO Connected all trees +gpub015:743037:743093 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:743037:743093 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:743037:743093 [0] NCCL INFO comm 0x55a9d8a31f10 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub016:765653:765715 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/Socket/1 +gpub016:765653:765715 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub016:765653:765715 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC +gpub016:765653:765715 [1] NCCL INFO Connected all trees +gpub016:765653:765715 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub016:765653:765715 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:765653:765715 [1] NCCL INFO comm 0x562f167dd7d0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub026:836553:836620 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC +gpub026:836553:836620 [3] NCCL INFO Connected all trees +gpub026:836553:836620 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:836553:836620 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:836553:836620 [3] NCCL INFO comm 0x5578dd6eba20 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub056:1489719:1489719 [1] NCCL INFO cudaDriverVersion 12020 +gpub056:1489719:1489719 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.156<0> +gpub056:1489719:1489719 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub056:1489719:1489780 [1] NCCL INFO NET/IB : No device found. +gpub056:1489719:1489780 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.156<0> [1]hsn0:141.142.145.156<0> [2]eth0:fe80::c8c2:d8a:9a8b:ad5a%eth0<0> +gpub056:1489719:1489780 [1] NCCL INFO Using network Socket +gpub056:1489719:1489780 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub056:1489719:1489780 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40 +gpub056:1489719:1489780 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub056:1489719:1489780 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC +gpub056:1489719:1489780 [1] NCCL INFO Connected all rings +gpub022:1029031:1029082 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC +gpub022:1029031:1029082 [3] NCCL INFO Connected all trees +gpub022:1029031:1029082 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:1029031:1029082 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub022:1029031:1029082 [3] NCCL INFO comm 0x555ec36f6510 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub062:525772:525834 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC +gpub062:525772:525834 [3] NCCL INFO Connected all trees +gpub062:525772:525834 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub062:525772:525834 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:525772:525834 [3] NCCL INFO comm 0x5648ee921120 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub062:525769:525769 [0] NCCL INFO cudaDriverVersion 12020 +gpub062:525769:525769 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.162<0> +gpub062:525769:525769 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub062:525769:525835 [0] NCCL INFO NET/IB : No device found. +gpub062:525769:525835 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.162<0> [1]hsn0:141.142.145.162<0> [2]eth0:fe80::221:e9:de39:a135%eth0<0> +gpub062:525769:525835 [0] NCCL INFO Using network Socket +gpub085:793246:793311 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/Socket/1 +gpub085:793246:793311 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub085:793246:793311 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC +gpub085:793246:793311 [1] NCCL INFO Connected all trees +gpub085:793246:793311 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub085:793246:793311 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:793246:793311 [1] NCCL INFO comm 0x563227e26520 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub089:160268:160268 [0] NCCL INFO cudaDriverVersion 12020 +gpub089:160268:160268 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0> +gpub089:160268:160268 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub089:160268:160331 [0] NCCL INFO NET/IB : No device found. +gpub089:160268:160331 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.189<0> [1]hsn0:141.142.145.189<0> +gpub089:160268:160331 [0] NCCL INFO Using network Socket +gpub089:160268:160331 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub089:160268:160331 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1 +gpub089:160268:160331 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/Socket/1 +gpub089:160268:160331 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/Socket/1 +gpub089:160268:160331 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub089:160268:160331 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC +gpub088:1835046:1835108 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/Socket/1 +gpub088:1835046:1835108 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/Socket/1 +gpub088:1835046:1835108 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/Socket/1 +gpub088:1835046:1835108 [0] NCCL INFO Connected all trees +gpub088:1835046:1835108 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub088:1835046:1835108 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub088:1835046:1835108 [0] NCCL INFO comm 0x5626f305c890 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub053:3113443:3113497 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/Socket/1 +gpub053:3113443:3113497 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/Socket/1 +gpub053:3113443:3113497 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub053:3113443:3113497 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC +gpub053:3113443:3113497 [1] NCCL INFO Connected all trees +gpub053:3113443:3113497 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:3113443:3113497 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub053:3113443:3113497 [1] NCCL INFO comm 0x558e1c37a9b0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub003:2602697:2602755 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC +gpub003:2602697:2602755 [3] NCCL INFO Connected all trees +gpub003:2602697:2602755 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub003:2602697:2602755 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub003:2602697:2602755 [3] NCCL INFO comm 0x558512fecf10 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub006:180545:180545 [1] NCCL INFO cudaDriverVersion 12020 +gpub006:180545:180545 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0> +gpub006:180545:180545 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub006:180545:180602 [1] NCCL INFO NET/IB : No device found. +gpub006:180545:180602 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.106<0> [1]hsn0:141.142.145.106<0> +gpub006:180545:180602 [1] NCCL INFO Using network Socket +gpub006:180545:180602 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub006:180545:180602 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4 +gpub006:180545:180602 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub006:180545:180602 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC +gpub006:180545:180602 [1] NCCL INFO Connected all rings +gpub006:180545:180602 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/Socket/1 +gpub007:822785:822785 [2] NCCL INFO cudaDriverVersion 12020 +gpub007:822785:822785 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0> +gpub007:822785:822785 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub007:822785:822841 [2] NCCL INFO NET/IB : No device found. +gpub007:822785:822841 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.107<0> [1]hsn0:141.142.145.107<0> +gpub007:822785:822841 [2] NCCL INFO Using network Socket +gpub007:822785:822841 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub007:822785:822841 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9 +gpub007:822785:822841 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub007:822785:822841 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC +gpub007:822785:822841 [2] NCCL INFO Connected all rings +gpub007:822785:822841 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub015:743038:743038 [1] NCCL INFO cudaDriverVersion 12020 +gpub015:743038:743038 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0> +gpub015:743038:743038 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub015:743038:743095 [1] NCCL INFO NET/IB : No device found. +gpub015:743038:743095 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.115<0> [1]hsn0:141.142.145.115<0> +gpub015:743038:743095 [1] NCCL INFO Using network Socket +gpub015:743038:743095 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub015:743038:743095 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12 +gpub015:743038:743095 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub015:743038:743095 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC +gpub015:743038:743095 [1] NCCL INFO Connected all rings +gpub015:743038:743095 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/Socket/1 +gpub016:765655:765655 [3] NCCL INFO cudaDriverVersion 12020 +gpub016:765655:765655 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0> +gpub016:765655:765655 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub016:765655:765718 [3] NCCL INFO NET/IB : No device found. +gpub016:765655:765718 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.116<0> [1]hsn0:141.142.145.116<0> +gpub016:765655:765718 [3] NCCL INFO Using network Socket +gpub016:765655:765718 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub016:765655:765718 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18 +gpub016:765655:765718 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/Socket/1 +gpub016:765655:765718 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/Socket/1 +gpub016:765655:765718 [3] NCCL INFO Connected all rings +gpub016:765655:765718 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub026:836551:836551 [1] NCCL INFO cudaDriverVersion 12020 +gpub026:836551:836551 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0> +gpub026:836551:836551 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub026:836551:836619 [1] NCCL INFO NET/IB : No device found. +gpub026:836551:836619 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.126<0> [1]hsn0:141.142.145.126<0> +gpub026:836551:836619 [1] NCCL INFO Using network Socket +gpub026:836551:836619 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub026:836551:836619 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24 +gpub026:836551:836619 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub026:836551:836619 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC +gpub026:836551:836619 [1] NCCL INFO Connected all rings +gpub026:836551:836619 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/Socket/1 +gpub056:1489719:1489780 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/Socket/1 +gpub056:1489719:1489780 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/Socket/1 +gpub056:1489719:1489780 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub056:1489719:1489780 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC +gpub056:1489719:1489780 [1] NCCL INFO Connected all trees +gpub056:1489719:1489780 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub056:1489719:1489780 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub056:1489719:1489780 [1] NCCL INFO comm 0x562d2d32ff80 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub022:1029028:1029028 [0] NCCL INFO cudaDriverVersion 12020 +gpub022:1029028:1029028 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0> +gpub022:1029028:1029028 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub022:1029028:1029084 [0] NCCL INFO NET/IB : No device found. +gpub022:1029028:1029084 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.122<0> [1]hsn0:141.142.145.122<0> +gpub022:1029028:1029084 [0] NCCL INFO Using network Socket +gpub022:1029028:1029084 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub022:1029028:1029084 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13 +gpub022:1029028:1029084 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/Socket/1 +gpub022:1029028:1029084 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/Socket/1 +gpub022:1029028:1029084 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub062:525769:525835 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub062:525769:525835 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29 +gpub062:525769:525835 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/Socket/1 +gpub062:525769:525835 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/Socket/1 +gpub062:525769:525835 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub062:525769:525835 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC +gpub062:525769:525835 [0] NCCL INFO Connected all rings +gpub062:525769:525835 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/Socket/1 +gpub062:525769:525835 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/Socket/1 +gpub062:525769:525835 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/Socket/1 +gpub062:525769:525835 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/Socket/1 +gpub085:793248:793248 [3] NCCL INFO cudaDriverVersion 12020 +gpub085:793248:793248 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0> +gpub085:793248:793248 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub085:793248:793313 [3] NCCL INFO NET/IB : No device found. +gpub085:793248:793313 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.185<0> [1]hsn0:141.142.145.185<0> [2]eth0:fe80::1703:8949:2817:7411%eth0<0> +gpub085:793248:793313 [3] NCCL INFO Using network Socket +gpub085:793248:793313 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub085:793248:793313 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54 +gpub085:793248:793313 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/Socket/1 +gpub085:793248:793313 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/Socket/1 +gpub085:793248:793313 [3] NCCL INFO Connected all rings +gpub085:793248:793313 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub089:160268:160331 [0] NCCL INFO Connected all rings +gpub089:160268:160331 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/Socket/1 +gpub089:160268:160331 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/Socket/1 +gpub089:160268:160331 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/Socket/1 +gpub089:160268:160331 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/Socket/1 +gpub089:160268:160331 [0] NCCL INFO Connected all trees +gpub089:160268:160331 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub089:160268:160331 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub089:160268:160331 [0] NCCL INFO comm 0x55a1884b0e80 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub088:1835048:1835048 [2] NCCL INFO cudaDriverVersion 12020 +gpub088:1835048:1835048 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0> +gpub088:1835048:1835048 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub088:1835048:1835110 [2] NCCL INFO NET/IB : No device found. +gpub088:1835048:1835110 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.188<0> [1]hsn0:141.142.145.188<0> [2]eth0:fe80::732:b822:3c8e:dec4%eth0<0> +gpub088:1835048:1835110 [2] NCCL INFO Using network Socket +gpub088:1835048:1835110 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub088:1835048:1835110 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57 +gpub088:1835048:1835110 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub088:1835048:1835110 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC +gpub088:1835048:1835110 [2] NCCL INFO Connected all rings +gpub088:1835048:1835110 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub053:3113442:3113442 [0] NCCL INFO cudaDriverVersion 12020 +gpub053:3113442:3113442 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0> +gpub053:3113442:3113442 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub053:3113442:3113495 [0] NCCL INFO NET/IB : No device found. +gpub053:3113442:3113495 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.153<0> [1]hsn0:141.142.145.153<0> [2]eth0:fe80::c2e4:b356:358f:a76e%eth0<0> +gpub053:3113442:3113495 [0] NCCL INFO Using network Socket +gpub053:3113442:3113495 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub053:3113442:3113495 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36 +gpub053:3113442:3113495 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/Socket/1 +gpub053:3113442:3113495 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/Socket/1 +gpub053:3113442:3113495 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub003:2602696:2602696 [2] NCCL INFO cudaDriverVersion 12020 +gpub003:2602696:2602696 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.103<0> +gpub003:2602696:2602696 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub003:2602696:2602753 [2] NCCL INFO NET/IB : No device found. +gpub003:2602696:2602753 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.103<0> [1]hsn0:141.142.145.103<0> +gpub003:2602696:2602753 [2] NCCL INFO Using network Socket +gpub003:2602696:2602753 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub003:2602696:2602753 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 +gpub003:2602696:2602753 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub003:2602696:2602753 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC +gpub003:2602696:2602753 [2] NCCL INFO Connected all rings +gpub003:2602696:2602753 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub006:180545:180602 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/Socket/1 +gpub006:180545:180602 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub006:180545:180602 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC +gpub006:180545:180602 [1] NCCL INFO Connected all trees +gpub006:180545:180602 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub006:180545:180602 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub006:180545:180602 [1] NCCL INFO comm 0x56364f0cae70 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub007:822785:822841 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC +gpub007:822785:822841 [2] NCCL INFO Connected all trees +gpub007:822785:822841 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub007:822785:822841 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub007:822785:822841 [2] NCCL INFO comm 0x55ab02d97f10 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub015:743038:743095 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/Socket/1 +gpub015:743038:743095 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub015:743038:743095 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC +gpub015:743038:743095 [1] NCCL INFO Connected all trees +gpub015:743038:743095 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub015:743038:743095 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub015:743038:743095 [1] NCCL INFO comm 0x559b5e27fcc0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub016:765655:765718 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC +gpub016:765655:765718 [3] NCCL INFO Connected all trees +gpub016:765655:765718 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub016:765655:765718 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub016:765655:765718 [3] NCCL INFO comm 0x55ffcd11ded0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub026:836551:836619 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/Socket/1 +gpub026:836551:836619 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub026:836551:836619 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC +gpub026:836551:836619 [1] NCCL INFO Connected all trees +gpub026:836551:836619 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub026:836551:836619 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub026:836551:836619 [1] NCCL INFO comm 0x55af3590f350 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub056:1489720:1489720 [2] NCCL INFO cudaDriverVersion 12020 +gpub056:1489720:1489720 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.156<0> +gpub056:1489720:1489720 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub056:1489720:1489783 [2] NCCL INFO NET/IB : No device found. +gpub056:1489720:1489783 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.156<0> [1]hsn0:141.142.145.156<0> [2]eth0:fe80::c8c2:d8a:9a8b:ad5a%eth0<0> +gpub056:1489720:1489783 [2] NCCL INFO Using network Socket +gpub056:1489720:1489783 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub056:1489720:1489783 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41 +gpub056:1489720:1489783 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub056:1489720:1489783 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC +gpub056:1489720:1489783 [2] NCCL INFO Connected all rings +gpub056:1489720:1489783 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub022:1029028:1029084 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC +gpub022:1029028:1029084 [0] NCCL INFO Connected all rings +gpub022:1029028:1029084 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/Socket/1 +gpub022:1029028:1029084 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/Socket/1 +gpub022:1029028:1029084 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/Socket/1 +gpub022:1029028:1029084 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/Socket/1 +gpub022:1029028:1029084 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/Socket/1 +gpub022:1029028:1029084 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/Socket/1 +gpub022:1029028:1029084 [0] NCCL INFO Connected all trees +gpub022:1029028:1029084 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub022:1029028:1029084 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:525769:525835 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/Socket/1 +gpub062:525769:525835 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/Socket/1 +gpub062:525769:525835 [0] NCCL INFO Connected all trees +gpub062:525769:525835 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub062:525769:525835 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub062:525769:525835 [0] NCCL INFO comm 0x55a7e3f95c70 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub085:793248:793313 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC +gpub085:793248:793313 [3] NCCL INFO Connected all trees +gpub085:793248:793313 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub085:793248:793313 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub085:793248:793313 [3] NCCL INFO comm 0x55bd533dae00 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub088:1835048:1835110 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC +gpub088:1835048:1835110 [2] NCCL INFO Connected all trees +gpub088:1835048:1835110 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub088:1835048:1835110 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub088:1835048:1835110 [2] NCCL INFO comm 0x55e99034dfe0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub053:3113442:3113495 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC +gpub053:3113442:3113495 [0] NCCL INFO Connected all rings +gpub053:3113442:3113495 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/Socket/1 +gpub053:3113442:3113495 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/Socket/1 +gpub053:3113442:3113495 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/Socket/1 +gpub053:3113442:3113495 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/Socket/1 +gpub053:3113442:3113495 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/Socket/1 +gpub053:3113442:3113495 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/Socket/1 +gpub053:3113442:3113495 [0] NCCL INFO Connected all trees +gpub053:3113442:3113495 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub053:3113442:3113495 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub003:2602696:2602753 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC +gpub003:2602696:2602753 [2] NCCL INFO Connected all trees +gpub003:2602696:2602753 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub003:2602696:2602753 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub003:2602696:2602753 [2] NCCL INFO comm 0x560b910b5a60 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub056:1489720:1489783 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC +gpub056:1489720:1489783 [2] NCCL INFO Connected all trees +gpub056:1489720:1489783 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub056:1489720:1489783 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub056:1489720:1489783 [2] NCCL INFO comm 0x562afffd3e50 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub022:1029028:1029084 [0] NCCL INFO comm 0x55960b41be10 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub053:3113442:3113495 [0] NCCL INFO comm 0x55fd64c83ef0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub028:560437:560437 [1] NCCL INFO cudaDriverVersion 12020 +gpub028:560437:560437 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:560437:560437 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:560437:560486 [1] NCCL INFO NET/IB : No device found. +gpub028:560437:560486 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.128<0> [1]hsn0:141.142.145.128<0> +gpub028:560437:560486 [1] NCCL INFO Using network Socket +gpub028:560437:560486 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub028:560437:560486 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28 +gpub028:560437:560486 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub028:560437:560486 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC +gpub028:560437:560486 [1] NCCL INFO Connected all rings +gpub028:560437:560486 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/Socket/1 +gpub028:560437:560486 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/Socket/1 +gpub028:560437:560486 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub028:560437:560486 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC +gpub028:560437:560486 [1] NCCL INFO Connected all trees +gpub028:560437:560486 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:560437:560486 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:560437:560486 [1] NCCL INFO comm 0x5583bcba53e0 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub028:560438:560438 [2] NCCL INFO cudaDriverVersion 12020 +gpub028:560438:560438 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:560438:560438 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:560438:560489 [2] NCCL INFO NET/IB : No device found. +gpub028:560438:560489 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.128<0> [1]hsn0:141.142.145.128<0> +gpub028:560438:560489 [2] NCCL INFO Using network Socket +gpub028:560438:560489 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub028:560438:560489 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29 +gpub028:560438:560489 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub028:560438:560489 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC +gpub028:560438:560489 [2] NCCL INFO Connected all rings +gpub028:560438:560489 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub028:560438:560489 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC +gpub028:560438:560489 [2] NCCL INFO Connected all trees +gpub028:560438:560489 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:560438:560489 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:560438:560489 [2] NCCL INFO comm 0x559a87b6d3b0 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub028:560436:560436 [0] NCCL INFO cudaDriverVersion 12020 +gpub028:560436:560436 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:560436:560436 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:560436:560488 [0] NCCL INFO NET/IB : No device found. +gpub028:560436:560488 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.128<0> [1]hsn0:141.142.145.128<0> +gpub028:560436:560488 [0] NCCL INFO Using network Socket +gpub028:560436:560488 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub028:560436:560488 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60 +gpub028:560436:560488 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/Socket/1 +gpub028:560436:560488 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/Socket/1 +gpub028:560436:560488 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub028:560436:560488 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC +gpub028:560436:560488 [0] NCCL INFO Connected all rings +gpub028:560436:560488 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/Socket/1 +gpub028:560436:560488 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/Socket/1 +gpub028:560436:560488 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/Socket/1 +gpub028:560436:560488 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/Socket/1 +gpub028:560436:560488 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/Socket/1 +gpub028:560436:560488 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/Socket/1 +gpub028:560436:560488 [0] NCCL INFO Connected all trees +gpub028:560436:560488 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:560436:560488 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:560436:560488 [0] NCCL INFO comm 0x55d0b9eccaa0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub028:560439:560439 [3] NCCL INFO cudaDriverVersion 12020 +gpub028:560439:560439 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0> +gpub028:560439:560439 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub028:560439:560487 [3] NCCL INFO NET/IB : No device found. +gpub028:560439:560487 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.128<0> [1]hsn0:141.142.145.128<0> +gpub028:560439:560487 [3] NCCL INFO Using network Socket +gpub028:560439:560487 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub028:560439:560487 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30 +gpub028:560439:560487 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/Socket/1 +gpub028:560439:560487 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/Socket/1 +gpub028:560439:560487 [3] NCCL INFO Connected all rings +gpub028:560439:560487 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub028:560439:560487 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC +gpub028:560439:560487 [3] NCCL INFO Connected all trees +gpub028:560439:560487 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub028:560439:560487 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub028:560439:560487 [3] NCCL INFO comm 0x555b8954c350 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +gpub054:1687935:1687935 [2] NCCL INFO cudaDriverVersion 12020 +gpub054:1687935:1687935 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:1687935:1687935 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:1687935:1687988 [2] NCCL INFO NET/IB : No device found. +gpub054:1687935:1687988 [2] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.154<0> [1]hsn0:141.142.145.154<0> [2]eth0:fe80::4cdc:ea86:1710:c85d%eth0<0> +gpub054:1687935:1687988 [2] NCCL INFO Using network Socket +gpub054:1687935:1687988 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000 +gpub054:1687935:1687988 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37 +gpub054:1687935:1687988 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub054:1687935:1687988 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC +gpub054:1687935:1687988 [2] NCCL INFO Connected all rings +gpub054:1687935:1687988 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub054:1687935:1687988 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC +gpub054:1687935:1687988 [2] NCCL INFO Connected all trees +gpub054:1687935:1687988 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:1687935:1687988 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:1687935:1687988 [2] NCCL INFO comm 0x55bc670cce70 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE +gpub054:1687934:1687934 [1] NCCL INFO cudaDriverVersion 12020 +gpub054:1687934:1687934 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:1687934:1687934 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:1687934:1687989 [1] NCCL INFO NET/IB : No device found. +gpub054:1687934:1687989 [1] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.154<0> [1]hsn0:141.142.145.154<0> [2]eth0:fe80::4cdc:ea86:1710:c85d%eth0<0> +gpub054:1687934:1687989 [1] NCCL INFO Using network Socket +gpub054:1687934:1687989 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000 +gpub054:1687934:1687989 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36 +gpub054:1687934:1687989 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub054:1687934:1687989 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC +gpub054:1687934:1687989 [1] NCCL INFO Connected all rings +gpub054:1687934:1687989 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/Socket/1 +gpub054:1687934:1687989 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/Socket/1 +gpub054:1687934:1687989 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub054:1687934:1687989 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC +gpub054:1687934:1687989 [1] NCCL INFO Connected all trees +gpub054:1687934:1687989 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:1687934:1687989 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:1687934:1687989 [1] NCCL INFO comm 0x559691e3bc50 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE +gpub054:1687933:1687933 [0] NCCL INFO cudaDriverVersion 12020 +gpub054:1687933:1687933 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:1687933:1687933 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:1687933:1687991 [0] NCCL INFO NET/IB : No device found. +gpub054:1687933:1687991 [0] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.154<0> [1]hsn0:141.142.145.154<0> [2]eth0:fe80::4cdc:ea86:1710:c85d%eth0<0> +gpub054:1687933:1687991 [0] NCCL INFO Using network Socket +gpub054:1687933:1687991 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000 +gpub054:1687933:1687991 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44 +gpub054:1687933:1687991 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1687933:1687991 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1687933:1687991 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub054:1687933:1687991 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC +gpub054:1687933:1687991 [0] NCCL INFO Connected all rings +gpub054:1687933:1687991 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1687933:1687991 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/Socket/1 +gpub054:1687933:1687991 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/Socket/1 +gpub054:1687933:1687991 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1687933:1687991 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/Socket/1 +gpub054:1687933:1687991 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/Socket/1 +gpub054:1687933:1687991 [0] NCCL INFO Connected all trees +gpub054:1687933:1687991 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:1687933:1687991 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:1687933:1687991 [0] NCCL INFO comm 0x557b83a75e20 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE +gpub054:1687936:1687936 [3] NCCL INFO cudaDriverVersion 12020 +gpub054:1687936:1687936 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0> +gpub054:1687936:1687936 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation +gpub054:1687936:1687990 [3] NCCL INFO NET/IB : No device found. +gpub054:1687936:1687990 [3] NCCL INFO NET/Socket : Using [0]eth1:172.28.23.154<0> [1]hsn0:141.142.145.154<0> [2]eth0:fe80::4cdc:ea86:1710:c85d%eth0<0> +gpub054:1687936:1687990 [3] NCCL INFO Using network Socket +gpub054:1687936:1687990 [3] NCCL INFO Setting affinity for GPU 3 to ffff +gpub054:1687936:1687990 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38 +gpub054:1687936:1687990 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/Socket/1 +gpub054:1687936:1687990 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/Socket/1 +gpub054:1687936:1687990 [3] NCCL INFO Connected all rings +gpub054:1687936:1687990 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub054:1687936:1687990 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC +gpub054:1687936:1687990 [3] NCCL INFO Connected all trees +gpub054:1687936:1687990 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512 +gpub054:1687936:1687990 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer +gpub054:1687936:1687990 [3] NCCL INFO comm 0x560fe88bb480 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE +[gpub003:0/64] 2024-01-28 15:50:47,238 (distributed:1027) INFO: Reducer buckets have been rebuilt in this iteration. +[gpub003:0/64] 2024-01-28 15:57:15,778 (trainer:753) INFO: 15epoch:train:1-100batch: iter_time=3.181, forward_time=0.336, loss_ctc=91.274, loss_interctc_layer6=97.304, loss_interctc_layer12=82.071, loss_interctc_layer15=75.705, loss_interctc_layer21=93.324, loss=87.935, backward_time=0.724, grad_norm=82.241, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.229, optim0_lr0=1.069e-04, train_time=9.666 +[gpub003:0/64] 2024-01-28 16:05:25,580 (trainer:753) INFO: 15epoch:train:101-200batch: iter_time=9.751e-05, forward_time=0.294, loss_ctc=87.268, loss_interctc_layer6=90.538, loss_interctc_layer12=76.212, loss_interctc_layer15=70.359, loss_interctc_layer21=89.325, loss=82.741, backward_time=0.839, grad_norm=88.127, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.229, optim0_lr0=1.069e-04, train_time=4.911 +[gpub003:0/64] 2024-01-28 16:13:50,064 (trainer:753) INFO: 15epoch:train:201-300batch: iter_time=1.030e-04, forward_time=0.203, loss_ctc=91.617, loss_interctc_layer6=94.735, loss_interctc_layer12=79.404, loss_interctc_layer15=73.140, loss_interctc_layer21=93.951, loss=86.569, backward_time=0.831, grad_norm=95.990, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.068e-04, train_time=5.045 +[gpub003:0/64] 2024-01-28 16:21:38,562 (trainer:753) INFO: 15epoch:train:301-400batch: iter_time=1.006e-04, forward_time=0.299, loss_ctc=90.421, loss_interctc_layer6=102.106, loss_interctc_layer12=87.670, loss_interctc_layer15=81.791, loss_interctc_layer21=92.651, loss=90.928, backward_time=0.767, grad_norm=77.971, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.232, optim0_lr0=1.068e-04, train_time=4.684 +[gpub003:0/64] 2024-01-28 16:28:33,894 (trainer:753) INFO: 15epoch:train:401-500batch: iter_time=9.878e-05, forward_time=0.201, loss_ctc=82.699, loss_interctc_layer6=86.610, loss_interctc_layer12=73.187, loss_interctc_layer15=67.700, loss_interctc_layer21=84.984, loss=79.036, backward_time=0.668, grad_norm=73.608, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.068e-04, train_time=4.154 +[gpub003:0/64] 2024-01-28 16:35:35,349 (trainer:753) INFO: 15epoch:train:501-600batch: iter_time=9.799e-05, forward_time=0.249, loss_ctc=89.249, loss_interctc_layer6=99.024, loss_interctc_layer12=83.595, loss_interctc_layer15=77.380, loss_interctc_layer21=91.433, loss=88.136, backward_time=0.696, grad_norm=69.786, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.068e-04, train_time=4.214 +[gpub003:0/64] 2024-01-28 16:42:24,385 (trainer:753) INFO: 15epoch:train:601-700batch: iter_time=9.361e-05, forward_time=0.258, loss_ctc=99.164, loss_interctc_layer6=105.419, loss_interctc_layer12=89.690, loss_interctc_layer15=83.891, loss_interctc_layer21=102.163, loss=96.065, backward_time=0.686, grad_norm=110.498, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.067e-04, train_time=4.090 +[gpub003:0/64] 2024-01-28 16:49:27,266 (trainer:753) INFO: 15epoch:train:701-800batch: iter_time=9.152e-05, forward_time=0.200, loss_ctc=81.717, loss_interctc_layer6=89.591, loss_interctc_layer12=75.074, loss_interctc_layer15=69.421, loss_interctc_layer21=83.949, loss=79.950, backward_time=0.673, grad_norm=73.191, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.067e-04, train_time=4.228 +[gpub003:0/64] 2024-01-28 16:55:59,537 (trainer:753) INFO: 15epoch:train:801-900batch: iter_time=9.338e-05, forward_time=0.200, loss_ctc=80.821, loss_interctc_layer6=88.086, loss_interctc_layer12=73.859, loss_interctc_layer15=68.195, loss_interctc_layer21=82.977, loss=78.787, backward_time=0.641, grad_norm=113.440, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.067e-04, train_time=3.923 +[gpub003:0/64] 2024-01-28 17:02:30,728 (trainer:753) INFO: 15epoch:train:901-1000batch: iter_time=7.816e-04, forward_time=0.244, loss_ctc=83.936, loss_interctc_layer6=95.183, loss_interctc_layer12=79.928, loss_interctc_layer15=74.010, loss_interctc_layer21=85.808, loss=83.773, backward_time=0.609, grad_norm=77.656, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.232, optim0_lr0=1.067e-04, train_time=3.911 +[gpub003:0/64] 2024-01-28 17:09:09,858 (trainer:753) INFO: 15epoch:train:1001-1100batch: iter_time=2.243e-04, forward_time=0.261, loss_ctc=81.338, loss_interctc_layer6=87.617, loss_interctc_layer12=73.602, loss_interctc_layer15=67.777, loss_interctc_layer21=82.950, loss=78.657, backward_time=0.679, grad_norm=80.082, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.066e-04, train_time=3.991 +[gpub003:0/64] 2024-01-28 17:15:38,974 (trainer:753) INFO: 15epoch:train:1101-1200batch: iter_time=8.732e-05, forward_time=0.221, loss_ctc=101.477, loss_interctc_layer6=92.520, loss_interctc_layer12=77.928, loss_interctc_layer15=71.938, loss_interctc_layer21=104.476, loss=89.668, backward_time=0.613, grad_norm=85.788, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.066e-04, train_time=3.891 +[gpub003:0/64] 2024-01-28 17:19:04,390 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub003:0/64] 2024-01-28 17:19:23,290 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-28 17:19:26,711 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-28 17:19:26,711 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub003:0/64] 2024-01-28 17:19:26,872 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-28 17:30:42,008 (trainer:753) INFO: 15epoch:train:1201-1300batch: iter_time=2.944, forward_time=0.201, loss_ctc=85.573, loss_interctc_layer6=89.212, loss_interctc_layer12=74.879, loss_interctc_layer15=69.047, loss_interctc_layer21=87.530, loss=81.248, backward_time=0.565, grad_norm=72.760, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.066e-04, train_time=9.030 +[gpub003:0/64] 2024-01-28 17:38:57,425 (trainer:753) INFO: 15epoch:train:1301-1400batch: iter_time=8.956e-05, forward_time=0.203, loss_ctc=78.636, loss_interctc_layer6=87.858, loss_interctc_layer12=73.287, loss_interctc_layer15=67.371, loss_interctc_layer21=80.666, loss=77.563, backward_time=0.992, grad_norm=61.383, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.066e-04, train_time=4.954 +[gpub003:0/64] 2024-01-28 17:49:49,706 (trainer:753) INFO: 15epoch:train:1401-1500batch: iter_time=9.386e-05, forward_time=0.202, loss_ctc=83.318, loss_interctc_layer6=90.789, loss_interctc_layer12=75.975, loss_interctc_layer15=70.236, loss_interctc_layer21=84.724, loss=81.009, backward_time=1.239, grad_norm=70.524, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.065e-04, train_time=6.523 +[gpub003:0/64] 2024-01-28 17:59:27,155 (trainer:753) INFO: 15epoch:train:1501-1600batch: iter_time=9.623e-05, forward_time=0.229, loss_ctc=104.045, loss_interctc_layer6=106.678, loss_interctc_layer12=90.111, loss_interctc_layer15=83.576, loss_interctc_layer21=107.183, loss=98.319, backward_time=1.172, grad_norm=75.303, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.065e-04, train_time=5.774 +[gpub003:0/64] 2024-01-28 18:08:36,471 (trainer:753) INFO: 15epoch:train:1601-1700batch: iter_time=9.498e-05, forward_time=0.226, loss_ctc=77.227, loss_interctc_layer6=85.242, loss_interctc_layer12=72.218, loss_interctc_layer15=66.779, loss_interctc_layer21=79.037, loss=76.101, backward_time=0.959, grad_norm=74.635, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.065e-04, train_time=5.493 +[gpub003:0/64] 2024-01-28 18:15:23,977 (trainer:753) INFO: 15epoch:train:1701-1800batch: iter_time=9.299e-05, forward_time=0.261, loss_ctc=87.305, loss_interctc_layer6=96.460, loss_interctc_layer12=81.444, loss_interctc_layer15=75.404, loss_interctc_layer21=89.701, loss=86.063, backward_time=0.681, grad_norm=78.901, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.065e-04, train_time=4.075 +[gpub003:0/64] 2024-01-28 18:21:12,342 (trainer:753) INFO: 15epoch:train:1801-1900batch: iter_time=9.059e-05, forward_time=0.201, loss_ctc=89.519, loss_interctc_layer6=96.660, loss_interctc_layer12=81.009, loss_interctc_layer15=74.574, loss_interctc_layer21=91.694, loss=86.691, backward_time=0.560, grad_norm=97.308, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.064e-04, train_time=3.483 +[gpub003:0/64] 2024-01-28 18:27:09,120 (trainer:753) INFO: 15epoch:train:1901-2000batch: iter_time=9.220e-05, forward_time=0.201, loss_ctc=97.623, loss_interctc_layer6=99.705, loss_interctc_layer12=84.164, loss_interctc_layer15=78.562, loss_interctc_layer21=100.376, loss=92.086, backward_time=0.597, grad_norm=200.500, clip=100.000, loss_scale=3.225e+31, optim_step_time=0.225, optim0_lr0=1.064e-04, train_time=3.568 +[gpub003:0/64] 2024-01-28 18:33:16,104 (trainer:753) INFO: 15epoch:train:2001-2100batch: iter_time=8.659e-05, forward_time=0.201, loss_ctc=73.769, loss_interctc_layer6=89.654, loss_interctc_layer12=75.000, loss_interctc_layer15=68.950, loss_interctc_layer21=75.700, loss=76.614, backward_time=0.589, grad_norm=72.876, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.064e-04, train_time=3.669 +[gpub003:0/64] 2024-01-28 18:39:20,880 (trainer:753) INFO: 15epoch:train:2101-2200batch: iter_time=8.270e-05, forward_time=0.201, loss_ctc=82.022, loss_interctc_layer6=89.344, loss_interctc_layer12=75.245, loss_interctc_layer15=69.319, loss_interctc_layer21=84.032, loss=79.992, backward_time=0.568, grad_norm=80.384, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.064e-04, train_time=3.648 +[gpub003:0/64] 2024-01-28 18:45:08,952 (trainer:753) INFO: 15epoch:train:2201-2300batch: iter_time=8.465e-05, forward_time=0.202, loss_ctc=88.483, loss_interctc_layer6=94.046, loss_interctc_layer12=78.812, loss_interctc_layer15=72.716, loss_interctc_layer21=90.389, loss=84.889, backward_time=0.549, grad_norm=82.635, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.063e-04, train_time=3.480 +[gpub003:0/64] 2024-01-28 18:50:42,551 (trainer:753) INFO: 15epoch:train:2301-2400batch: iter_time=8.474e-05, forward_time=0.202, loss_ctc=91.181, loss_interctc_layer6=89.697, loss_interctc_layer12=75.630, loss_interctc_layer15=69.748, loss_interctc_layer21=93.883, loss=84.028, backward_time=0.550, grad_norm=71.893, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.063e-04, train_time=3.336 +[gpub003:0/64] 2024-01-28 18:56:31,376 (trainer:753) INFO: 15epoch:train:2401-2500batch: iter_time=8.501e-05, forward_time=0.258, loss_ctc=81.284, loss_interctc_layer6=80.424, loss_interctc_layer12=66.961, loss_interctc_layer15=61.547, loss_interctc_layer21=83.563, loss=74.756, backward_time=0.546, grad_norm=63.722, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.063e-04, train_time=3.488 +[gpub003:0/64] 2024-01-28 18:56:46,491 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub003:0/64] 2024-01-28 18:57:04,696 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-28 18:57:08,058 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-28 18:57:08,059 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub003:0/64] 2024-01-28 18:57:08,165 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-28 19:08:03,224 (trainer:753) INFO: 15epoch:train:2501-2600batch: iter_time=3.277, forward_time=0.204, loss_ctc=89.379, loss_interctc_layer6=93.917, loss_interctc_layer12=78.484, loss_interctc_layer15=71.930, loss_interctc_layer21=91.540, loss=85.050, backward_time=0.534, grad_norm=70.945, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.063e-04, train_time=6.918 +[gpub003:0/64] 2024-01-28 19:13:47,518 (trainer:753) INFO: 15epoch:train:2601-2700batch: iter_time=8.242e-05, forward_time=0.202, loss_ctc=93.414, loss_interctc_layer6=89.807, loss_interctc_layer12=75.195, loss_interctc_layer15=69.318, loss_interctc_layer21=96.518, loss=84.851, backward_time=0.571, grad_norm=70.844, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.062e-04, train_time=3.443 +[gpub003:0/64] 2024-01-28 19:20:42,751 (trainer:753) INFO: 15epoch:train:2701-2800batch: iter_time=8.029e-05, forward_time=0.213, loss_ctc=94.446, loss_interctc_layer6=93.842, loss_interctc_layer12=78.363, loss_interctc_layer15=72.282, loss_interctc_layer21=96.955, loss=87.178, backward_time=0.647, grad_norm=69.429, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.062e-04, train_time=4.151 +[gpub003:0/64] 2024-01-28 19:27:34,435 (trainer:753) INFO: 15epoch:train:2801-2900batch: iter_time=8.263e-05, forward_time=0.239, loss_ctc=96.629, loss_interctc_layer6=99.784, loss_interctc_layer12=84.965, loss_interctc_layer15=78.637, loss_interctc_layer21=99.546, loss=91.912, backward_time=0.693, grad_norm=85.751, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.228, optim0_lr0=1.062e-04, train_time=4.117 +[gpub003:0/64] 2024-01-28 19:33:22,223 (trainer:753) INFO: 15epoch:train:2901-3000batch: iter_time=8.031e-05, forward_time=0.201, loss_ctc=87.495, loss_interctc_layer6=84.939, loss_interctc_layer12=71.461, loss_interctc_layer15=65.956, loss_interctc_layer21=89.865, loss=79.943, backward_time=0.527, grad_norm=73.586, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.062e-04, train_time=3.478 +[gpub003:0/64] 2024-01-28 19:39:24,950 (trainer:753) INFO: 15epoch:train:3001-3100batch: iter_time=8.274e-05, forward_time=0.204, loss_ctc=94.702, loss_interctc_layer6=97.918, loss_interctc_layer12=82.460, loss_interctc_layer15=76.225, loss_interctc_layer21=96.996, loss=89.660, backward_time=0.572, grad_norm=89.092, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.061e-04, train_time=3.627 +[gpub003:0/64] 2024-01-28 19:45:22,260 (trainer:753) INFO: 15epoch:train:3101-3200batch: iter_time=7.969e-05, forward_time=0.202, loss_ctc=102.319, loss_interctc_layer6=102.588, loss_interctc_layer12=87.472, loss_interctc_layer15=80.613, loss_interctc_layer21=106.018, loss=95.802, backward_time=0.580, grad_norm=87.013, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.061e-04, train_time=3.572 +[gpub003:0/64] 2024-01-28 19:51:15,282 (trainer:753) INFO: 15epoch:train:3201-3300batch: iter_time=7.959e-05, forward_time=0.202, loss_ctc=87.366, loss_interctc_layer6=88.542, loss_interctc_layer12=74.152, loss_interctc_layer15=68.291, loss_interctc_layer21=90.064, loss=81.683, backward_time=0.554, grad_norm=77.221, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.061e-04, train_time=3.531 +[gpub003:0/64] 2024-01-28 19:57:08,944 (trainer:753) INFO: 15epoch:train:3301-3400batch: iter_time=8.061e-05, forward_time=0.201, loss_ctc=85.554, loss_interctc_layer6=87.799, loss_interctc_layer12=73.387, loss_interctc_layer15=67.544, loss_interctc_layer21=87.523, loss=80.361, backward_time=0.595, grad_norm=73.781, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.061e-04, train_time=3.536 +[gpub003:0/64] 2024-01-28 20:03:03,051 (trainer:753) INFO: 15epoch:train:3401-3500batch: iter_time=8.639e-05, forward_time=0.262, loss_ctc=89.455, loss_interctc_layer6=94.213, loss_interctc_layer12=79.389, loss_interctc_layer15=73.115, loss_interctc_layer21=91.917, loss=85.618, backward_time=0.536, grad_norm=82.868, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.060e-04, train_time=3.541 +[gpub003:0/64] 2024-01-28 20:08:55,704 (trainer:753) INFO: 15epoch:train:3501-3600batch: iter_time=8.545e-05, forward_time=0.213, loss_ctc=85.781, loss_interctc_layer6=86.769, loss_interctc_layer12=72.655, loss_interctc_layer15=66.808, loss_interctc_layer21=88.017, loss=80.006, backward_time=0.539, grad_norm=84.049, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.060e-04, train_time=3.526 +[gpub003:0/64] 2024-01-28 20:15:43,075 (trainer:753) INFO: 15epoch:train:3601-3700batch: iter_time=8.575e-05, forward_time=0.205, loss_ctc=106.352, loss_interctc_layer6=92.116, loss_interctc_layer12=77.320, loss_interctc_layer15=71.342, loss_interctc_layer21=109.451, loss=91.316, backward_time=0.709, grad_norm=87.775, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.060e-04, train_time=4.073 +[gpub003:0/64] 2024-01-28 20:18:57,518 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub003:0/64] 2024-01-28 20:19:15,661 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-28 20:19:19,120 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-28 20:19:19,120 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub003:0/64] 2024-01-28 20:19:19,124 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-28 20:34:30,817 (trainer:753) INFO: 15epoch:train:3701-3800batch: iter_time=2.866, forward_time=0.270, loss_ctc=88.988, loss_interctc_layer6=87.953, loss_interctc_layer12=73.593, loss_interctc_layer15=67.775, loss_interctc_layer21=91.138, loss=81.889, backward_time=0.513, grad_norm=63.322, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.229, optim0_lr0=1.060e-04, train_time=11.276 +[gpub003:0/64] 2024-01-28 20:41:00,420 (trainer:753) INFO: 15epoch:train:3801-3900batch: iter_time=8.606e-05, forward_time=0.201, loss_ctc=80.412, loss_interctc_layer6=86.534, loss_interctc_layer12=71.955, loss_interctc_layer15=66.072, loss_interctc_layer21=82.653, loss=77.525, backward_time=0.622, grad_norm=68.586, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.059e-04, train_time=3.897 +[gpub003:0/64] 2024-01-28 20:47:48,859 (trainer:753) INFO: 15epoch:train:3901-4000batch: iter_time=8.505e-05, forward_time=0.202, loss_ctc=85.361, loss_interctc_layer6=89.647, loss_interctc_layer12=74.813, loss_interctc_layer15=68.877, loss_interctc_layer21=87.616, loss=81.263, backward_time=0.650, grad_norm=75.803, clip=100.000, loss_scale=6.450e+31, optim_step_time=0.225, optim0_lr0=1.059e-04, train_time=4.084 +[gpub003:0/64] 2024-01-28 20:54:23,879 (trainer:753) INFO: 15epoch:train:4001-4100batch: iter_time=9.009e-05, forward_time=0.223, loss_ctc=110.092, loss_interctc_layer6=106.557, loss_interctc_layer12=89.798, loss_interctc_layer15=83.223, loss_interctc_layer21=113.158, loss=100.566, backward_time=0.651, grad_norm=79.547, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.059e-04, train_time=3.950 +[gpub003:0/64] 2024-01-28 21:00:23,359 (trainer:753) INFO: 15epoch:train:4101-4200batch: iter_time=8.802e-05, forward_time=0.230, loss_ctc=82.649, loss_interctc_layer6=83.888, loss_interctc_layer12=70.726, loss_interctc_layer15=65.984, loss_interctc_layer21=84.771, loss=77.604, backward_time=0.591, grad_norm=84.995, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=1.059e-04, train_time=3.595 +[gpub003:0/64] 2024-01-28 21:07:22,032 (trainer:753) INFO: 15epoch:train:4201-4300batch: iter_time=9.044e-05, forward_time=0.212, loss_ctc=93.278, loss_interctc_layer6=94.894, loss_interctc_layer12=79.909, loss_interctc_layer15=74.115, loss_interctc_layer21=95.831, loss=87.606, backward_time=0.657, grad_norm=76.539, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.225, optim0_lr0=1.058e-04, train_time=4.187 +[gpub003:0/64] 2024-01-28 21:13:52,654 (trainer:753) INFO: 15epoch:train:4301-4400batch: iter_time=8.889e-05, forward_time=0.204, loss_ctc=95.156, loss_interctc_layer6=96.137, loss_interctc_layer12=80.270, loss_interctc_layer15=73.838, loss_interctc_layer21=97.806, loss=88.641, backward_time=0.588, grad_norm=90.276, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.226, optim0_lr0=1.058e-04, train_time=3.906 +[gpub003:0/64] 2024-01-28 21:19:47,382 (trainer:753) INFO: 15epoch:train:4401-4500batch: iter_time=8.945e-05, forward_time=0.203, loss_ctc=104.802, loss_interctc_layer6=98.904, loss_interctc_layer12=83.812, loss_interctc_layer15=77.216, loss_interctc_layer21=107.643, loss=94.475, backward_time=0.543, grad_norm=87.474, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.226, optim0_lr0=1.058e-04, train_time=3.547 +[gpub003:0/64] 2024-01-28 21:25:56,874 (trainer:753) INFO: 15epoch:train:4501-4600batch: iter_time=8.991e-05, forward_time=0.261, loss_ctc=76.419, loss_interctc_layer6=90.087, loss_interctc_layer12=75.167, loss_interctc_layer15=69.162, loss_interctc_layer21=78.287, loss=77.824, backward_time=0.580, grad_norm=90.053, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.227, optim0_lr0=1.058e-04, train_time=3.694 +[gpub003:0/64] 2024-01-28 21:32:50,469 (trainer:753) INFO: 15epoch:train:4601-4700batch: iter_time=8.978e-05, forward_time=0.201, loss_ctc=85.364, loss_interctc_layer6=88.634, loss_interctc_layer12=74.488, loss_interctc_layer15=68.727, loss_interctc_layer21=87.425, loss=80.928, backward_time=0.655, grad_norm=73.996, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.225, optim0_lr0=1.057e-04, train_time=4.137 +[gpub003:0/64] 2024-01-28 21:35:29,871 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-28 21:39:46,358 (trainer:753) INFO: 15epoch:train:4701-4800batch: iter_time=8.840e-05, forward_time=0.202, loss_ctc=98.448, loss_interctc_layer6=94.341, loss_interctc_layer12=79.320, loss_interctc_layer15=73.022, loss_interctc_layer21=101.051, loss=89.236, backward_time=0.671, grad_norm=73.468, clip=100.000, loss_scale=5.654e+31, optim_step_time=0.225, optim0_lr0=1.057e-04, train_time=4.159 +[gpub003:0/64] 2024-01-28 21:46:00,524 (trainer:753) INFO: 15epoch:train:4801-4900batch: iter_time=9.165e-05, forward_time=0.201, loss_ctc=93.746, loss_interctc_layer6=89.175, loss_interctc_layer12=75.083, loss_interctc_layer15=69.388, loss_interctc_layer21=96.131, loss=84.705, backward_time=0.530, grad_norm=67.383, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.057e-04, train_time=3.741 +[gpub003:0/64] 2024-01-28 21:52:06,087 (trainer:753) INFO: 15epoch:train:4901-5000batch: iter_time=8.375e-05, forward_time=0.217, loss_ctc=84.568, loss_interctc_layer6=80.468, loss_interctc_layer12=66.930, loss_interctc_layer15=61.466, loss_interctc_layer21=86.888, loss=76.064, backward_time=0.555, grad_norm=66.813, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.057e-04, train_time=3.655 +[gpub003:0/64] 2024-01-28 21:52:20,307 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub003:0/64] 2024-01-28 21:52:38,809 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-28 21:52:42,247 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-28 21:52:42,247 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub003:0/64] 2024-01-28 21:52:42,282 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-28 22:12:09,925 (trainer:753) INFO: 15epoch:train:5001-5100batch: iter_time=3.032, forward_time=0.213, loss_ctc=89.251, loss_interctc_layer6=94.262, loss_interctc_layer12=78.617, loss_interctc_layer15=72.277, loss_interctc_layer21=91.683, loss=85.218, backward_time=1.200, grad_norm=72.380, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.056e-04, train_time=12.038 +[gpub003:0/64] 2024-01-28 22:20:49,124 (trainer:753) INFO: 15epoch:train:5101-5200batch: iter_time=8.691e-05, forward_time=0.201, loss_ctc=91.783, loss_interctc_layer6=89.445, loss_interctc_layer12=74.951, loss_interctc_layer15=69.191, loss_interctc_layer21=94.187, loss=83.911, backward_time=0.752, grad_norm=66.914, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.056e-04, train_time=5.192 +[gpub003:0/64] 2024-01-28 22:30:03,123 (trainer:753) INFO: 15epoch:train:5201-5300batch: iter_time=8.926e-05, forward_time=0.201, loss_ctc=93.356, loss_interctc_layer6=93.540, loss_interctc_layer12=78.166, loss_interctc_layer15=72.131, loss_interctc_layer21=95.775, loss=86.594, backward_time=0.787, grad_norm=70.785, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.056e-04, train_time=5.540 +[gpub003:0/64] 2024-01-28 22:38:19,318 (trainer:753) INFO: 15epoch:train:5301-5400batch: iter_time=9.077e-05, forward_time=0.202, loss_ctc=94.478, loss_interctc_layer6=98.093, loss_interctc_layer12=83.137, loss_interctc_layer15=77.029, loss_interctc_layer21=96.902, loss=89.928, backward_time=0.782, grad_norm=74.281, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.056e-04, train_time=4.962 +[gpub003:0/64] 2024-01-28 22:48:34,895 (trainer:753) INFO: 15epoch:train:5401-5500batch: iter_time=8.938e-05, forward_time=0.201, loss_ctc=85.651, loss_interctc_layer6=84.413, loss_interctc_layer12=70.412, loss_interctc_layer15=65.238, loss_interctc_layer21=87.934, loss=78.730, backward_time=1.418, grad_norm=75.064, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.055e-04, train_time=6.156 +[gpub003:0/64] 2024-01-28 23:00:46,630 (trainer:753) INFO: 15epoch:train:5501-5600batch: iter_time=9.438e-05, forward_time=0.203, loss_ctc=93.882, loss_interctc_layer6=97.663, loss_interctc_layer12=82.152, loss_interctc_layer15=75.949, loss_interctc_layer21=96.475, loss=89.224, backward_time=1.521, grad_norm=72.628, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.055e-04, train_time=7.317 +[gpub003:0/64] 2024-01-28 23:16:00,879 (trainer:753) INFO: 15epoch:train:5601-5700batch: iter_time=9.478e-05, forward_time=0.219, loss_ctc=100.426, loss_interctc_layer6=101.575, loss_interctc_layer12=86.877, loss_interctc_layer15=80.274, loss_interctc_layer21=102.754, loss=94.381, backward_time=1.679, grad_norm=100.434, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.055e-04, train_time=9.142 +[gpub003:0/64] 2024-01-28 23:25:21,684 (trainer:753) INFO: 15epoch:train:5701-5800batch: iter_time=8.829e-05, forward_time=0.201, loss_ctc=85.696, loss_interctc_layer6=87.523, loss_interctc_layer12=73.056, loss_interctc_layer15=67.219, loss_interctc_layer21=88.434, loss=80.386, backward_time=0.883, grad_norm=83.883, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.055e-04, train_time=5.608 +[gpub003:0/64] 2024-01-28 23:31:37,533 (trainer:753) INFO: 15epoch:train:5801-5900batch: iter_time=8.648e-05, forward_time=0.202, loss_ctc=83.686, loss_interctc_layer6=87.437, loss_interctc_layer12=72.940, loss_interctc_layer15=67.438, loss_interctc_layer21=86.007, loss=79.502, backward_time=0.601, grad_norm=77.662, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.054e-04, train_time=3.758 +[gpub003:0/64] 2024-01-28 23:37:56,703 (trainer:753) INFO: 15epoch:train:5901-6000batch: iter_time=9.015e-05, forward_time=0.201, loss_ctc=89.956, loss_interctc_layer6=94.034, loss_interctc_layer12=78.710, loss_interctc_layer15=72.390, loss_interctc_layer21=91.804, loss=85.379, backward_time=0.627, grad_norm=114.354, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.054e-04, train_time=3.791 +[gpub003:0/64] 2024-01-28 23:44:33,338 (trainer:753) INFO: 15epoch:train:6001-6100batch: iter_time=9.562e-05, forward_time=0.201, loss_ctc=85.903, loss_interctc_layer6=86.610, loss_interctc_layer12=72.357, loss_interctc_layer15=66.624, loss_interctc_layer21=87.971, loss=79.893, backward_time=0.631, grad_norm=71.655, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.054e-04, train_time=3.966 +[gpub003:0/64] 2024-01-28 23:50:32,463 (trainer:753) INFO: 15epoch:train:6101-6200batch: iter_time=9.266e-05, forward_time=0.202, loss_ctc=104.362, loss_interctc_layer6=90.634, loss_interctc_layer12=75.843, loss_interctc_layer15=69.989, loss_interctc_layer21=107.427, loss=89.651, backward_time=0.576, grad_norm=77.255, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.054e-04, train_time=3.591 +[gpub003:0/64] 2024-01-28 23:53:58,380 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub003:0/64] 2024-01-28 23:54:16,637 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-28 23:54:20,082 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-28 23:54:20,082 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub003:0/64] 2024-01-28 23:54:20,085 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 00:04:32,519 (trainer:753) INFO: 15epoch:train:6201-6300batch: iter_time=1.816, forward_time=0.245, loss_ctc=87.343, loss_interctc_layer6=87.167, loss_interctc_layer12=72.714, loss_interctc_layer15=66.990, loss_interctc_layer21=89.524, loss=80.748, backward_time=0.522, grad_norm=68.873, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.054e-04, train_time=8.399 +[gpub003:0/64] 2024-01-29 00:09:57,981 (trainer:753) INFO: 15epoch:train:6301-6400batch: iter_time=8.497e-05, forward_time=0.201, loss_ctc=80.384, loss_interctc_layer6=86.251, loss_interctc_layer12=71.655, loss_interctc_layer15=65.716, loss_interctc_layer21=82.488, loss=77.299, backward_time=0.533, grad_norm=65.046, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.053e-04, train_time=3.256 +[gpub003:0/64] 2024-01-29 00:15:22,916 (trainer:753) INFO: 15epoch:train:6401-6500batch: iter_time=8.528e-05, forward_time=0.201, loss_ctc=85.581, loss_interctc_layer6=89.501, loss_interctc_layer12=74.888, loss_interctc_layer15=68.869, loss_interctc_layer21=87.779, loss=81.324, backward_time=0.519, grad_norm=66.910, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.053e-04, train_time=3.249 +[gpub003:0/64] 2024-01-29 00:21:12,386 (trainer:753) INFO: 15epoch:train:6501-6600batch: iter_time=8.740e-05, forward_time=0.202, loss_ctc=107.767, loss_interctc_layer6=104.897, loss_interctc_layer12=88.352, loss_interctc_layer15=81.743, loss_interctc_layer21=110.941, loss=98.740, backward_time=0.557, grad_norm=80.537, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.053e-04, train_time=3.494 +[gpub003:0/64] 2024-01-29 00:27:10,156 (trainer:753) INFO: 15epoch:train:6601-6700batch: iter_time=8.709e-05, forward_time=0.201, loss_ctc=81.491, loss_interctc_layer6=83.038, loss_interctc_layer12=69.787, loss_interctc_layer15=64.646, loss_interctc_layer21=83.179, loss=76.428, backward_time=0.544, grad_norm=69.567, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.053e-04, train_time=3.577 +[gpub003:0/64] 2024-01-29 00:33:50,833 (trainer:753) INFO: 15epoch:train:6701-6800batch: iter_time=8.515e-05, forward_time=0.202, loss_ctc=93.312, loss_interctc_layer6=95.102, loss_interctc_layer12=79.955, loss_interctc_layer15=74.030, loss_interctc_layer21=95.786, loss=87.637, backward_time=0.610, grad_norm=72.744, clip=100.000, loss_scale=6.490e+31, optim_step_time=0.225, optim0_lr0=1.052e-04, train_time=4.007 +[gpub003:0/64] 2024-01-29 00:39:31,857 (trainer:753) INFO: 15epoch:train:6801-6900batch: iter_time=8.894e-05, forward_time=0.213, loss_ctc=94.558, loss_interctc_layer6=95.356, loss_interctc_layer12=79.494, loss_interctc_layer15=73.241, loss_interctc_layer21=97.081, loss=87.946, backward_time=0.532, grad_norm=67.379, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.225, optim0_lr0=1.052e-04, train_time=3.410 +[gpub003:0/64] 2024-01-29 00:45:18,008 (trainer:753) INFO: 15epoch:train:6901-7000batch: iter_time=9.370e-05, forward_time=0.202, loss_ctc=103.708, loss_interctc_layer6=97.677, loss_interctc_layer12=82.201, loss_interctc_layer15=76.312, loss_interctc_layer21=106.738, loss=93.327, backward_time=0.512, grad_norm=78.769, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.225, optim0_lr0=1.052e-04, train_time=3.461 +[gpub003:0/64] 2024-01-29 00:47:19,244 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-29 00:51:01,985 (trainer:753) INFO: 15epoch:train:7001-7100batch: iter_time=8.899e-05, forward_time=0.201, loss_ctc=76.470, loss_interctc_layer6=89.370, loss_interctc_layer12=74.579, loss_interctc_layer15=68.556, loss_interctc_layer21=78.454, loss=77.486, backward_time=0.558, grad_norm=79.373, clip=100.000, loss_scale=5.327e+31, optim_step_time=0.225, optim0_lr0=1.052e-04, train_time=3.440 +[gpub003:0/64] 2024-01-29 00:51:25,900 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-29 00:56:46,189 (trainer:753) INFO: 15epoch:train:7101-7200batch: iter_time=8.895e-05, forward_time=0.217, loss_ctc=85.210, loss_interctc_layer6=88.201, loss_interctc_layer12=73.963, loss_interctc_layer15=68.204, loss_interctc_layer21=87.390, loss=80.594, backward_time=0.514, grad_norm=67.258, clip=100.000, loss_scale=2.151e+31, optim_step_time=0.225, optim0_lr0=1.051e-04, train_time=3.442 +[gpub003:0/64] 2024-01-29 01:03:27,596 (trainer:753) INFO: 15epoch:train:7201-7300batch: iter_time=9.257e-05, forward_time=0.234, loss_ctc=97.098, loss_interctc_layer6=93.934, loss_interctc_layer12=78.524, loss_interctc_layer15=72.396, loss_interctc_layer21=99.278, loss=88.246, backward_time=0.613, grad_norm=78.373, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.051e-04, train_time=4.014 +[gpub003:0/64] 2024-01-29 01:09:53,144 (trainer:753) INFO: 15epoch:train:7301-7400batch: iter_time=8.851e-05, forward_time=0.207, loss_ctc=94.644, loss_interctc_layer6=88.531, loss_interctc_layer12=74.374, loss_interctc_layer15=68.699, loss_interctc_layer21=97.121, loss=84.674, backward_time=0.531, grad_norm=104.913, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.051e-04, train_time=3.855 +[gpub003:0/64] 2024-01-29 01:15:54,592 (trainer:753) INFO: 15epoch:train:7401-7500batch: iter_time=8.480e-05, forward_time=0.211, loss_ctc=83.186, loss_interctc_layer6=79.739, loss_interctc_layer12=66.177, loss_interctc_layer15=60.745, loss_interctc_layer21=85.680, loss=75.105, backward_time=0.549, grad_norm=70.133, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.051e-04, train_time=3.615 +[gpub003:0/64] 2024-01-29 01:16:07,241 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub003:0/64] 2024-01-29 01:16:25,762 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 01:16:29,172 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 01:16:29,172 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub003:0/64] 2024-01-29 01:16:29,176 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 01:25:23,566 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-29 01:29:25,317 (trainer:753) INFO: 15epoch:train:7501-7600batch: iter_time=4.974, forward_time=0.234, loss_ctc=87.038, loss_interctc_layer6=93.225, loss_interctc_layer12=77.447, loss_interctc_layer15=71.279, loss_interctc_layer21=89.264, loss=83.651, backward_time=0.478, grad_norm=67.038, clip=100.000, loss_scale=1.229e+31, optim_step_time=0.226, optim0_lr0=1.050e-04, train_time=8.107 +[gpub003:0/64] 2024-01-29 01:36:12,754 (trainer:753) INFO: 15epoch:train:7601-7700batch: iter_time=8.696e-05, forward_time=0.201, loss_ctc=85.822, loss_interctc_layer6=88.563, loss_interctc_layer12=74.039, loss_interctc_layer15=68.163, loss_interctc_layer21=88.434, loss=81.004, backward_time=0.621, grad_norm=72.860, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.050e-04, train_time=4.074 +[gpub003:0/64] 2024-01-29 01:42:06,319 (trainer:753) INFO: 15epoch:train:7701-7800batch: iter_time=8.762e-05, forward_time=0.203, loss_ctc=89.935, loss_interctc_layer6=92.923, loss_interctc_layer12=77.481, loss_interctc_layer15=71.273, loss_interctc_layer21=92.505, loss=84.823, backward_time=0.511, grad_norm=77.476, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.050e-04, train_time=3.535 +[gpub003:0/64] 2024-01-29 01:48:23,714 (trainer:753) INFO: 15epoch:train:7801-7900batch: iter_time=9.072e-05, forward_time=0.311, loss_ctc=86.799, loss_interctc_layer6=97.230, loss_interctc_layer12=82.165, loss_interctc_layer15=76.473, loss_interctc_layer21=89.133, loss=86.360, backward_time=0.607, grad_norm=75.713, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.239, optim0_lr0=1.050e-04, train_time=3.773 +[gpub003:0/64] 2024-01-29 01:54:54,660 (trainer:753) INFO: 15epoch:train:7901-8000batch: iter_time=9.268e-05, forward_time=0.201, loss_ctc=80.907, loss_interctc_layer6=84.011, loss_interctc_layer12=70.133, loss_interctc_layer15=65.238, loss_interctc_layer21=83.291, loss=76.716, backward_time=0.579, grad_norm=90.707, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.049e-04, train_time=3.910 +[gpub003:0/64] 2024-01-29 02:00:13,060 (trainer:753) INFO: 15epoch:train:8001-8100batch: iter_time=9.065e-05, forward_time=0.202, loss_ctc=87.692, loss_interctc_layer6=97.639, loss_interctc_layer12=81.682, loss_interctc_layer15=75.507, loss_interctc_layer21=90.341, loss=86.572, backward_time=0.497, grad_norm=75.046, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.049e-04, train_time=3.184 +[gpub003:0/64] 2024-01-29 02:07:29,830 (trainer:753) INFO: 15epoch:train:8101-8200batch: iter_time=9.019e-05, forward_time=0.202, loss_ctc=93.985, loss_interctc_layer6=100.447, loss_interctc_layer12=84.414, loss_interctc_layer15=78.772, loss_interctc_layer21=97.046, loss=90.933, backward_time=0.604, grad_norm=83.533, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.049e-04, train_time=4.367 +[gpub003:0/64] 2024-01-29 02:14:19,677 (trainer:753) INFO: 15epoch:train:8201-8300batch: iter_time=8.936e-05, forward_time=0.330, loss_ctc=80.044, loss_interctc_layer6=87.746, loss_interctc_layer12=73.079, loss_interctc_layer15=67.189, loss_interctc_layer21=82.194, loss=78.050, backward_time=0.624, grad_norm=63.950, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.049e-04, train_time=4.094 +[gpub003:0/64] 2024-01-29 02:20:22,118 (trainer:753) INFO: 15epoch:train:8301-8400batch: iter_time=8.061e-05, forward_time=0.206, loss_ctc=79.244, loss_interctc_layer6=86.924, loss_interctc_layer12=72.420, loss_interctc_layer15=66.661, loss_interctc_layer21=81.657, loss=77.381, backward_time=0.592, grad_norm=64.151, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.048e-04, train_time=3.628 +[gpub003:0/64] 2024-01-29 02:25:56,911 (trainer:753) INFO: 15epoch:train:8401-8500batch: iter_time=8.514e-05, forward_time=0.201, loss_ctc=82.951, loss_interctc_layer6=93.832, loss_interctc_layer12=78.848, loss_interctc_layer15=72.558, loss_interctc_layer21=84.997, loss=82.637, backward_time=0.513, grad_norm=78.615, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.048e-04, train_time=3.349 +[gpub003:0/64] 2024-01-29 02:31:39,349 (trainer:753) INFO: 15epoch:train:8501-8600batch: iter_time=8.151e-05, forward_time=0.201, loss_ctc=80.367, loss_interctc_layer6=86.477, loss_interctc_layer12=72.356, loss_interctc_layer15=66.554, loss_interctc_layer21=82.603, loss=77.671, backward_time=0.562, grad_norm=73.328, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.048e-04, train_time=3.424 +[gpub003:0/64] 2024-01-29 02:37:45,902 (trainer:753) INFO: 15epoch:train:8601-8700batch: iter_time=8.382e-05, forward_time=0.202, loss_ctc=99.954, loss_interctc_layer6=90.031, loss_interctc_layer12=75.328, loss_interctc_layer15=69.356, loss_interctc_layer21=102.874, loss=87.509, backward_time=0.576, grad_norm=65.870, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.048e-04, train_time=3.665 +[gpub003:0/64] 2024-01-29 02:41:04,866 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub003:0/64] 2024-01-29 02:41:23,751 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 02:41:27,194 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 02:41:27,194 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub003:0/64] 2024-01-29 02:41:27,197 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 02:49:42,653 (trainer:753) INFO: 15epoch:train:8701-8800batch: iter_time=3.046, forward_time=0.201, loss_ctc=82.801, loss_interctc_layer6=86.610, loss_interctc_layer12=72.235, loss_interctc_layer15=66.250, loss_interctc_layer21=85.108, loss=78.601, backward_time=0.496, grad_norm=100.678, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.224, optim0_lr0=1.047e-04, train_time=7.167 +[gpub003:0/64] 2024-01-29 02:55:24,896 (trainer:753) INFO: 15epoch:train:8801-8900batch: iter_time=8.449e-05, forward_time=0.322, loss_ctc=76.990, loss_interctc_layer6=86.089, loss_interctc_layer12=71.407, loss_interctc_layer15=65.442, loss_interctc_layer21=79.143, loss=75.814, backward_time=0.515, grad_norm=65.265, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.047e-04, train_time=3.421 +[gpub003:0/64] 2024-01-29 03:01:48,807 (trainer:753) INFO: 15epoch:train:8901-9000batch: iter_time=8.657e-05, forward_time=0.201, loss_ctc=79.686, loss_interctc_layer6=88.327, loss_interctc_layer12=73.462, loss_interctc_layer15=67.566, loss_interctc_layer21=81.638, loss=78.136, backward_time=0.589, grad_norm=66.010, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.047e-04, train_time=3.839 +[gpub003:0/64] 2024-01-29 03:07:37,598 (trainer:753) INFO: 15epoch:train:9001-9100batch: iter_time=8.709e-05, forward_time=0.202, loss_ctc=102.582, loss_interctc_layer6=104.593, loss_interctc_layer12=88.045, loss_interctc_layer15=81.321, loss_interctc_layer21=105.557, loss=96.419, backward_time=0.507, grad_norm=77.130, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.047e-04, train_time=3.489 +[gpub003:0/64] 2024-01-29 03:13:29,861 (trainer:753) INFO: 15epoch:train:9101-9200batch: iter_time=9.786e-05, forward_time=0.201, loss_ctc=74.606, loss_interctc_layer6=82.720, loss_interctc_layer12=69.395, loss_interctc_layer15=64.149, loss_interctc_layer21=76.473, loss=73.469, backward_time=0.518, grad_norm=78.327, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.047e-04, train_time=3.522 +[gpub003:0/64] 2024-01-29 03:19:41,417 (trainer:753) INFO: 15epoch:train:9201-9300batch: iter_time=9.130e-05, forward_time=0.202, loss_ctc=85.206, loss_interctc_layer6=94.680, loss_interctc_layer12=79.592, loss_interctc_layer15=73.783, loss_interctc_layer21=87.602, loss=84.173, backward_time=0.602, grad_norm=94.962, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.046e-04, train_time=3.715 +[gpub003:0/64] 2024-01-29 03:26:28,296 (trainer:753) INFO: 15epoch:train:9301-9400batch: iter_time=9.232e-05, forward_time=0.201, loss_ctc=88.064, loss_interctc_layer6=95.021, loss_interctc_layer12=79.192, loss_interctc_layer15=72.725, loss_interctc_layer21=90.085, loss=85.017, backward_time=0.599, grad_norm=87.791, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.225, optim0_lr0=1.046e-04, train_time=4.069 +[gpub003:0/64] 2024-01-29 03:32:06,893 (trainer:753) INFO: 15epoch:train:9401-9500batch: iter_time=8.557e-05, forward_time=0.261, loss_ctc=96.223, loss_interctc_layer6=97.164, loss_interctc_layer12=81.169, loss_interctc_layer15=75.931, loss_interctc_layer21=98.904, loss=89.878, backward_time=0.547, grad_norm=79.009, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.232, optim0_lr0=1.046e-04, train_time=3.386 +[gpub003:0/64] 2024-01-29 03:38:03,924 (trainer:753) INFO: 15epoch:train:9501-9600batch: iter_time=9.240e-05, forward_time=0.225, loss_ctc=72.600, loss_interctc_layer6=88.787, loss_interctc_layer12=73.926, loss_interctc_layer15=67.959, loss_interctc_layer21=74.523, loss=75.559, backward_time=0.575, grad_norm=65.318, clip=100.000, loss_scale=1.805e+31, optim_step_time=0.227, optim0_lr0=1.046e-04, train_time=3.570 +[gpub003:0/64] 2024-01-29 03:43:30,919 (trainer:753) INFO: 15epoch:train:9601-9700batch: iter_time=9.817e-05, forward_time=0.201, loss_ctc=81.282, loss_interctc_layer6=87.347, loss_interctc_layer12=73.070, loss_interctc_layer15=67.437, loss_interctc_layer21=83.297, loss=78.486, backward_time=0.516, grad_norm=69.177, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.045e-04, train_time=3.269 +[gpub003:0/64] 2024-01-29 03:49:20,475 (trainer:753) INFO: 15epoch:train:9701-9800batch: iter_time=9.999e-05, forward_time=0.201, loss_ctc=86.626, loss_interctc_layer6=93.900, loss_interctc_layer12=78.138, loss_interctc_layer15=71.996, loss_interctc_layer21=89.193, loss=83.971, backward_time=0.557, grad_norm=101.183, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.045e-04, train_time=3.494 +[gpub003:0/64] 2024-01-29 03:54:53,433 (trainer:753) INFO: 15epoch:train:9801-9900batch: iter_time=1.022e-04, forward_time=0.201, loss_ctc=89.346, loss_interctc_layer6=87.872, loss_interctc_layer12=73.492, loss_interctc_layer15=67.655, loss_interctc_layer21=91.952, loss=82.064, backward_time=0.498, grad_norm=210.726, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.045e-04, train_time=3.331 +[gpub003:0/64] 2024-01-29 04:00:49,197 (trainer:753) INFO: 15epoch:train:9901-10000batch: iter_time=1.055e-04, forward_time=0.202, loss_ctc=79.195, loss_interctc_layer6=79.589, loss_interctc_layer12=65.789, loss_interctc_layer15=60.472, loss_interctc_layer21=81.803, loss=73.369, backward_time=0.532, grad_norm=57.855, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.045e-04, train_time=3.557 +[gpub003:0/64] 2024-01-29 04:01:09,227 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub003:0/64] 2024-01-29 04:01:27,684 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 04:01:31,337 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 04:01:31,337 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub003:0/64] 2024-01-29 04:01:31,340 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 04:16:30,475 (trainer:753) INFO: 15epoch:train:10001-10100batch: iter_time=3.282, forward_time=0.202, loss_ctc=87.692, loss_interctc_layer6=92.656, loss_interctc_layer12=76.930, loss_interctc_layer15=70.557, loss_interctc_layer21=90.037, loss=83.574, backward_time=0.524, grad_norm=70.298, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.044e-04, train_time=9.413 +[gpub003:0/64] 2024-01-29 04:22:23,605 (trainer:753) INFO: 15epoch:train:10101-10200batch: iter_time=9.093e-05, forward_time=0.208, loss_ctc=91.597, loss_interctc_layer6=88.207, loss_interctc_layer12=73.536, loss_interctc_layer15=67.655, loss_interctc_layer21=93.607, loss=82.920, backward_time=0.591, grad_norm=76.632, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.044e-04, train_time=3.531 +[gpub003:0/64] 2024-01-29 04:28:24,325 (trainer:753) INFO: 15epoch:train:10201-10300batch: iter_time=8.777e-05, forward_time=0.254, loss_ctc=92.794, loss_interctc_layer6=93.194, loss_interctc_layer12=77.611, loss_interctc_layer15=71.357, loss_interctc_layer21=95.315, loss=86.054, backward_time=0.649, grad_norm=86.145, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.044e-04, train_time=3.607 +[gpub003:0/64] 2024-01-29 04:34:31,469 (trainer:753) INFO: 15epoch:train:10301-10400batch: iter_time=9.027e-05, forward_time=0.231, loss_ctc=95.053, loss_interctc_layer6=98.102, loss_interctc_layer12=82.987, loss_interctc_layer15=76.784, loss_interctc_layer21=97.645, loss=90.114, backward_time=0.626, grad_norm=76.072, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.044e-04, train_time=3.671 +[gpub003:0/64] 2024-01-29 04:39:48,738 (trainer:753) INFO: 15epoch:train:10401-10500batch: iter_time=9.658e-05, forward_time=0.202, loss_ctc=85.083, loss_interctc_layer6=83.264, loss_interctc_layer12=69.591, loss_interctc_layer15=64.384, loss_interctc_layer21=87.417, loss=77.948, backward_time=0.476, grad_norm=70.920, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.043e-04, train_time=3.172 +[gpub003:0/64] 2024-01-29 04:46:02,049 (trainer:753) INFO: 15epoch:train:10501-10600batch: iter_time=9.274e-05, forward_time=0.203, loss_ctc=92.519, loss_interctc_layer6=96.266, loss_interctc_layer12=80.888, loss_interctc_layer15=74.660, loss_interctc_layer21=94.796, loss=87.826, backward_time=0.571, grad_norm=149.451, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.043e-04, train_time=3.733 +[gpub003:0/64] 2024-01-29 04:51:45,950 (trainer:753) INFO: 15epoch:train:10601-10700batch: iter_time=9.404e-05, forward_time=0.202, loss_ctc=99.582, loss_interctc_layer6=100.512, loss_interctc_layer12=84.174, loss_interctc_layer15=78.335, loss_interctc_layer21=102.654, loss=93.052, backward_time=0.562, grad_norm=95.155, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.043e-04, train_time=3.438 +[gpub003:0/64] 2024-01-29 04:57:13,767 (trainer:753) INFO: 15epoch:train:10701-10800batch: iter_time=9.329e-05, forward_time=0.202, loss_ctc=86.037, loss_interctc_layer6=87.427, loss_interctc_layer12=72.686, loss_interctc_layer15=66.927, loss_interctc_layer21=88.256, loss=80.267, backward_time=0.521, grad_norm=74.638, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.043e-04, train_time=3.279 +[gpub003:0/64] 2024-01-29 05:02:55,069 (trainer:753) INFO: 15epoch:train:10801-10900batch: iter_time=9.164e-05, forward_time=0.202, loss_ctc=83.798, loss_interctc_layer6=86.450, loss_interctc_layer12=72.060, loss_interctc_layer15=66.470, loss_interctc_layer21=85.950, loss=78.945, backward_time=0.504, grad_norm=68.675, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.042e-04, train_time=3.413 +[gpub003:0/64] 2024-01-29 05:08:34,316 (trainer:753) INFO: 15epoch:train:10901-11000batch: iter_time=8.993e-05, forward_time=0.202, loss_ctc=88.767, loss_interctc_layer6=93.411, loss_interctc_layer12=77.848, loss_interctc_layer15=71.946, loss_interctc_layer21=90.864, loss=84.567, backward_time=0.537, grad_norm=99.945, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.042e-04, train_time=3.392 +[gpub003:0/64] 2024-01-29 05:14:54,193 (trainer:753) INFO: 15epoch:train:11001-11100batch: iter_time=8.892e-05, forward_time=0.203, loss_ctc=84.565, loss_interctc_layer6=85.867, loss_interctc_layer12=71.590, loss_interctc_layer15=65.783, loss_interctc_layer21=87.035, loss=78.968, backward_time=0.552, grad_norm=72.614, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.042e-04, train_time=3.799 +[gpub003:0/64] 2024-01-29 05:20:36,509 (trainer:753) INFO: 15epoch:train:11101-11200batch: iter_time=8.855e-05, forward_time=0.210, loss_ctc=103.210, loss_interctc_layer6=89.446, loss_interctc_layer12=74.745, loss_interctc_layer15=68.881, loss_interctc_layer21=106.112, loss=88.479, backward_time=0.520, grad_norm=76.777, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.042e-04, train_time=3.423 +[gpub003:0/64] 2024-01-29 05:23:38,346 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub003:0/64] 2024-01-29 05:23:57,188 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 05:24:00,888 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 05:24:00,888 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub003:0/64] 2024-01-29 05:24:00,892 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 05:39:44,879 (trainer:753) INFO: 15epoch:train:11201-11300batch: iter_time=3.044, forward_time=0.224, loss_ctc=84.849, loss_interctc_layer6=85.296, loss_interctc_layer12=71.171, loss_interctc_layer15=65.559, loss_interctc_layer21=87.042, loss=78.784, backward_time=0.507, grad_norm=74.880, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.226, optim0_lr0=1.042e-04, train_time=11.483 +[gpub003:0/64] 2024-01-29 05:49:03,881 (trainer:753) INFO: 15epoch:train:11301-11400batch: iter_time=9.816e-05, forward_time=0.201, loss_ctc=76.634, loss_interctc_layer6=85.542, loss_interctc_layer12=70.966, loss_interctc_layer15=65.020, loss_interctc_layer21=78.525, loss=75.337, backward_time=1.090, grad_norm=76.364, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.041e-04, train_time=5.590 +[gpub003:0/64] 2024-01-29 06:03:36,567 (trainer:753) INFO: 15epoch:train:11401-11500batch: iter_time=9.926e-05, forward_time=0.201, loss_ctc=79.832, loss_interctc_layer6=89.204, loss_interctc_layer12=74.067, loss_interctc_layer15=68.097, loss_interctc_layer21=81.958, loss=78.632, backward_time=1.419, grad_norm=65.047, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.041e-04, train_time=8.727 +[gpub003:0/64] 2024-01-29 06:13:12,258 (trainer:753) INFO: 15epoch:train:11501-11600batch: iter_time=1.009e-04, forward_time=0.202, loss_ctc=102.556, loss_interctc_layer6=104.409, loss_interctc_layer12=87.672, loss_interctc_layer15=81.117, loss_interctc_layer21=105.440, loss=96.239, backward_time=0.876, grad_norm=158.356, clip=100.000, loss_scale=3.610e+31, optim_step_time=0.225, optim0_lr0=1.041e-04, train_time=5.757 +[gpub003:0/64] 2024-01-29 06:25:16,905 (trainer:753) INFO: 15epoch:train:11601-11700batch: iter_time=9.799e-05, forward_time=0.203, loss_ctc=74.667, loss_interctc_layer6=83.181, loss_interctc_layer12=69.646, loss_interctc_layer15=64.673, loss_interctc_layer21=76.673, loss=73.768, backward_time=1.215, grad_norm=63.082, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.041e-04, train_time=7.246 +[gpub003:0/64] 2024-01-29 06:34:39,082 (trainer:753) INFO: 15epoch:train:11701-11800batch: iter_time=9.492e-05, forward_time=0.219, loss_ctc=85.157, loss_interctc_layer6=94.699, loss_interctc_layer12=79.675, loss_interctc_layer15=73.909, loss_interctc_layer21=87.619, loss=84.212, backward_time=0.773, grad_norm=79.690, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.226, optim0_lr0=1.040e-04, train_time=5.621 +[gpub003:0/64] 2024-01-29 06:49:03,583 (trainer:753) INFO: 15epoch:train:11801-11900batch: iter_time=9.529e-05, forward_time=0.202, loss_ctc=87.479, loss_interctc_layer6=94.880, loss_interctc_layer12=79.020, loss_interctc_layer15=72.635, loss_interctc_layer21=89.899, loss=84.783, backward_time=1.543, grad_norm=79.347, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.040e-04, train_time=8.646 +[gpub003:0/64] 2024-01-29 06:58:05,385 (trainer:753) INFO: 15epoch:train:11901-12000batch: iter_time=9.524e-05, forward_time=0.202, loss_ctc=96.783, loss_interctc_layer6=97.862, loss_interctc_layer12=82.489, loss_interctc_layer15=76.204, loss_interctc_layer21=99.827, loss=90.633, backward_time=1.048, grad_norm=123.007, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.040e-04, train_time=5.418 +[gpub003:0/64] 2024-01-29 07:08:57,443 (trainer:753) INFO: 15epoch:train:12001-12100batch: iter_time=9.130e-05, forward_time=0.202, loss_ctc=71.930, loss_interctc_layer6=87.831, loss_interctc_layer12=73.218, loss_interctc_layer15=67.220, loss_interctc_layer21=73.966, loss=74.833, backward_time=1.749, grad_norm=57.438, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.040e-04, train_time=6.520 +[gpub003:0/64] 2024-01-29 07:13:39,126 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-29 07:18:29,181 (trainer:753) INFO: 15epoch:train:12101-12200batch: iter_time=9.158e-05, forward_time=0.201, loss_ctc=80.534, loss_interctc_layer6=86.623, loss_interctc_layer12=72.532, loss_interctc_layer15=66.776, loss_interctc_layer21=82.437, loss=77.780, backward_time=0.968, grad_norm=73.745, clip=100.000, loss_scale=2.971e+31, optim_step_time=0.225, optim0_lr0=1.039e-04, train_time=5.717 +[gpub003:0/64] 2024-01-29 07:27:20,532 (trainer:753) INFO: 15epoch:train:12201-12300batch: iter_time=9.281e-05, forward_time=0.201, loss_ctc=86.864, loss_interctc_layer6=93.339, loss_interctc_layer12=77.662, loss_interctc_layer15=71.548, loss_interctc_layer21=88.583, loss=83.599, backward_time=0.957, grad_norm=75.182, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.039e-04, train_time=5.313 +[gpub003:0/64] 2024-01-29 07:34:17,578 (trainer:753) INFO: 15epoch:train:12301-12400batch: iter_time=9.454e-05, forward_time=0.203, loss_ctc=88.508, loss_interctc_layer6=87.080, loss_interctc_layer12=72.966, loss_interctc_layer15=67.126, loss_interctc_layer21=91.239, loss=81.384, backward_time=0.707, grad_norm=67.750, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.039e-04, train_time=4.170 +[gpub003:0/64] 2024-01-29 07:41:13,613 (trainer:753) INFO: 15epoch:train:12401-12500batch: iter_time=9.384e-05, forward_time=0.223, loss_ctc=78.741, loss_interctc_layer6=79.300, loss_interctc_layer12=65.693, loss_interctc_layer15=60.267, loss_interctc_layer21=81.109, loss=73.022, backward_time=0.680, grad_norm=58.267, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.039e-04, train_time=4.160 +[gpub003:0/64] 2024-01-29 07:41:33,538 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub003:0/64] 2024-01-29 07:41:51,884 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 07:41:55,347 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 07:41:55,347 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub003:0/64] 2024-01-29 07:41:55,350 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 08:11:05,899 (trainer:753) INFO: 15epoch:train:12501-12600batch: iter_time=6.406, forward_time=3.626, loss_ctc=86.906, loss_interctc_layer6=92.442, loss_interctc_layer12=76.716, loss_interctc_layer15=70.211, loss_interctc_layer21=89.158, loss=83.087, backward_time=1.523, grad_norm=175.287, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.497, optim0_lr0=1.039e-04, train_time=17.921 +[gpub003:0/64] 2024-01-29 08:23:09,171 (trainer:753) INFO: 15epoch:train:12601-12700batch: iter_time=0.007, forward_time=2.194, loss_ctc=90.394, loss_interctc_layer6=88.275, loss_interctc_layer12=73.361, loss_interctc_layer15=67.505, loss_interctc_layer21=93.292, loss=82.565, backward_time=1.523, grad_norm=80.024, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.395, optim0_lr0=1.038e-04, train_time=7.233 +[gpub003:0/64] 2024-01-29 08:36:11,326 (trainer:753) INFO: 15epoch:train:12701-12800batch: iter_time=0.008, forward_time=2.609, loss_ctc=92.350, loss_interctc_layer6=92.753, loss_interctc_layer12=77.228, loss_interctc_layer15=71.155, loss_interctc_layer21=94.959, loss=85.689, backward_time=1.334, grad_norm=86.165, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.369, optim0_lr0=1.038e-04, train_time=7.820 +[gpub003:0/64] 2024-01-29 08:45:39,904 (trainer:753) INFO: 15epoch:train:12801-12900batch: iter_time=0.004, forward_time=1.400, loss_ctc=93.516, loss_interctc_layer6=97.962, loss_interctc_layer12=82.469, loss_interctc_layer15=76.339, loss_interctc_layer21=96.163, loss=89.290, backward_time=0.943, grad_norm=98.127, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.304, optim0_lr0=1.038e-04, train_time=5.687 +[gpub003:0/64] 2024-01-29 08:55:27,802 (trainer:753) INFO: 15epoch:train:12901-13000batch: iter_time=0.004, forward_time=1.495, loss_ctc=84.456, loss_interctc_layer6=83.288, loss_interctc_layer12=69.741, loss_interctc_layer15=64.092, loss_interctc_layer21=87.133, loss=77.742, backward_time=1.079, grad_norm=72.834, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.346, optim0_lr0=1.038e-04, train_time=5.879 +[gpub003:0/64] 2024-01-29 09:05:30,956 (trainer:753) INFO: 15epoch:train:13001-13100batch: iter_time=0.004, forward_time=1.810, loss_ctc=92.935, loss_interctc_layer6=97.072, loss_interctc_layer12=81.549, loss_interctc_layer15=75.386, loss_interctc_layer21=95.532, loss=88.495, backward_time=0.956, grad_norm=70.433, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.302, optim0_lr0=1.037e-04, train_time=6.031 +[gpub003:0/64] 2024-01-29 09:14:26,237 (trainer:753) INFO: 15epoch:train:13101-13200batch: iter_time=0.002, forward_time=1.357, loss_ctc=97.832, loss_interctc_layer6=100.516, loss_interctc_layer12=84.235, loss_interctc_layer15=77.956, loss_interctc_layer21=100.965, loss=92.301, backward_time=0.810, grad_norm=85.663, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.264, optim0_lr0=1.037e-04, train_time=5.336 +[gpub003:0/64] 2024-01-29 09:22:44,143 (trainer:753) INFO: 15epoch:train:13201-13300batch: iter_time=0.016, forward_time=1.023, loss_ctc=84.920, loss_interctc_layer6=87.059, loss_interctc_layer12=72.422, loss_interctc_layer15=66.635, loss_interctc_layer21=87.236, loss=79.654, backward_time=0.863, grad_norm=68.399, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.262, optim0_lr0=1.037e-04, train_time=4.995 +[gpub003:0/64] 2024-01-29 09:29:53,717 (trainer:753) INFO: 15epoch:train:13301-13400batch: iter_time=4.270e-04, forward_time=0.674, loss_ctc=82.595, loss_interctc_layer6=86.574, loss_interctc_layer12=72.027, loss_interctc_layer15=66.113, loss_interctc_layer21=84.835, loss=78.429, backward_time=0.668, grad_norm=65.533, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.246, optim0_lr0=1.037e-04, train_time=4.295 +[gpub003:0/64] 2024-01-29 09:37:58,403 (trainer:753) INFO: 15epoch:train:13401-13500batch: iter_time=0.001, forward_time=0.869, loss_ctc=87.913, loss_interctc_layer6=92.721, loss_interctc_layer12=77.365, loss_interctc_layer15=71.219, loss_interctc_layer21=90.259, loss=83.895, backward_time=0.854, grad_norm=71.512, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.254, optim0_lr0=1.036e-04, train_time=4.847 +[gpub003:0/64] 2024-01-29 09:44:56,285 (trainer:753) INFO: 15epoch:train:13501-13600batch: iter_time=4.253e-04, forward_time=0.397, loss_ctc=84.527, loss_interctc_layer6=85.381, loss_interctc_layer12=71.104, loss_interctc_layer15=65.233, loss_interctc_layer21=86.990, loss=78.647, backward_time=0.665, grad_norm=60.491, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.233, optim0_lr0=1.036e-04, train_time=4.177 +[gpub003:0/64] 2024-01-29 09:51:48,631 (trainer:753) INFO: 15epoch:train:13601-13700batch: iter_time=0.001, forward_time=0.411, loss_ctc=102.825, loss_interctc_layer6=88.798, loss_interctc_layer12=74.122, loss_interctc_layer15=68.210, loss_interctc_layer21=106.001, loss=87.991, backward_time=0.728, grad_norm=102.826, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.237, optim0_lr0=1.036e-04, train_time=4.125 +[gpub003:0/64] 2024-01-29 09:55:21,203 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub003:0/64] 2024-01-29 09:55:40,192 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 09:55:43,650 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 09:55:43,650 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub003:0/64] 2024-01-29 09:55:43,653 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 10:09:42,763 (trainer:753) INFO: 15epoch:train:13701-13800batch: iter_time=5.262, forward_time=0.336, loss_ctc=84.474, loss_interctc_layer6=85.300, loss_interctc_layer12=71.102, loss_interctc_layer15=65.406, loss_interctc_layer21=86.581, loss=78.573, backward_time=0.572, grad_norm=74.230, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.236, optim0_lr0=1.036e-04, train_time=10.742 +[gpub003:0/64] 2024-01-29 10:16:09,086 (trainer:753) INFO: 15epoch:train:13801-13900batch: iter_time=8.249e-05, forward_time=0.399, loss_ctc=77.363, loss_interctc_layer6=85.735, loss_interctc_layer12=71.047, loss_interctc_layer15=65.160, loss_interctc_layer21=79.462, loss=75.753, backward_time=0.568, grad_norm=66.565, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.035e-04, train_time=3.864 +[gpub003:0/64] 2024-01-29 10:22:43,439 (trainer:753) INFO: 15epoch:train:13901-14000batch: iter_time=8.026e-05, forward_time=0.202, loss_ctc=78.842, loss_interctc_layer6=88.022, loss_interctc_layer12=73.208, loss_interctc_layer15=67.253, loss_interctc_layer21=80.945, loss=77.654, backward_time=0.690, grad_norm=64.235, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.225, optim0_lr0=1.035e-04, train_time=3.943 +[gpub003:0/64] 2024-01-29 10:29:07,581 (trainer:753) INFO: 15epoch:train:14001-14100batch: iter_time=3.374e-04, forward_time=0.336, loss_ctc=102.447, loss_interctc_layer6=104.429, loss_interctc_layer12=87.555, loss_interctc_layer15=80.743, loss_interctc_layer21=105.420, loss=96.119, backward_time=0.583, grad_norm=102.258, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.230, optim0_lr0=1.035e-04, train_time=3.840 +[gpub003:0/64] 2024-01-29 10:35:03,784 (trainer:753) INFO: 15epoch:train:14101-14200batch: iter_time=8.968e-05, forward_time=0.202, loss_ctc=73.958, loss_interctc_layer6=82.174, loss_interctc_layer12=68.959, loss_interctc_layer15=63.584, loss_interctc_layer21=76.041, loss=72.943, backward_time=0.599, grad_norm=70.567, clip=100.000, loss_scale=3.103e+31, optim_step_time=0.225, optim0_lr0=1.035e-04, train_time=3.564 +[gpub003:0/64] 2024-01-29 10:41:38,921 (trainer:753) INFO: 15epoch:train:14201-14300batch: iter_time=4.150e-04, forward_time=0.330, loss_ctc=84.875, loss_interctc_layer6=93.482, loss_interctc_layer12=78.597, loss_interctc_layer15=72.777, loss_interctc_layer21=87.239, loss=83.394, backward_time=0.629, grad_norm=70.501, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.228, optim0_lr0=1.035e-04, train_time=3.951 +[gpub003:0/64] 2024-01-29 10:47:35,179 (trainer:753) INFO: 15epoch:train:14301-14400batch: iter_time=9.040e-05, forward_time=0.202, loss_ctc=86.807, loss_interctc_layer6=94.836, loss_interctc_layer12=78.847, loss_interctc_layer15=72.379, loss_interctc_layer21=89.870, loss=84.548, backward_time=0.546, grad_norm=74.904, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.034e-04, train_time=3.562 +[gpub003:0/64] 2024-01-29 10:53:52,037 (trainer:753) INFO: 15epoch:train:14401-14500batch: iter_time=8.621e-05, forward_time=0.202, loss_ctc=95.399, loss_interctc_layer6=96.569, loss_interctc_layer12=81.110, loss_interctc_layer15=75.036, loss_interctc_layer21=97.946, loss=89.212, backward_time=0.552, grad_norm=85.690, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.034e-04, train_time=3.769 +[gpub003:0/64] 2024-01-29 11:00:27,345 (trainer:753) INFO: 15epoch:train:14501-14600batch: iter_time=8.900e-05, forward_time=0.341, loss_ctc=72.608, loss_interctc_layer6=88.906, loss_interctc_layer12=73.842, loss_interctc_layer15=67.845, loss_interctc_layer21=74.527, loss=75.545, backward_time=0.571, grad_norm=69.754, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.230, optim0_lr0=1.034e-04, train_time=3.953 +[gpub003:0/64] 2024-01-29 11:06:46,124 (trainer:753) INFO: 15epoch:train:14601-14700batch: iter_time=2.865e-04, forward_time=0.201, loss_ctc=80.772, loss_interctc_layer6=87.110, loss_interctc_layer12=72.895, loss_interctc_layer15=67.041, loss_interctc_layer21=82.733, loss=78.110, backward_time=0.586, grad_norm=75.456, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.034e-04, train_time=3.787 +[gpub003:0/64] 2024-01-29 11:12:53,912 (trainer:753) INFO: 15epoch:train:14701-14800batch: iter_time=8.553e-05, forward_time=0.203, loss_ctc=85.850, loss_interctc_layer6=93.157, loss_interctc_layer12=77.377, loss_interctc_layer15=71.405, loss_interctc_layer21=88.477, loss=83.253, backward_time=0.588, grad_norm=76.654, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.224, optim0_lr0=1.033e-04, train_time=3.676 +[gpub003:0/64] 2024-01-29 11:18:16,237 (trainer:753) INFO: 15epoch:train:14801-14900batch: iter_time=8.808e-05, forward_time=0.202, loss_ctc=88.346, loss_interctc_layer6=86.954, loss_interctc_layer12=72.825, loss_interctc_layer15=67.113, loss_interctc_layer21=90.829, loss=81.213, backward_time=0.492, grad_norm=92.862, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.033e-04, train_time=3.225 +[gpub003:0/64] 2024-01-29 11:23:56,054 (trainer:753) INFO: 15epoch:train:14901-15000batch: iter_time=8.586e-05, forward_time=0.211, loss_ctc=78.593, loss_interctc_layer6=78.955, loss_interctc_layer12=65.366, loss_interctc_layer15=60.007, loss_interctc_layer21=80.914, loss=72.767, backward_time=0.524, grad_norm=73.339, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.225, optim0_lr0=1.033e-04, train_time=3.398 +[gpub003:0/64] 2024-01-29 12:01:40,837 (trainer:352) INFO: 15epoch results: [train] iter_time=0.288, forward_time=0.322, loss_ctc=88.193, loss_interctc_layer6=91.348, loss_interctc_layer12=76.512, loss_interctc_layer15=70.582, loss_interctc_layer21=90.581, loss=83.443, backward_time=0.699, grad_norm=81.033, clip=100.000, loss_scale=3.086e+31, optim_step_time=0.233, optim0_lr0=1.051e-04, train_time=4.731, time=19 hours, 43 minutes and 14.66 seconds, total_count=225000, gpu_max_cached_mem_GB=34.398, [valid] loss_ctc=54.179, cer_ctc=0.244, loss_interctc_layer6=58.614, cer_interctc_layer6=0.254, loss_interctc_layer12=45.571, cer_interctc_layer12=0.187, loss_interctc_layer15=41.077, cer_interctc_layer15=0.161, loss_interctc_layer21=56.677, cer_interctc_layer21=0.252, loss=51.224, time=37 minutes and 18.15 seconds, total_count=70065, gpu_max_cached_mem_GB=34.398 +[gpub003:0/64] 2024-01-29 12:02:11,966 (trainer:407) INFO: The best model has been updated: valid.total_count +[gpub003:0/64] 2024-01-29 12:02:12,099 (trainer:461) INFO: The model files were removed: exp/s2t_train_s2t_multitask-ctc_ebf27_conv2d8_size1024_raw_bpe50000/10epoch.pth +[gpub003:0/64] 2024-01-29 12:02:12,100 (trainer:286) INFO: 16/45epoch started. Estimated time to finish: 3 weeks, 4 days and 10 hours +[gpub003:0/64] 2024-01-29 12:02:12,118 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub003:0/64] 2024-01-29 12:02:30,691 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 12:02:34,054 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 12:02:34,054 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub003:0/64] 2024-01-29 12:02:34,058 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 12:15:35,580 (trainer:753) INFO: 16epoch:train:1-100batch: iter_time=4.834, forward_time=0.249, loss_ctc=71.391, loss_interctc_layer6=80.934, loss_interctc_layer12=67.602, loss_interctc_layer15=62.463, loss_interctc_layer21=73.461, loss=71.170, backward_time=0.485, grad_norm=64.336, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.230, optim0_lr0=1.033e-04, train_time=8.034 +[gpub003:0/64] 2024-01-29 12:20:44,091 (trainer:753) INFO: 16epoch:train:101-200batch: iter_time=9.161e-05, forward_time=0.203, loss_ctc=96.514, loss_interctc_layer6=102.304, loss_interctc_layer12=87.741, loss_interctc_layer15=82.147, loss_interctc_layer21=98.788, loss=93.499, backward_time=0.468, grad_norm=88.387, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.228, optim0_lr0=1.032e-04, train_time=3.085 +[gpub003:0/64] 2024-01-29 12:26:23,464 (trainer:753) INFO: 16epoch:train:201-300batch: iter_time=7.393e-04, forward_time=0.280, loss_ctc=95.931, loss_interctc_layer6=101.062, loss_interctc_layer12=86.453, loss_interctc_layer15=80.958, loss_interctc_layer21=98.465, loss=92.574, backward_time=0.559, grad_norm=98.595, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.242, optim0_lr0=1.032e-04, train_time=3.392 +[gpub003:0/64] 2024-01-29 12:32:06,587 (trainer:753) INFO: 16epoch:train:301-400batch: iter_time=9.252e-05, forward_time=0.204, loss_ctc=96.132, loss_interctc_layer6=97.584, loss_interctc_layer12=82.163, loss_interctc_layer15=76.088, loss_interctc_layer21=98.531, loss=90.099, backward_time=0.551, grad_norm=77.692, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.228, optim0_lr0=1.032e-04, train_time=3.433 +[gpub003:0/64] 2024-01-29 12:37:58,166 (trainer:753) INFO: 16epoch:train:401-500batch: iter_time=9.837e-05, forward_time=0.203, loss_ctc=98.129, loss_interctc_layer6=96.619, loss_interctc_layer12=81.206, loss_interctc_layer15=75.010, loss_interctc_layer21=100.650, loss=90.323, backward_time=0.560, grad_norm=70.911, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.228, optim0_lr0=1.032e-04, train_time=3.516 +[gpub003:0/64] 2024-01-29 12:40:23,888 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-29 12:43:35,505 (trainer:753) INFO: 16epoch:train:501-600batch: iter_time=1.044e-04, forward_time=0.202, loss_ctc=92.405, loss_interctc_layer6=95.244, loss_interctc_layer12=80.032, loss_interctc_layer15=73.761, loss_interctc_layer21=94.902, loss=87.269, backward_time=0.534, grad_norm=93.756, clip=100.000, loss_scale=2.909e+31, optim_step_time=0.228, optim0_lr0=1.032e-04, train_time=3.373 +[gpub003:0/64] 2024-01-29 12:49:21,561 (trainer:753) INFO: 16epoch:train:601-700batch: iter_time=1.044e-04, forward_time=0.202, loss_ctc=92.285, loss_interctc_layer6=93.788, loss_interctc_layer12=78.617, loss_interctc_layer15=72.811, loss_interctc_layer21=94.838, loss=86.468, backward_time=0.558, grad_norm=108.950, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.031e-04, train_time=3.460 +[gpub003:0/64] 2024-01-29 12:51:54,142 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-29 12:55:19,004 (trainer:753) INFO: 16epoch:train:701-800batch: iter_time=1.039e-04, forward_time=0.202, loss_ctc=91.215, loss_interctc_layer6=96.919, loss_interctc_layer12=81.452, loss_interctc_layer15=75.275, loss_interctc_layer21=93.684, loss=87.709, backward_time=0.580, grad_norm=76.426, clip=100.000, loss_scale=1.465e+31, optim_step_time=0.228, optim0_lr0=1.031e-04, train_time=3.574 +[gpub003:0/64] 2024-01-29 13:00:57,121 (trainer:753) INFO: 16epoch:train:801-900batch: iter_time=9.076e-05, forward_time=0.203, loss_ctc=89.652, loss_interctc_layer6=94.465, loss_interctc_layer12=79.170, loss_interctc_layer15=73.022, loss_interctc_layer21=92.271, loss=85.716, backward_time=0.508, grad_norm=71.705, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.031e-04, train_time=3.381 +[gpub003:0/64] 2024-01-29 13:07:39,727 (trainer:753) INFO: 16epoch:train:901-1000batch: iter_time=9.636e-05, forward_time=0.202, loss_ctc=93.240, loss_interctc_layer6=92.691, loss_interctc_layer12=77.789, loss_interctc_layer15=71.980, loss_interctc_layer21=95.844, loss=86.309, backward_time=0.841, grad_norm=93.334, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.031e-04, train_time=4.026 +[gpub003:0/64] 2024-01-29 13:13:56,897 (trainer:753) INFO: 16epoch:train:1001-1100batch: iter_time=6.884e-04, forward_time=0.295, loss_ctc=64.243, loss_interctc_layer6=82.463, loss_interctc_layer12=69.559, loss_interctc_layer15=64.425, loss_interctc_layer21=65.339, loss=69.206, backward_time=0.605, grad_norm=64.331, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.234, optim0_lr0=1.030e-04, train_time=3.770 +[gpub003:0/64] 2024-01-29 13:19:45,487 (trainer:753) INFO: 16epoch:train:1101-1200batch: iter_time=9.538e-05, forward_time=0.204, loss_ctc=86.095, loss_interctc_layer6=94.020, loss_interctc_layer12=79.129, loss_interctc_layer15=73.215, loss_interctc_layer21=88.102, loss=84.112, backward_time=0.516, grad_norm=85.093, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.030e-04, train_time=3.487 +[gpub003:0/64] 2024-01-29 13:22:55,147 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub003:0/64] 2024-01-29 13:23:13,999 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 13:23:17,481 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 13:23:17,481 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, +[gpub003:0/64] 2024-01-29 13:23:17,484 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 13:36:15,449 (trainer:753) INFO: 16epoch:train:1201-1300batch: iter_time=3.061, forward_time=0.201, loss_ctc=83.194, loss_interctc_layer6=88.059, loss_interctc_layer12=73.698, loss_interctc_layer15=68.372, loss_interctc_layer21=85.478, loss=79.760, backward_time=0.495, grad_norm=68.088, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.030e-04, train_time=9.899 +[gpub003:0/64] 2024-01-29 13:42:00,785 (trainer:753) INFO: 16epoch:train:1301-1400batch: iter_time=8.457e-05, forward_time=0.202, loss_ctc=84.158, loss_interctc_layer6=90.168, loss_interctc_layer12=76.128, loss_interctc_layer15=70.805, loss_interctc_layer21=86.148, loss=81.481, backward_time=0.525, grad_norm=101.010, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.030e-04, train_time=3.453 +[gpub003:0/64] 2024-01-29 13:47:24,408 (trainer:753) INFO: 16epoch:train:1401-1500batch: iter_time=8.188e-05, forward_time=0.204, loss_ctc=86.513, loss_interctc_layer6=86.223, loss_interctc_layer12=72.667, loss_interctc_layer15=67.434, loss_interctc_layer21=88.811, loss=80.330, backward_time=0.494, grad_norm=77.539, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.030e-04, train_time=3.236 +[gpub003:0/64] 2024-01-29 13:53:36,057 (trainer:753) INFO: 16epoch:train:1501-1600batch: iter_time=9.354e-05, forward_time=0.204, loss_ctc=107.831, loss_interctc_layer6=108.538, loss_interctc_layer12=92.522, loss_interctc_layer15=86.435, loss_interctc_layer21=110.402, loss=101.146, backward_time=0.647, grad_norm=93.386, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.029e-04, train_time=3.716 +[gpub003:0/64] 2024-01-29 14:00:06,651 (trainer:753) INFO: 16epoch:train:1601-1700batch: iter_time=9.250e-05, forward_time=0.202, loss_ctc=101.552, loss_interctc_layer6=99.848, loss_interctc_layer12=83.793, loss_interctc_layer15=77.454, loss_interctc_layer21=104.453, loss=93.420, backward_time=0.646, grad_norm=69.841, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.029e-04, train_time=3.906 +[gpub003:0/64] 2024-01-29 14:05:59,942 (trainer:753) INFO: 16epoch:train:1701-1800batch: iter_time=9.020e-05, forward_time=0.202, loss_ctc=89.394, loss_interctc_layer6=89.726, loss_interctc_layer12=75.180, loss_interctc_layer15=69.654, loss_interctc_layer21=91.762, loss=83.143, backward_time=0.588, grad_norm=76.156, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.029e-04, train_time=3.533 +[gpub003:0/64] 2024-01-29 14:11:39,249 (trainer:753) INFO: 16epoch:train:1801-1900batch: iter_time=8.887e-05, forward_time=0.307, loss_ctc=100.333, loss_interctc_layer6=94.003, loss_interctc_layer12=78.179, loss_interctc_layer15=72.078, loss_interctc_layer21=102.856, loss=89.490, backward_time=0.565, grad_norm=78.193, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.240, optim0_lr0=1.029e-04, train_time=3.392 +[gpub003:0/64] 2024-01-29 14:18:03,822 (trainer:753) INFO: 16epoch:train:1901-2000batch: iter_time=8.633e-05, forward_time=0.202, loss_ctc=103.513, loss_interctc_layer6=97.427, loss_interctc_layer12=81.000, loss_interctc_layer15=74.419, loss_interctc_layer21=106.450, loss=92.562, backward_time=0.632, grad_norm=116.869, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.028e-04, train_time=3.846 +[gpub003:0/64] 2024-01-29 14:24:17,404 (trainer:753) INFO: 16epoch:train:2001-2100batch: iter_time=8.510e-05, forward_time=0.204, loss_ctc=95.887, loss_interctc_layer6=98.225, loss_interctc_layer12=83.027, loss_interctc_layer15=76.728, loss_interctc_layer21=98.568, loss=90.487, backward_time=0.565, grad_norm=80.296, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.028e-04, train_time=3.736 +[gpub003:0/64] 2024-01-29 14:30:00,824 (trainer:753) INFO: 16epoch:train:2101-2200batch: iter_time=8.509e-05, forward_time=0.201, loss_ctc=95.905, loss_interctc_layer6=83.796, loss_interctc_layer12=69.774, loss_interctc_layer15=64.263, loss_interctc_layer21=99.140, loss=82.576, backward_time=0.503, grad_norm=62.597, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.028e-04, train_time=3.434 +[gpub003:0/64] 2024-01-29 14:36:17,210 (trainer:753) INFO: 16epoch:train:2201-2300batch: iter_time=9.089e-05, forward_time=0.202, loss_ctc=81.665, loss_interctc_layer6=90.844, loss_interctc_layer12=76.065, loss_interctc_layer15=70.095, loss_interctc_layer21=83.621, loss=80.458, backward_time=0.549, grad_norm=68.157, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.028e-04, train_time=3.764 +[gpub003:0/64] 2024-01-29 14:42:14,013 (trainer:753) INFO: 16epoch:train:2301-2400batch: iter_time=9.054e-05, forward_time=0.201, loss_ctc=79.570, loss_interctc_layer6=91.490, loss_interctc_layer12=76.863, loss_interctc_layer15=71.182, loss_interctc_layer21=81.718, loss=80.165, backward_time=0.526, grad_norm=144.355, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.027e-04, train_time=3.568 +[gpub003:0/64] 2024-01-29 14:48:30,734 (trainer:753) INFO: 16epoch:train:2401-2500batch: iter_time=8.507e-05, forward_time=0.201, loss_ctc=87.483, loss_interctc_layer6=89.617, loss_interctc_layer12=75.311, loss_interctc_layer15=69.851, loss_interctc_layer21=89.948, loss=82.442, backward_time=0.663, grad_norm=99.622, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.027e-04, train_time=3.767 +[gpub003:0/64] 2024-01-29 14:48:50,664 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub003:0/64] 2024-01-29 14:49:09,383 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 14:49:13,053 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 14:49:13,053 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub003:0/64] 2024-01-29 14:49:13,057 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 14:59:07,147 (trainer:753) INFO: 16epoch:train:2501-2600batch: iter_time=3.036, forward_time=0.202, loss_ctc=70.004, loss_interctc_layer6=79.332, loss_interctc_layer12=66.043, loss_interctc_layer15=60.888, loss_interctc_layer21=72.096, loss=69.673, backward_time=0.496, grad_norm=79.533, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.027e-04, train_time=6.364 +[gpub003:0/64] 2024-01-29 15:04:25,456 (trainer:753) INFO: 16epoch:train:2601-2700batch: iter_time=8.975e-05, forward_time=0.201, loss_ctc=92.878, loss_interctc_layer6=97.911, loss_interctc_layer12=83.033, loss_interctc_layer15=77.586, loss_interctc_layer21=95.567, loss=89.395, backward_time=0.474, grad_norm=103.059, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.027e-04, train_time=3.183 +[gpub003:0/64] 2024-01-29 15:09:37,878 (trainer:753) INFO: 16epoch:train:2701-2800batch: iter_time=8.902e-05, forward_time=0.202, loss_ctc=94.488, loss_interctc_layer6=100.018, loss_interctc_layer12=84.634, loss_interctc_layer15=78.673, loss_interctc_layer21=95.972, loss=90.757, backward_time=0.484, grad_norm=78.404, clip=100.000, loss_scale=1.572e+31, optim_step_time=0.227, optim0_lr0=1.027e-04, train_time=3.124 +[gpub003:0/64] 2024-01-29 15:15:00,589 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-29 15:15:28,610 (trainer:753) INFO: 16epoch:train:2801-2900batch: iter_time=8.504e-05, forward_time=0.202, loss_ctc=94.757, loss_interctc_layer6=96.067, loss_interctc_layer12=80.890, loss_interctc_layer15=75.119, loss_interctc_layer21=97.384, loss=88.843, backward_time=0.611, grad_norm=72.286, clip=100.000, loss_scale=1.946e+31, optim_step_time=0.228, optim0_lr0=1.026e-04, train_time=3.507 +[gpub003:0/64] 2024-01-29 15:21:08,684 (trainer:753) INFO: 16epoch:train:2901-3000batch: iter_time=9.168e-05, forward_time=0.203, loss_ctc=96.267, loss_interctc_layer6=96.168, loss_interctc_layer12=80.641, loss_interctc_layer15=74.311, loss_interctc_layer21=99.012, loss=89.280, backward_time=0.524, grad_norm=77.762, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.026e-04, train_time=3.401 +[gpub003:0/64] 2024-01-29 15:26:52,268 (trainer:753) INFO: 16epoch:train:3001-3100batch: iter_time=8.869e-05, forward_time=0.201, loss_ctc=91.434, loss_interctc_layer6=94.259, loss_interctc_layer12=78.904, loss_interctc_layer15=72.405, loss_interctc_layer21=93.656, loss=86.132, backward_time=0.523, grad_norm=76.044, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.026e-04, train_time=3.436 +[gpub003:0/64] 2024-01-29 15:32:50,871 (trainer:753) INFO: 16epoch:train:3101-3200batch: iter_time=8.477e-05, forward_time=0.202, loss_ctc=89.744, loss_interctc_layer6=92.018, loss_interctc_layer12=76.527, loss_interctc_layer15=70.690, loss_interctc_layer21=92.511, loss=84.298, backward_time=0.585, grad_norm=67.392, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.026e-04, train_time=3.586 +[gpub003:0/64] 2024-01-29 15:38:42,316 (trainer:753) INFO: 16epoch:train:3201-3300batch: iter_time=9.047e-05, forward_time=0.201, loss_ctc=90.282, loss_interctc_layer6=96.693, loss_interctc_layer12=81.097, loss_interctc_layer15=74.784, loss_interctc_layer21=92.770, loss=87.125, backward_time=0.584, grad_norm=89.734, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.025e-04, train_time=3.514 +[gpub003:0/64] 2024-01-29 15:44:13,014 (trainer:753) INFO: 16epoch:train:3301-3400batch: iter_time=1.034e-04, forward_time=0.202, loss_ctc=88.441, loss_interctc_layer6=94.316, loss_interctc_layer12=78.742, loss_interctc_layer15=72.488, loss_interctc_layer21=90.822, loss=84.962, backward_time=0.505, grad_norm=79.510, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.025e-04, train_time=3.307 +[gpub003:0/64] 2024-01-29 15:49:33,988 (trainer:753) INFO: 16epoch:train:3401-3500batch: iter_time=9.754e-05, forward_time=0.201, loss_ctc=93.528, loss_interctc_layer6=92.078, loss_interctc_layer12=77.017, loss_interctc_layer15=70.900, loss_interctc_layer21=96.277, loss=85.960, backward_time=0.473, grad_norm=69.399, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.025e-04, train_time=3.210 +[gpub003:0/64] 2024-01-29 15:54:56,673 (trainer:753) INFO: 16epoch:train:3501-3600batch: iter_time=9.155e-05, forward_time=0.203, loss_ctc=62.179, loss_interctc_layer6=81.715, loss_interctc_layer12=68.337, loss_interctc_layer15=62.915, loss_interctc_layer21=63.676, loss=67.764, backward_time=0.487, grad_norm=69.545, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.025e-04, train_time=3.227 +[gpub003:0/64] 2024-01-29 16:00:53,052 (trainer:753) INFO: 16epoch:train:3601-3700batch: iter_time=9.548e-05, forward_time=0.317, loss_ctc=84.636, loss_interctc_layer6=92.762, loss_interctc_layer12=77.766, loss_interctc_layer15=71.695, loss_interctc_layer21=86.660, loss=82.704, backward_time=0.525, grad_norm=76.767, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.235, optim0_lr0=1.025e-04, train_time=3.563 +[gpub003:0/64] 2024-01-29 16:03:48,009 (multiple_iter_factory:32) INFO: Building 3th iter-factory... +[gpub003:0/64] 2024-01-29 16:04:06,319 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 16:04:09,740 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 16:04:09,740 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, +[gpub003:0/64] 2024-01-29 16:04:09,744 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 16:17:14,302 (trainer:753) INFO: 16epoch:train:3701-3800batch: iter_time=1.939, forward_time=0.226, loss_ctc=82.433, loss_interctc_layer6=87.187, loss_interctc_layer12=73.093, loss_interctc_layer15=67.074, loss_interctc_layer21=84.864, loss=78.930, backward_time=0.518, grad_norm=73.913, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.230, optim0_lr0=1.024e-04, train_time=9.813 +[gpub003:0/64] 2024-01-29 16:22:23,929 (trainer:753) INFO: 16epoch:train:3801-3900batch: iter_time=8.338e-05, forward_time=0.203, loss_ctc=82.447, loss_interctc_layer6=88.990, loss_interctc_layer12=74.663, loss_interctc_layer15=69.384, loss_interctc_layer21=84.969, loss=80.091, backward_time=0.477, grad_norm=83.212, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.024e-04, train_time=3.096 +[gpub003:0/64] 2024-01-29 16:28:16,327 (trainer:753) INFO: 16epoch:train:3901-4000batch: iter_time=8.393e-05, forward_time=0.203, loss_ctc=84.664, loss_interctc_layer6=85.274, loss_interctc_layer12=71.575, loss_interctc_layer15=66.202, loss_interctc_layer21=86.951, loss=78.933, backward_time=0.581, grad_norm=68.608, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.024e-04, train_time=3.524 +[gpub003:0/64] 2024-01-29 16:33:50,913 (trainer:753) INFO: 16epoch:train:4001-4100batch: iter_time=9.220e-05, forward_time=0.203, loss_ctc=106.048, loss_interctc_layer6=106.948, loss_interctc_layer12=90.706, loss_interctc_layer15=84.813, loss_interctc_layer21=108.571, loss=99.417, backward_time=0.511, grad_norm=88.419, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.024e-04, train_time=3.346 +[gpub003:0/64] 2024-01-29 16:39:21,849 (trainer:753) INFO: 16epoch:train:4101-4200batch: iter_time=8.914e-05, forward_time=0.201, loss_ctc=101.390, loss_interctc_layer6=99.332, loss_interctc_layer12=83.390, loss_interctc_layer15=76.790, loss_interctc_layer21=104.377, loss=93.056, backward_time=0.506, grad_norm=72.367, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.023e-04, train_time=3.309 +[gpub003:0/64] 2024-01-29 16:44:59,231 (trainer:753) INFO: 16epoch:train:4201-4300batch: iter_time=9.179e-05, forward_time=0.202, loss_ctc=88.737, loss_interctc_layer6=88.818, loss_interctc_layer12=74.368, loss_interctc_layer15=68.625, loss_interctc_layer21=90.852, loss=82.280, backward_time=0.534, grad_norm=79.101, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.023e-04, train_time=3.374 +[gpub003:0/64] 2024-01-29 16:45:55,289 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-29 16:50:52,337 (trainer:753) INFO: 16epoch:train:4301-4400batch: iter_time=8.983e-05, forward_time=0.202, loss_ctc=96.555, loss_interctc_layer6=91.816, loss_interctc_layer12=76.147, loss_interctc_layer15=70.083, loss_interctc_layer21=99.365, loss=86.793, backward_time=0.533, grad_norm=67.148, clip=100.000, loss_scale=5.634e+30, optim_step_time=0.228, optim0_lr0=1.023e-04, train_time=3.531 +[gpub003:0/64] 2024-01-29 16:56:37,138 (trainer:753) INFO: 16epoch:train:4401-4500batch: iter_time=9.686e-05, forward_time=0.204, loss_ctc=102.356, loss_interctc_layer6=96.384, loss_interctc_layer12=79.634, loss_interctc_layer15=72.952, loss_interctc_layer21=105.406, loss=91.346, backward_time=0.539, grad_norm=75.510, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.228, optim0_lr0=1.023e-04, train_time=3.448 +[gpub003:0/64] 2024-01-29 17:02:14,169 (trainer:753) INFO: 16epoch:train:4501-4600batch: iter_time=8.906e-05, forward_time=0.216, loss_ctc=94.207, loss_interctc_layer6=97.249, loss_interctc_layer12=81.606, loss_interctc_layer15=75.306, loss_interctc_layer21=97.050, loss=89.084, backward_time=0.521, grad_norm=1.202e+03, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.229, optim0_lr0=1.023e-04, train_time=3.370 +[gpub003:0/64] 2024-01-29 17:08:06,535 (trainer:753) INFO: 16epoch:train:4601-4700batch: iter_time=9.636e-05, forward_time=0.229, loss_ctc=95.790, loss_interctc_layer6=83.756, loss_interctc_layer12=69.607, loss_interctc_layer15=64.241, loss_interctc_layer21=98.803, loss=82.439, backward_time=0.559, grad_norm=74.825, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.228, optim0_lr0=1.022e-04, train_time=3.523 +[gpub003:0/64] 2024-01-29 17:13:33,384 (trainer:753) INFO: 16epoch:train:4701-4800batch: iter_time=8.944e-05, forward_time=0.308, loss_ctc=81.554, loss_interctc_layer6=90.539, loss_interctc_layer12=75.729, loss_interctc_layer15=69.779, loss_interctc_layer21=83.438, loss=80.208, backward_time=0.511, grad_norm=70.539, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.231, optim0_lr0=1.022e-04, train_time=3.267 +[gpub003:0/64] 2024-01-29 17:19:50,534 (trainer:753) INFO: 16epoch:train:4801-4900batch: iter_time=8.511e-05, forward_time=0.202, loss_ctc=78.134, loss_interctc_layer6=90.744, loss_interctc_layer12=75.908, loss_interctc_layer15=70.265, loss_interctc_layer21=80.236, loss=79.058, backward_time=0.557, grad_norm=91.170, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.022e-04, train_time=3.773 +[gpub003:0/64] 2024-01-29 17:25:19,788 (trainer:753) INFO: 16epoch:train:4901-5000batch: iter_time=8.429e-05, forward_time=0.202, loss_ctc=87.005, loss_interctc_layer6=89.103, loss_interctc_layer12=74.712, loss_interctc_layer15=69.081, loss_interctc_layer21=89.377, loss=81.855, backward_time=0.488, grad_norm=71.686, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.022e-04, train_time=3.292 +[gpub003:0/64] 2024-01-29 17:25:36,994 (multiple_iter_factory:32) INFO: Building 4th iter-factory... +[gpub003:0/64] 2024-01-29 17:25:55,598 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 17:25:59,236 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 17:25:59,237 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub003:0/64] 2024-01-29 17:25:59,240 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 17:41:07,004 (trainer:753) INFO: 16epoch:train:5001-5100batch: iter_time=3.405, forward_time=0.202, loss_ctc=69.688, loss_interctc_layer6=79.395, loss_interctc_layer12=65.964, loss_interctc_layer15=60.503, loss_interctc_layer21=71.463, loss=69.402, backward_time=0.511, grad_norm=74.647, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.021e-04, train_time=9.472 +[gpub003:0/64] 2024-01-29 17:46:57,257 (trainer:753) INFO: 16epoch:train:5101-5200batch: iter_time=8.568e-05, forward_time=0.201, loss_ctc=92.084, loss_interctc_layer6=97.013, loss_interctc_layer12=81.880, loss_interctc_layer15=76.395, loss_interctc_layer21=94.540, loss=88.383, backward_time=0.528, grad_norm=80.995, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.021e-04, train_time=3.502 +[gpub003:0/64] 2024-01-29 17:52:41,109 (trainer:753) INFO: 16epoch:train:5201-5300batch: iter_time=9.214e-05, forward_time=0.203, loss_ctc=92.958, loss_interctc_layer6=99.332, loss_interctc_layer12=83.808, loss_interctc_layer15=77.298, loss_interctc_layer21=94.773, loss=89.634, backward_time=0.569, grad_norm=86.414, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.021e-04, train_time=3.438 +[gpub003:0/64] 2024-01-29 17:58:22,631 (trainer:753) INFO: 16epoch:train:5301-5400batch: iter_time=8.562e-05, forward_time=0.202, loss_ctc=93.595, loss_interctc_layer6=94.966, loss_interctc_layer12=79.688, loss_interctc_layer15=73.489, loss_interctc_layer21=96.331, loss=87.614, backward_time=0.497, grad_norm=95.757, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.021e-04, train_time=3.415 +[gpub003:0/64] 2024-01-29 18:04:25,667 (trainer:753) INFO: 16epoch:train:5401-5500batch: iter_time=8.968e-05, forward_time=0.202, loss_ctc=95.704, loss_interctc_layer6=95.566, loss_interctc_layer12=80.080, loss_interctc_layer15=73.721, loss_interctc_layer21=98.393, loss=88.693, backward_time=0.562, grad_norm=70.956, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.021e-04, train_time=3.630 +[gpub003:0/64] 2024-01-29 18:10:04,547 (trainer:753) INFO: 16epoch:train:5501-5600batch: iter_time=9.004e-05, forward_time=0.202, loss_ctc=90.945, loss_interctc_layer6=93.702, loss_interctc_layer12=78.088, loss_interctc_layer15=71.934, loss_interctc_layer21=93.477, loss=85.629, backward_time=0.517, grad_norm=80.544, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.020e-04, train_time=3.389 +[gpub003:0/64] 2024-01-29 18:15:46,211 (trainer:753) INFO: 16epoch:train:5601-5700batch: iter_time=9.335e-05, forward_time=0.201, loss_ctc=88.190, loss_interctc_layer6=91.199, loss_interctc_layer12=75.804, loss_interctc_layer15=69.628, loss_interctc_layer21=90.794, loss=83.123, backward_time=0.500, grad_norm=70.810, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.020e-04, train_time=3.416 +[gpub003:0/64] 2024-01-29 18:21:50,249 (trainer:753) INFO: 16epoch:train:5701-5800batch: iter_time=9.181e-05, forward_time=0.228, loss_ctc=88.941, loss_interctc_layer6=95.608, loss_interctc_layer12=79.928, loss_interctc_layer15=73.649, loss_interctc_layer21=91.332, loss=85.892, backward_time=0.542, grad_norm=77.336, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.228, optim0_lr0=1.020e-04, train_time=3.640 +[gpub003:0/64] 2024-01-29 18:27:58,545 (trainer:753) INFO: 16epoch:train:5801-5900batch: iter_time=0.002, forward_time=0.297, loss_ctc=86.626, loss_interctc_layer6=92.508, loss_interctc_layer12=77.213, loss_interctc_layer15=70.934, loss_interctc_layer21=88.991, loss=83.254, backward_time=0.541, grad_norm=102.958, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.239, optim0_lr0=1.020e-04, train_time=3.681 +[gpub003:0/64] 2024-01-29 18:33:45,623 (trainer:753) INFO: 16epoch:train:5901-6000batch: iter_time=8.650e-05, forward_time=0.201, loss_ctc=91.765, loss_interctc_layer6=90.652, loss_interctc_layer12=75.929, loss_interctc_layer15=69.528, loss_interctc_layer21=94.394, loss=84.454, backward_time=0.549, grad_norm=125.771, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.019e-04, train_time=3.472 +[gpub003:0/64] 2024-01-29 18:40:04,674 (trainer:753) INFO: 16epoch:train:6001-6100batch: iter_time=8.717e-05, forward_time=0.201, loss_ctc=61.334, loss_interctc_layer6=80.707, loss_interctc_layer12=67.683, loss_interctc_layer15=62.234, loss_interctc_layer21=62.749, loss=66.941, backward_time=0.577, grad_norm=61.513, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.019e-04, train_time=3.790 +[gpub003:0/64] 2024-01-29 18:45:46,542 (trainer:753) INFO: 16epoch:train:6101-6200batch: iter_time=8.874e-05, forward_time=0.204, loss_ctc=83.954, loss_interctc_layer6=92.286, loss_interctc_layer12=77.191, loss_interctc_layer15=71.317, loss_interctc_layer21=86.166, loss=82.183, backward_time=0.511, grad_norm=74.355, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.019e-04, train_time=3.418 +[gpub003:0/64] 2024-01-29 18:48:44,847 (multiple_iter_factory:32) INFO: Building 5th iter-factory... +[gpub003:0/64] 2024-01-29 18:49:03,193 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 18:49:06,632 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 18:49:06,632 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, +[gpub003:0/64] 2024-01-29 18:49:06,635 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 19:01:57,119 (trainer:753) INFO: 16epoch:train:6201-6300batch: iter_time=6.359, forward_time=0.276, loss_ctc=82.324, loss_interctc_layer6=86.806, loss_interctc_layer12=72.548, loss_interctc_layer15=66.874, loss_interctc_layer21=84.679, loss=78.646, backward_time=0.521, grad_norm=63.111, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.228, optim0_lr0=1.019e-04, train_time=9.704 +[gpub003:0/64] 2024-01-29 19:08:02,938 (trainer:753) INFO: 16epoch:train:6301-6400batch: iter_time=0.002, forward_time=0.386, loss_ctc=81.519, loss_interctc_layer6=87.802, loss_interctc_layer12=73.511, loss_interctc_layer15=68.240, loss_interctc_layer21=83.907, loss=78.996, backward_time=0.614, grad_norm=78.526, clip=100.000, loss_scale=9.533e+30, optim_step_time=0.239, optim0_lr0=1.019e-04, train_time=3.659 +[gpub003:0/64] 2024-01-29 19:13:19,394 (trainer:753) INFO: 16epoch:train:6401-6500batch: iter_time=9.044e-05, forward_time=0.234, loss_ctc=84.618, loss_interctc_layer6=84.989, loss_interctc_layer12=71.267, loss_interctc_layer15=65.895, loss_interctc_layer21=86.880, loss=78.730, backward_time=0.468, grad_norm=69.708, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.018e-04, train_time=3.164 +[gpub003:0/64] 2024-01-29 19:19:03,613 (trainer:753) INFO: 16epoch:train:6501-6600batch: iter_time=9.904e-05, forward_time=0.374, loss_ctc=105.977, loss_interctc_layer6=106.562, loss_interctc_layer12=90.634, loss_interctc_layer15=84.366, loss_interctc_layer21=108.954, loss=99.299, backward_time=0.519, grad_norm=91.965, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.235, optim0_lr0=1.018e-04, train_time=3.440 +[gpub003:0/64] 2024-01-29 19:25:02,531 (trainer:753) INFO: 16epoch:train:6601-6700batch: iter_time=5.518e-04, forward_time=0.320, loss_ctc=100.417, loss_interctc_layer6=98.915, loss_interctc_layer12=82.727, loss_interctc_layer15=76.277, loss_interctc_layer21=103.152, loss=92.298, backward_time=0.568, grad_norm=69.662, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.235, optim0_lr0=1.018e-04, train_time=3.590 +[gpub003:0/64] 2024-01-29 19:31:37,246 (trainer:753) INFO: 16epoch:train:6701-6800batch: iter_time=8.861e-05, forward_time=0.223, loss_ctc=88.351, loss_interctc_layer6=88.666, loss_interctc_layer12=74.288, loss_interctc_layer15=68.878, loss_interctc_layer21=90.733, loss=82.183, backward_time=0.616, grad_norm=75.173, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.018e-04, train_time=3.948 +[gpub003:0/64] 2024-01-29 19:38:04,893 (trainer:753) INFO: 16epoch:train:6801-6900batch: iter_time=5.319e-04, forward_time=0.290, loss_ctc=96.853, loss_interctc_layer6=91.575, loss_interctc_layer12=76.271, loss_interctc_layer15=70.004, loss_interctc_layer21=99.870, loss=86.915, backward_time=0.645, grad_norm=75.028, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.234, optim0_lr0=1.017e-04, train_time=3.876 +[gpub003:0/64] 2024-01-29 19:43:51,522 (trainer:753) INFO: 16epoch:train:6901-7000batch: iter_time=9.098e-05, forward_time=0.271, loss_ctc=101.046, loss_interctc_layer6=95.283, loss_interctc_layer12=79.033, loss_interctc_layer15=72.247, loss_interctc_layer21=104.104, loss=90.343, backward_time=0.519, grad_norm=79.324, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.233, optim0_lr0=1.017e-04, train_time=3.466 +[gpub003:0/64] 2024-01-29 19:49:10,958 (trainer:753) INFO: 16epoch:train:7001-7100batch: iter_time=4.310e-04, forward_time=0.251, loss_ctc=94.469, loss_interctc_layer6=97.244, loss_interctc_layer12=81.765, loss_interctc_layer15=75.495, loss_interctc_layer21=97.134, loss=89.221, backward_time=0.490, grad_norm=87.687, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.230, optim0_lr0=1.017e-04, train_time=3.196 +[gpub003:0/64] 2024-01-29 19:54:30,353 (trainer:753) INFO: 16epoch:train:7101-7200batch: iter_time=9.268e-05, forward_time=0.202, loss_ctc=96.189, loss_interctc_layer6=83.899, loss_interctc_layer12=69.931, loss_interctc_layer15=64.424, loss_interctc_layer21=99.384, loss=82.766, backward_time=0.477, grad_norm=68.304, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.017e-04, train_time=3.193 +[gpub003:0/64] 2024-01-29 20:00:38,000 (trainer:753) INFO: 16epoch:train:7201-7300batch: iter_time=0.001, forward_time=0.261, loss_ctc=80.870, loss_interctc_layer6=90.144, loss_interctc_layer12=75.480, loss_interctc_layer15=69.145, loss_interctc_layer21=82.901, loss=79.708, backward_time=0.584, grad_norm=75.277, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.233, optim0_lr0=1.017e-04, train_time=3.675 +[gpub003:0/64] 2024-01-29 20:06:35,957 (trainer:753) INFO: 16epoch:train:7301-7400batch: iter_time=9.481e-05, forward_time=0.290, loss_ctc=77.885, loss_interctc_layer6=90.201, loss_interctc_layer12=75.670, loss_interctc_layer15=69.904, loss_interctc_layer21=79.942, loss=78.720, backward_time=0.572, grad_norm=65.121, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.231, optim0_lr0=1.016e-04, train_time=3.581 +[gpub003:0/64] 2024-01-29 20:12:34,052 (trainer:753) INFO: 16epoch:train:7401-7500batch: iter_time=4.506e-04, forward_time=0.259, loss_ctc=86.483, loss_interctc_layer6=88.317, loss_interctc_layer12=74.180, loss_interctc_layer15=68.307, loss_interctc_layer21=88.403, loss=81.138, backward_time=0.551, grad_norm=79.782, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.231, optim0_lr0=1.016e-04, train_time=3.580 +[gpub003:0/64] 2024-01-29 20:12:53,827 (multiple_iter_factory:32) INFO: Building 6th iter-factory... +[gpub003:0/64] 2024-01-29 20:13:12,358 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 20:13:15,823 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 20:13:15,823 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, +[gpub003:0/64] 2024-01-29 20:13:15,827 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 20:24:11,655 (trainer:753) INFO: 16epoch:train:7501-7600batch: iter_time=3.875, forward_time=0.273, loss_ctc=74.189, loss_interctc_layer6=79.436, loss_interctc_layer12=65.989, loss_interctc_layer15=60.573, loss_interctc_layer21=76.211, loss=71.279, backward_time=0.483, grad_norm=61.759, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.016e-04, train_time=6.976 +[gpub003:0/64] 2024-01-29 20:29:19,161 (trainer:753) INFO: 16epoch:train:7601-7700batch: iter_time=8.271e-05, forward_time=0.201, loss_ctc=92.992, loss_interctc_layer6=95.367, loss_interctc_layer12=80.361, loss_interctc_layer15=74.778, loss_interctc_layer21=95.424, loss=87.785, backward_time=0.473, grad_norm=82.628, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.016e-04, train_time=3.075 +[gpub003:0/64] 2024-01-29 20:34:31,065 (trainer:753) INFO: 16epoch:train:7701-7800batch: iter_time=4.187e-04, forward_time=0.210, loss_ctc=97.915, loss_interctc_layer6=98.712, loss_interctc_layer12=83.526, loss_interctc_layer15=77.359, loss_interctc_layer21=100.518, loss=91.606, backward_time=0.476, grad_norm=83.427, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.251, optim0_lr0=1.015e-04, train_time=3.117 +[gpub003:0/64] 2024-01-29 20:40:17,962 (trainer:753) INFO: 16epoch:train:7801-7900batch: iter_time=8.534e-05, forward_time=0.259, loss_ctc=94.520, loss_interctc_layer6=95.591, loss_interctc_layer12=80.224, loss_interctc_layer15=73.812, loss_interctc_layer21=97.283, loss=88.286, backward_time=0.602, grad_norm=70.435, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.234, optim0_lr0=1.015e-04, train_time=3.471 +[gpub003:0/64] 2024-01-29 20:46:42,386 (trainer:753) INFO: 16epoch:train:7901-8000batch: iter_time=8.746e-05, forward_time=0.202, loss_ctc=98.478, loss_interctc_layer6=94.535, loss_interctc_layer12=78.917, loss_interctc_layer15=72.667, loss_interctc_layer21=101.473, loss=89.214, backward_time=0.634, grad_norm=102.122, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.015e-04, train_time=3.844 +[gpub003:0/64] 2024-01-29 20:52:18,759 (trainer:753) INFO: 16epoch:train:8001-8100batch: iter_time=8.214e-05, forward_time=0.202, loss_ctc=93.691, loss_interctc_layer6=93.854, loss_interctc_layer12=78.189, loss_interctc_layer15=72.279, loss_interctc_layer21=96.006, loss=86.804, backward_time=0.550, grad_norm=117.911, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.015e-04, train_time=3.363 +[gpub003:0/64] 2024-01-29 20:58:06,518 (trainer:753) INFO: 16epoch:train:8101-8200batch: iter_time=8.938e-05, forward_time=0.201, loss_ctc=93.848, loss_interctc_layer6=90.921, loss_interctc_layer12=75.337, loss_interctc_layer15=69.305, loss_interctc_layer21=96.676, loss=85.217, backward_time=0.540, grad_norm=75.708, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.015e-04, train_time=3.478 +[gpub003:0/64] 2024-01-29 21:03:29,185 (trainer:753) INFO: 16epoch:train:8201-8300batch: iter_time=8.979e-05, forward_time=0.201, loss_ctc=96.170, loss_interctc_layer6=95.352, loss_interctc_layer12=79.520, loss_interctc_layer15=73.190, loss_interctc_layer21=98.875, loss=88.621, backward_time=0.519, grad_norm=94.138, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.014e-04, train_time=3.226 +[gpub003:0/64] 2024-01-29 21:09:18,899 (trainer:753) INFO: 16epoch:train:8301-8400batch: iter_time=2.347e-04, forward_time=0.294, loss_ctc=96.086, loss_interctc_layer6=92.795, loss_interctc_layer12=77.441, loss_interctc_layer15=71.135, loss_interctc_layer21=98.973, loss=87.286, backward_time=0.554, grad_norm=85.647, clip=100.000, loss_scale=1.907e+31, optim_step_time=0.235, optim0_lr0=1.014e-04, train_time=3.488 +[gpub003:0/64] 2024-01-29 21:14:59,553 (trainer:753) INFO: 16epoch:train:8401-8500batch: iter_time=8.339e-05, forward_time=0.201, loss_ctc=97.112, loss_interctc_layer6=90.758, loss_interctc_layer12=75.670, loss_interctc_layer15=69.605, loss_interctc_layer21=100.253, loss=86.680, backward_time=0.523, grad_norm=66.524, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.014e-04, train_time=3.415 +[gpub003:0/64] 2024-01-29 21:15:12,233 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-29 21:20:27,452 (trainer:753) INFO: 16epoch:train:8501-8600batch: iter_time=9.065e-05, forward_time=0.203, loss_ctc=62.170, loss_interctc_layer6=80.196, loss_interctc_layer12=66.875, loss_interctc_layer15=61.508, loss_interctc_layer21=63.632, loss=66.876, backward_time=0.526, grad_norm=88.814, clip=100.000, loss_scale=1.045e+31, optim_step_time=0.227, optim0_lr0=1.014e-04, train_time=3.279 +[gpub003:0/64] 2024-01-29 21:25:59,941 (trainer:753) INFO: 16epoch:train:8601-8700batch: iter_time=9.831e-05, forward_time=0.203, loss_ctc=88.041, loss_interctc_layer6=91.841, loss_interctc_layer12=76.895, loss_interctc_layer15=70.862, loss_interctc_layer21=90.687, loss=83.665, backward_time=0.505, grad_norm=78.501, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.014e-04, train_time=3.325 +[gpub003:0/64] 2024-01-29 21:29:01,440 (multiple_iter_factory:32) INFO: Building 7th iter-factory... +[gpub003:0/64] 2024-01-29 21:29:20,151 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 21:29:23,560 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 21:29:23,560 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, +[gpub003:0/64] 2024-01-29 21:29:23,563 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 21:40:35,974 (trainer:753) INFO: 16epoch:train:8701-8800batch: iter_time=3.157, forward_time=0.242, loss_ctc=81.687, loss_interctc_layer6=86.476, loss_interctc_layer12=72.200, loss_interctc_layer15=66.460, loss_interctc_layer21=83.879, loss=78.140, backward_time=0.487, grad_norm=65.369, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=1.013e-04, train_time=8.759 +[gpub003:0/64] 2024-01-29 21:46:31,196 (trainer:753) INFO: 16epoch:train:8801-8900batch: iter_time=8.661e-05, forward_time=0.309, loss_ctc=78.600, loss_interctc_layer6=88.079, loss_interctc_layer12=73.926, loss_interctc_layer15=68.389, loss_interctc_layer21=80.560, loss=77.911, backward_time=0.537, grad_norm=80.735, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.240, optim0_lr0=1.013e-04, train_time=3.551 +[gpub003:0/64] 2024-01-29 21:52:33,745 (trainer:753) INFO: 16epoch:train:8901-9000batch: iter_time=8.134e-05, forward_time=0.256, loss_ctc=77.599, loss_interctc_layer6=84.106, loss_interctc_layer12=70.318, loss_interctc_layer15=64.986, loss_interctc_layer21=79.895, loss=75.381, backward_time=0.630, grad_norm=80.975, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.236, optim0_lr0=1.013e-04, train_time=3.627 +[gpub003:0/64] 2024-01-29 21:58:10,643 (trainer:753) INFO: 16epoch:train:9001-9100batch: iter_time=9.051e-05, forward_time=0.204, loss_ctc=99.829, loss_interctc_layer6=106.169, loss_interctc_layer12=89.512, loss_interctc_layer15=83.066, loss_interctc_layer21=102.256, loss=96.166, backward_time=0.577, grad_norm=106.251, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.013e-04, train_time=3.368 +[gpub003:0/64] 2024-01-29 22:04:20,761 (trainer:753) INFO: 16epoch:train:9101-9200batch: iter_time=2.818e-04, forward_time=0.318, loss_ctc=99.051, loss_interctc_layer6=98.771, loss_interctc_layer12=82.800, loss_interctc_layer15=76.207, loss_interctc_layer21=101.909, loss=91.747, backward_time=0.616, grad_norm=90.455, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.236, optim0_lr0=1.012e-04, train_time=3.700 +[gpub003:0/64] 2024-01-29 22:10:07,973 (trainer:753) INFO: 16epoch:train:9201-9300batch: iter_time=9.897e-04, forward_time=0.278, loss_ctc=82.157, loss_interctc_layer6=88.423, loss_interctc_layer12=73.746, loss_interctc_layer15=68.037, loss_interctc_layer21=84.562, loss=79.385, backward_time=0.680, grad_norm=67.464, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.235, optim0_lr0=1.012e-04, train_time=3.473 +[gpub003:0/64] 2024-01-29 22:16:58,465 (trainer:753) INFO: 16epoch:train:9301-9400batch: iter_time=3.288e-04, forward_time=0.267, loss_ctc=95.408, loss_interctc_layer6=91.415, loss_interctc_layer12=76.034, loss_interctc_layer15=69.662, loss_interctc_layer21=98.114, loss=86.126, backward_time=0.710, grad_norm=73.914, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.239, optim0_lr0=1.012e-04, train_time=4.105 +[gpub003:0/64] 2024-01-29 22:23:00,174 (trainer:753) INFO: 16epoch:train:9401-9500batch: iter_time=6.752e-04, forward_time=0.290, loss_ctc=92.822, loss_interctc_layer6=95.081, loss_interctc_layer12=78.694, loss_interctc_layer15=71.935, loss_interctc_layer21=95.704, loss=86.847, backward_time=0.586, grad_norm=64.069, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.233, optim0_lr0=1.012e-04, train_time=3.613 +[gpub003:0/64] 2024-01-29 22:28:42,481 (trainer:753) INFO: 16epoch:train:9501-9600batch: iter_time=9.472e-05, forward_time=0.205, loss_ctc=84.623, loss_interctc_layer6=96.583, loss_interctc_layer12=81.218, loss_interctc_layer15=74.905, loss_interctc_layer21=87.019, loss=84.870, backward_time=0.540, grad_norm=84.556, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.012e-04, train_time=3.426 +[gpub003:0/64] 2024-01-29 22:35:06,819 (trainer:753) INFO: 16epoch:train:9601-9700batch: iter_time=4.292e-04, forward_time=0.331, loss_ctc=87.568, loss_interctc_layer6=82.764, loss_interctc_layer12=68.940, loss_interctc_layer15=63.146, loss_interctc_layer21=90.257, loss=78.535, backward_time=0.626, grad_norm=120.433, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.233, optim0_lr0=1.011e-04, train_time=3.843 +[gpub003:0/64] 2024-01-29 22:40:40,084 (trainer:753) INFO: 16epoch:train:9701-9800batch: iter_time=9.642e-05, forward_time=0.204, loss_ctc=77.427, loss_interctc_layer6=89.335, loss_interctc_layer12=74.613, loss_interctc_layer15=68.506, loss_interctc_layer21=79.579, loss=77.892, backward_time=0.517, grad_norm=63.804, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.011e-04, train_time=3.333 +[gpub003:0/64] 2024-01-29 22:46:40,493 (trainer:753) INFO: 16epoch:train:9801-9900batch: iter_time=3.629e-04, forward_time=0.326, loss_ctc=74.849, loss_interctc_layer6=89.755, loss_interctc_layer12=75.180, loss_interctc_layer15=69.451, loss_interctc_layer21=76.786, loss=77.204, backward_time=0.557, grad_norm=69.149, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.236, optim0_lr0=1.011e-04, train_time=3.603 +[gpub003:0/64] 2024-01-29 22:52:16,516 (trainer:753) INFO: 16epoch:train:9901-10000batch: iter_time=8.551e-05, forward_time=0.201, loss_ctc=80.906, loss_interctc_layer6=87.914, loss_interctc_layer12=73.694, loss_interctc_layer15=68.064, loss_interctc_layer21=83.060, loss=78.728, backward_time=0.524, grad_norm=67.754, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=1.011e-04, train_time=3.362 +[gpub003:0/64] 2024-01-29 22:52:29,349 (multiple_iter_factory:32) INFO: Building 8th iter-factory... +[gpub003:0/64] 2024-01-29 22:52:47,651 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-29 22:52:51,092 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-29 22:52:51,092 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, +[gpub003:0/64] 2024-01-29 22:52:51,097 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-29 23:08:43,838 (trainer:753) INFO: 16epoch:train:10001-10100batch: iter_time=3.087, forward_time=0.202, loss_ctc=73.694, loss_interctc_layer6=78.811, loss_interctc_layer12=65.151, loss_interctc_layer15=59.696, loss_interctc_layer21=75.869, loss=70.644, backward_time=0.514, grad_norm=113.753, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.011e-04, train_time=9.873 +[gpub003:0/64] 2024-01-29 23:14:01,464 (trainer:753) INFO: 16epoch:train:10101-10200batch: iter_time=8.859e-05, forward_time=0.201, loss_ctc=92.500, loss_interctc_layer6=95.110, loss_interctc_layer12=80.100, loss_interctc_layer15=74.484, loss_interctc_layer21=95.412, loss=87.521, backward_time=0.485, grad_norm=150.404, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.010e-04, train_time=3.176 +[gpub003:0/64] 2024-01-29 23:20:06,812 (trainer:753) INFO: 16epoch:train:10201-10300batch: iter_time=8.426e-05, forward_time=0.212, loss_ctc=97.334, loss_interctc_layer6=98.606, loss_interctc_layer12=83.060, loss_interctc_layer15=76.571, loss_interctc_layer21=100.427, loss=91.199, backward_time=0.600, grad_norm=120.991, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.010e-04, train_time=3.652 +[gpub003:0/64] 2024-01-29 23:26:24,658 (trainer:753) INFO: 16epoch:train:10301-10400batch: iter_time=8.830e-05, forward_time=0.681, loss_ctc=94.646, loss_interctc_layer6=94.680, loss_interctc_layer12=79.385, loss_interctc_layer15=73.266, loss_interctc_layer21=97.267, loss=87.849, backward_time=0.550, grad_norm=75.254, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.233, optim0_lr0=1.010e-04, train_time=3.780 +[gpub003:0/64] 2024-01-29 23:31:46,881 (trainer:753) INFO: 16epoch:train:10401-10500batch: iter_time=9.031e-05, forward_time=0.210, loss_ctc=98.308, loss_interctc_layer6=94.415, loss_interctc_layer12=78.858, loss_interctc_layer15=72.614, loss_interctc_layer21=100.973, loss=89.034, backward_time=0.481, grad_norm=77.076, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.010e-04, train_time=3.220 +[gpub003:0/64] 2024-01-29 23:37:29,793 (trainer:753) INFO: 16epoch:train:10501-10600batch: iter_time=9.127e-05, forward_time=0.202, loss_ctc=92.649, loss_interctc_layer6=93.059, loss_interctc_layer12=77.631, loss_interctc_layer15=71.431, loss_interctc_layer21=95.053, loss=85.965, backward_time=0.521, grad_norm=70.159, clip=100.000, loss_scale=1.988e+31, optim_step_time=0.227, optim0_lr0=1.009e-04, train_time=3.431 +[gpub003:0/64] 2024-01-29 23:43:14,271 (trainer:753) INFO: 16epoch:train:10601-10700batch: iter_time=8.538e-05, forward_time=0.205, loss_ctc=93.644, loss_interctc_layer6=89.883, loss_interctc_layer12=74.570, loss_interctc_layer15=68.583, loss_interctc_layer21=96.630, loss=84.662, backward_time=0.553, grad_norm=78.819, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.009e-04, train_time=3.445 +[gpub003:0/64] 2024-01-29 23:49:00,817 (trainer:753) INFO: 16epoch:train:10701-10800batch: iter_time=8.733e-05, forward_time=0.202, loss_ctc=95.556, loss_interctc_layer6=94.420, loss_interctc_layer12=79.021, loss_interctc_layer15=72.616, loss_interctc_layer21=98.254, loss=87.973, backward_time=0.530, grad_norm=76.231, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.009e-04, train_time=3.465 +[gpub003:0/64] 2024-01-29 23:55:18,085 (trainer:753) INFO: 16epoch:train:10801-10900batch: iter_time=9.306e-05, forward_time=0.203, loss_ctc=96.643, loss_interctc_layer6=93.050, loss_interctc_layer12=77.605, loss_interctc_layer15=71.357, loss_interctc_layer21=99.480, loss=87.627, backward_time=0.612, grad_norm=68.250, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.009e-04, train_time=3.772 +[gpub003:0/64] 2024-01-30 00:01:18,399 (trainer:753) INFO: 16epoch:train:10901-11000batch: iter_time=9.220e-05, forward_time=0.202, loss_ctc=97.468, loss_interctc_layer6=90.368, loss_interctc_layer12=75.141, loss_interctc_layer15=69.070, loss_interctc_layer21=100.561, loss=86.522, backward_time=0.542, grad_norm=73.709, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.009e-04, train_time=3.603 +[gpub003:0/64] 2024-01-30 00:06:57,448 (trainer:753) INFO: 16epoch:train:11001-11100batch: iter_time=8.941e-04, forward_time=0.319, loss_ctc=62.218, loss_interctc_layer6=80.415, loss_interctc_layer12=67.111, loss_interctc_layer15=61.649, loss_interctc_layer21=63.696, loss=67.018, backward_time=0.553, grad_norm=67.514, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.232, optim0_lr0=1.008e-04, train_time=3.389 +[gpub003:0/64] 2024-01-30 00:13:00,033 (trainer:753) INFO: 16epoch:train:11101-11200batch: iter_time=9.245e-05, forward_time=0.214, loss_ctc=87.750, loss_interctc_layer6=91.927, loss_interctc_layer12=76.851, loss_interctc_layer15=70.793, loss_interctc_layer21=90.518, loss=83.568, backward_time=0.593, grad_norm=87.436, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.008e-04, train_time=3.626 +[gpub003:0/64] 2024-01-30 00:16:09,062 (multiple_iter_factory:32) INFO: Building 9th iter-factory... +[gpub003:0/64] 2024-01-30 00:16:27,761 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-30 00:16:31,215 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-30 00:16:31,215 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, +[gpub003:0/64] 2024-01-30 00:16:31,278 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-30 00:28:38,627 (trainer:753) INFO: 16epoch:train:11201-11300batch: iter_time=3.151, forward_time=0.202, loss_ctc=83.297, loss_interctc_layer6=85.910, loss_interctc_layer12=71.713, loss_interctc_layer15=66.121, loss_interctc_layer21=85.604, loss=78.529, backward_time=0.496, grad_norm=80.486, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.008e-04, train_time=9.387 +[gpub003:0/64] 2024-01-30 00:34:06,971 (trainer:753) INFO: 16epoch:train:11301-11400batch: iter_time=8.831e-05, forward_time=0.201, loss_ctc=80.205, loss_interctc_layer6=86.419, loss_interctc_layer12=72.706, loss_interctc_layer15=67.242, loss_interctc_layer21=82.609, loss=77.836, backward_time=0.493, grad_norm=76.220, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.008e-04, train_time=3.283 +[gpub003:0/64] 2024-01-30 00:39:59,036 (trainer:753) INFO: 16epoch:train:11401-11500batch: iter_time=8.775e-05, forward_time=0.202, loss_ctc=82.157, loss_interctc_layer6=83.884, loss_interctc_layer12=70.393, loss_interctc_layer15=64.985, loss_interctc_layer21=84.468, loss=77.178, backward_time=0.577, grad_norm=75.650, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.008e-04, train_time=3.520 +[gpub003:0/64] 2024-01-30 00:45:35,078 (trainer:753) INFO: 16epoch:train:11501-11600batch: iter_time=9.101e-05, forward_time=0.202, loss_ctc=101.971, loss_interctc_layer6=105.833, loss_interctc_layer12=89.208, loss_interctc_layer15=83.354, loss_interctc_layer21=104.794, loss=97.032, backward_time=0.524, grad_norm=103.262, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.007e-04, train_time=3.360 +[gpub003:0/64] 2024-01-30 00:51:15,368 (trainer:753) INFO: 16epoch:train:11601-11700batch: iter_time=9.523e-05, forward_time=0.201, loss_ctc=99.648, loss_interctc_layer6=97.991, loss_interctc_layer12=82.064, loss_interctc_layer15=75.491, loss_interctc_layer21=102.585, loss=91.556, backward_time=0.512, grad_norm=68.782, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.007e-04, train_time=3.403 +[gpub003:0/64] 2024-01-30 00:56:41,446 (trainer:753) INFO: 16epoch:train:11701-11800batch: iter_time=9.270e-05, forward_time=0.203, loss_ctc=87.018, loss_interctc_layer6=88.199, loss_interctc_layer12=73.946, loss_interctc_layer15=67.922, loss_interctc_layer21=89.420, loss=81.301, backward_time=0.515, grad_norm=98.370, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.007e-04, train_time=3.261 +[gpub003:0/64] 2024-01-30 01:02:36,235 (trainer:753) INFO: 16epoch:train:11801-11900batch: iter_time=9.071e-05, forward_time=0.235, loss_ctc=96.414, loss_interctc_layer6=91.009, loss_interctc_layer12=75.738, loss_interctc_layer15=69.265, loss_interctc_layer21=99.377, loss=86.361, backward_time=0.539, grad_norm=82.334, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.232, optim0_lr0=1.007e-04, train_time=3.548 +[gpub003:0/64] 2024-01-30 01:08:25,960 (trainer:753) INFO: 16epoch:train:11901-12000batch: iter_time=9.004e-05, forward_time=0.296, loss_ctc=99.636, loss_interctc_layer6=94.903, loss_interctc_layer12=78.450, loss_interctc_layer15=71.862, loss_interctc_layer21=102.587, loss=89.488, backward_time=0.550, grad_norm=84.241, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.231, optim0_lr0=1.006e-04, train_time=3.495 +[gpub003:0/64] 2024-01-30 01:14:18,639 (trainer:753) INFO: 16epoch:train:12001-12100batch: iter_time=8.951e-05, forward_time=0.203, loss_ctc=94.821, loss_interctc_layer6=96.921, loss_interctc_layer12=81.418, loss_interctc_layer15=75.105, loss_interctc_layer21=97.589, loss=89.171, backward_time=0.547, grad_norm=87.886, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.006e-04, train_time=3.527 +[gpub003:0/64] 2024-01-30 01:20:02,571 (trainer:753) INFO: 16epoch:train:12101-12200batch: iter_time=8.741e-05, forward_time=0.202, loss_ctc=95.036, loss_interctc_layer6=82.840, loss_interctc_layer12=68.833, loss_interctc_layer15=63.175, loss_interctc_layer21=98.201, loss=81.617, backward_time=0.520, grad_norm=92.205, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.006e-04, train_time=3.441 +[gpub003:0/64] 2024-01-30 01:25:26,318 (trainer:753) INFO: 16epoch:train:12201-12300batch: iter_time=9.337e-05, forward_time=0.202, loss_ctc=80.299, loss_interctc_layer6=89.427, loss_interctc_layer12=74.801, loss_interctc_layer15=68.725, loss_interctc_layer21=82.191, loss=79.089, backward_time=0.487, grad_norm=96.200, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.006e-04, train_time=3.237 +[gpub003:0/64] 2024-01-30 01:31:14,939 (trainer:753) INFO: 16epoch:train:12301-12400batch: iter_time=8.839e-05, forward_time=0.201, loss_ctc=77.807, loss_interctc_layer6=89.649, loss_interctc_layer12=75.459, loss_interctc_layer15=69.430, loss_interctc_layer21=79.624, loss=78.394, backward_time=0.533, grad_norm=75.071, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.006e-04, train_time=3.486 +[gpub003:0/64] 2024-01-30 01:36:40,451 (trainer:753) INFO: 16epoch:train:12401-12500batch: iter_time=8.492e-05, forward_time=0.201, loss_ctc=84.988, loss_interctc_layer6=87.975, loss_interctc_layer12=73.789, loss_interctc_layer15=68.186, loss_interctc_layer21=87.239, loss=80.435, backward_time=0.524, grad_norm=105.182, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.228, optim0_lr0=1.005e-04, train_time=3.255 +[gpub003:0/64] 2024-01-30 01:37:00,480 (multiple_iter_factory:32) INFO: Building 10th iter-factory... +[gpub003:0/64] 2024-01-30 01:37:19,522 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-30 01:37:22,915 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-30 01:37:22,915 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, +[gpub003:0/64] 2024-01-30 01:37:22,946 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-30 01:46:51,820 (trainer:753) INFO: 16epoch:train:12501-12600batch: iter_time=2.952, forward_time=0.227, loss_ctc=70.481, loss_interctc_layer6=79.015, loss_interctc_layer12=65.581, loss_interctc_layer15=60.156, loss_interctc_layer21=72.530, loss=69.553, backward_time=0.479, grad_norm=96.360, clip=100.000, loss_scale=3.975e+31, optim_step_time=0.228, optim0_lr0=1.005e-04, train_time=6.113 +[gpub003:0/64] 2024-01-30 01:52:11,516 (trainer:753) INFO: 16epoch:train:12601-12700batch: iter_time=8.345e-05, forward_time=0.202, loss_ctc=90.059, loss_interctc_layer6=94.689, loss_interctc_layer12=80.418, loss_interctc_layer15=74.557, loss_interctc_layer21=91.866, loss=86.318, backward_time=0.485, grad_norm=86.986, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.005e-04, train_time=3.197 +[gpub003:0/64] 2024-01-30 01:57:16,627 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-30 01:57:16,755 (trainer:753) INFO: 16epoch:train:12701-12800batch: iter_time=9.018e-05, forward_time=0.210, loss_ctc=92.780, loss_interctc_layer6=97.905, loss_interctc_layer12=82.388, loss_interctc_layer15=76.198, loss_interctc_layer21=94.950, loss=88.844, backward_time=0.473, grad_norm=83.036, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.227, optim0_lr0=1.005e-04, train_time=3.051 +[gpub003:0/64] 2024-01-30 02:02:49,585 (trainer:753) INFO: 16epoch:train:12801-12900batch: iter_time=8.850e-05, forward_time=0.201, loss_ctc=93.751, loss_interctc_layer6=94.886, loss_interctc_layer12=79.580, loss_interctc_layer15=73.310, loss_interctc_layer21=96.214, loss=87.548, backward_time=0.547, grad_norm=69.259, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.005e-04, train_time=3.328 +[gpub003:0/64] 2024-01-30 02:09:17,454 (trainer:753) INFO: 16epoch:train:12901-13000batch: iter_time=8.774e-05, forward_time=0.203, loss_ctc=95.495, loss_interctc_layer6=94.228, loss_interctc_layer12=78.846, loss_interctc_layer15=72.576, loss_interctc_layer21=98.148, loss=87.859, backward_time=0.596, grad_norm=95.677, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.004e-04, train_time=3.880 +[gpub003:0/64] 2024-01-30 02:15:23,301 (trainer:753) INFO: 16epoch:train:13001-13100batch: iter_time=9.473e-05, forward_time=0.202, loss_ctc=91.106, loss_interctc_layer6=93.098, loss_interctc_layer12=77.565, loss_interctc_layer15=71.406, loss_interctc_layer21=93.193, loss=85.274, backward_time=0.524, grad_norm=95.694, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.004e-04, train_time=3.658 +[gpub003:0/64] 2024-01-30 02:20:54,864 (trainer:753) INFO: 16epoch:train:13101-13200batch: iter_time=9.436e-05, forward_time=0.203, loss_ctc=87.593, loss_interctc_layer6=90.045, loss_interctc_layer12=74.797, loss_interctc_layer15=68.690, loss_interctc_layer21=90.645, loss=82.354, backward_time=0.493, grad_norm=73.294, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.004e-04, train_time=3.315 +[gpub003:0/64] 2024-01-30 02:26:33,820 (trainer:753) INFO: 16epoch:train:13201-13300batch: iter_time=9.424e-05, forward_time=0.220, loss_ctc=88.199, loss_interctc_layer6=94.878, loss_interctc_layer12=79.206, loss_interctc_layer15=72.819, loss_interctc_layer21=90.953, loss=85.211, backward_time=0.523, grad_norm=70.910, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.004e-04, train_time=3.389 +[gpub003:0/64] 2024-01-30 02:32:22,070 (trainer:753) INFO: 16epoch:train:13301-13400batch: iter_time=8.446e-05, forward_time=0.291, loss_ctc=87.413, loss_interctc_layer6=92.045, loss_interctc_layer12=76.781, loss_interctc_layer15=70.456, loss_interctc_layer21=89.846, loss=83.308, backward_time=0.546, grad_norm=86.372, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.243, optim0_lr0=1.004e-04, train_time=3.482 +[gpub003:0/64] 2024-01-30 02:38:00,482 (trainer:753) INFO: 16epoch:train:13401-13500batch: iter_time=8.866e-05, forward_time=0.202, loss_ctc=89.794, loss_interctc_layer6=90.256, loss_interctc_layer12=75.255, loss_interctc_layer15=68.978, loss_interctc_layer21=92.317, loss=83.320, backward_time=0.526, grad_norm=69.222, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=1.003e-04, train_time=3.384 +[gpub003:0/64] 2024-01-30 02:41:17,922 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-30 02:43:49,020 (trainer:753) INFO: 16epoch:train:13501-13600batch: iter_time=8.695e-05, forward_time=0.201, loss_ctc=60.689, loss_interctc_layer6=79.876, loss_interctc_layer12=66.824, loss_interctc_layer15=61.394, loss_interctc_layer21=62.134, loss=66.184, backward_time=0.518, grad_norm=82.243, clip=100.000, loss_scale=1.537e+31, optim_step_time=0.227, optim0_lr0=1.003e-04, train_time=3.485 +[gpub003:0/64] 2024-01-30 02:49:13,366 (trainer:753) INFO: 16epoch:train:13601-13700batch: iter_time=9.210e-05, forward_time=0.212, loss_ctc=82.476, loss_interctc_layer6=90.661, loss_interctc_layer12=75.707, loss_interctc_layer15=69.756, loss_interctc_layer21=84.937, loss=80.707, backward_time=0.474, grad_norm=97.090, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.003e-04, train_time=3.243 +[gpub003:0/64] 2024-01-30 02:52:32,447 (multiple_iter_factory:32) INFO: Building 11th iter-factory... +[gpub003:0/64] 2024-01-30 02:52:51,515 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-30 02:52:55,012 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-30 02:52:55,012 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, +[gpub003:0/64] 2024-01-30 02:52:55,040 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-30 03:04:50,856 (trainer:753) INFO: 16epoch:train:13701-13800batch: iter_time=3.166, forward_time=0.202, loss_ctc=79.420, loss_interctc_layer6=85.755, loss_interctc_layer12=71.757, loss_interctc_layer15=65.975, loss_interctc_layer21=81.670, loss=76.916, backward_time=0.507, grad_norm=81.365, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.003e-04, train_time=9.374 +[gpub003:0/64] 2024-01-30 03:12:25,277 (trainer:753) INFO: 16epoch:train:13801-13900batch: iter_time=8.006e-05, forward_time=0.204, loss_ctc=76.172, loss_interctc_layer6=85.863, loss_interctc_layer12=71.734, loss_interctc_layer15=66.318, loss_interctc_layer21=78.362, loss=75.690, backward_time=0.691, grad_norm=69.044, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.002e-04, train_time=4.544 +[gpub003:0/64] 2024-01-30 03:20:36,289 (trainer:753) INFO: 16epoch:train:13901-14000batch: iter_time=8.179e-05, forward_time=0.201, loss_ctc=78.217, loss_interctc_layer6=84.144, loss_interctc_layer12=70.536, loss_interctc_layer15=65.070, loss_interctc_layer21=80.427, loss=75.679, backward_time=0.789, grad_norm=97.423, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=1.002e-04, train_time=4.910 +[gpub003:0/64] 2024-01-30 03:28:34,065 (trainer:684) WARNING: The grad norm is nan. Skipping updating the model. +[gpub003:0/64] 2024-01-30 03:29:31,152 (trainer:753) INFO: 16epoch:train:14001-14100batch: iter_time=8.672e-05, forward_time=0.234, loss_ctc=99.260, loss_interctc_layer6=105.245, loss_interctc_layer12=88.895, loss_interctc_layer15=82.550, loss_interctc_layer21=101.799, loss=95.550, backward_time=0.996, grad_norm=106.269, clip=100.000, loss_scale=9.731e+30, optim_step_time=0.234, optim0_lr0=1.002e-04, train_time=5.348 +[gpub003:0/64] 2024-01-30 03:38:41,856 (trainer:753) INFO: 16epoch:train:14101-14200batch: iter_time=9.213e-05, forward_time=0.253, loss_ctc=97.858, loss_interctc_layer6=97.837, loss_interctc_layer12=81.682, loss_interctc_layer15=75.055, loss_interctc_layer21=100.717, loss=90.630, backward_time=0.893, grad_norm=82.965, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.228, optim0_lr0=1.002e-04, train_time=5.507 +[gpub003:0/64] 2024-01-30 03:50:37,793 (trainer:753) INFO: 16epoch:train:14201-14300batch: iter_time=9.834e-05, forward_time=0.203, loss_ctc=81.727, loss_interctc_layer6=87.806, loss_interctc_layer12=73.433, loss_interctc_layer15=67.701, loss_interctc_layer21=84.240, loss=78.981, backward_time=1.310, grad_norm=93.424, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.002e-04, train_time=7.159 +[gpub003:0/64] 2024-01-30 04:00:24,622 (trainer:753) INFO: 16epoch:train:14301-14400batch: iter_time=8.990e-05, forward_time=0.202, loss_ctc=94.060, loss_interctc_layer6=90.362, loss_interctc_layer12=75.029, loss_interctc_layer15=68.598, loss_interctc_layer21=96.835, loss=84.977, backward_time=0.988, grad_norm=75.739, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.001e-04, train_time=5.868 +[gpub003:0/64] 2024-01-30 04:10:14,741 (trainer:753) INFO: 16epoch:train:14401-14500batch: iter_time=8.957e-05, forward_time=0.205, loss_ctc=92.242, loss_interctc_layer6=94.673, loss_interctc_layer12=78.303, loss_interctc_layer15=71.713, loss_interctc_layer21=94.750, loss=86.336, backward_time=0.953, grad_norm=78.317, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.001e-04, train_time=5.901 +[gpub003:0/64] 2024-01-30 04:16:21,237 (trainer:753) INFO: 16epoch:train:14501-14600batch: iter_time=8.798e-05, forward_time=0.201, loss_ctc=84.250, loss_interctc_layer6=96.693, loss_interctc_layer12=81.085, loss_interctc_layer15=74.642, loss_interctc_layer21=86.834, loss=84.701, backward_time=0.640, grad_norm=82.227, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.001e-04, train_time=3.665 +[gpub003:0/64] 2024-01-30 04:22:02,790 (trainer:753) INFO: 16epoch:train:14601-14700batch: iter_time=9.007e-05, forward_time=0.204, loss_ctc=88.074, loss_interctc_layer6=82.699, loss_interctc_layer12=68.863, loss_interctc_layer15=63.126, loss_interctc_layer21=91.068, loss=78.766, backward_time=0.499, grad_norm=80.818, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.001e-04, train_time=3.415 +[gpub003:0/64] 2024-01-30 04:27:56,787 (trainer:753) INFO: 16epoch:train:14701-14800batch: iter_time=8.616e-05, forward_time=0.208, loss_ctc=77.085, loss_interctc_layer6=89.193, loss_interctc_layer12=74.454, loss_interctc_layer15=68.273, loss_interctc_layer21=79.095, loss=77.620, backward_time=0.571, grad_norm=73.491, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.001e-04, train_time=3.540 +[gpub003:0/64] 2024-01-30 04:33:10,214 (trainer:753) INFO: 16epoch:train:14801-14900batch: iter_time=8.028e-05, forward_time=0.240, loss_ctc=73.787, loss_interctc_layer6=89.541, loss_interctc_layer12=74.820, loss_interctc_layer15=69.036, loss_interctc_layer21=75.759, loss=76.589, backward_time=0.484, grad_norm=76.586, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=1.000e-04, train_time=3.132 +[gpub003:0/64] 2024-01-30 04:39:15,704 (trainer:753) INFO: 16epoch:train:14901-15000batch: iter_time=8.260e-05, forward_time=0.219, loss_ctc=80.725, loss_interctc_layer6=87.500, loss_interctc_layer12=73.255, loss_interctc_layer15=67.481, loss_interctc_layer21=83.040, loss=78.400, backward_time=0.586, grad_norm=81.108, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.231, optim0_lr0=1.000e-04, train_time=3.656 +[gpub003:0/64] 2024-01-30 05:16:45,197 (trainer:352) INFO: 16epoch results: [train] iter_time=0.280, forward_time=0.228, loss_ctc=88.554, loss_interctc_layer6=91.785, loss_interctc_layer12=76.867, loss_interctc_layer15=70.909, loss_interctc_layer21=91.004, loss=83.824, backward_time=0.562, grad_norm=89.877, clip=100.000, loss_scale=1.308e+31, optim_step_time=0.229, optim0_lr0=1.016e-04, train_time=3.988, time=16 hours, 37 minutes and 26.7 seconds, total_count=240000, gpu_max_cached_mem_GB=34.398, [valid] loss_ctc=51.047, cer_ctc=0.227, loss_interctc_layer6=54.414, cer_interctc_layer6=0.245, loss_interctc_layer12=41.941, cer_interctc_layer12=0.177, loss_interctc_layer15=37.579, cer_interctc_layer15=0.151, loss_interctc_layer21=53.298, cer_interctc_layer21=0.243, loss=47.656, time=37 minutes and 6.35 seconds, total_count=74736, gpu_max_cached_mem_GB=34.398 +[gpub003:0/64] 2024-01-30 05:17:04,934 (trainer:407) INFO: The best model has been updated: valid.cer_ctc, valid.loss_ctc, valid.total_count +[gpub003:0/64] 2024-01-30 05:17:04,993 (trainer:286) INFO: 17/45epoch started. Estimated time to finish: 3 weeks, 1 day and 17 hours +[gpub003:0/64] 2024-01-30 05:17:05,008 (multiple_iter_factory:32) INFO: Building 0th iter-factory... +[gpub003:0/64] 2024-01-30 05:17:23,349 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-30 05:17:26,724 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-30 05:17:26,724 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, +[gpub003:0/64] 2024-01-30 05:17:26,727 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-30 05:27:17,734 (trainer:753) INFO: 17epoch:train:1-100batch: iter_time=2.805, forward_time=0.228, loss_ctc=70.549, loss_interctc_layer6=80.993, loss_interctc_layer12=67.735, loss_interctc_layer15=62.612, loss_interctc_layer21=72.168, loss=70.812, backward_time=0.472, grad_norm=63.006, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.228, optim0_lr0=9.999e-05, train_time=6.127 +[gpub003:0/64] 2024-01-30 05:32:36,120 (trainer:753) INFO: 17epoch:train:101-200batch: iter_time=9.370e-05, forward_time=0.201, loss_ctc=66.669, loss_interctc_layer6=77.627, loss_interctc_layer12=64.783, loss_interctc_layer15=59.558, loss_interctc_layer21=68.528, loss=67.433, backward_time=0.491, grad_norm=65.378, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=9.997e-05, train_time=3.184 +[gpub003:0/64] 2024-01-30 05:38:11,616 (trainer:753) INFO: 17epoch:train:201-300batch: iter_time=9.084e-05, forward_time=0.201, loss_ctc=69.184, loss_interctc_layer6=77.054, loss_interctc_layer12=64.920, loss_interctc_layer15=59.861, loss_interctc_layer21=71.295, loss=68.463, backward_time=0.511, grad_norm=88.537, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=9.995e-05, train_time=3.355 +[gpub003:0/64] 2024-01-30 05:43:45,565 (trainer:753) INFO: 17epoch:train:301-400batch: iter_time=9.679e-05, forward_time=0.202, loss_ctc=96.572, loss_interctc_layer6=101.821, loss_interctc_layer12=85.643, loss_interctc_layer15=78.816, loss_interctc_layer21=99.131, loss=92.397, backward_time=0.492, grad_norm=85.276, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=9.993e-05, train_time=3.339 +[gpub003:0/64] 2024-01-30 05:49:17,238 (trainer:753) INFO: 17epoch:train:401-500batch: iter_time=9.445e-05, forward_time=0.201, loss_ctc=74.316, loss_interctc_layer6=83.871, loss_interctc_layer12=71.029, loss_interctc_layer15=65.722, loss_interctc_layer21=76.591, loss=74.306, backward_time=0.503, grad_norm=121.670, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=9.991e-05, train_time=3.317 +[gpub003:0/64] 2024-01-30 05:55:29,102 (trainer:753) INFO: 17epoch:train:501-600batch: iter_time=1.056e-04, forward_time=0.220, loss_ctc=82.876, loss_interctc_layer6=88.653, loss_interctc_layer12=75.444, loss_interctc_layer15=69.235, loss_interctc_layer21=85.688, loss=80.379, backward_time=0.595, grad_norm=77.425, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=9.989e-05, train_time=3.718 +[gpub003:0/64] 2024-01-30 06:01:18,011 (trainer:753) INFO: 17epoch:train:601-700batch: iter_time=1.106e-04, forward_time=0.202, loss_ctc=104.270, loss_interctc_layer6=105.890, loss_interctc_layer12=89.374, loss_interctc_layer15=82.928, loss_interctc_layer21=107.233, loss=97.939, backward_time=0.531, grad_norm=99.290, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.226, optim0_lr0=9.987e-05, train_time=3.489 +[gpub003:0/64] 2024-01-30 06:06:51,287 (trainer:753) INFO: 17epoch:train:701-800batch: iter_time=9.454e-05, forward_time=0.259, loss_ctc=75.206, loss_interctc_layer6=81.913, loss_interctc_layer12=68.836, loss_interctc_layer15=63.624, loss_interctc_layer21=77.129, loss=73.342, backward_time=0.507, grad_norm=65.861, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.235, optim0_lr0=9.985e-05, train_time=3.332 +[gpub003:0/64] 2024-01-30 06:12:10,540 (trainer:753) INFO: 17epoch:train:801-900batch: iter_time=9.511e-05, forward_time=0.236, loss_ctc=96.175, loss_interctc_layer6=104.171, loss_interctc_layer12=87.082, loss_interctc_layer15=80.465, loss_interctc_layer21=98.802, loss=93.339, backward_time=0.476, grad_norm=73.940, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.228, optim0_lr0=9.983e-05, train_time=3.193 +[gpub003:0/64] 2024-01-30 06:18:08,503 (trainer:753) INFO: 17epoch:train:901-1000batch: iter_time=9.703e-05, forward_time=0.202, loss_ctc=86.311, loss_interctc_layer6=97.704, loss_interctc_layer12=82.571, loss_interctc_layer15=76.743, loss_interctc_layer21=88.500, loss=86.366, backward_time=0.576, grad_norm=95.368, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.227, optim0_lr0=9.981e-05, train_time=3.580 +[gpub003:0/64] 2024-01-30 06:24:01,222 (trainer:753) INFO: 17epoch:train:1001-1100batch: iter_time=9.254e-05, forward_time=0.202, loss_ctc=97.971, loss_interctc_layer6=103.031, loss_interctc_layer12=87.789, loss_interctc_layer15=81.563, loss_interctc_layer21=100.349, loss=94.141, backward_time=0.528, grad_norm=80.700, clip=100.000, loss_scale=5.476e+30, optim_step_time=0.227, optim0_lr0=9.979e-05, train_time=3.527 +[gpub003:0/64] 2024-01-30 06:29:53,789 (trainer:753) INFO: 17epoch:train:1101-1200batch: iter_time=8.959e-05, forward_time=0.201, loss_ctc=72.744, loss_interctc_layer6=83.166, loss_interctc_layer12=69.755, loss_interctc_layer15=64.375, loss_interctc_layer21=74.666, loss=72.941, backward_time=0.545, grad_norm=88.133, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=9.977e-05, train_time=3.525 +[gpub003:0/64] 2024-01-30 06:33:10,650 (multiple_iter_factory:32) INFO: Building 1th iter-factory... +[gpub003:0/64] 2024-01-30 06:33:29,774 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-30 06:33:33,460 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-30 06:33:33,460 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, +[gpub003:0/64] 2024-01-30 06:33:33,464 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-30 06:43:27,852 (trainer:753) INFO: 17epoch:train:1201-1300batch: iter_time=2.712, forward_time=0.231, loss_ctc=80.817, loss_interctc_layer6=94.351, loss_interctc_layer12=78.747, loss_interctc_layer15=72.634, loss_interctc_layer21=82.914, loss=81.893, backward_time=0.528, grad_norm=76.088, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.228, optim0_lr0=9.975e-05, train_time=8.140 +[gpub003:0/64] 2024-01-30 06:48:27,192 (trainer:753) INFO: 17epoch:train:1301-1400batch: iter_time=8.475e-05, forward_time=0.201, loss_ctc=66.162, loss_interctc_layer6=76.261, loss_interctc_layer12=63.627, loss_interctc_layer15=58.615, loss_interctc_layer21=68.135, loss=66.560, backward_time=0.468, grad_norm=65.486, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=9.972e-05, train_time=2.994 +[gpub003:0/64] 2024-01-30 06:53:36,419 (trainer:753) INFO: 17epoch:train:1401-1500batch: iter_time=8.098e-05, forward_time=0.202, loss_ctc=63.208, loss_interctc_layer6=75.920, loss_interctc_layer12=63.301, loss_interctc_layer15=58.343, loss_interctc_layer21=64.917, loss=65.138, backward_time=0.477, grad_norm=60.550, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=9.970e-05, train_time=3.092 +[gpub003:0/64] 2024-01-30 06:58:44,109 (trainer:753) INFO: 17epoch:train:1501-1600batch: iter_time=9.128e-05, forward_time=0.201, loss_ctc=83.778, loss_interctc_layer6=87.185, loss_interctc_layer12=72.877, loss_interctc_layer15=67.035, loss_interctc_layer21=86.353, loss=79.446, backward_time=0.477, grad_norm=67.970, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=9.968e-05, train_time=3.077 +[gpub003:0/64] 2024-01-30 07:03:59,373 (trainer:753) INFO: 17epoch:train:1601-1700batch: iter_time=8.257e-05, forward_time=0.204, loss_ctc=84.347, loss_interctc_layer6=92.174, loss_interctc_layer12=76.777, loss_interctc_layer15=70.814, loss_interctc_layer21=86.556, loss=82.134, backward_time=0.527, grad_norm=66.142, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=9.966e-05, train_time=3.152 +[gpub003:0/64] 2024-01-30 07:09:24,507 (trainer:753) INFO: 17epoch:train:1701-1800batch: iter_time=8.412e-05, forward_time=0.202, loss_ctc=85.564, loss_interctc_layer6=90.363, loss_interctc_layer12=76.549, loss_interctc_layer15=71.097, loss_interctc_layer21=88.077, loss=82.330, backward_time=0.505, grad_norm=107.371, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=9.964e-05, train_time=3.251 +[gpub003:0/64] 2024-01-30 07:15:08,975 (trainer:753) INFO: 17epoch:train:1801-1900batch: iter_time=8.619e-05, forward_time=0.201, loss_ctc=89.660, loss_interctc_layer6=97.365, loss_interctc_layer12=81.030, loss_interctc_layer15=74.810, loss_interctc_layer21=91.646, loss=86.902, backward_time=0.530, grad_norm=93.752, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=9.962e-05, train_time=3.444 +[gpub003:0/64] 2024-01-30 07:20:54,736 (trainer:753) INFO: 17epoch:train:1901-2000batch: iter_time=8.692e-05, forward_time=0.225, loss_ctc=81.436, loss_interctc_layer6=89.260, loss_interctc_layer12=74.864, loss_interctc_layer15=69.102, loss_interctc_layer21=83.818, loss=79.696, backward_time=0.531, grad_norm=168.591, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.227, optim0_lr0=9.960e-05, train_time=3.457 +[gpub003:0/64] 2024-01-30 07:26:50,105 (trainer:753) INFO: 17epoch:train:2001-2100batch: iter_time=8.110e-05, forward_time=0.272, loss_ctc=78.298, loss_interctc_layer6=86.228, loss_interctc_layer12=71.886, loss_interctc_layer15=66.283, loss_interctc_layer21=80.524, loss=76.644, backward_time=0.573, grad_norm=69.005, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.230, optim0_lr0=9.958e-05, train_time=3.552 +[gpub003:0/64] 2024-01-30 07:31:59,991 (trainer:753) INFO: 17epoch:train:2101-2200batch: iter_time=9.134e-05, forward_time=0.202, loss_ctc=102.086, loss_interctc_layer6=108.087, loss_interctc_layer12=91.061, loss_interctc_layer15=84.186, loss_interctc_layer21=104.583, loss=98.001, backward_time=0.467, grad_norm=82.483, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=9.956e-05, train_time=3.100 +[gpub003:0/64] 2024-01-30 07:37:14,576 (trainer:753) INFO: 17epoch:train:2201-2300batch: iter_time=9.397e-05, forward_time=0.202, loss_ctc=84.766, loss_interctc_layer6=93.466, loss_interctc_layer12=78.653, loss_interctc_layer15=72.633, loss_interctc_layer21=86.734, loss=83.251, backward_time=0.470, grad_norm=72.744, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=9.954e-05, train_time=3.146 +[gpub003:0/64] 2024-01-30 07:42:36,506 (trainer:753) INFO: 17epoch:train:2301-2400batch: iter_time=9.371e-05, forward_time=0.202, loss_ctc=90.210, loss_interctc_layer6=97.315, loss_interctc_layer12=82.671, loss_interctc_layer15=76.668, loss_interctc_layer21=92.313, loss=87.836, backward_time=0.483, grad_norm=82.755, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=9.952e-05, train_time=3.219 +[gpub003:0/64] 2024-01-30 07:47:51,238 (trainer:753) INFO: 17epoch:train:2401-2500batch: iter_time=8.613e-05, forward_time=0.201, loss_ctc=76.016, loss_interctc_layer6=91.221, loss_interctc_layer12=76.160, loss_interctc_layer15=70.035, loss_interctc_layer21=78.064, loss=78.299, backward_time=0.503, grad_norm=72.747, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=9.950e-05, train_time=3.147 +[gpub003:0/64] 2024-01-30 07:48:05,590 (multiple_iter_factory:32) INFO: Building 2th iter-factory... +[gpub003:0/64] 2024-01-30 07:48:24,682 (s2t:401) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4') +[gpub003:0/64] 2024-01-30 07:48:28,119 (abs_task:1660) INFO: [train] dataset: +ESPnetDataset( + speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"} + text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"} + text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"} + text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"} + preprocess: ) +[gpub003:0/64] 2024-01-30 07:48:28,120 (abs_task:1661) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=19027, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, +[gpub003:0/64] 2024-01-30 07:48:28,123 (abs_task:1662) INFO: [train] mini-batch sizes summary: N-batch=19027, mean=256.0, min=256, max=257 +[gpub003:0/64] 2024-01-30 07:57:19,939 (trainer:753) INFO: 17epoch:train:2501-2600batch: iter_time=2.472, forward_time=0.229, loss_ctc=69.091, loss_interctc_layer6=79.823, loss_interctc_layer12=66.770, loss_interctc_layer15=61.671, loss_interctc_layer21=70.920, loss=69.655, backward_time=0.477, grad_norm=63.297, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.229, optim0_lr0=9.948e-05, train_time=5.686 +[gpub003:0/64] 2024-01-30 08:02:20,858 (trainer:753) INFO: 17epoch:train:2601-2700batch: iter_time=8.640e-05, forward_time=0.202, loss_ctc=66.463, loss_interctc_layer6=76.079, loss_interctc_layer12=63.177, loss_interctc_layer15=58.060, loss_interctc_layer21=68.370, loss=66.430, backward_time=0.473, grad_norm=57.968, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=9.946e-05, train_time=3.009 +[gpub003:0/64] 2024-01-30 08:08:15,951 (trainer:753) INFO: 17epoch:train:2701-2800batch: iter_time=8.835e-05, forward_time=0.201, loss_ctc=67.992, loss_interctc_layer6=75.873, loss_interctc_layer12=63.625, loss_interctc_layer15=58.527, loss_interctc_layer21=69.868, loss=67.177, backward_time=0.543, grad_norm=75.343, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=9.944e-05, train_time=3.551 +[gpub003:0/64] 2024-01-30 08:13:29,316 (trainer:753) INFO: 17epoch:train:2801-2900batch: iter_time=9.256e-05, forward_time=0.202, loss_ctc=95.160, loss_interctc_layer6=99.913, loss_interctc_layer12=83.630, loss_interctc_layer15=77.157, loss_interctc_layer21=98.065, loss=90.785, backward_time=0.491, grad_norm=105.167, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=9.942e-05, train_time=3.133 +[gpub003:0/64] 2024-01-30 08:18:38,240 (trainer:753) INFO: 17epoch:train:2901-3000batch: iter_time=8.849e-05, forward_time=0.201, loss_ctc=72.883, loss_interctc_layer6=82.026, loss_interctc_layer12=69.026, loss_interctc_layer15=63.739, loss_interctc_layer21=74.993, loss=72.534, backward_time=0.480, grad_norm=58.269, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.226, optim0_lr0=9.940e-05, train_time=3.089 +[gpub003:0/64] 2024-01-30 08:24:19,077 (trainer:753) INFO: 17epoch:train:3001-3100batch: iter_time=9.537e-05, forward_time=0.312, loss_ctc=78.245, loss_interctc_layer6=86.849, loss_interctc_layer12=72.337, loss_interctc_layer15=66.290, loss_interctc_layer21=80.280, loss=76.800, backward_time=0.539, grad_norm=83.364, clip=100.000, loss_scale=1.095e+31, optim_step_time=0.240, optim0_lr0=9.938e-05, train_time=3.408 +[gpub003:0/64] 2024-01-30 08:30:17,430 (trainer:753) INFO: 17epoch:train:3101-3200batch: iter_time=9.050e-05, forward_time=0.216, loss_ctc=99.882, loss_interctc_layer6=102.627, loss_interctc_layer12=85.847, loss_interctc_layer15=79.027, loss_interctc_layer21=103.679, loss=94.213, backward_time=0.602, grad_norm=94.769, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=9.935e-05, train_time=3.583 +[gpub003:0/64] 2024-01-30 08:35:28,599 (trainer:753) INFO: 17epoch:train:3201-3300batch: iter_time=9.542e-05, forward_time=0.201, loss_ctc=73.550, loss_interctc_layer6=81.205, loss_interctc_layer12=68.005, loss_interctc_layer15=62.925, loss_interctc_layer21=75.514, loss=72.240, backward_time=0.491, grad_norm=83.003, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=9.933e-05, train_time=3.112 +[gpub003:0/64] 2024-01-30 08:41:24,772 (trainer:753) INFO: 17epoch:train:3301-3400batch: iter_time=8.918e-05, forward_time=0.202, loss_ctc=95.746, loss_interctc_layer6=102.732, loss_interctc_layer12=85.525, loss_interctc_layer15=78.688, loss_interctc_layer21=98.668, loss=92.272, backward_time=0.578, grad_norm=102.881, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=9.931e-05, train_time=3.562 +[gpub003:0/64] 2024-01-30 08:46:41,670 (trainer:753) INFO: 17epoch:train:3401-3500batch: iter_time=9.194e-05, forward_time=0.202, loss_ctc=84.507, loss_interctc_layer6=96.443, loss_interctc_layer12=81.381, loss_interctc_layer15=75.400, loss_interctc_layer21=86.486, loss=84.843, backward_time=0.494, grad_norm=92.656, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=9.929e-05, train_time=3.169 +[gpub003:0/64] 2024-01-30 08:52:23,380 (trainer:753) INFO: 17epoch:train:3501-3600batch: iter_time=9.239e-05, forward_time=0.203, loss_ctc=96.572, loss_interctc_layer6=102.047, loss_interctc_layer12=85.616, loss_interctc_layer15=79.327, loss_interctc_layer21=98.520, loss=92.416, backward_time=0.516, grad_norm=80.236, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.227, optim0_lr0=9.927e-05, train_time=3.417 +[gpub003:0/64] 2024-01-30 08:58:00,847 (trainer:753) INFO: 17epoch:train:3601-3700batch: iter_time=9.015e-05, forward_time=0.315, loss_ctc=71.232, loss_interctc_layer6=82.341, loss_interctc_layer12=68.777, loss_interctc_layer15=63.537, loss_interctc_layer21=73.127, loss=71.803, backward_time=0.533, grad_norm=68.333, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.233, optim0_lr0=9.925e-05, train_time=3.374 +srun: Job step aborted: Waiting up to 32 seconds for job step to finish. +slurmstepd: error: *** STEP 2903446.0 ON gpub003 CANCELLED AT 2024-01-30T08:58:27 ***